30 #include <suplibxx/str.h> 32 #include "RdbStatsPercentiles.h" 36 const std::string& percent_list,
37 bool implicitquartile )
38 :
RdbStats( irdbtable, name ), the_median( 0.0 ) {
40 std::vector< std::string > tokens;
42 suplib::tok( tokens, percent_list,
"," );
44 const int mysize( tokens.size( ) );
45 for (
int ii = 0; ii < mysize; ii++ ) {
46 std::string sval = std::string(
"_p" ) + tokens[ ii ];
47 double dval = suplib::str2d( tokens[ ii ].c_str( ) );
48 the_percentile.push_back( std::pair< std::string, double >( sval, dval ) );
51 for (
int ii = 0; ii < mysize; ii++ ) {
52 if ( the_percentile[ ii ].second <= 0.0 ||
53 the_percentile[ ii ].second > 100.0 ) {
54 std::ostringstream ost;
55 ost <<
"The percentiles option (" << percent_list <<
") must be " 56 "within [ 0.0, 100.0 ]\n";
57 throw Exception( ost.str( ) );
61 if ( implicitquartile ) {
62 the_percentile.push_back( std::pair< std::string, double >(
"_fq", 25.0 ) );
63 the_percentile.push_back( std::pair< std::string, double >(
"_lq", 75.0 ) );
69 :
RdbStats( irdbtable, name ), the_median( 0.0 ) {
71 the_percentile.push_back( std::pair< std::string, double >(
"_fq", 25.0 ) );
72 the_percentile.push_back( std::pair< std::string, double >(
"_lq", 75.0 ) );
75 double RdbStatsPercentiles::calculate_median( ) {
77 std::vector< double >::iterator begin_ptr = data.begin( );
79 size_t num_size = data.size( );
80 size_t num_size_div = num_size / 2;
82 nth_element( begin_ptr, begin_ptr + num_size_div, data.end( ) );
84 if ( num_size & 0001 ) {
88 return data[ num_size_div ];
94 double tmp = data[ num_size_div ];
100 nth_element( begin_ptr, begin_ptr + num_size_div - 1,
101 begin_ptr + num_size_div );
102 tmp += data[ num_size_div - 1 ];
109 double RdbStatsPercentiles::calculate_percentile(
const double percentile ) {
111 std::vector< double >::iterator begin_ptr = data.begin( );
113 size_t n = data.size( );
114 double f = percentile / 100.0;
115 int i = ( ( n - 1.0 ) * f );
116 double delta = ( n - 1.0 ) * f - i;
118 nth_element( begin_ptr, begin_ptr + i + 1, data.end( ) );
119 double data_i_1 = data[ i + 1];
121 nth_element( begin_ptr, begin_ptr + i, data.end( ) );
122 double data_i = data[ i ];
124 double quantile = ( 1.0 - delta ) * data_i + delta * data_i_1;
143 the_median = calculate_median( );
145 for (
size_t ii = 0; ii < the_percentile.size( ); ii++ )
146 the_percentile_result[ ii ] =
147 calculate_percentile( the_percentile[ ii ].second );
152 void RdbStatsPercentiles::init( ) {
153 this->RdbStats::init( );
159 void RdbStatsPercentiles::normalize_results(
const double norm ) {
161 this->RdbStats::normalize_results( norm );
162 for (
size_t ii = 0; ii < the_percentile.size( ); ii++ )
163 the_percentile_result[ ii ] /= norm;
167 void RdbStatsPercentiles::set_output_columns( RDB& ordbtable ) {
169 this->RdbStats::set_output_columns( ordbtable );
171 const int mysize( the_percentile.size( ) );
172 the_percentile_result.reserve( mysize );
174 const char* column_name_ptr = column_name.c_str( );
177 sprintf( str,
"%s_median", column_name_ptr );
178 ordbtable.setColumn( str,
"N" );
179 RDBColumn* tmp = ordbtable.getColumn( str );
180 tmp->mapData( &the_median, 1 );
182 for (
int ii = 0; ii < mysize; ii++ ) {
184 std::ostringstream ost;
185 ost << column_name_ptr << the_percentile[ ii ].first;
186 ordbtable.setColumn( ost.str( ),
"N" );
187 RDBColumn* ptr = ordbtable.getColumn( ost.str( ) );
188 ptr->mapData( &the_percentile_result[ii], 1 );
196 data.push_back( get_value( ) );
The base class to calculate : average, maximum, minimum, num, stddev and sum.
virtual void update_statistics()
Read the column from RDB++, update the statistics for the column.
virtual void update_statistics()
Read the column from RDB++, update the statistics for the column.
RdbStatsPercentiles(RDB &irdbtable, const std::string &name, const std::string &percent_list, bool implicitquartile)
–percentiles 12,34.. and –quartile
virtual int calculate_statistics()
Perform the final statistic for the set.
virtual int calculate_statistics()
Perform the final statistic for the set.