00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027 #include <sstream>
00028 #include <algorithm>
00029
00030 #include <suplibxx/str.h>
00031
00032 #include "RdbStatsPercentiles.h"
00033
00034
00035 RdbStatsPercentiles::RdbStatsPercentiles( RDB& irdbtable, const string& name,
00036 const string& percent_list,
00037 bool implicitquartile )
00038 throw( Exception ) try : RdbStats( irdbtable, name ), the_median( 0.0 ),
00039 the_percentile_result( NULL ) {
00040
00041 try {
00042
00043 #ifdef TRACEFCT
00044 TraceFct tf( "RdbStatsPercentiles::RdbStatsPercentiles( RDB&, "
00045 "const string&, const string& ) throw( Exception )" );
00046 #endif
00047
00048 vector< string > tokens;
00049
00050 suplib::tok( tokens, percent_list, "," );
00051
00052 const int mysize( tokens.size( ) );
00053 for ( int ii = 0; ii < mysize; ii++ ) {
00054 string sval = string( "_p" ) + tokens[ ii ];
00055 double dval = suplib::str2d( tokens[ ii ].c_str( ) );
00056 the_percentile.push_back( pair< string, double >( sval, dval ) );
00057 }
00058
00059 for ( int ii = 0; ii < mysize; ii++ ) {
00060 if ( the_percentile[ ii ].second <= 0.0 ||
00061 the_percentile[ ii ].second > 100.0 ) {
00062 ostringstream ost;
00063 ost << "The percentiles option (" << percent_list << ") must be "
00064 "within [ 0.0, 100.0 ]\n";
00065 throw Exception( ost.str( ) );
00066 }
00067 }
00068
00069 if ( implicitquartile ) {
00070 the_percentile.push_back( pair< string, double >( "_fq", 25.0 ) );
00071 the_percentile.push_back( pair< string, double >( "_lq", 75.0 ) );
00072 }
00073
00074 } catch ( Exception& e ) {
00075
00076 throw;
00077
00078 } catch ( exception& e ) {
00079
00080 throw Exception( e.what( ) );
00081
00082 }
00083
00084 } catch ( Exception& E ) {
00085
00086
00087 throw;
00088
00089 } catch ( exception& e ) {
00090
00091
00092 throw Exception( e.what( ) );
00093
00094 }
00095
00096
00097
00098 RdbStatsPercentiles::RdbStatsPercentiles( RDB& irdbtable, const string& name )
00099 throw( Exception ) try : RdbStats( irdbtable, name ), the_median( 0.0 ),
00100 the_percentile_result( NULL ) {
00101
00102 try {
00103
00104 #ifdef TRACEFCT
00105 TraceFct tf( "RdbStatsPercentiles::RdbStatsPercentiles( RDB&, "
00106 "const string&, const string& ) throw( Exception )" );
00107 #endif
00108
00109 the_percentile.push_back( pair< string, double >( "_fq", 25.0 ) );
00110 the_percentile.push_back( pair< string, double >( "_lq", 75.0 ) );
00111
00112 } catch ( Exception& e ) {
00113
00114 throw;
00115
00116 } catch ( exception& e ) {
00117
00118 throw Exception( e.what( ) );
00119
00120 }
00121
00122 } catch ( Exception& E ) {
00123
00124
00125 throw;
00126
00127 } catch ( exception& e ) {
00128
00129
00130 throw Exception( e.what( ) );
00131
00132 }
00133
00134
00135 double RdbStatsPercentiles::calculate_median( ) throw( ) {
00136
00137 #ifdef TRACEFCT
00138 TraceFct tf( "double RdbStatsPercentiles::calculate_mdeian( ) throw( )" );
00139 #endif
00140
00141 vector< double >::iterator begin_ptr = data.begin( );
00142
00143 size_t num_size = data.size( );
00144 size_t num_size_div = num_size / 2;
00145
00146 nth_element( begin_ptr, begin_ptr + num_size_div, data.end( ) );
00147
00148 if ( num_size & 0001 ) {
00149
00150
00151
00152 return data[ num_size_div ];
00153
00154 } else {
00155
00156
00157
00158 double tmp = data[ num_size_div ];
00159
00160
00161
00162
00163
00164 nth_element( begin_ptr, begin_ptr + num_size_div - 1,
00165 begin_ptr + num_size_div );
00166 tmp += data[ num_size_div - 1 ];
00167
00168 return 0.5 * tmp;
00169
00170 }
00171
00172 }
00173
00174 double RdbStatsPercentiles::calculate_percentile( const double percentile )
00175 throw( ) {
00176
00177 #ifdef TRACEFCT
00178 TraceFct tf( "double RdbStatsPercentiles::calculate_percentile( int ) "
00179 "throw( )" );
00180 #endif
00181
00182 vector< double >::iterator begin_ptr = data.begin( );
00183
00184 size_t n = data.size( );
00185 double f = percentile / 100.0;
00186 int i = ( ( n - 1.0 ) * f );
00187 double delta = ( n - 1.0 ) * f - i;
00188
00189 nth_element( begin_ptr, begin_ptr + i + 1, data.end( ) );
00190 double data_i_1 = data[ i + 1];
00191
00192 nth_element( begin_ptr, begin_ptr + i, data.end( ) );
00193 double data_i = data[ i ];
00194
00195 double quantile = ( 1.0 - delta ) * data_i + delta * data_i_1;
00196
00197
00198
00199
00200
00201
00202
00203
00204
00205
00206 return quantile;
00207
00208 }
00209
00210 int RdbStatsPercentiles::calculate_statistics( ) throw( ) {
00211
00212 #ifdef TRACEFCT
00213 TraceFct tf( "double RdbStatsPercentiles::calculate_statistics( ) "
00214 "throw( )" );
00215 #endif
00216
00217 if ( 0 == this->RdbStats::calculate_statistics( ) )
00218 return 0;
00219
00220 the_median = calculate_median( );
00221
00222 for ( size_t ii = 0; ii < the_percentile.size( ); ii++ )
00223 the_percentile_result[ ii ] =
00224 calculate_percentile( the_percentile[ ii ].second );
00225
00226 return num_n;
00227
00228 }
00229
00230 void RdbStatsPercentiles::init( ) throw( ) {
00231
00232 #ifdef TRACEFCT
00233 TraceFct tf( "void RdbStatsPercentiles:init( ) throw( )" );
00234 #endif
00235
00236 this->RdbStats::init( );
00237
00238
00239 data.clear( );
00240
00241 }
00242
00243 void RdbStatsPercentiles::normalize_results( const double norm ) throw( ) {
00244
00245 this->RdbStats::normalize_results( norm );
00246 for ( size_t ii = 0; ii < the_percentile.size( ); ii++ )
00247 the_percentile_result[ ii ] /= norm;
00248
00249 }
00250
00251 void RdbStatsPercentiles::set_output_columns( RDB& ordbtable )
00252 throw( Exception ) {
00253
00254 try {
00255
00256 #ifdef TRACEFCT
00257 TraceFct tf( "double RdbStatsPercentiles::set_output_columns( RDB& ) "
00258 "throw( Exception )" );
00259 #endif
00260
00261 this->RdbStats::set_output_columns( ordbtable );
00262
00263 const int mysize( the_percentile.size( ) );
00264 the_percentile_result = new double[ mysize ];
00265
00266 const char* column_name_ptr = column_name.c_str( );
00267
00268 char str[ 256 ];
00269 sprintf( str, "%s_median", column_name_ptr );
00270 ordbtable.setColumn( str, "N" );
00271 RDBColumn* tmp = ordbtable.getColumn( str );
00272 tmp->mapData( &the_median, 1 );
00273
00274 for ( int ii = 0; ii < mysize; ii++ ) {
00275
00276 ostringstream ost;
00277 ost << column_name_ptr << the_percentile[ ii ].first;
00278 ordbtable.setColumn( ost.str( ), "N" );
00279 RDBColumn* ptr = ordbtable.getColumn( ost.str( ) );
00280 ptr->mapData( the_percentile_result + ii , 1 );
00281
00282 }
00283
00284 } catch ( RDBErr& rdbe ) {
00285
00286 throw Exception( rdbe );
00287
00288 } catch( Exception& E ) {
00289
00290 throw;
00291
00292 } catch ( exception& e ) {
00293
00294 throw Exception( e.what( ) );
00295
00296 }
00297
00298 }
00299
00300 void RdbStatsPercentiles::update_statistics( ) throw( Exception ) {
00301
00302 try {
00303
00304 #ifdef TRACEFCT
00305 TraceFct tf( "double RdbStatsPercentiles::update_statistics( ) "
00306 "throw( Exception )" );
00307 #endif
00308
00309 this->RdbStats::update_statistics( );
00310
00311 data.push_back( get_value( ) );
00312
00313 } catch( Exception& e ) {
00314
00315 throw;
00316
00317 }
00318
00319 }