RdbStatsPercentiles.cc

00001 // File:  RdbStatsPercentiles.cc
00002 
00003 // --8<--8<--8<--8<--
00004 //
00005 // Copyright (C) 2006 Smithsonian Astrophysical Observatory
00006 //
00007 // This file is part of rdbstats
00008 //
00009 // rdbstats is free software; you can redistribute it and/or
00010 // modify it under the terms of the GNU General Public License
00011 // as published by the Free Software Foundation; either version 2
00012 // of the License, or (at your option) any later version.
00013 //
00014 // rdbstats is distributed in the hope that it will be useful,
00015 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00016 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00017 // GNU General Public License for more details.
00018 //
00019 // You should have received a copy of the GNU General Public License
00020 // along with this program; if not, write to the 
00021 //       Free Software Foundation, Inc. 
00022 //       51 Franklin Street, Fifth Floor
00023 //       Boston, MA  02110-1301, USA
00024 //
00025 // -->8-->8-->8-->8--
00026 
00027 #include <sstream>
00028 #include <algorithm>
00029 
00030 #include <suplibxx/str.h>
00031 
00032 #include "RdbStatsPercentiles.h"
00033 
00034 // The user specified percentiles.  For example: --percentiles 12,34,45
00035 RdbStatsPercentiles::RdbStatsPercentiles( RDB& irdbtable, const string& name,
00036                                           const string& percent_list,
00037                                           bool implicitquartile )
00038   throw( Exception ) try : RdbStats( irdbtable, name ), the_median( 0.0 ),
00039                            the_percentile_result( NULL ) {
00040 
00041     try {
00042 
00043 #ifdef TRACEFCT
00044       TraceFct tf( "RdbStatsPercentiles::RdbStatsPercentiles( RDB&, "
00045                    "const string&, const string& ) throw( Exception )" );
00046 #endif
00047 
00048       vector< string > tokens;
00049   
00050       suplib::tok( tokens, percent_list, "," );
00051 
00052       const int mysize( tokens.size( ) );
00053       for ( int ii = 0; ii < mysize; ii++ ) {
00054         string sval = string( "_p" ) + tokens[ ii ];
00055         double dval = suplib::str2d( tokens[ ii ].c_str( ) );
00056         the_percentile.push_back( pair< string, double >( sval, dval ) );
00057       }
00058 
00059       for ( int ii = 0; ii < mysize; ii++ ) {
00060         if ( the_percentile[ ii ].second <= 0.0 ||
00061              the_percentile[ ii ].second > 100.0 ) {
00062           ostringstream ost;
00063           ost << "The percentiles option (" << percent_list << ") must be "
00064             "within [ 0.0, 100.0 ]\n";
00065           throw Exception( ost.str( ) );
00066         }
00067       }
00068 
00069       if ( implicitquartile ) {
00070         the_percentile.push_back( pair< string, double >( "_fq", 25.0 ) );
00071         the_percentile.push_back( pair< string, double >( "_lq", 75.0 ) );
00072       }
00073 
00074     } catch ( Exception& e ) {
00075 
00076       throw;
00077 
00078     } catch ( exception& e ) {
00079 
00080       throw Exception( e.what( ) );
00081 
00082     }
00083 
00084   } catch ( Exception& E ) {
00085 
00086     // exception thrown by RdbStats class is caught here
00087     throw;
00088     
00089   } catch ( exception& e ) {
00090     
00091     // exception thrown by RdbStats class is caught here
00092     throw Exception( e.what( ) );
00093 
00094   }
00095 
00096 
00097 // --quartiles was entered at the command line.
00098 RdbStatsPercentiles::RdbStatsPercentiles( RDB& irdbtable, const string& name )
00099   throw( Exception ) try : RdbStats( irdbtable, name ), the_median( 0.0 ),
00100                            the_percentile_result( NULL ) {
00101 
00102     try {
00103 
00104 #ifdef TRACEFCT
00105       TraceFct tf( "RdbStatsPercentiles::RdbStatsPercentiles( RDB&, "
00106                    "const string&, const string& ) throw( Exception )" );
00107 #endif
00108 
00109       the_percentile.push_back( pair< string, double >( "_fq", 25.0 ) );
00110       the_percentile.push_back( pair< string, double >( "_lq", 75.0 ) );
00111 
00112     } catch ( Exception& e ) {
00113 
00114       throw;
00115 
00116     } catch ( exception& e ) {
00117 
00118       throw Exception( e.what( ) );
00119 
00120     }
00121 
00122   } catch ( Exception& E ) {
00123     
00124     // exception thrown by RdbStats class is caught here
00125     throw;
00126     
00127   } catch ( exception& e ) {
00128     
00129     // exception thrown by RdbStats class is caught here
00130     throw Exception( e.what( ) );
00131 
00132   }
00133 
00134 
00135 double RdbStatsPercentiles::calculate_median(  ) throw( ) {
00136 
00137 #ifdef TRACEFCT
00138     TraceFct tf( "double RdbStatsPercentiles::calculate_mdeian( ) throw( )" );
00139 #endif
00140 
00141   vector< double >::iterator begin_ptr = data.begin( );
00142 
00143   size_t num_size = data.size( );
00144   size_t num_size_div = num_size / 2;
00145 
00146   nth_element( begin_ptr, begin_ptr + num_size_div, data.end( ) );
00147 
00148   if ( num_size & 0001 ) {
00149 
00150     // num_size is odd:
00151 
00152     return data[ num_size_div ];
00153 
00154   } else {
00155 
00156     // num_size is even:
00157 
00158     double tmp = data[ num_size_div ];
00159 
00160     //
00161     // element beyond num_size_div is guarranteed
00162     // to be > then  element at data[ num_size_div ].
00163     //
00164     nth_element( begin_ptr, begin_ptr + num_size_div - 1,
00165                  begin_ptr + num_size_div );
00166     tmp += data[ num_size_div - 1 ];
00167 
00168     return 0.5 * tmp;
00169 
00170   }
00171 
00172 }
00173 
00174 double RdbStatsPercentiles::calculate_percentile( const double percentile )
00175   throw( ) {
00176 
00177 #ifdef TRACEFCT
00178     TraceFct tf( "double RdbStatsPercentiles::calculate_percentile( int ) "
00179                  "throw( )" );
00180 #endif
00181 
00182   vector< double >::iterator begin_ptr = data.begin( );
00183 
00184   size_t n        = data.size( );
00185   double f        = percentile / 100.0;
00186   int i           = ( ( n - 1.0 ) * f );
00187   double delta    = ( n - 1.0 ) * f - i; 
00188 
00189   nth_element( begin_ptr, begin_ptr + i + 1, data.end( ) );
00190   double data_i_1 = data[ i + 1];
00191 
00192   nth_element( begin_ptr, begin_ptr + i, data.end( ) );
00193   double data_i   = data[ i ];
00194 
00195   double quantile = ( 1.0 - delta ) * data_i + delta * data_i_1;
00196 
00197   /*
00198   cerr << "n:         " << n        << endl
00199        << "f:         " << f        << endl
00200        << "i:         " << i        << endl
00201        << "delta:     " << delta    << endl
00202        << "data_i:    " << data_i   << endl
00203        << "data_i_1:  " << data_i_1 << endl
00204        << "quantile:  " << quantile << endl << endl;
00205   */
00206   return quantile;
00207 
00208 }
00209 
00210 int RdbStatsPercentiles::calculate_statistics( ) throw( ) {
00211 
00212 #ifdef TRACEFCT
00213     TraceFct tf( "double RdbStatsPercentiles::calculate_statistics( ) "
00214                  "throw( )" );
00215 #endif
00216 
00217   if ( 0 == this->RdbStats::calculate_statistics( ) )
00218     return 0;
00219 
00220   the_median = calculate_median( );
00221 
00222   for ( size_t ii = 0; ii < the_percentile.size( ); ii++ )
00223     the_percentile_result[ ii ] =
00224       calculate_percentile( the_percentile[ ii ].second );
00225 
00226   return num_n;
00227 
00228 }
00229 
00230 void RdbStatsPercentiles::init( ) throw( ) {
00231 
00232 #ifdef TRACEFCT
00233   TraceFct tf( "void RdbStatsPercentiles:init( ) throw( )" );
00234 #endif
00235 
00236   this->RdbStats::init( );
00237 
00238   // Since we are, possibly, starting a new group so wipe out data.
00239   data.clear( );
00240 
00241 }
00242 
00243 void RdbStatsPercentiles::normalize_results( const double norm ) throw( ) {
00244 
00245   this->RdbStats::normalize_results( norm );
00246   for ( size_t ii = 0; ii < the_percentile.size( ); ii++ )
00247     the_percentile_result[ ii ] /= norm;
00248 
00249 }
00250 
00251 void RdbStatsPercentiles::set_output_columns( RDB& ordbtable )
00252   throw( Exception ) {
00253 
00254   try {
00255 
00256 #ifdef TRACEFCT
00257     TraceFct tf( "double RdbStatsPercentiles::set_output_columns( RDB& ) "
00258                  "throw( Exception )" );
00259 #endif
00260     
00261     this->RdbStats::set_output_columns( ordbtable );
00262 
00263     const int mysize( the_percentile.size( ) );
00264     the_percentile_result = new double[ mysize ];
00265 
00266     const char* column_name_ptr = column_name.c_str( );
00267 
00268     char str[ 256 ];
00269     sprintf( str, "%s_median", column_name_ptr );
00270     ordbtable.setColumn( str, "N" );
00271     RDBColumn* tmp = ordbtable.getColumn( str );
00272     tmp->mapData( &the_median, 1 );
00273 
00274     for ( int ii = 0; ii < mysize; ii++ ) {
00275       
00276       ostringstream ost;
00277       ost << column_name_ptr << the_percentile[ ii ].first;
00278       ordbtable.setColumn( ost.str( ), "N" );
00279       RDBColumn* ptr = ordbtable.getColumn( ost.str( ) );
00280       ptr->mapData( the_percentile_result + ii , 1 );
00281 
00282     }
00283 
00284   } catch ( RDBErr& rdbe ) {
00285 
00286     throw Exception( rdbe );
00287 
00288   } catch( Exception& E ) {
00289 
00290     throw;
00291 
00292   } catch ( exception& e ) { 
00293 
00294     throw Exception( e.what( ) );
00295 
00296   }
00297       
00298 }
00299 
00300 void RdbStatsPercentiles::update_statistics( ) throw( Exception ) {
00301 
00302   try {
00303 
00304 #ifdef TRACEFCT
00305     TraceFct tf( "double RdbStatsPercentiles::update_statistics( ) "
00306                  "throw( Exception )" );
00307 #endif
00308 
00309     this->RdbStats::update_statistics( );
00310 
00311     data.push_back( get_value( ) );
00312 
00313   } catch( Exception& e ) {
00314 
00315     throw;
00316 
00317   }
00318 
00319 }