RdbStats.cc

00001 // File:  RdbStats.cc
00002 
00003 // --8<--8<--8<--8<--
00004 //
00005 // Copyright (C) 2006 Smithsonian Astrophysical Observatory
00006 //
00007 // This file is part of rdbstats
00008 //
00009 // rdbstats is free software; you can redistribute it and/or
00010 // modify it under the terms of the GNU General Public License
00011 // as published by the Free Software Foundation; either version 2
00012 // of the License, or (at your option) any later version.
00013 //
00014 // rdbstats is distributed in the hope that it will be useful,
00015 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00016 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00017 // GNU General Public License for more details.
00018 //
00019 // You should have received a copy of the GNU General Public License
00020 // along with this program; if not, write to the 
00021 //       Free Software Foundation, Inc. 
00022 //       51 Franklin Street, Fifth Floor
00023 //       Boston, MA  02110-1301, USA
00024 //
00025 // -->8-->8-->8-->8--
00026 
00027 #include "RdbStats.h"
00028 
00029 RdbStats::~RdbStats( ) throw( ) {
00030 
00031 #ifdef TRACEFCT
00032   TraceFct tf( "RdbStats::~RdbStats(  ) throw( )" );
00033 #endif
00034   
00035   //  if ( output_stats ) {
00036     //    for ( int ii = 0; output_stats[ ii ] != NULL; ii++ )
00037     //      delete output_stats[ ii ];
00038   //    delete [] output_stats;
00039   //    output_stats = NULL;
00040   //  }
00041 
00042   // Note, input_column is not to be delete since RDB++ lib will do it.
00043 
00044 }
00045 
00046 
00047 RdbStats::RdbStats( RDB& irdbtable, const string& name )
00048   throw( Exception ) : input_column( NULL ), output_stats( ), num_n( 0 ),
00049                        column_name( name ) {
00050 
00051   try {
00052 
00053 #ifdef TRACEFCT
00054     TraceFct tf( "RdbStats::RdbStats( RDB&, const string& ) throw( Exception)" );
00055 #endif
00056 
00057     init( );
00058 
00059     input_column = (RDBColumn*) irdbtable.getColumn( name );
00060 
00061   } catch ( RDBErr& rdbe ) {
00062 
00063     throw Exception( rdbe );
00064 
00065   } catch ( Exception& e ) {
00066 
00067     throw;
00068 
00069   } catch ( exception& e ) {
00070 
00071     throw Exception( e.what( ) );
00072 
00073   }
00074 
00075 }
00076 
00077 int RdbStats::calculate_statistics( ) throw( ) {
00078 
00079 #ifdef TRACEFCT
00080   TraceFct tf( "int RdbStats::calculate_statistics(  ) throw( )" );
00081 #endif
00082 
00083   if ( 0 == num_n )
00084     return 0;
00085 
00086   if ( 1 == num_n )
00087     the_statistics[ SD ] = 0.0;
00088   else
00089     the_statistics[ SD ] = sqrt( the_statistics[ SUM_T ] / ( num_n - 1 ) );
00090 
00091   the_statistics[ SUM2 ] = sqrt( the_statistics[ SUM2 ] );
00092 
00093   return num_n;
00094 
00095 }
00096 
00097 double RdbStats::get_value( ) throw( Exception ) {
00098 
00099 #ifdef TRACEFCT
00100   TraceFct tf( "double RdbStats::get_value(  ) throw( Exception )" );
00101 #endif
00102 
00103   try {
00104 
00105     return input_column->getDataDouble( ); 
00106 
00107   } catch ( RDBErr& rdbe ) {
00108 
00109     throw Exception( rdbe );
00110 
00111   } catch ( exception& e ) { 
00112 
00113     throw Exception( e.what( ) );
00114 
00115   }
00116 
00117 }
00118 
00119 #define ARRAY_SIZE(arr)(sizeof(arr)/sizeof((arr)[0]))
00120 
00121 void RdbStats::init( ) throw( ) {
00122 
00123 #ifdef TRACEFCT
00124   TraceFct tf( "void RdbStats::init(  ) throw( )" );
00125 #endif
00126 
00127   num_n = 0;
00128 
00129   for ( int ii = 0; ii < ARRAY_SIZE( the_statistics ); ii++ )
00130     the_statistics[ ii ] = 0.0;
00131 
00132   the_statistics[ MAX ] = -DBL_MAX;
00133   the_statistics[ MIN ] = DBL_MAX;
00134 
00135   // purposedly leave input_column alone.
00136 
00137 }
00138 
00139 #undef ARRAY_SIZE
00140 
00141 void RdbStats::normalize_results( const double norm ) throw( ) {
00142 
00143   the_statistics[ SD ] /= norm;
00144   the_statistics[ MIN ] /= norm;
00145   the_statistics[ MAX ] /= norm;
00146   the_statistics[ SUM2 ] /= norm;
00147 
00148 }
00149 
00150 void RdbStats::set_output_columns( RDB& ordbtable ) throw( Exception ) {
00151 
00152   try {
00153 
00154 #ifdef TRACEFCT
00155     TraceFct tf( "void RdbStats::set_output_columns( RDB& ) throw( )" );
00156 #endif
00157 
00158     static const char* suffix[] = { "_n", "_ave", "_max", "_min", "_dev",
00159                                       "_sum", "_rss", NULL };
00160 
00161     const char* column_name_ptr = column_name.c_str( );
00162 
00163     size_t counter( 0 );
00164     while ( suffix[ counter ] )
00165       ++counter;
00166     // + 1 to allocate an extra for the NULL sentinel.
00167     output_stats.reserve( counter + 1 );
00168 
00169     char str[ 256 ];
00170     sprintf( str, "%s%s", column_name_ptr, suffix[ 0 ] );
00171     ordbtable.setColumn( str, "N" );
00172     output_stats[ 0 ] = ordbtable.getColumn( ordbtable.nColumns( ) - 1 );
00173     output_stats[ 0 ]->mapData( &num_n, 1 );
00174     for ( int ii = 1; ii < counter ; ii++ ) {
00175 
00176       int ii_1 ( ii - 1 );
00177 
00178       sprintf( str, "%s%s", column_name_ptr, suffix[ ii ] );
00179       ordbtable.setColumn( str, "N" );
00180       output_stats[ ii_1 ] = ordbtable.getColumn( ordbtable.nColumns( ) - 1 ) ;
00181       output_stats[ ii_1 ]->mapData( the_statistics + ii_1, 1 );
00182 
00183       // Make sure that the sentinel is set to NULL
00184       output_stats[ ii ] = NULL;
00185 
00186     }
00187 
00188   } catch ( RDBErr& rdbe ) {
00189 
00190     throw Exception( rdbe );
00191 
00192   } catch( Exception& E ) {
00193 
00194     throw;
00195 
00196   } catch ( exception& e ) { 
00197 
00198     throw Exception( e.what( ) );
00199 
00200   }
00201 
00202 }
00203 
00204 void RdbStats::update_statistics( ) throw( Exception ) {
00205 
00206   try {
00207 
00208 #ifdef TRACEFCT
00209     TraceFct tf( "void RdbStats::update_statistics( ) throw( Exception )" );
00210 #endif
00211 
00212     double val = get_value( );
00213 
00214     // only use value if it isn't NaN.  This is a simple check for NaN,
00215     // based upon Net Lore.
00216 
00217     if ( val == val ) {
00218 
00219         ++num_n;
00220 
00221         if ( 1 == num_n )
00222             the_statistics[ AVG ] = val;
00223         else {
00224 
00225             //  Algorithm taken from the paper titled:
00226             //    Updating Mean and Variances Estimates: An Improved Method
00227             //    by D.H.D. West
00228             //    Communications of the ACM
00229             //    September 1979 Vol 22 Number 9
00230             double tmp = ( val - the_statistics[ AVG ] );
00231             the_statistics[ SUM_T ] += tmp * tmp * ( num_n - 1 ) / num_n;
00232             the_statistics[ AVG ] += tmp / num_n;
00233 
00234         }
00235 
00236         the_statistics[ SUM2 ] += val * val;
00237         the_statistics[ SUM ] += val;
00238         the_statistics[ MAX ] = max( the_statistics[ MAX ], val );
00239         the_statistics[ MIN ] = min( the_statistics[ MIN ], val );
00240 
00241     }
00242 
00243   } catch ( Exception& e ) { 
00244 
00245     throw;
00246 
00247   }
00248 
00249 }