main.cc

00001 // File:  rdbstats.cc
00002 
00003 // --8<--8<--8<--8<--
00004 //
00005 // Copyright (C) 2006 Smithsonian Astrophysical Observatory
00006 //
00007 // This file is part of rdbstats
00008 //
00009 // rdbstats is free software; you can redistribute it and/or
00010 // modify it under the terms of the GNU General Public License
00011 // as published by the Free Software Foundation; either version 2
00012 // of the License, or (at your option) any later version.
00013 //
00014 // rdbstats is distributed in the hope that it will be useful,
00015 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00016 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00017 // GNU General Public License for more details.
00018 //
00019 // You should have received a copy of the GNU General Public License
00020 // along with this program; if not, write to the 
00021 //       Free Software Foundation, Inc. 
00022 //       51 Franklin Street, Fifth Floor
00023 //       Boston, MA  02110-1301, USA
00024 //
00025 // -->8-->8-->8-->8--
00026 
00027 #include <iostream>
00028 #include <cstdlib>
00029 
00030 #include <rdbxx/RDB.h>
00031 #include <Exception/Exception.h>
00032 
00033 #include "Options.h"
00034 #include "usage.h"
00035 #include "RdbStats.h"
00036 #include "RdbStatsAve.h"
00037 #include "RdbStatsPercentiles.h"
00038 #include "RdbStatsPercentilesAve.h"
00039 #include "RdbStatsPercentilesMed.h"
00040 #include "Row.h"
00041 #include "SelectedCols.h"
00042 #include "StatsResult.h"
00043 #include "config.h"
00044 
00045 using namespace std;
00046 
00050 template<class Type>
00051 void rdbstats( RDB& irdbtable, Type& rdb_stats_columns,
00052                const vector< pair< long, long > >& range, RDB& ordbtable, bool grouped )
00053   throw( Exception ) {
00054 
00055 #ifdef TRACEFCT
00056   TraceFct tf( "rdbstats( RDB&, Type&, const vector< pair<long, long> >&,"
00057                "RDB&, bool ) throw( Exception )" );
00058 #endif
00059 
00060   try {
00061 
00062     StatsResult< Type >* stats_result_ptr =
00063       new StatsResult< Type >( rdb_stats_columns, ordbtable );
00064 
00065     size_t mysize = range.size( );
00066     int status = RDB::REOF;
00067 
00068     for ( int ii = 0, line_number = 1; ii < mysize; ++ii, ++line_number ) {
00069 
00070       // Skip the un-requested rows.
00071       // At the end of this loop, line_number == range[ ii ].first
00072       for ( ; line_number < range[ ii ].first; ++line_number ) {
00073         if ( RDB::REOF == (status = irdbtable.read( )) ) {
00074 
00075           stats_result_ptr->calculate_statistics( );
00076           delete stats_result_ptr;
00077           return;
00078 
00079         }
00080         //      ordbtable.autoIdx( false );
00081       }
00082 
00083       // loop through the requested range of rows.
00084       for ( ; line_number <= range[ ii ].second; ++line_number ) {
00085 
00086         if ( RDB::REOF == (status = irdbtable.read( )) ) {
00087 
00088           if ( ! grouped ) 
00089             stats_result_ptr->calculate_statistics( ); 
00090 
00091           delete stats_result_ptr;
00092 
00093           return;
00094 
00095         } 
00096 
00097         stats_result_ptr->update_statistics( );
00098 
00099         if ( RDB::REOG & status ) 
00100 
00101           stats_result_ptr->calculate_statistics( );
00102 
00103         //              ordbtable.autoIdx( false );
00104 
00105       }
00106 
00107     }
00108 
00109     delete stats_result_ptr;
00110 
00111   } catch( Exception& e ) {
00112 
00113     cerr << e << '\n';
00114     throw;
00115 
00116   }
00117 
00118 }
00119 
00123 void rdbstats( clo::parser& clo ) throw( Exception ) {
00124 
00125 #ifdef TRACEFCT
00126   TraceFct tf( "rdbstats( clo::parser& ) throw( Exception& e )" );
00127 #endif
00128 
00129   try {
00130 
00131     /*
00132      * get the struct of options from the parser class so that you can
00133      * access the option values.
00134      */
00135     const clo::options& options = clo.get_options();
00136     const std::vector<std::string>& the_files = clo.get_non_options();
00137 
00138     // The input stream shall be the standard in
00139     RDB *irdbtable = ( options.input == "stdin" ) ? ( new RDB( &cin ) ) : ( new RDB( options.input ) );
00140 
00141     // The output stream shall be the standard out
00142     //    ostream* os = &cout;
00143     //    os->precision( 15 );
00144     RDB *ordbtable = ( options.output == "stdout" ) ? ( new RDB( &cout ) ) : ( new RDB( options.output ) );
00145     ordbtable->setComment( *irdbtable );
00146 
00147     SelectedCols* selected_cols = NULL;
00148     if ( options.all || 0 == the_files.size( ) ) {
00149       selected_cols = new SelectedCols( *irdbtable, clo );
00150     } else {
00151       selected_cols = new SelectedCols( the_files, *irdbtable, clo );
00152     }
00153     // cout << *selected_cols << '\n';
00154 
00155     const vector< string >& col_name = selected_cols->get_selected_cols( );
00156     // cout << "# col_name.size( ) = " << col_name.size( ) << '\n';
00157     if ( 0 == col_name.size( ) )
00158       return;
00159 
00160     // Set the group.
00161     bool grouped = false;
00162     vector< string >::const_iterator current_group( options.group.begin( ) ),
00163       end_group( options.group.end( ) );
00164     for ( ; current_group != end_group; ++current_group ) {
00165       // --group a,b,c may have been entered, so must parse entry.
00166       vector< string > groupies;
00167       suplib::tok( groupies, *current_group, "," );
00168       for ( unsigned int ii = 0; ii < groupies.size( ); ii++ ) {
00169         ordbtable->setColumn( irdbtable->getColumn( groupies[ ii ] ) );
00170         irdbtable->setGroup( groupies[ ii ] );
00171         grouped = true;
00172       }
00173     }
00174 
00175     Row row( options.rows );
00176     // cout << '#' << row << '\n';
00177 
00178     if ( options.percentiles != "" ) {
00179 
00180       string percentiles( options.percentiles );
00181 
00182       //
00183       // This allows the possibility that the user entered :   --p 23,45 -q
00184       //
00185       bool iq = options.quartiles;
00186 
00187       switch( options.normalize ) {
00188       case clo::normalize_ave:
00189         {
00190           vector< RdbStatsPercentilesAve > rdb_stats_columns;
00191 
00192           for ( unsigned int ii = 0; ii < col_name.size( ); ii++ )
00193             rdb_stats_columns.push_back( RdbStatsPercentilesAve( *irdbtable,
00194                                                                  col_name[ii],
00195                                                                  percentiles,
00196                                                                  iq ) );
00197           rdbstats( *irdbtable, rdb_stats_columns, row.get_range( ),
00198                     *ordbtable, grouped );
00199         }
00200         break;
00201       case clo::normalize_med:
00202         {
00203           vector< RdbStatsPercentilesMed > rdb_stats_columns;
00204           for ( unsigned int ii = 0; ii < col_name.size( ); ii++ )
00205             rdb_stats_columns.push_back( RdbStatsPercentilesMed( *irdbtable,
00206                                                                  col_name[ii],
00207                                                                  percentiles,
00208                                                                  iq ) );
00209           rdbstats( *irdbtable, rdb_stats_columns, row.get_range( ),
00210                     *ordbtable, grouped );
00211         }
00212         break;
00213       case clo::normalize_none:
00214         {
00215           vector< RdbStatsPercentiles > rdb_stats_columns;
00216           for ( int ii = 0; ii < col_name.size( ); ii++ )
00217             rdb_stats_columns.push_back( RdbStatsPercentiles( *irdbtable,
00218                                                               col_name[ ii ],
00219                                                               percentiles,
00220                                                               iq ) );
00221           rdbstats( *irdbtable, rdb_stats_columns, row.get_range( ),
00222                     *ordbtable, grouped );
00223         }
00224         break;
00225       }
00226 
00227     } else if ( options.quartiles ) {
00228 
00229       switch( options.normalize ) {
00230       case clo::normalize_ave:
00231         {
00232           vector< RdbStatsPercentilesAve > rdb_stats_columns;
00233           for ( unsigned int ii = 0; ii < col_name.size( ); ii++ )
00234             rdb_stats_columns.push_back( RdbStatsPercentilesAve( *irdbtable,
00235                                                                  col_name[ ii ]
00236                                                                  ) );
00237           rdbstats( *irdbtable, rdb_stats_columns, row.get_range( ),
00238                     *ordbtable, grouped );
00239         }
00240         break;
00241       case clo::normalize_med:
00242         {
00243           vector< RdbStatsPercentilesMed > rdb_stats_columns;
00244           for ( unsigned int ii = 0; ii < col_name.size( ); ii++ )
00245             rdb_stats_columns.push_back( RdbStatsPercentilesMed( *irdbtable,
00246                                                                  col_name[ ii ]
00247                                                                  ) );
00248           rdbstats( *irdbtable, rdb_stats_columns, row.get_range( ),
00249                     *ordbtable, grouped );
00250         }
00251         break;
00252       case clo::normalize_none:
00253         {
00254           vector< RdbStatsPercentiles > rdb_stats_columns;
00255           for ( unsigned int ii = 0; ii < col_name.size( ); ii++ )
00256             rdb_stats_columns.push_back( RdbStatsPercentiles( *irdbtable,
00257                                                               col_name[ ii ] )
00258                                          );
00259           rdbstats( *irdbtable, rdb_stats_columns, row.get_range( ),
00260                     *ordbtable, grouped );
00261         }
00262         break;
00263       }
00264 
00265     } else {
00266 
00267       switch( options.normalize ) {
00268       case clo::normalize_ave:
00269         {
00270           vector< RdbStatsAve > rdb_stats_columns;
00271 
00272           for ( unsigned int ii = 0; ii < col_name.size( ); ii++ )
00273             rdb_stats_columns.push_back( RdbStatsAve( *irdbtable,
00274                                                       col_name[ ii ] ) );
00275           rdbstats( *irdbtable, rdb_stats_columns, row.get_range( ),
00276                     *ordbtable, grouped );
00277         }
00278         break;
00279       case clo::normalize_med:
00280         {
00281           // implicit --quartiles case
00282           vector< RdbStatsPercentilesMed > rdb_stats_columns;
00283           for ( unsigned int ii = 0; ii < col_name.size( ); ii++ )
00284             rdb_stats_columns.push_back( RdbStatsPercentilesMed( *irdbtable,
00285                                                                  col_name[ ii ]
00286                                                                  ) );
00287           rdbstats( *irdbtable, rdb_stats_columns, row.get_range( ),
00288                     *ordbtable, grouped );
00289         }
00290         break;
00291       case clo::normalize_none:
00292         {
00293           vector< RdbStats > rdb_stats_columns;
00294           for ( unsigned int ii = 0; ii < col_name.size( ); ii++ )
00295             rdb_stats_columns.push_back( RdbStats( *irdbtable, col_name[ ii ]
00296                                                    ) );
00297           rdbstats( *irdbtable, rdb_stats_columns, row.get_range( ),
00298                     *ordbtable, grouped );
00299         }
00300         break;
00301       }
00302 
00303     }
00304 
00305     delete selected_cols;
00306     delete ordbtable;
00307     delete irdbtable;
00308 
00309   } catch( RDBErr& rdbe ) {
00310 
00311     cerr << rdbe << '\n';
00312     throw Exception( rdbe );
00313 
00314   } catch( Exception& e ) {
00315 
00316     cerr << e << '\n';
00317     throw;
00318 
00319   } catch( exception& ee ) {
00320 
00321     cerr << ee.what( ) << '\n';
00322     throw Exception( ee.what( ) );
00323 
00324   } catch( ... ) {
00325 
00326     throw Exception( "Caught unknown exception, got to love C++!" );
00327 
00328   }
00329 
00330 }
00331 
00332 int main( int argc, char* argv[] ) {
00333 
00334 #ifdef TRACEFCT
00335   TraceFct tf( argv[0], 1, -1 );
00336 #endif
00337 
00338   try {
00339 
00340     clo::parser         clo;
00341     clo.parse( argc, argv );
00342 
00343     if ( 0 != clo.get_options( ).group.size( ) &&
00344          0 != clo.get_options( ).rows.size( ) )
00345       throw Exception(  "The options '--group'and '--rows' "
00346                         "cannot be used in conjunction\n" );
00347 
00348     rdbstats( clo );
00349 
00350     return EXIT_SUCCESS;
00351 
00352   } catch ( clo::autoexcept& cloe ) {
00353 
00354     switch ( cloe.get_autothrow_id() ) {
00355     case clo::autothrow_help:
00356       std::cout << "Usage: " << argv[0] << " [options]\n";
00357       std::cout << cloe.what();
00358       return EXIT_SUCCESS;
00359     case clo::autothrow_usage:
00360       usage( );
00361       return EXIT_SUCCESS;
00362     case clo::autothrow_version:
00363       std::cout << PACKAGE_NAME << '\t' << PACKAGE_VERSION << endl;
00364       return EXIT_SUCCESS;
00365     }
00366 
00367   } catch( Exception& E ) {
00368 
00369     cerr << E << endl;
00370     return EXIT_FAILURE;
00371 
00372   } catch( std::exception &e ) {
00373 
00374     std::cerr << e.what() << "\n";
00375     return EXIT_FAILURE;
00376 
00377   } catch ( ... ) {
00378 
00379     cerr << "Unknown exception caught, got to love C++\n";
00380     return EXIT_FAILURE;
00381 
00382   }
00383 
00384   return 0;
00385 
00386 }