00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027 #include <iostream>
00028 #include <cstdlib>
00029
00030 #include <rdbxx/RDB.h>
00031 #include <Exception/Exception.h>
00032
00033 #include "Options.h"
00034 #include "usage.h"
00035 #include "RdbStats.h"
00036 #include "RdbStatsAve.h"
00037 #include "RdbStatsPercentiles.h"
00038 #include "RdbStatsPercentilesAve.h"
00039 #include "RdbStatsPercentilesMed.h"
00040 #include "Row.h"
00041 #include "SelectedCols.h"
00042 #include "StatsResult.h"
00043 #include "config.h"
00044
00045 using namespace std;
00046
00050 template<class Type>
00051 void rdbstats( RDB& irdbtable, Type& rdb_stats_columns,
00052 const vector< pair< long, long > >& range, RDB& ordbtable, bool grouped )
00053 throw( Exception ) {
00054
00055 #ifdef TRACEFCT
00056 TraceFct tf( "rdbstats( RDB&, Type&, const vector< pair<long, long> >&,"
00057 "RDB&, bool ) throw( Exception )" );
00058 #endif
00059
00060 try {
00061
00062 StatsResult< Type >* stats_result_ptr =
00063 new StatsResult< Type >( rdb_stats_columns, ordbtable );
00064
00065 size_t mysize = range.size( );
00066 int status = RDB::REOF;
00067
00068 for ( int ii = 0, line_number = 1; ii < mysize; ++ii, ++line_number ) {
00069
00070
00071
00072 for ( ; line_number < range[ ii ].first; ++line_number ) {
00073 if ( RDB::REOF == (status = irdbtable.read( )) ) {
00074
00075 stats_result_ptr->calculate_statistics( );
00076 delete stats_result_ptr;
00077 return;
00078
00079 }
00080
00081 }
00082
00083
00084 for ( ; line_number <= range[ ii ].second; ++line_number ) {
00085
00086 if ( RDB::REOF == (status = irdbtable.read( )) ) {
00087
00088 if ( ! grouped )
00089 stats_result_ptr->calculate_statistics( );
00090
00091 delete stats_result_ptr;
00092
00093 return;
00094
00095 }
00096
00097 stats_result_ptr->update_statistics( );
00098
00099 if ( RDB::REOG & status )
00100
00101 stats_result_ptr->calculate_statistics( );
00102
00103
00104
00105 }
00106
00107 }
00108
00109 delete stats_result_ptr;
00110
00111 } catch( Exception& e ) {
00112
00113 cerr << e << '\n';
00114 throw;
00115
00116 }
00117
00118 }
00119
00123 void rdbstats( clo::parser& clo ) throw( Exception ) {
00124
00125 #ifdef TRACEFCT
00126 TraceFct tf( "rdbstats( clo::parser& ) throw( Exception& e )" );
00127 #endif
00128
00129 try {
00130
00131
00132
00133
00134
00135 const clo::options& options = clo.get_options();
00136 const std::vector<std::string>& the_files = clo.get_non_options();
00137
00138
00139 RDB *irdbtable = ( options.input == "stdin" ) ? ( new RDB( &cin ) ) : ( new RDB( options.input ) );
00140
00141
00142
00143
00144 RDB *ordbtable = ( options.output == "stdout" ) ? ( new RDB( &cout ) ) : ( new RDB( options.output ) );
00145 ordbtable->setComment( *irdbtable );
00146
00147 SelectedCols* selected_cols = NULL;
00148 if ( options.all || 0 == the_files.size( ) ) {
00149 selected_cols = new SelectedCols( *irdbtable, clo );
00150 } else {
00151 selected_cols = new SelectedCols( the_files, *irdbtable, clo );
00152 }
00153
00154
00155 const vector< string >& col_name = selected_cols->get_selected_cols( );
00156
00157 if ( 0 == col_name.size( ) )
00158 return;
00159
00160
00161 bool grouped = false;
00162 vector< string >::const_iterator current_group( options.group.begin( ) ),
00163 end_group( options.group.end( ) );
00164 for ( ; current_group != end_group; ++current_group ) {
00165
00166 vector< string > groupies;
00167 suplib::tok( groupies, *current_group, "," );
00168 for ( unsigned int ii = 0; ii < groupies.size( ); ii++ ) {
00169 ordbtable->setColumn( irdbtable->getColumn( groupies[ ii ] ) );
00170 irdbtable->setGroup( groupies[ ii ] );
00171 grouped = true;
00172 }
00173 }
00174
00175 Row row( options.rows );
00176
00177
00178 if ( options.percentiles != "" ) {
00179
00180 string percentiles( options.percentiles );
00181
00182
00183
00184
00185 bool iq = options.quartiles;
00186
00187 switch( options.normalize ) {
00188 case clo::normalize_ave:
00189 {
00190 vector< RdbStatsPercentilesAve > rdb_stats_columns;
00191
00192 for ( unsigned int ii = 0; ii < col_name.size( ); ii++ )
00193 rdb_stats_columns.push_back( RdbStatsPercentilesAve( *irdbtable,
00194 col_name[ii],
00195 percentiles,
00196 iq ) );
00197 rdbstats( *irdbtable, rdb_stats_columns, row.get_range( ),
00198 *ordbtable, grouped );
00199 }
00200 break;
00201 case clo::normalize_med:
00202 {
00203 vector< RdbStatsPercentilesMed > rdb_stats_columns;
00204 for ( unsigned int ii = 0; ii < col_name.size( ); ii++ )
00205 rdb_stats_columns.push_back( RdbStatsPercentilesMed( *irdbtable,
00206 col_name[ii],
00207 percentiles,
00208 iq ) );
00209 rdbstats( *irdbtable, rdb_stats_columns, row.get_range( ),
00210 *ordbtable, grouped );
00211 }
00212 break;
00213 case clo::normalize_none:
00214 {
00215 vector< RdbStatsPercentiles > rdb_stats_columns;
00216 for ( int ii = 0; ii < col_name.size( ); ii++ )
00217 rdb_stats_columns.push_back( RdbStatsPercentiles( *irdbtable,
00218 col_name[ ii ],
00219 percentiles,
00220 iq ) );
00221 rdbstats( *irdbtable, rdb_stats_columns, row.get_range( ),
00222 *ordbtable, grouped );
00223 }
00224 break;
00225 }
00226
00227 } else if ( options.quartiles ) {
00228
00229 switch( options.normalize ) {
00230 case clo::normalize_ave:
00231 {
00232 vector< RdbStatsPercentilesAve > rdb_stats_columns;
00233 for ( unsigned int ii = 0; ii < col_name.size( ); ii++ )
00234 rdb_stats_columns.push_back( RdbStatsPercentilesAve( *irdbtable,
00235 col_name[ ii ]
00236 ) );
00237 rdbstats( *irdbtable, rdb_stats_columns, row.get_range( ),
00238 *ordbtable, grouped );
00239 }
00240 break;
00241 case clo::normalize_med:
00242 {
00243 vector< RdbStatsPercentilesMed > rdb_stats_columns;
00244 for ( unsigned int ii = 0; ii < col_name.size( ); ii++ )
00245 rdb_stats_columns.push_back( RdbStatsPercentilesMed( *irdbtable,
00246 col_name[ ii ]
00247 ) );
00248 rdbstats( *irdbtable, rdb_stats_columns, row.get_range( ),
00249 *ordbtable, grouped );
00250 }
00251 break;
00252 case clo::normalize_none:
00253 {
00254 vector< RdbStatsPercentiles > rdb_stats_columns;
00255 for ( unsigned int ii = 0; ii < col_name.size( ); ii++ )
00256 rdb_stats_columns.push_back( RdbStatsPercentiles( *irdbtable,
00257 col_name[ ii ] )
00258 );
00259 rdbstats( *irdbtable, rdb_stats_columns, row.get_range( ),
00260 *ordbtable, grouped );
00261 }
00262 break;
00263 }
00264
00265 } else {
00266
00267 switch( options.normalize ) {
00268 case clo::normalize_ave:
00269 {
00270 vector< RdbStatsAve > rdb_stats_columns;
00271
00272 for ( unsigned int ii = 0; ii < col_name.size( ); ii++ )
00273 rdb_stats_columns.push_back( RdbStatsAve( *irdbtable,
00274 col_name[ ii ] ) );
00275 rdbstats( *irdbtable, rdb_stats_columns, row.get_range( ),
00276 *ordbtable, grouped );
00277 }
00278 break;
00279 case clo::normalize_med:
00280 {
00281
00282 vector< RdbStatsPercentilesMed > rdb_stats_columns;
00283 for ( unsigned int ii = 0; ii < col_name.size( ); ii++ )
00284 rdb_stats_columns.push_back( RdbStatsPercentilesMed( *irdbtable,
00285 col_name[ ii ]
00286 ) );
00287 rdbstats( *irdbtable, rdb_stats_columns, row.get_range( ),
00288 *ordbtable, grouped );
00289 }
00290 break;
00291 case clo::normalize_none:
00292 {
00293 vector< RdbStats > rdb_stats_columns;
00294 for ( unsigned int ii = 0; ii < col_name.size( ); ii++ )
00295 rdb_stats_columns.push_back( RdbStats( *irdbtable, col_name[ ii ]
00296 ) );
00297 rdbstats( *irdbtable, rdb_stats_columns, row.get_range( ),
00298 *ordbtable, grouped );
00299 }
00300 break;
00301 }
00302
00303 }
00304
00305 delete selected_cols;
00306 delete ordbtable;
00307 delete irdbtable;
00308
00309 } catch( RDBErr& rdbe ) {
00310
00311 cerr << rdbe << '\n';
00312 throw Exception( rdbe );
00313
00314 } catch( Exception& e ) {
00315
00316 cerr << e << '\n';
00317 throw;
00318
00319 } catch( exception& ee ) {
00320
00321 cerr << ee.what( ) << '\n';
00322 throw Exception( ee.what( ) );
00323
00324 } catch( ... ) {
00325
00326 throw Exception( "Caught unknown exception, got to love C++!" );
00327
00328 }
00329
00330 }
00331
00332 int main( int argc, char* argv[] ) {
00333
00334 #ifdef TRACEFCT
00335 TraceFct tf( argv[0], 1, -1 );
00336 #endif
00337
00338 try {
00339
00340 clo::parser clo;
00341 clo.parse( argc, argv );
00342
00343 if ( 0 != clo.get_options( ).group.size( ) &&
00344 0 != clo.get_options( ).rows.size( ) )
00345 throw Exception( "The options '--group'and '--rows' "
00346 "cannot be used in conjunction\n" );
00347
00348 rdbstats( clo );
00349
00350 return EXIT_SUCCESS;
00351
00352 } catch ( clo::autoexcept& cloe ) {
00353
00354 switch ( cloe.get_autothrow_id() ) {
00355 case clo::autothrow_help:
00356 std::cout << "Usage: " << argv[0] << " [options]\n";
00357 std::cout << cloe.what();
00358 return EXIT_SUCCESS;
00359 case clo::autothrow_usage:
00360 usage( );
00361 return EXIT_SUCCESS;
00362 case clo::autothrow_version:
00363 std::cout << PACKAGE_NAME << '\t' << PACKAGE_VERSION << endl;
00364 return EXIT_SUCCESS;
00365 }
00366
00367 } catch( Exception& E ) {
00368
00369 cerr << E << endl;
00370 return EXIT_FAILURE;
00371
00372 } catch( std::exception &e ) {
00373
00374 std::cerr << e.what() << "\n";
00375 return EXIT_FAILURE;
00376
00377 } catch ( ... ) {
00378
00379 cerr << "Unknown exception caught, got to love C++\n";
00380 return EXIT_FAILURE;
00381
00382 }
00383
00384 return 0;
00385
00386 }