rdbstats  2.0.7
main.cc
1 // File: rdbstats.cc
2 
3 // --8<--8<--8<--8<--
4 //
5 // Copyright (C) 2006 Smithsonian Astrophysical Observatory
6 //
7 // This file is part of rdbstats
8 //
9 // rdbstats is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU General Public License
11 // as published by the Free Software Foundation; either version 2
12 // of the License, or (at your option) any later version.
13 //
14 // rdbstats is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 // GNU General Public License for more details.
18 //
19 // You should have received a copy of the GNU General Public License
20 // along with this program; if not, write to the
21 // Free Software Foundation, Inc.
22 // 51 Franklin Street, Fifth Floor
23 // Boston, MA 02110-1301, USA
24 //
25 // -->8-->8-->8-->8--
26 
27 #include <iostream>
28 
29 #include <rdbxx/RDB.h>
30 #include <Exception/Exception.h>
31 
32 #include "Options.h"
33 #include "RdbStats.h"
34 #include "RdbStatsAve.h"
35 #include "RdbStatsPercentiles.h"
36 #include "RdbStatsPercentilesAve.h"
37 #include "RdbStatsPercentilesMed.h"
38 #include "Row.h"
39 #include "SelectedCols.h"
40 #include "StatsResult.h"
41 #include "config.h"
42 
46 template<class Type>
47 void rdbstats( RDB& irdbtable, Type& rdb_stats_columns,
48  const std::vector< std::pair< long, long > >& range, RDB& ordbtable, bool grouped ) {
49 
50  StatsResult< Type > stats_result( rdb_stats_columns, ordbtable );
51 
52  size_t mysize = range.size( );
53  int status = RDB::REOF;
54 
55  for ( int ii = 0, line_number = 1; ii < mysize; ++ii, ++line_number ) {
56 
57  // Skip the un-requested rows.
58  // At the end of this loop, line_number == range[ ii ].first
59  for ( ; line_number < range[ ii ].first; ++line_number ) {
60  if ( RDB::REOF == (status = irdbtable.read( )) ) {
61  stats_result.calculate_statistics( );
62  return;
63  }
64  // ordbtable.autoIdx( false );
65  }
66 
67  // loop through the requested range of rows.
68  for ( ; line_number <= range[ ii ].second; ++line_number ) {
69  if ( RDB::REOF == (status = irdbtable.read( )) ) {
70  if ( ! grouped )
71  stats_result.calculate_statistics( );
72  return;
73  }
74 
75  stats_result.update_statistics( );
76 
77  if ( RDB::REOG & status )
78  stats_result.calculate_statistics( );
79  // ordbtable.autoIdx( false );
80  }
81  }
82 }
83 
87 void rdbstats( Options& options ) {
88 
89  /*
90  * get the struct of options from the parser class so that you can
91  * access the option values.
92  */
93  // The input stream shall be the standard in
94  RDB irdbtable(
95  [&](){ return
96  options.input.empty()
97  ? RDB( &std::cin )
98  : RDB( options.input );
99  }()
100  );
101  // The output stream shall be the standard out
102  // ostream* os = &cout;
103  // os->precision( 15 );
104  RDB ordbtable(
105  [&](){ return
106  options.output.empty()
107  ? RDB( &std::cout )
108  : RDB( options.output );
109  }()
110  );
111  ordbtable.setComment( irdbtable );
112 
113  // if the all-columns option has been selected, clear any selected
114  // on the command line
115  if ( options.all )
116  options.columns.clear();
117 
118  SelectedCols selected_cols( irdbtable, options.columns, options.group, options.override );
119 
120  const std::vector< std::string >& col_name = selected_cols.get_selected_cols( );
121 
122  if ( col_name.empty() )
123  return;
124 
125  // Set the group.
126  bool grouped = false;
127  std::vector< std::string >::const_iterator current_group( options.group.begin( ) ),
128  end_group( options.group.end( ) );
129  for ( ; current_group != end_group; ++current_group ) {
130  // --group a,b,c may have been entered, so must parse entry.
131  std::vector< std::string > groupies;
132  suplib::tok( groupies, *current_group, "," );
133 
134  for ( unsigned int ii = 0; ii < groupies.size( ); ii++ ) {
135  ordbtable.setColumn( irdbtable.getColumn( groupies[ ii ] ) );
136  irdbtable.setGroup( groupies[ ii ] );
137  grouped = true;
138  }
139  }
140 
141  Row row( options.rows );
142  // cout << '#' << row << '\n';
143 
144  if ( ! options.percentiles.empty() ) {
145 
146  std::string percentiles( options.percentiles );
147 
148  //
149  // This allows the possibility that the user entered : --p 23,45 -q
150  //
151  bool iq = options.quartiles;
152 
153  switch( options.normalize ) {
154  case Normalize::Average:
155  {
156  std::vector< RdbStatsPercentilesAve > rdb_stats_columns;
157 
158  for ( unsigned int ii = 0; ii < col_name.size( ); ii++ )
159  rdb_stats_columns.push_back( RdbStatsPercentilesAve( irdbtable,
160  col_name[ii],
161  percentiles,
162  iq ) );
163  rdbstats( irdbtable, rdb_stats_columns, row.get_range( ),
164  ordbtable, grouped );
165  }
166  break;
167  case Normalize::Median:
168  {
169  std::vector< RdbStatsPercentilesMed > rdb_stats_columns;
170  for ( unsigned int ii = 0; ii < col_name.size( ); ii++ )
171  rdb_stats_columns.push_back( RdbStatsPercentilesMed( irdbtable,
172  col_name[ii],
173  percentiles,
174  iq ) );
175  rdbstats( irdbtable, rdb_stats_columns, row.get_range( ),
176  ordbtable, grouped );
177  }
178  break;
179  case Normalize::None:
180  {
181  std::vector< RdbStatsPercentiles > rdb_stats_columns;
182  for ( int ii = 0; ii < col_name.size( ); ii++ )
183  rdb_stats_columns.push_back( RdbStatsPercentiles( irdbtable,
184  col_name[ ii ],
185  percentiles,
186  iq ) );
187  rdbstats( irdbtable, rdb_stats_columns, row.get_range( ),
188  ordbtable, grouped );
189  }
190  break;
191  }
192 
193  } else if ( options.quartiles ) {
194 
195  switch( options.normalize ) {
196  case Normalize::Average:
197  {
198  std::vector< RdbStatsPercentilesAve > rdb_stats_columns;
199  for ( unsigned int ii = 0; ii < col_name.size( ); ii++ )
200  rdb_stats_columns.push_back( RdbStatsPercentilesAve( irdbtable,
201  col_name[ ii ]
202  ) );
203  rdbstats( irdbtable, rdb_stats_columns, row.get_range( ),
204  ordbtable, grouped );
205  }
206  break;
207  case Normalize::Median:
208  {
209  std::vector< RdbStatsPercentilesMed > rdb_stats_columns;
210  for ( unsigned int ii = 0; ii < col_name.size( ); ii++ )
211  rdb_stats_columns.push_back( RdbStatsPercentilesMed( irdbtable,
212  col_name[ ii ]
213  ) );
214  rdbstats( irdbtable, rdb_stats_columns, row.get_range( ),
215  ordbtable, grouped );
216  }
217  break;
218  case Normalize::None:
219  {
220  std::vector< RdbStatsPercentiles > rdb_stats_columns;
221  for ( unsigned int ii = 0; ii < col_name.size( ); ii++ )
222  rdb_stats_columns.push_back( RdbStatsPercentiles( irdbtable,
223  col_name[ ii ] )
224  );
225  rdbstats( irdbtable, rdb_stats_columns, row.get_range( ),
226  ordbtable, grouped );
227  }
228  break;
229  }
230 
231  } else {
232 
233  switch( options.normalize ) {
234  case Normalize::Average:
235  {
236  std::vector< RdbStatsAve > rdb_stats_columns;
237 
238  for ( unsigned int ii = 0; ii < col_name.size( ); ii++ )
239  rdb_stats_columns.push_back( RdbStatsAve( irdbtable,
240  col_name[ ii ] ) );
241  rdbstats( irdbtable, rdb_stats_columns, row.get_range( ),
242  ordbtable, grouped );
243  }
244  break;
245  case Normalize::Median:
246  {
247  // implicit --quartiles case
248  std::vector< RdbStatsPercentilesMed > rdb_stats_columns;
249  for ( unsigned int ii = 0; ii < col_name.size( ); ii++ )
250  rdb_stats_columns.push_back( RdbStatsPercentilesMed( irdbtable,
251  col_name[ ii ]
252  ) );
253  rdbstats( irdbtable, rdb_stats_columns, row.get_range( ),
254  ordbtable, grouped );
255  }
256  break;
257  case Normalize::None:
258  {
259  std::vector< RdbStats > rdb_stats_columns;
260  for ( unsigned int ii = 0; ii < col_name.size( ); ii++ )
261  rdb_stats_columns.push_back( RdbStats( irdbtable, col_name[ ii ]
262  ) );
263  rdbstats( irdbtable, rdb_stats_columns, row.get_range( ),
264  ordbtable, grouped );
265  }
266  break;
267  }
268 
269  }
270 }
271 
272 int main( int argc, char* argv[] ) {
273 
274 #ifdef TRACEFCT
275  TraceFct tf( argv[0], 1, -1 );
276 #endif
277 
278  try {
279 
280  Options options( argc, argv );
281 
282  if ( options.exit_requested )
283  return options.exit_value;
284 
285  rdbstats( options );
286 
287  return EXIT_SUCCESS;
288 
289  } catch( Exception& E ) {
290 
291  std::cerr << E << std::endl;
292  return EXIT_FAILURE;
293 
294  } catch( std::exception &e ) {
295 
296  std::cerr << e.what() << std::endl;
297  return EXIT_FAILURE;
298 
299  } catch ( ... ) {
300 
301  std::cerr << "Unknown exception caught" << std::endl;
302 
303  return EXIT_FAILURE;
304  }
305 
306  return 0;
307 }
The base class to calculate : average, maximum, minimum, num, stddev and sum.
Definition: RdbStats.h:39
A class to calculate the percentiles, RdbStatsPercentilesAve isA special case of the class RdbStatsPe...
A class to figure out the name of the columns for which the statistics are to be calculated.
Definition: SelectedCols.h:48
To normalize the results wrt to the average.
Definition: RdbStatsAve.h:33
A class to calculate the percentiles, RdbStatsPercentiles isA special case of the class RdbStats.
A container class to hold the relevant data for the rdb data columns.
Definition: StatsResult.h:36
A class to calculate the percentiles, RdbStatsPercentilesMed isA special case of the class RdbStatsPe...