// --*- C++ -*------x---------------------------------------------------------
// $Id: 
//
// Program:         averager
//
// Author:          Eckart Bindewald
//
// Description:     read multi column list with numbers,
//                  return for each line average, standard deviation,
//                  average + and - standard deviation
//                  Demonstrates a little word parser
// -----------------x-------------------x-------------------x-----------------

#include <math.h>
#include <iostream>
#include <Vec.h>
#include <string>
#include <sstream>
#include <vectornumerics.h>
#include <GetArg.h>

#include <debug.h> // own routines for error checking and debugging

// return string with actual line. Reads last newline, but does not include it.
/*
string
getLine(istream& is)
{
  PRECOND(is);
  char c;
  string resultString; 
  is.get(c);
  while ((is) && (!is.eof()) && (c != '\n') )
    {
      resultString = resultString + c;
      is.get(c);
    }
  return resultString;
}
*/

// return average of Vec of double 
double
average(const Vec<double>& a)
{
  PRECOND(a.size()>0);
  double sum = 0.0;
  for (unsigned int i = 0; i < a.size(); ++i)
    {
      sum += a[i];
    }
  return sum/a.size();
}

// return standard deviation of single values 
double
stddev(const Vec<double>& a)
{
  PRECOND(a.size()>0);
  double avg = average(a);
  double sum = 0.0;
  if (a.size() < 2) {
    return 0.0;
  }
  for (unsigned int i = 0; i < a.size(); ++i) {
      sum += (a[i] - avg) * (a[i] - avg);
  }
  return sqrt(sum/(a.size()-1)); 
}

// error: if the input is no number (e.g. char or string)  then error
// changes string into double:
/* 
double stod( const string &s )
{
  PRECOND(s.size() > 0);
  double d;
  double temp; // temporary integer

  const char *pointer;
  pointer = s.c_str();
  istringstream ist( pointer );
  ist >> temp;
  // if ist==NULL (s is no number) then error:
  if( ist == NULL )
    {
      cerr << "\"" << s << "\" is no double!!" << endl;
      ERROR( "Wrong format" ); 
    }
  d = temp;
  return d;
}
*/

// tokenize text
/*
Vec<string> getTokens(const string& text) 
{
  istringstream ist(text.c_str());
  char* charLine = new char[text.size()+1]; // size of string
  Vec<string> v;
  string s;
  while (!ist.eof()) {
    ist >> charLine;
    s = charLine; // assignment of c-strings to string!
    //    DUMP(s);
    if (s != "") { 
      v.push_back(s);
    }
  }
  delete[] charLine;
  return v;
}
*/

/** returns average distance of n'th vector to other vectors */
double
computeAverageDistance(unsigned int n, 
		       const Vec<Vec<double> >& data)
{
  PRECOND(data.size() > 1);
  double sum = 0;
  for (unsigned int i = 0; i < data.size(); ++i) {
    if (i == n) {
      continue;
    }
    sum += euclidianDistance(data[n], data[i]);
  }
  return sum/(data.size()-1);
}

void
outputDistanceToMembers(ostream &os, 
			const Vec<Vec<double> >& data)
{
  Vec<double> result(data.size());
  for (unsigned int i = 0; i < data.size(); ++i) {
    result[i] = computeAverageDistance(i, data);
    os << (i+1) << " " << result[i] << endl;
  }
}

void
outputZScoreSums(ostream &os, 
		 Vec<Vec<double> > data)
{
  // Vec<Vec<double> > data(dataOrig);
  for (unsigned int i = 0; i < data[0].size(); ++i) {
    Vec<double> col = getColumn(data, i);
    transformToZScores(col);
    setColumn(data, col, i);
  }
  for (unsigned int i = 0; i < data.size(); ++i) {
    os << (i+1) << " " << fabs(elementSum(data[i])) << endl;
  }
}

int
main(int argc, char ** argv)
{
  bool enumMode   = false;
  bool helpMode   = false;
  // bool noStatMode = false;

  int countStart = 1; 
  int outputFormat = 1;

  getArg("h", helpMode, argc, argv);
  getArg("n", enumMode, argc, argv);
  getArg("m", countStart, argc, argv, countStart);
  getArg("-of", outputFormat, argc, argv, outputFormat);

  if (helpMode) {
    cout << "The program reads from standard input."
	 << "It expects numerical data in columns."  << endl
	 << "For each row it writes mean, standard deviation,"
	 << " mean plus std-dev, and mean minus std-dev of the" 
	 << " numbers of the columns" << endl
	 << "usage: averager [-h][-n][-m number][--of 1|2] < inputfile > outputfile"
	 << endl;
    exit(0);
  }
  Vec<double> avgVec, stdVec, stdMVec, sumVec;
  Vec<Vec<double> > data;
  while ( cin && (!cin.eof()) ) {
      string line = getLine(cin);
      Vec<string> words = getTokens(line); // seperate into words
      Vec<double> aVec(words.size(),0.0);
      for (unsigned int i = 0; i < aVec.size(); ++i) {
	aVec[i] = stod(words[i]); // translate string into double
      }
      if (aVec.size() == 0) {
	continue; // skip empty line
      }
      double avg =  average(aVec);
      double standardDev = stddev(aVec); // / sqrt(static_cast<double>(aVec.size())); 
      avgVec.push_back(avg);
      sumVec.push_back(elementSum(aVec));
      stdVec.push_back(standardDev);
      stdMVec.push_back(standardDev / sqrt(static_cast<double>(aVec.size())));
      data.push_back(aVec);
  }
  double avgavg = average(avgVec);
  // double avgSum = elementSum(avgVec);
  double stddevVal = stddev(avgVec);
  Vec<double> avgZVec = avgVec;
  transformToZScores(avgZVec);
  if (outputFormat == 1) {
    cout << "# Row-mean Row-std mean+std mean-std z-score std_of_mean" << endl;
  }
  for (unsigned int i = 0; i < avgVec.size(); ++i) {
    if (enumMode) {
      if (outputFormat == 1) {
	cout << (static_cast<int>(i) + countStart) << " ";
      }
    }
    if (outputFormat == 1) {
      cout <<  avgVec[i] << " " << stdVec[i] << " "
	   << ( avgVec[i] + stdVec[i] ) << " "
	   << ( avgVec[i] - stdVec[i] ) << " "
	   << avgZVec[i] << " " << stdMVec[i] << " " << sumVec[i] << endl;
    }
  }
  double standardError = stddevVal / sqrt(static_cast<double>(avgVec.size()));
  switch (outputFormat) {
  case 1:
    cout << "############" << endl;
    cout << "# General statistics: ";
    cout << "# average of average: " << avgavg;
    // cout << "# sum of avg: " << avgSum << endl;
    cout << " +- " << stddevVal;
    cout << " (+- " << standardError;
    cout << " ) sum of line averages: " << elementSum(avgVec) 
	 << " number of lines: " << avgVec.size() << endl;
    break;
  case 2:
    cout << avgavg << endl;
    break;
  case 3:
    cout << avgavg << " +- " << stddevVal << " ( " 
	 << standardError << " ) " << endl;
    break; 
  case 4:
    outputDistanceToMembers(cout, data);
    break;
  case 5:
    outputZScoreSums(cout, data);
    break;
  case 6:
    cout << data.size() << endl;
    for (unsigned int i = 0; i < data.size(); ++i) {
      cout << (i+1) << " " << data[i];
    }
    break;
  case 7: // write property file
    cout << "average=" << avgavg << endl;
    cout << "standardDeviation=" << stddevVal << endl;
    cout << "standardError=" << standardError << endl;
    cout << "numberCases=" << avgVec.size() << endl;
    break;
  default:
    ERROR("Unknown output format id!");
  }

  return 0;
}







