// --*- C++ -*------x---------------------------------------------------------
#ifndef __BIRTHDAY_PROB__
#define __BIRTHDAY_PROB__

#include <Vec.h>
#include <Random.h>
#include <debug.h>
#include <MultiValArray.h>

#define UNDEFINED_PROB -1.0

class BirthdayProb {

 public:
  
  typedef int int_type;
  typedef Vec<double>::size_type size_type;

  enum { OUTCOME_MAX_DEFAULT = 400 , USE_MATRICES_MODE = 1 };

 private:

  mutable MultiValArray matrices;
  
 public:

  BirthdayProb() : matrices(3, OUTCOME_MAX_DEFAULT) { 
    ASSERT(matrices.getDimensionCount() == 3);
    ASSERT(matrices.getDimension(0) == OUTCOME_MAX_DEFAULT);
    matrices.fill(UNDEFINED_PROB );
  }

  BirthdayProb(int_type outcomeMax) : matrices(3, outcomeMax) {
    ASSERT(matrices.getDimensionCount() == 3);
    ASSERT(static_cast<int_type>(matrices.getDimension(0)) == outcomeMax);
    matrices.fill(UNDEFINED_PROB );
  }

  virtual ~BirthdayProb() { }

  /** For k given possibilities (like 365 days), n trials (like n people),
   * what is the probability to observe only m OR LESS different outcomes (like m different birthdays)
   * @param epsilon how accurate
   */
  static double computeP(int_type k, int_type n, int_type m, int_type iterMax = 10000, 
			 int_type pseudoCount1 = 1, int_type pseudoCount2 = 1) {
    ASSERT(k > 0);
    ASSERT(n > 0);
    ASSERT(m <= k); // number successes must be smaller or equal than number of outcomes (for now)
    Random& rnd = Random::getInstance();
    int_type successes = 0; // pseudocount
    Vec<int> outcomes(k,0);
    for (int_type i = 0; i < iterMax; ++i) {
      for (int_type j = 0; j < k; ++j) { // loop over n trials
	outcomes[j] = 0;
      }
      int_type nonZeros = 0;
      for (int_type j = 0; j < n; ++j) { // loop over n trials
	int index = rnd.getRand(k);
	if (outcomes[index] == 0) {
	  ++nonZeros;
	} else {
	  ASSERT(j > 0);
	}
	outcomes[index] = outcomes[index] + 1;
      }
      ASSERT(nonZeros > 0);
      if (nonZeros <= m) {
	++successes;
      }
    }
    return static_cast<double>(successes + pseudoCount1) / (iterMax + pseudoCount2); // pseudocount: apriori: P = 1; nonZeros + 2 would mean P = 0.5 a priori
  }

  /** For k given possibilities (like 365 days), n trials (like n people),
   * what is the probability to observe only m OR LESS different outcomes (like m different birthdays)
   * @param epsilon how accurate
   */
  virtual double exactProb(int_type k, int_type n, int_type m) const {
    PRECOND((n >= 1) && (m >= 1) && (k >= 1));
    PRECOND( n <= k); // for now restricted to less trials then outcomes
    if (m > n) {
      return 0.0; 
    }
    if (k > static_cast<int_type>(matrices.getDimension(0))) {
      //      cout << "# Warning: number of outcomes ( " << k << " ) exceeds internal data structure size of " << matrices.getDimension(0) << " . Cannot compute precise p-value." << endl; 
      return computeP(k , n, m, 10000); 
    }
    double result = UNDEFINED_PROB;
    MultiValArray::index_array indices(3);
    indices[0] = k-1; // -1 because internal matrices are 0-based, external loops are 1-based
    indices[1] = n-1;
    indices[2] = m-1;
    if (USE_MATRICES_MODE) {
      // try lookup table:
      //      if (matrices[k-1].size() == 0) {
      // matrices[k-1] = Vec<Vec<double> > (k, Vec<double>(k, UNDEFINED_PROB));
      // }
      result = matrices.get(indices); // [k-1][n-1][m-1];
    }
    if (result != UNDEFINED_PROB) {
      ASSERT(result >= 0.0 && result <= 1.0);
      return result;
    }
    if ((n == 1) && (m == 1)) {
      result = 1.0;
    } else {
      double duplicateProb = static_cast<double>(m)/k; 
      result = duplicateProb * exactProb(k,n-1,m);
      if (m > 1) {
	double noDuplicateProb = 1.0 - static_cast<double>(m-1.0)/k; 
	result += noDuplicateProb * exactProb(k,n-1,m-1);
      }
    }
    ASSERT(result < 1.1); // rounding errors are possible but must not exceed 0.1
    if (result > 1.0) {
      result = 1.0;
    }
    ASSERT(result > -0.1); // rounding errors are possible but must not exceed 0.1
    if (result < 0.0) {
      result = 0.0;
    }
    if (USE_MATRICES_MODE) {
      matrices.set(indices, result); // [k-1][n-1][m-1] = result; // store result
    }
    POSTCOND(result >= 0.0 && (result <= 1.0)); 
    return result;
  }

  /** For k given possibilities (like 365 days), n trials (like n people),
   * what is the probability to observe only m OR LESS different outcomes (like m different birthdays)
   * @param epsilon how accurate
   */
  double exactP(int_type k, int_type n, int_type m) const {
#ifndef NDEBUG    
    if (k > static_cast<int_type>(matrices.getDimension(0))) {
      cout << "# Warning: number of outcomes ( " << k << " ) exceeds internal data structure size of " << matrices.getDimension(0) << " . Using approximation to estimate helix-bias P." << endl; 
      // return 1.0;
    }
#endif
    double result = 0.0;
    if (n > k) {
      cout << "# Warning: number of trials ( " << n << " ) exceeds number of outcomes ( " << k << " ) . Setting helix-bias P to 1.0." << endl; 
      return 1.0;
    }
    if (m >= k) {
      return 1.0;
    }
    if (m == n) {
      return 1.0;
    }
    for (int_type i = m; i >= 1; --i) {
      result += exactProb(k,n,i);
    }
    if (result > 1.1) {
      cout << "# WARNING: Rounding error too large in BirthdayProb : " << k << " " << n << " " << m << " : " << result << endl;
    }
    ASSERT(result < 1.5); // rounding errors are possible 
    if (result > 1.0) {
      result = 1.0;
    }
    ASSERT(result > -0.5); // rounding errors are possible but must not exceed 0.1
    if (result < 0.0) {
      result = 0.0;
    }
    POSTCOND(result >= 0.0 && (result <= 1.0)); 
    return result;
  }

  /** Returns number of possible outcomes. For a classical birthday problem, this would be 365 */
  size_type size() const { return matrices.getDimension(0); }

};

#endif
