// --*- C++ -*------x---------------------------------------------------------
#ifndef __LOCORNA_MAIN__
#define __LOCORNA_MAIN__

#include <iostream>
#include <HashCorrelationFinder.h>
#include <InteractionClusterAnalyzer.h>
#include <Stem.h>

using namespace std;

// defines number of alignment blocks to be expected in MAF file:
#define MAF_RESERVE 10000000
#define COVARNA_VERSION "1.16.0"

/* version history
 * version 0.9.3: fixed problems with --strand -1 , --strand2 -1
 * version 0.9.4: implemented --noself and --opposite
 * version 0.9.5: minor change related to version --opposite : implies --noself iff only on MAF filename given.
 * attempted fix of converting external id to internal ids for p-values.
 * version 0.9.7: implemented options --block-min, --block-max, --block-min2, --block-max2 and --prune --search-max
 * version 0.9.8: minor changes related to error handling of sparse data 
 * version 0.9.9: added options --taboo and --require
 * version 0.9.10: minor changes in output
 * version 0.9.10: added better cluster analysis including strand prediction. Added options --emax --cluster
 * version 0.9.11: fixed bug regarding to cluster analysis of second MAF file.
 * version 0.9.12: Automatic detection of search within same chromosome. Minor changes in output.
 * version 0.9.13: Revamped analysis of clusters 
 * version 0.9.14: Again revamped analysis of clusters, pre-filtering using single-linkage clustering, output of BED format file 
 * version 0.9.15: Minor change related to computing P-values 
 * version 0.9.16: Fixed problem related to segmentation fault that was due to too small stack memory (BirthdayProb class now uses heap)
 * version 0.9.17: Filter out adjacent matching columns.
 * version 0.9.18: Increased estimate of column pair matches. Leads to more conservative E-value.
 * version 0.9.19: Unsuccessful attempt to change to more conservative estimation of E-value using different multiple-testing correction 
 * version 0.9.20: Changed to more conservative estimation of E-value using different multiple-testing correction 
 * version 0.9.21: Changed default thread granularity in covarnap. Added option --granularity.
 * version 0.9.22: Substantial changes: checkNeighborFilter is new; single-linkage clustering instead of stem-3 filtering. Parallel computing fixes.
 * version 0.9.23: Taken out reclustering: fixes problem with MAF access.
 * version 0.9.24: fixed issues related to searching forward matches (as opposed to the usual reverse complement). 
 * version 1.0.0 : contains added check for conserved-stem expandability. Also contains fixed regarding extractAssemblySequence 
 * version 1.0.1 : fixed issue related to finding conserved stem expandability in mode -m 0 -r 0 (forward match)
 * version 1.0.2 : better at splitting file names and --ignore
 * version 1.1.0 : using local density estimations
 * version 1.1.1 : less memory in stem-bias P estimation
 * version 1.1.2 : more conservative version of computing cluster area 
 * version 1.1.3 : yet more conservative version of computing cluster area 
 * version 1.1.4 : most conservative version of computing cluster area (back to multiplying by complete area for Bonferroni correction) 
 * version 1.1.5 : now allows clusters larger 400nt. Previously its stem-bias P value was set to 1 because of numerical difficulties
 * version 1.1.6 : minor change: less output warnings for larger clusters. Also: default cluster cutoff is 100 (before it was 40).
 * version 1.1.7 : redesigned computation of densities
 * version 1.1.8 : E-value computation is again of most conservative kind
 * version 1.2.0 : Fixed problem with input density
 * version 1.2.1 : Added option --multi 1|2|3: different multiple-testing correction schemes.
 * version 1.2.2 : Added option --anti 0|1|2 : different checks for "wrong" diagonals NOT being complementary
 * version 1.3.0 : First version that allows GU matches in found column pairs
 * version 1.3.1 : now skips completely conserved alignment columns; improved GU base pair handling.
 * version 1.3.2 : improved version including GU base pair matches.
 * version 1.3.3 : No writing of dummy densities.
 * version 1.3.4 : Re-introduced forward matches with -m 0 -r 0.
 * version 1.3.5 : Less verbose output to standard output. 
 * version 1.3.6 : Implemented --multi 0 : no clustering, just used for density file output
 * version 1.3.7 : Changed default values such that they are equivalent to --multi 2 --cluster 40 --anti 2
 * version 1.3.8 : Taken out too verbose statments in augmentDensities method.
 * version 1.3.9 : added option --cluster-min 1..n for specifying the minimum number of comp. base changes per cluster
 * version 1.4.0 : Fix in density estimation.
 * version 1.4.1 : Change in density estimation mode.
 * version 1.5.0 : Revamped density estimation.
 * version 1.5.1 : Implemented cluster-filter-off option.
 * version 1.5.2 : Fixes in area computation.
 * version 1.5.3 : Fixes in area computation for split areas.
 * version 1.5.4 : Fixes in alignment index computation.
 * version 1.6.0 : Improved search for higest density.
 * version 1.6.1 : More verbose output to density files.
 * version 1.6.2 : Minor change in density output for areas with no covariation.
 * version 1.6.3 : Another minor change in density output for areas with no covariation.
 * version 1.6.4 : Fix in method getMatchPairCountHash3
 * version 1.7.0 : Nontrivial change in compute searchable area: now skipping blocks whose assemblies are not subset of each other.
 * version 1.7.1 : Improved output for error in getMatchPairCountHash3
 * version 1.7.2 : Several improvements to estimation of effective search area.
 * version 1.8.0 : Vertical shuffling is now gap-pattern preserving. Implemented annotation of claimed covariation regions (options --annotat
e and --annotate-out)
 * version 1.8.1 : change in augmentDensity method
 * version 1.8.3 : deactivated density estimation
 * version 1.9.0 : fix in output of area in "forward match" mode (-r 0 -m 0) 
 * version 1.9.1 : changed --stem-p 1 --emax -1 to default; also important fixes for search: searches now 4 times more for two input MAFs. 
 * version 1.9.2 : speedup using split search.
 * version 1.10.0 : bugfix in HashCorrelationFinder3 (not fixed yet -r 0 -m 0 forward matching mode                                                                                * version 1.10.1 : finished bugfix in HashCorrelationFinder3 related to testing for complementarity. Did not affect past results, because they were run with allowedGuFrac=0
 * version 1.11.0 : using concurrent_vector in result data structure
 * version 1.11.1 : four tasks per thread instead of 32 
 * version 1.12.0 : --cluster-filter-off is now default; implemented --cluster-filter-on; bugfix in Correlation == operator; only unique correlations;
 * version 1.12.1 : Improved warning and help messages. Same results compared to version 1.12.0.
 * version 1.13.0 : Re-introduces single-linkage clustering as post-processing step of HashCorrelationFinder3
 * version 1.13.1 : minor change: Result in HashCorrelationFinder3 is sorted before it is returned. Bugfix in getResult related to filter settings.
 * version 1.13.2 : now --cluster-min also adjusts the minimum size of clusters in initial search.
 * version 1.13.3 : version that does not produce a memory-spike in the HashCorrelationFinder3.getResults method
 * version 1.14.0 : using concurrent_unordered_set to save memory 
 * version 1.14.1 : changed back to concurrent_vector, because concurrent_unordered_set costs too much memory
 * version 1.15.0 : implemented option --basepairs N for requiring minimum number of different types of base pairs
 * version 1.15.1 : implemented option --expand-max N for setting the maximally allowed number of consecutive covariation
 * version 1.15.2 : diagonal bias P-value computation is now deactivated by default.
 * version 1.15.3 : bugfix for boundary computation in case of minimum stem lengths greater zero
 * version 1.15.4 : taken out debug information for default verbose level 
 * version 1.16.0 : first release version. Has updated help information compared to the previous versions.
 */

/** 
 * Main class that parses command line, reads input files, performs algorithm
 * and writes output of the algorithm. 
 * @see LocoRnaParMain
*/
class LocoRnaMain {

 public:

  typedef MAFAlignment::size_type size_type;
  
  typedef Stem::index_type index_type;

  typedef CorrelationFinder::length_type length_type;
  
  typedef CorrelationFinder::result_container result_container;

 protected:
  int argc;
  char ** argv;
  char ** env;
  ostream * osp;
  int ambiguityMode;  
  string annotateInFileName; // annotate an existing BED format file
  string annotateOutFileName;
  string appendFileName;
  double assemblyPairFraction;
  size_type basepairTypeMin;
  string bedFileName;
  string bedFileName1;
  string bedFileName2;
  string bedOutFileName;
  MAFAlignment::size_type blockMin, blockMax, blockMin2, blockMax2; 
  int checkAntiNeighborMode;
  size_type clusterColMin;
  double clusterCutoff;
  bool clusterFilterActive;
  string collapseAssembly;
  bool complementMode;
  length_type corrDistMin;
  string densInFileName;
  string densOutFileName;
  double eMax;
  int expandClusterMaxAllowed;
  string filename;
  int multiTestMode;
  bool noSelfMode;
  bool oppositeMode;
  length_type outputIntervall;
  length_type padding;
  size_type pruneAfter;
  bool pvalMode;
  length_type searchColumnMax; // maximum number of columns to search with linear search
  string refAssembly;
  string requiredAssemblyNames; // default: empty; example: hg18,mm8
  bool reverseMode;
  /** if true, interactions are from same chromosome, otherwise between different chromosomes 
   * Default behavior: load two chromosomes : sameChrom is false, if only one MAF file is loaded, sameChrom is true 
   */
  bool sameChrom; 
  size_type seqMin;
  bool stemBiasPMode;
  double stemDensity; // number of expected stems of length stemLengthMin per area (sites squared). A stem of length 4 should be counted as two stems of length 2 if stemLengthMin is 2, otherwise if stemLengthMin==3, it is counted as one stem of length 3 (or 4 or 5);
  Stem::index_type stemLengthMin;
  int shuffleMode;
  double shuffleNormLimit;
  double stemPMax;
  int strandMode1;
  int strandMode2;
  string tabooAssemblyNames;
  int verbose;

 public:

  LocoRnaMain(ostream& os, int _argc, char ** _argv, char ** _env) : argc(_argc), argv(_argv), env(_env) { 
    osp = &os;
    setDefaultValues();
    parseCommandLine();
  }

  virtual ~LocoRnaMain() { }

  /** filter out only correlations that go between two alignments */
  static Vec<Correlation> filterCrossCorrelations(const Vec<Correlation>& corrs, length_type offset, length_type offsetMin);

  /** Adds computed p-value to stems */
  static void addPValue(Vec<Stem>& stems, const CorrelationFinder& finder, bool isInternal);

  /** Adds computed p-value to special "forward" stems */
  virtual void addForwardPValue(Vec<Stem>& stems, const CorrelationFinder& finder, bool isInternal);

  /** Returns version of program */
  static string getVersion() { return COVARNA_VERSION; }

  /** Convert strand directionality of stem start positions */
  static void reverseStemStarts(Vec<Stem>& stems, long _totLength);

  /** Convert strand directionality of stem stop positions */
  static void reverseStemStops(Vec<Stem>& stems, long _totLength);
  
  /** Return set of tokens; example: convert hg18,mm8 to set of strings containing hg18 and mm8 */
  static set<string> tokenizeToSet(const string& line, string delimiter);

  /** Writes individual correlations */
  virtual void writeCorrelations(ostream& os, 
				 result_container::const_iterator first,
				 result_container::const_iterator last) const;

  static void writeUsageMessage(ostream& os);

  /** Writes welcome message to output stream */
  static void writeWelcomeMessage(ostream& os);

  static void writeResults(ostream& os, const MAFAlignment& maf, const result_container& results,
			   const string& refAssembly);

  static void writeAssemblyResults(ostream& os, const result_container& results);

  /** Executes main program */
  virtual int run();

  /** Parses command line and sets member attributes accordingly. */
  virtual void parseCommandLine();

  /** Sets default values. */
  virtual void setDefaultValues() {
    ambiguityMode = 1; // equals to MAFSearchTables3::NO_AMBIGUITY
    appendFileName = "";
    assemblyPairFraction = 1.0;
    basepairTypeMin = 2; // this should be consistent with previous results: at least this many DIFFERENT base pairs f. a cov ali column pair
    bedFileName = "";
    bedFileName1 = "";
    bedFileName2 = "";
    bedOutFileName = "covarna_clusters.bed";
    blockMin = 0;     
    blockMax = 0;     
    blockMin2 = 0;     
    blockMax2 = 0;     
    checkAntiNeighborMode = 2; // 0; // 0: no checking, 2: most strict
    clusterColMin = 2; // minimum number of complementary base changes per cluster
    clusterCutoff = 40; // 100; // single linkage distance for stems
    clusterFilterActive = false; // true;
    collapseAssembly = "";
    complementMode = true; // default: search for reverse complements
    corrDistMin = 3; // minimum distance between start and stop position of base pair
    densInFileName = "";
    densOutFileName = ""; // was: "covarna_densities.dat";
    eMax = -1; // 10.0; // do not report clusters with e-values greater than this
    expandClusterMaxAllowed = 30; // maximum allowed length of consecutive covariation. Beyond that, it is probably a case of sequence duplication
    filename = "";
    multiTestMode = InteractionClusterAnalyzer::MTEST_CLUSTER_AREA; // MTEST_TOTAL_AREA;
    noSelfMode = false;
    oppositeMode = false; // combination mode: equivalent to strandMode=1, strandMode2=-1, noSelfMode=true
    outputIntervall = 100000; // progress output intervall during run-time
    padding = 0;
    pruneAfter = 0;
    pvalMode = false;
    refAssembly = ""; // "hg18"; // will be set to first sequence of first alignment block of user '-a' statement
    reverseMode = true;
    sameChrom = true; 
    seqMin = 10;
    searchColumnMax = 100000;
    shuffleMode = 0;
    shuffleNormLimit = 0.1; // 0.05;
    stemBiasPMode = false;
    stemDensity = 0.0; // if smaller or equal zero, estimate stem density from current data
    stemLengthMin = 1;
    stemPMax = 1.0; // do not report clusters with stem bias p-value greater than this
    strandMode1 = MAFAlignment::STRAND_PLUS;
    strandMode2 = MAFAlignment::STRAND_PLUS;
    verbose = 1;
  }

 protected:
  static void addStemSequence(Stem& stem,
			      const MAFAlignment& maf  // const MAFAlignment& maf2,
			      );

  static void addStemSequences(Vec<Stem>& stem,
			       const MAFAlignment& maf  // const MAFAlignment& maf2,
			       );


};

#endif
