// --*- C++ -*------x---------------------------------------------------------
#ifndef _MAF_SEARCH_TABLES_
#define _MAF_SEARCH_TABLES_

#include <queue>
#include <MAFAlignment.h>
#include <RankedSolution5.h>
#include <debug.h>
#include <SortedCompressedSequence2.h>
#include <SearchTables.h>
#include <iterator>

using namespace std;

class MAFSearchTables : public SearchTables {

 public:

  typedef MAFAlignment::length_type length_type;

  typedef Vec<length_type> set_type;

  typedef SortedCompressedSequence2 compressed_type;
  
  // maps hashes like hg18_mm8_A_C to set containing column ids...
  typedef map<string, compressed_type > table_type;

  typedef map<string, compressed_type::const_iterator > iterator_table_type;

  typedef MAFAlignment::size_type size_type;

  enum { SEP = '_' }; // separator used for hash table names

  friend class HashCorrelationFinder;

 private:

  double assemblyPairFraction; // what fraction of all possible assembly pairs should be stored as hash tables? Between 0 and 1

  MAFAlignment *maf;

  mutable table_type positionHashes; // maps hashes like hg18_mm8_A_C to set containing column ids...

  mutable map<string, size_type> intersectionSizeMap; // stores sizes of set intersections

  length_type searchRangeMin;

  length_type searchRangeMax;

  int verbose;

 public:

  /** Generates hash tables that are later used by HashCorrelationFinder */
  MAFSearchTables() : assemblyPairFraction(1.0), maf(0), searchRangeMin(0), searchRangeMax(0), verbose(1) {
    searchRangeMax = 0;
  }

  /** Generates hash tables that are later used by HashCorrelationFinder */
  MAFSearchTables(MAFAlignment * _maf) : assemblyPairFraction(1.0), maf(_maf), searchRangeMin(0), verbose(1) {
    searchRangeMax = _maf -> getTotalLength();
  }

  MAFSearchTables(const MAFSearchTables& other) {
    copy(other);
  }

  MAFSearchTables& operator = (const MAFSearchTables& other) {
    if (this != &other) {
      copy(other);
    }
    return (*this);
  }

  virtual ~MAFSearchTables() { }

  void copy(const MAFSearchTables& other) {
    ERROR("Copying of search tables not implemented!");
  }

  /** Generates compressed set for internal use */
  // static compressed_type compressSet(const set_type& set);

  static set_type uncompressSet(const compressed_type& set) { 
    return set.toVector(); 
  }

  /** Tests compression and uncompression */
  // static void testCompressSet();

  virtual void run() {
    createSearchHashTable(maf, maf->getAssemblies());
    ASSERT(validate());
  }

  static string createHashTableHash(const string& assembly1, const string& assembly2, char c1, char c2);

  /** Parses hash table name of form hg18_mm8_A_C to assembly1=hg18 and assembly2=mm8, c1=A, c2=C */
  static void parseHashTableHash(const string& hash, string& assembly1, string& assembly2, char& c1, char& c2);

  // static string createHashTableHash(const string& assembly1, char c1);

  static void testCreateHashTableHash();

  void createSearchHashTable(MAFAlignment *maf, const set<string>& assemblies);

  /** Returns size of intersection set */
  virtual size_type intersectionSize(const string& hash1, const string& hash2) const;

  /** Returns best next set for intersection */
  template <typename _Iterator>
  string findNextSetByIntersectionSize(const string& hash, _Iterator first, _Iterator last) const {
    ASSERT(first != last);
    size_type bestSize = getSet(*first).size();
    string bestName = *first;
    first++;
    size_type sz;
    for (; first != last; first++) {
      if (*first == hash) {
	continue;
      }
      sz = intersectionSize(hash, *first); // getSet(*first).size();
      if (sz < bestSize) {
	bestSize = sz;
	bestName = *first;
      }
    }
    // cout << "Result of findNextSetByIntersectionSize: " << hash << " " << bestName << endl;
    return bestName;
  }

  /** Returns best next set for intersection */
  template <typename _Iterator>
  string findNextSetBySize(const string& hash, _Iterator first, _Iterator last) const {
    ASSERT(first != last);
    size_type bestSize = getSet(*first).size();
    string bestName = *first;
    first++;
    size_type sz;
    for (; first != last; first++) {
      if (*first == hash) {
	continue;
      }
      sz = getSet(*first).size();
      if (sz < bestSize) {
	bestSize = sz;
	bestName = *first;
      }
    }
    return bestName;
  }

  /** Returns best next set for intersection. Assumes that sequence is already sorted by set size. 
   * Returns first set in which two different organisms are used. */
  template <typename _Iterator>
  string findNextSetByName(const string& hash, _Iterator first, _Iterator last) const {
    ASSERT(first != last);
    ASSERT(*first != hash);
    string assem1, assem2, assem1b, assem2b;
    char c1, c2, c1b, c2b;
    parseHashTableHash(hash, assem1, assem2, c1, c2);
    for (_Iterator it = first; it != last; it++) {
      if (*first == hash) {
	continue;
      }
      parseHashTableHash(hash, assem1b, assem2b, c1b, c2b);
      if ((assem1 != assem1b) && (assem1 != assem2b)
	  && (assem2 != assem1b) && (assem2 != assem2b)) {
	return *it;
      }
    }
    return *first; // nothing suitable found, return first available set
  }

  /** Returns best next set for intersection */
  template <typename _Iterator>
  string findNextSet(const string& hash, _Iterator first, _Iterator last) const {
    // check if there is a stored interaction size for all elements:
    for (_Iterator it = first; it != last; it++) {
      if (*it == hash) {
	continue;
      }
      string doubleHash = createDoubleHash(hash, *it);
      if (intersectionSizeMap.find(doubleHash) == intersectionSizeMap.end()) {
	return findNextSetBySize(hash, first, last); // there was a hash table pair with so far uncached intersection size, so use slow method
      }
    }
    return findNextSetByIntersectionSize(hash, first, last); // only use cached intersection size values
  }

  /** Creates hash string from two hash strings */
  static string createDoubleHash(const string& hash1, const string& hash2)  {
    ASSERT(hash1 != hash2);
    string doubleHash;
    if (hash1 < hash2) {
      doubleHash = hash1 + hash2;
    } else {
      doubleHash = hash2 + hash1;
    }
    return doubleHash; 
  }

  /** Stores size of intersection between two sets */
  void setIntersectionSize(const string& hash1, const string& hash2, size_type sz) {
    string doubleHash = createDoubleHash(hash1, hash2);
    map<string, size_type>::iterator it = intersectionSizeMap.find(doubleHash);
    if (it == intersectionSizeMap.end()) {
      intersectionSizeMap[doubleHash] = sz;
    } else {
      ASSERT(intersectionSizeMap[doubleHash] == sz);
    }
  }

  virtual double getAssemblyPairFraction() const { return assemblyPairFraction; } 

  /** Returns uncompressed set with certain hash code. Precondition: hash code must exist */
  virtual const compressed_type& getSet(const string& hash) const {
    PRECOND(findPositionHash(hash) != positionHashes.end());
    return findPositionHash(hash) -> second;
  }

  /** Estimates the potential number of hash table entries of two assemblies
   */
  virtual double estimateAssemblyPairHashSize(const string& assem1, const string& assem2) const;

  /** Returns iterator to beginning of correct hash table */
  virtual table_type::iterator findPositionHash(const string& hashhash) const {
    return positionHashes.find(hashhash);
  }
  
  /** Sets parameter that determines what fraction of possible hash tables are actually generated */
  virtual void setAssemblyPairFraction(double value) { assemblyPairFraction = value; }

  virtual void setMAF(MAFAlignment* _maf) {
    maf = _maf;
    searchRangeMax = maf->getTotalLength();
  }

  /** Sets minimum values of column indices that can be part of search result */
  virtual void setSearchRangeMin(length_type n) { searchRangeMin = n; }

  /** Sets minimum values of column indices that can be part of search result */
  virtual void setSearchRangeMax(length_type n) { searchRangeMax = n; }

  /** Sets verbose level */
  virtual void setVerbose(int _verbose) { verbose = _verbose; }

  /** Returns true if properly defined. */
  virtual bool validate() const {
    return (positionHashes.size() > 0); //  && (positionHashes.size() == positionHashStarts.size());
  }
  
};

#endif
