#ifndef _BED_CHROM_
#define _BED_CHROM_

#include <string>
#include <Vec.h>
#include <set>
#include <map>
#include <StringTools.h>
#include <debug.h>

/** Represents interval set for one chromosome 
 * Important: here starts are 0-based and ends are 1-based as in UCSC BED format.
 * (other classes like IntervallInt and IntervallIntSet and BEDRegions used 1-based starts and 1-based ends)
 */
class BedChrom {

 public:

  typedef int index_type;
  typedef string::size_type size_type;

  enum {STRAND_PLUS = 1, STRAND_MINUS = -1, STRAND_UNKNOWN = 0,
	STRAND_UNSPECIFIED = -2 };

 private:

  string chrom;

  Vec<index_type> ends;

  int indexInterval; // used for building indices

  Vec<set<size_type> > indexSets;

  Vec<string> names;

  Vec<index_type> starts;

  Vec<int> strands; // 1 to "+", -1 for "-", 0 for missing

  Vec<double> scores;

  Vec<Vec<string> > annotations;

 public:

  BedChrom() : indexInterval(1000) { }

  BedChrom(const BedChrom& other) { copy(other);}

  virtual ~BedChrom() { }

  BedChrom& operator = (const BedChrom& other) {
    if (&other != this) {
      copy(other);
    }
    return (*this);
  }

  void add(index_type start, index_type end, int strand, string name, double score,
	   const Vec<string>& annotation) {
    PRECOND(start < end);
    PRECOND(validateStrand(strand));
    starts.push_back(start);
    ends.push_back(end);
    // if (strand > STRAND_UNSPECIFIED) {
    strands.push_back(strand);
    // }
    // if (name.size() > 0) {
    names.push_back(name);
    scores.push_back(score);
    annotations.push_back(annotation);
    // }
    POSTCOND(validate());
  }

  void buildIndices(int _indexInterval) {
    PRECOND(_indexInterval > 1);
    indexInterval = _indexInterval;
    if (size() == 0) {
      ASSERT(indexSets.size() == 0);
      indexSets.clear();
      return;
    }
    // find largest position to be indexed:
    index_type largest = ends[0];
    for (Vec<index_type>::size_type i = 1; i < ends.size(); ++i) {
      if (ends[i] > largest) {
	largest = ends[i];
      }
    }
    // largest index set that has to be generated:
    index_type maxIndices = (largest / indexInterval) + 1;
    indexSets = Vec<set<size_type> >(maxIndices);
    // loop over all intervals:
    for (size_type i = 0; i < size(); ++i) {
      index_type start = getStart(i);
      index_type end = getEnd(i);
      size_type minIndex = start/indexInterval;
      size_type maxIndex = (end-1)/indexInterval; // largest actual position of an interval is end-1
      for (size_type index = minIndex; index <= maxIndex; ++index) {
	indexSets[index].insert(i);
      }
    }
  }

  virtual void copy(const BedChrom& other) {
    chrom = other.chrom;
    ends = other.ends;
    indexInterval = other.indexInterval;
    indexSets = other.indexSets;
    names = other.names;
    starts = other.starts;
    strands = other.strands;
    scores = other.scores;
    annotations = other.annotations;
  }

  /** Return indices of intervals that are overlapping with query position 
   *
   */
  set<size_type> findAllOverlapping(index_type position) const {
    set<size_type> result;
    if (hasIndices()) {
      index_type index = position / indexInterval;
      if (index >= static_cast<index_type>(indexSets.size())) {
	return result; // beyond largest indexed position
      }
      const set<size_type>& maybe = indexSets[index]; // findPossiblyOverlapping(position);
      for (set<size_type>::const_iterator it = maybe.begin(); it != maybe.end(); it++) {
	if (isOverlapping(position, *it)) {
	  result.insert(*it);
	}
      }
    } else { // no indices were computed
      for (size_type i = 0; i < size(); ++i) {
	if (isOverlapping(position, i)) {
	  result.insert(i);
	}
      }
    }
    return result;
  }

  /** Returns indices of intervals that overlap with region specified by startPos, endPos */
  set<size_type> findAllOverlapping(index_type startPos, index_type endPos) const {
    PRECOND(indexSets.size() > 0);
    set<size_type> result;
    index_type incr = 1;
    if (hasIndices()) {
      incr = indexInterval; // important speed-up: do not have to check every adjacent nucleotide
    }
    set<size_type>::size_type firstIndexId = startPos / indexInterval;
    // last physical position to check:
    set<size_type>::size_type lastIndexId = (endPos-1) / indexInterval;
    if (lastIndexId >= indexSets.size()) {
      lastIndexId = indexSets.size() - 1;
    }
    for (size_type indexId = firstIndexId; indexId <= lastIndexId;
	 ++indexId) {
      const set<size_type>& tmpSet = indexSets[indexId];
      for (set<size_type>::const_iterator it = tmpSet.begin(); it != tmpSet.end(); it++) {
	size_type id = *it;
	ASSERT(id < size());
	if (isOverlapping(startPos, endPos, starts[id], ends[id])) {
	  result.insert(*it);
	}
      }
    }
    return result;
  }
  
  set<size_type> findPossiblyOverlapping(index_type position) const {
    PRECOND(hasIndices());
    ASSERT(indexInterval > 1);
    index_type  index = position / indexInterval;
    ASSERT(index < static_cast<index_type>(indexSets.size()));
    return indexSets[index];
  }

  const string& getChrom() const { return chrom; }
  
  double getScore(size_type n) const { return scores[n]; }

  Vec<string> getAnnotation(size_type n) const { return annotations[n]; }

  index_type getStart(size_type n) const { return starts[n]; }

  index_type getEnd(size_type n) const { return ends[n]; }

  string getName(size_type n) const { return names[n]; }

  int getStrand(size_type n) const { return strands[n]; }

  string getStrandString(size_type n) const { 
    if (strands[n] == 1) {
      return "+";
    } else if (strands[n] == -1) {
      return "-";
    }
    return "?";
  }

  bool hasIndices() const { return indexSets.size() > 0; }

  bool isOverlapping(index_type position, size_type intervalId) const {
    return ((starts[intervalId] <= position) && (position < ends[intervalId]));
  }

  static bool isDisjunct(index_type start1, index_type end1,
			 index_type start2, index_type end2) {
    PRECOND(start1 < end1);
    PRECOND(start2 < end2);
    return (start2 >= end1) || (start1 >= end2);
  }

  static bool isDisjunctWithGap(index_type start1, index_type end1,
			 index_type start2, index_type end2) {
    PRECOND(start1 < end1);
    PRECOND(start2 < end2);
    return (start2 > end1) || (start1 > end2);
  }

  static bool isOverlapping(index_type start1, index_type end1,
		     index_type start2, index_type end2) {
    return !isDisjunct(start1, end1, start2, end2);
  }

  /** Returns intersection interval */
  static bool intersection(index_type start1, index_type end1,
			   index_type start2, index_type end2,
			   index_type& rstart, index_type& rend) {
    rstart = -1;
    rend = -1;
    if (isDisjunct(start1, end1, start2, end2)) {
      return false;
    }
    // highest start:
    rstart = start1;
    if (start2 > start1) {
      rstart = start2;
    }
    rend = end1; // lowest end
    if (end2 < end1) {
      rend = end2;
    }
    return true;
  }

  /** Returns intersection interval */
  static bool merge(index_type start1, index_type end1,
		    index_type start2, index_type end2,
		    index_type& rstart, index_type& rend) {
    rstart = -1;
    rend = -1;
    if (isDisjunctWithGap(start1, end1, start2, end2)) {
      return false;
    }
    // lowest start:
    rstart = start1;
    if (start2 < start1) {
      rstart = start2;
    }
    rend = end1; // highest end
    if (end2 > end1) {
      rend = end2;
    }
    return true;
  }

  /** Returns merged intervals of provided intervals (given as starts and ends) */
  static Vec<pair<index_type, index_type> > merge(Vec<index_type> starts, Vec<index_type> ends) {
    PRECOND(starts.size() == ends.size());
    for (size_type i = 0; i < starts.size(); ++i) {
      ASSERT(starts[i] < ends[i]);
    }
    bool foundMerge = true;
    while (foundMerge) {
      foundMerge = false;
      for (size_type i = 0; i < starts.size(); ++i) {
	ASSERT(starts[i] < ends[i]);
	for (size_type j = i+1; j < starts.size(); ++j) {
	  index_type rstart = 0;
	  index_type rend = 0;
	  ASSERT(starts[j] < ends[j]);
	  if (merge(starts[i], ends[i], starts[j], ends[j],rstart,rend)) {
	    starts[i] = rstart;
	    ends[i] = rend;
            starts.erase(starts.begin() +j);
            ends.erase(ends.begin() +j);
	    foundMerge = true;
	    break;
	  }
	}
	if (foundMerge) {
	  break;
	} 
      }
    }
    Vec<pair<index_type, index_type> > results(starts.size());
    for (size_type i = 0; i < starts.size(); ++i) {
      results[i] = pair<index_type, index_type>(starts[i], ends[i]);
    }
    return results;
  }

  /** Returns merged intervals of this BedChrom object
   * tested with command ./intervops -i ../../test/fixtures/dm3_fromexons.bed -p merge --c1 1:2:3:4:6:5 
   * gave result:
   chr2R1181902811819434
   chr2R1181966811820225
   chr2R1182028811821324
   chr2R11821261182217
   chr2R1182170311822248
   chr2R1182230211822330
   chr2R1182239211822428
   chr2R1182248511824088
   chr2R11823011182568
   chr2R1182518311825384
   chr2R1182561911825772
   chr2R1182595111826483
  */
  Vec<pair<index_type, index_type> > merge() {
    PRECOND(starts.size() == ends.size());
    Vec<pair<index_type, index_type> > results;
    for (size_type i = 0; i < starts.size(); ++i) {
      ASSERT(starts[i] < ends[i]);
    }
    if (size() <= 1) {
      return results; // nothing to do if zero or one interval specified
    }
    bool foundMerge = true;
    while (foundMerge) {
      foundMerge = false;
      // cout << "# current size during merge is: " << starts.size() << endl;
      for (size_type i = 0; i < starts.size(); ++i) {
	ASSERT(starts[i] < ends[i]);
	// bool found = false;
	ASSERT(!foundMerge);
	for (size_type j = i+1; j < starts.size(); ++j) {
	  ASSERT(!foundMerge);
	  // cout << "# " << i << " " << j << " " << starts.size() << " " << ends.size() << endl;
	  index_type rstart = 0;
	  index_type rend = 0;
	  ASSERT(starts[j] < ends[j]);
 	  size_type n0 = starts.size();
	  if (merge(starts[i], ends[i], starts[j], ends[j], rstart, rend)) {
	    // cout << "# erasing interval " << (j+1) << " " << starts[j] << " " << ends[j] << " because it overlaps with " << starts[i] << " " << ends[i] << " new merged interval: " << rstart << " " << rend << endl;
	    ASSERT( n0 == starts.size());
	    foundMerge = true;
	    starts[i] = rstart;
	    ends[i] = rend;
	    size_type n1 = starts.size();
	    // cout << "# erasing !!!" << endl;
            starts.erase(starts.begin() +j);
            ends.erase(ends.begin() +j);
	    size_type n2 = starts.size();
	    ASSERT((n2 + 1) == n1);
	    // found = true;
	    break;
	  }
	}
	if (foundMerge) {
	  break;
	} else {
	  results.push_back(pair<index_type,index_type>(starts[i], ends[i]));
	  // starts.erase(starts.begin() + i);
	  // ends.erase(ends.begin() + i);
	}
      }
    }
    names = Vec<string>(size());
    strands = Vec<int>(size(), STRAND_UNSPECIFIED);
    buildIndices(indexInterval); // rebuild indices
    return results;
  }

  /** Returns merged intervals of this BedChrom object
   * tested with command
./intervops -i ../../test/fixtures/dm3_fromexons.bed -p mergename --c1 1:2:3:4:6:5
chr2R1181902811819434CG82910-
chr2R1181966811820225CG82930-
chr2R1182028811821324CG82930-
chr2R11821261182217CG26820+
chr2R1182170311822248CG82930-
chr2R1182230211822330CG82930-
chr2R1182239211822428CG82970+
chr2R1182248511823950CG82970+
chr2R11823011182568CG26820+
chr2R1182334411824088CG82950-
chr2R1182518311825384CG82950-
chr2R1182561911825772CG82950-
chr2R1182595111826483CG82950-

  */
  Vec<pair<index_type, index_type> > mergeNameFast() {
    PRECOND(starts.size() == ends.size());
    PRECOND(names.size() == ends.size());
    Vec<pair<index_type, index_type> > results;
    if (size() <= 1) {
      return results; // nothing to do if zero or one interval specified
    }
    typedef pair<index_type, index_type> region_type;
    typedef multimap<string, region_type > name_hash;
    name_hash nameHash;
    map<string, int> nameStrandHash;
    for (size_type i = 0; i < size(); ++i) {
      ASSERT(starts[i] < ends[i]);
      string name = names[i];
      nameStrandHash[name] = strands[i];
      nameHash.insert(pair<string, region_type>(name, region_type(starts[i], ends[i])));
    }
    starts.clear();
    ends.clear();
    names.clear();
    strands.clear();
    scores.clear();
    annotations.clear();
    for (map<string, int>::const_iterator it = nameStrandHash.begin(); it != nameStrandHash.end(); it++) {
      int strand = it->second;
      pair<name_hash::const_iterator, name_hash::const_iterator> regionIts = nameHash.equal_range(it->first);
      // Vec<pair<index_type, index_type> > regions = it->second;
      // pair<map_type::const_iterator, map_type::const_iterator> range = map2.equal_range(it->first);
      Vec<index_type> lstarts, lends;
      for (name_hash::const_iterator it2 = regionIts.first; it2 != regionIts.second; it2++) {
	lstarts.push_back(it2->second.first);
	lends.push_back(it2->second.second);
      }
      Vec<pair<index_type, index_type> > regions = merge(lstarts, lends);
      for (size_type i = 0; i < regions.size(); ++i) {
	starts.push_back(regions[i].first);
	ends.push_back(regions[i].second);
	names.push_back(it->first);
	strands.push_back(strand);
      }
    }
    annotations = Vec<Vec<string> >(size()); // empty annotations
    scores = Vec<double>(size(), 0.0); // set scores to zero
    buildIndices(indexInterval); // rebuild indices
    POSTCOND(validate());
    return results;
  }


  /** Returns merged intervals of this BedChrom object
   * tested with command
./intervops -i ../../test/fixtures/dm3_fromexons.bed -p mergename --c1 1:2:3:4:6:5
chr2R1181902811819434CG82910-
chr2R1181966811820225CG82930-
chr2R1182028811821324CG82930-
chr2R11821261182217CG26820+
chr2R1182170311822248CG82930-
chr2R1182230211822330CG82930-
chr2R1182239211822428CG82970+
chr2R1182248511823950CG82970+
chr2R11823011182568CG26820+
chr2R1182334411824088CG82950-
chr2R1182518311825384CG82950-
chr2R1182561911825772CG82950-
chr2R1182595111826483CG82950-

  */
  Vec<pair<index_type, index_type> > mergeNameSlow() {
    PRECOND(starts.size() == ends.size());
    Vec<pair<index_type, index_type> > results;
    for (size_type i = 0; i < starts.size(); ++i) {
      ASSERT(starts[i] < ends[i]);
    }
    if (size() <= 1) {
      return results; // nothing to do if zero or one interval specified
    }
    bool foundMerge = true;
    while (foundMerge) {
      foundMerge = false;
      // cout << "# current size during merge is: " << starts.size() << endl;
      for (size_type i = 0; i < starts.size(); ++i) {
	ASSERT(starts[i] < ends[i]);
	// bool found = false;
	ASSERT(!foundMerge);
	for (size_type j = i+1; j < starts.size(); ++j) {
	  ASSERT(!foundMerge);
	  // cout << "# " << i << " " << j << " " << starts.size() << " " << ends.size() << endl;
	  index_type rstart = 0;
	  index_type rend = 0;
	  ASSERT(starts[j] < ends[j]);
 	  size_type n0 = starts.size();
	  if (merge(starts[i], ends[i], starts[j], ends[j], rstart, rend) && (names[i] == names[j])) {
	    // cout << "# erasing interval " << (j+1) << " " << starts[j] << " " << ends[j] << " because it overlaps with " << starts[i] << " " << ends[i] << " new merged interval: " << rstart << " " << rend << endl;
	    ASSERT( n0 == starts.size());
	    foundMerge = true;
	    starts[i] = rstart;
	    ends[i] = rend;
	    size_type n1 = starts.size();
	    // cout << "# erasing !!!" << endl;
            starts.erase(starts.begin() +j);
            ends.erase(ends.begin() +j);
	    names.erase(names.begin() +j);
	    strands.erase(strands.begin() + j);
	    size_type n2 = starts.size();
	    ASSERT((n2 + 1) == n1);
	    // found = true;
	    break;
	  }
	}
	if (foundMerge) {
	  break;
	} else {
	  results.push_back(pair<index_type,index_type>(starts[i], ends[i]));
	  // starts.erase(starts.begin() + i);
	  // ends.erase(ends.begin() + i);
	}
      }
    }
    buildIndices(indexInterval); // rebuild indices
    return results;
  }



  void setChrom(const string& _chrom) { chrom = _chrom; }

  size_type size() const { return starts.size(); }

  bool validate() const {
    return ((size() > 0) 
            && (size() == starts.size())
            && (size() == ends.size())
	    && (names.size() == size())
	    && (strands.size() == size())
	    && (scores.size() == size())
	    && (annotations.size() == size()));
  }

  /** Returns true if id is valid strand specified */
  bool validateStrand(int id) {
    return (id == STRAND_PLUS) || (id == STRAND_MINUS) || (id == STRAND_UNKNOWN)
      || (id == STRAND_UNSPECIFIED);
  }

  /** Writes BED format output. Flag indicating whether strand orientation, name or score
   * should be part of the output */
  void write(ostream& os, bool writeStrand, bool writeName, bool writeScore) const {
    for (size_type i = 0; i < size(); ++i) {
      os << chrom << "\t" << starts[i] << "\t" << ends[i];
      if (writeName) {
	os << "\t" << names[i];
      }
      if (writeScore) {
	os << "\t" << scores[i];
      }
      if (writeStrand) {
	os << "\t";
	if (strands[i] == 1) {
	  os << "+";
	} else if (strands[i] == -1) {
	  os << "-";
	} else {
	  os << "?";
	}
      }
      for (size_type k = 0; k < annotations[i].size(); ++k) {
	os << "\t" << annotations[i][k];
      }
      os << endl;
    }
  }

  void writeIndexSets(ostream& os) const {
    for (Vec<set<size_type> >::size_type i = 0; i < indexSets.size(); ++i) {
      os << chrom << " " << (i + 1) << " " << (i * indexInterval) << " " 
	 << ((i+1) * indexInterval);
      for (set<size_type>::const_iterator it = indexSets[i].begin();
	   it != indexSets[i].end(); it++) {
	os << " " << (*it);
      }
      os << endl;
    }
  }

};

#endif
