// --*- C++ -*------x---------------------------------------------------------
#ifndef __NUCLEOTIDE_TOOLS__
#define __NUCLEOTIDE_TOOLS__

using namespace std;

#include <debug.h>
#include <cctype>

class NucleotideTools {

 public:
  typedef string::size_type size_type;

  enum {AMBIGUOUS_CHAR = 'N'};

 public:

  /** returns the total number of non-gap charactes in sequence */
  static size_type countNonGapsInChars(const string& s) {
    size_type n = s.size();
    size_type count = 0;
    for (size_type i = 0; i < n; ++i) {
      if (!isGap(s[i])) {
	++count;
      }
    }
    return count;
  }

  /** Returns true, if provided characters correspond to complementary nucleotides */
  static bool isComplementary(char c1, char c2, bool allowGu, bool allowGap) {
    PRECOND(!allowGap);
    if (isGap(c1) && isGap(c2)) {
      return false;
    }
    if (isGap(c1) || isGap(c2)) {
      return allowGap;
    }
    c1 = toupper(c1);
    c2 = toupper(c2);
    switch (c1) {
    case 'A': return ((c2 == 'T') || (c2 == 'U'));
    case 'C': return (c2 == 'G');
    case 'G': return (c2 == 'C') || (allowGu && ((c2 == 'T') || (c2 == 'U')));
    case 'T': return (c2 == 'A')  || (allowGu && (c2 == 'G'));
    case 'U': return (c2 == 'A')  || (allowGu && (c2 == 'G'));
    }
    return false;
  }

  /** Returns true, if provided characters correspond to G-U nucleotides */
  static bool isWobble(char c1, char c2) {
    c1 = toupper(c1);
    c2 = toupper(c2);
    switch (c1) {
    case 'G': return ((c2 == 'T') || (c2 == 'U'));
    case 'T': return (c2 == 'G');
    case 'U': return (c2 == 'G');
    }
    return false;
  }

  /** Returns true, if provided strings correspond to complementary nucleotides */
  static bool isComplementary(const string& s1, const string& s2, bool allowGu, bool allowGap) {
    PRECOND(s1.size() == s2.size());
    for (string::size_type i = 0; i < s1.size(); ++i) {
      if (!isComplementary(s1[i], s2[i], allowGu, allowGap)) {
	return false;
      }
    }
    return true;
  }

  /** Returns true, if provided strings correspond to complementary nucleotides */
  static size_type countComplementary(const string& s1, const string& s2, bool allowGu, bool allowGap,
				      const Vec<int>& permutation) {
    PRECOND(s1.size() == s2.size());
    size_type count = 0;
    for (string::size_type i = 0; i < s1.size(); ++i) {
      if (!isComplementary(s1[i], s2[i], allowGu, allowGap)) {
	++count;
      }
    }
    return count;
  }


  /** Returns true, if provided strings correspond to complementary nucleotides */
  static bool isAlmostComplementary(const string& s1, const string& s2, bool allowedGuFrac, bool allowedGapFrac, size_type basepairTypeMin=1) {
    PRECOND(s1.size() == s2.size());
    string::size_type countWc = 0;
    string::size_type countGap = 0;
    string::size_type countGU = 0;
    string::size_type n = s1.size();
    set<string> basepairTypes;
    string little = "AA";
    for (string::size_type i = 0; i < n; ++i) {
      string little2 = little; // make copy
      little2[0] = toupper(s1[i]);
      little2[1] = toupper(s2[i]);
      if (isGap(s1[i]) || isGap(s2[i])) {
	++countGap;
      } else if (isComplementary(s1[i], s2[i], false, false)) {
	++countWc;
	basepairTypes.insert(little2);
      } else if (isWobble(s1[i], s2[i])) {
	++countGU;
	basepairTypes.insert(little2);
      } else if (s1[i] == AMBIGUOUS_CHAR || s2[i] == AMBIGUOUS_CHAR) {
	++countGap;
      } else { // incompatible pair found
	return false;
      }

    }
    double nd = static_cast<double>(n);
    return ((countGU/nd) <= allowedGuFrac) && ((countGap/nd) <= allowedGapFrac) && (basepairTypes.size() >= basepairTypeMin);
  }

  /** Returns true, if provided strings correspond to complementary nucleotides */
  static bool isReverseComplementary(const string& s1, const string& s2, bool allowGu, bool allowGap) {
    PRECOND(s1.size() == s2.size());
    string::size_type n = s1.size();
    for (string::size_type i = 0; i < n; ++i) {
      if (!isComplementary(s1[i], s2[(n-i)-1], allowGu, allowGap)) {
	return false;
      }
    }
    return true;
  }

  /** Returns true, if provided strings correspond to complementary nucleotides */
  static void testIsComplementary() {
    ASSERT(isComplementary("ACGT", "TGCA", false, false));
    ASSERT(isComplementary("ACG-", "TGCA", false, true));
    ASSERT(!isComplementary("ACG-", "TGCA", false, false));
    ASSERT(!isComplementary("ACGT", "TGCT", false, false));
  }

  static string dnaComplement(const string& s) {
    string result = s;
    string::size_type n = s.size();
    for (string::size_type i = 0; i < n; ++i) {
      switch (result[i]) {
      case 'A': result[i] = 'T'; break;
      case 'C': result[i] = 'G'; break;
      case 'G': result[i] = 'C'; break;
      case 'T': result[i] = 'A'; break;
	// otherwise leave unchanged
      }
    }
    return result;
  }

  static void testDnaComplement() {
    ASSERT(dnaComplement("ACGT") == "TGCA");
    ASSERT(dnaComplement("AAAA") == "TTTT");
    ASSERT(dnaComplement("A-AA") == "T-TT");
    ASSERT(dnaComplement("A-NA") == "T-NT");
  }

  static bool isGap(char c) {
    return (c == '.') || (c == '-');
  }

  /** Returns true if alignment column is conserved (not ignoring gaps) */
  static bool isConserved(const string& s) {
    char found = ' ';
    char foundOrig = found;
    string::size_type n = s.size();
    for (string::size_type i = 0; i < n; ++i) {
      char c = toupper(s[i]);
      if (!isGap(c)) {
	if (found != foundOrig) {
	  if (c != found) {
	    return false; // second character that is not gap found
	  }
	} else {
	  found = c;
	}
      }
    }
    if (found == foundOrig) {
      return false; // only gaps found
    }
    return true;
  }

  /** Returns true if alignment column is conserved (not counting gaps) */
  static size_type nongapCharacterCount(const string& s) {
    set<char> charSet;
    string::size_type n = s.size();
    for (string::size_type i = 0; i < n; ++i) {
      if (!isGap(s[i])) {
	charSet.insert(toupper(s[i]));
      }
    }
    return charSet.size();
  }

  /** Returns true if alignment column is conserved (not counting gaps) */
  static void testNongapCharacterCount() {
    ASSERT(nongapCharacterCount("ACGT") == 4);
    ASSERT(nongapCharacterCount("ACGT-") == 4);
    ASSERT(nongapCharacterCount("ACGA") == 3);
    ASSERT(nongapCharacterCount("ACGa") == 3);
    ASSERT(nongapCharacterCount("AAaAa") == 1);
    ASSERT(nongapCharacterCount("--") == 0);
  }

  /** Tests method isConserved */
  static void testIsConserved() {
    ASSERT(isConserved("AAAAAAA"));
    ASSERT(!isConserved("AAAGAAAAA"));
    ASSERT(isConserved("-AAA-AA.AA"));
  }

  /** Runs all tests */
  static void testAll() {
    testIsConserved();
    testDnaComplement();
    testNongapCharacterCount();
    testIsComplementary();
  }
  
};


#endif
