#include <MAFAlignmentTest.h>

/**
   Extracts segment from second maf block of chrM.hg18 that reads for hg18 as follows:
  s hg18.chrM               226 70 +   16571 AGGACATAATAATAACAATTGAATGTCTGCACAGCCGCTTTCCACACAGACATCATAACAAAAAATTTCC
  s ornAna1.Contig3497    32985 67 +   39338 AGGACACAGTTCAAGCAATGG--Tatttactgagt-gcttaccctgtgcacagcactgtactagcattgt
 */
void
MAFAlignmentTest::testExtractAssemblySequence() {
  string methodName = "testExtractAssemblySequence";
  cout << "Starting " << methodName << endl;
  string filename = "../../test/fixtures/chrM.maf";
  ifstream ifs(filename.c_str());
  ERROR_IF(!ifs, "Error opening file " + filename);
  MAFAlignment maf;
  string refAssembly = "hg18";
  maf.setRefAssembly(refAssembly);
  maf.setVerbose(3);
  maf.read(ifs);
  ifs.close();
  ASSERT(maf.size()  > 0);
  length_type minCoord = 227; // UCSC coordinates: 228-232 (1 based, MAF data is zero based)
  length_type maxCoord = 232;
  length_type len = maxCoord - minCoord + 1; 
  cout << "Succesfully read " << maf.size() << "maf blocks from file " << filename << endl;
  string extracted = maf.extractAssemblySequence(minCoord, maxCoord);
  string desired = "GGACAT";
  cout << "Extracted sequence using coordinates " << refAssembly << " " << minCoord << " " << maxCoord << " : " 
       << extracted << " should have length " << len << " found length: " << extracted.size() 
       << " correct content would be: " << desired << endl;
//   ASSERT(len == static_cast<length_type>(extracted.size()));
//   ASSERT(extracted == desired);
//   length_type minCoord2 = 32986;
//   length_type maxCoord2 = 32991;
//   string refAssembly2 = "ornAna1"; // currently only works for reference assembly
//   string desired2 = "GGACAC";
//   string extracted2 = maf.extractAssemblySequence(minCoord2, maxCoord2);
//   cout << "Extracted sequence using coordinates " << refAssembly2 << " " << minCoord2 << " " << maxCoord2 << " : " 
//        << extracted2 << " should have length " << len << " found length: " << extracted2.size() 
//        << " correct content would be: " << desired2 << endl;
//   ASSERT(len == static_cast<length_type>(extracted2.size()));
//   ASSERT(extracted2 == desired2);

  cout << "Finished " << methodName << endl;
}

/**
  Extracts complete hg18 sequence from chrM alignment 
 */
void
MAFAlignmentTest::testExtractCompleteAssemblySequence() {
  string methodName = "testExtractCompleteAssemblySequence";
  cout << "Starting " << methodName << endl;
  string filename = "../../test/fixtures/chrM.maf";
  ifstream ifs(filename.c_str());
  ERROR_IF(!ifs, "Error opening file " + filename);
  MAFAlignment maf;
  string refAssembly = "hg18";
  maf.setRefAssembly(refAssembly);
  maf.setVerbose(1);
  maf.read(ifs);
  ifs.close();
  ASSERT(maf.size()  > 0);
  cout << "Succesfully read " << maf.size() << "maf blocks from file " << filename << endl;
  cout << "Defined assembly coordinate (1-based): "
       << (maf.getRefAssemblyBegin() + 1)<< " - "
       << maf.getRefAssemblyEnd() << endl;
  length_type minCoord =maf.getRefAssemblyBegin();
  length_type maxCoord = maf.getRefAssemblyEnd();
  ASSERT((minCoord+1) == 194); // internal coordinates, 0-based (UCSC: 194)
  // length_type len = maxCoord - minCoord; 
  length_type count = 0;
  for (length_type i = minCoord; i < maxCoord; ++i) {
    string seq = maf.extractAssemblySequence(i, i);
    if (seq.size() > 0) {
      ASSERT(seq.size() == 1);
      cout << (i+1) << "\t" << seq << endl;
      if (i == minCoord) {
	ASSERT(seq == "C"); // first letter of hg18 chrM alignment
      } else if ((i+1) == 1166) { // some verified example; compare UCSC Genome Browser website for hg18/chrM:1166 !
	ASSERT(seq == "T");
      } else if ((i+1) == 16571) { // some verified example; compare UCSC Genome Browser website for hg18/chrM:16571 !
	ASSERT(seq == "G"); // corresponds to last character
      }
      ++count;
    } else {
      cout << (i+1) << "\t" << "-" << endl;
      ERROR("This chrM example should not contain hg18 gaps!");
    }
  } 
  cout << "Found " << count << " defined sequence characters." << endl;
  ERROR_IF(count != 16378,
	   "Expected 16378 defined hg18 residues in chrM example!");
  cout << "Finished " << methodName << endl;
}

/**
  Extracts complete hg18 sequence from chrM alignment 
 */
void
MAFAlignmentTest::testExtractCompleteChimpAssemblySequence() {
  string methodName = "testExtractCompleteChimpAssemblySequence";
  cout << "Starting " << methodName << endl;
  string filename = "../../test/fixtures/chrM.maf";
  ifstream ifs(filename.c_str());
  ERROR_IF(!ifs, "Error opening file " + filename);
  MAFAlignment maf;
  string refAssembly = "panTro2"; // "panTro2";
  maf.setRefAssembly(refAssembly);
  maf.setVerbose(1);
  maf.read(ifs);
  ifs.close();
  ASSERT(maf.size()  > 0);
  cout << "Succesfully read " << maf.size() << "maf blocks from file " << filename << endl;
  cout << "Defined assembly coordinate (1-based): "
       << (maf.getRefAssemblyBegin() + 1)<< " - "
       << maf.getRefAssemblyEnd() << endl;
  length_type minCoord =maf.getRefAssemblyBegin();
  length_type maxCoord = maf.getRefAssemblyEnd();
  ASSERT(minCoord == 0); // internal coordinates, 0-based (UCSC: 1)
  // length_type len = maxCoord - minCoord; 
  length_type count = 0;
  for (length_type i = minCoord; i < maxCoord; ++i) {
    string seq = maf.extractAssemblySequence(i, i);
    if (seq.size() > 0) {
      ASSERT(seq.size() == 1);
      cout << (i+1) << "\t" << seq << endl;
      if (i == minCoord) {
	ASSERT(seq == "G"); // first letter of hg18 chrM alignment
      } else if ((i+1) == 1166) { // some verified example; compare UCSC Genome Browser website for panTro2/chrM:1166 !
	ASSERT(seq == "G");
      } else if ((i+1) == 15983) { // some verified example; compare UCSC Genome Browser website for panTro2/chrM:15983 !
	ASSERT(seq == "A"); // corresponds to last character
      }
      ++count;
    } else {
      cout << (i+1) << "\t" << "-" << endl;
      ERROR("This chrM example should not contain hg18 gaps!");
    }
  } 
  cout << "Found " << count << " defined sequence characters." << endl;
  ERROR_IF(count != 15983,
	   "Expected 15983 defined hg18 residues in chrM example!");
  cout << "Finished " << methodName << endl;
}


void
MAFAlignmentTest::testRead() {
  string methodName = "testRead";
  cout << "Starting " << methodName << endl;
  MAFAlignment maf;
  maf.setRefAssembly("hg18");
  maf.setVerbose(2);
  string filename = "../../test/fixtures/chr16_18S.maf";
  cout << "Reading MAF alignment from file " << filename << endl;
  ifstream ifs(filename.c_str());
  ERROR_IF(!ifs, "Could not find filename " + filename);
  maf.read(ifs);
  ifs.close();
  cout << "Number of alignment blocks: " << maf.size() << endl;
  cout << maf << endl;
  ASSERT(maf.size() == 5); // there should be 5 individual alignments
  cout << "Detected reference assembly: " << maf.getRefAssembly() << endl;
  ASSERT(maf.getRefAssembly() == "hg18");
  cout << "Converting between internal column ids and assembly positions:" << endl;
  for (length_type i = 0; i < maf.getTotalLength(); ++i) {
    length_type asmPos = maf.getAssemblyPosition(i, "hg18");
    length_type convColId = maf.convertAssemblyPositionToColumnId(asmPos); // converted back to column id
    cout << (i + 1) << " " << maf.getAssemblyPosition(i, "hg18") << " " 
	 << (convColId + 1) << endl;
    ASSERT(i >= convColId); // not in all cases equal (gaps)
  }
  cout << "Finished " << methodName << endl;
}

void
MAFAlignmentTest::testReadSmall() {
  string methodName = "testReadSmall";
  cout << "Starting " << methodName << endl;
  MAFAlignment maf;
  maf.setRefAssembly("dm3");
  maf.setVerbose(2);
  string filename = "../../test/fixtures/dm3_synth_GC2_8_2b.maf";
  cout << "Reading MAF alignment from file " << filename << endl;
  ifstream ifs(filename.c_str());
  ERROR_IF(!ifs, "Could not find filename " + filename);
  maf.read(ifs);
  ifs.close();
  cout << "Number of alignment blocks: " << maf.size() << endl;
  cout << maf << endl;
  ASSERT(maf.size() == 2); // there should be 2 individual alignments
  cout << "Detected reference assembly: " << maf.getRefAssembly() << endl;
  ASSERT(maf.getRefAssembly() == "dm3");
  cout << "Converting between internal column ids and assembly positions:" << endl;
  for (length_type i = 0; i < maf.getTotalLength(); ++i) {
    length_type asmPos = maf.getAssemblyPosition(i, "dm3");

    length_type convColId = maf.convertAssemblyPositionToColumnId(asmPos); // converted back to column id
    length_type convColId2 = maf.convertAssemblyPositionToColumnIdSlow(asmPos); // converted back to column id
    length_type convColId3 = maf.convertAssemblyPositionToColumnIdVerySlow(asmPos); // converted back to column id

   
    cout << (i + 1) << " " << maf.getAssemblyPosition(i, "dm3") << " " 
	 << convColId << " " << convColId2 <<  " " << convColId3 << endl;
    ASSERT(i >= convColId); // not in all cases equal (gaps)
    ASSERT(convColId == convColId2);
    ASSERT(convColId == convColId3);
  }
  cout << "Finished " << methodName << endl;
}

void
MAFAlignmentTest::testAppendSmall() {
  string methodName = "testAppendSmall";
  cout << "Starting " << methodName << endl;
  MAFAlignment maf;
  MAFAlignment maf2;
  maf.setRefAssembly("dm3");
  maf.setVerbose(2);
  maf2.setRefAssembly("dm3");
  maf2.setVerbose(2);
  string filename = "../../test/fixtures/dm3_synth_GC2_8_2b.maf";
  cout << "Reading MAF alignment from file " << filename << endl;
  ifstream ifs(filename.c_str());
  ERROR_IF(!ifs, "Could not find filename " + filename);
  maf.read(ifs);
  ifs.close();
  cout << "Number of alignment blocks: " << maf.size() << endl;
  cout << maf << endl;
  ASSERT(maf.size() == 2); // there should be 2 individual alignments
  cout << "Detected reference assembly: " << maf.getRefAssembly() << endl;
  ASSERT(maf.getRefAssembly() == "dm3");

  string filename2 = "../../test/fixtures/dm3_synth_GC2_8_2c2.maf";
  cout << "Reading MAF alignment from file " << filename2 << endl;
  ifstream ifs2(filename2.c_str());
  ERROR_IF(!ifs2, "Could not find filename " + filename2);
  maf2.read(ifs2);
  ifs2.close();
  cout << "Number of alignment blocks: " << maf2.size() << endl;

  maf.append(maf2); // make concatenation
  cout << "Converting between internal column ids and assembly positions:" << endl;
  for (length_type i = 0; i < maf.getTotalLength(); ++i) {
    length_type asmPos = maf.getAssemblyPosition(i, "dm3");

    length_type convColId = maf.convertAssemblyPositionToColumnId(asmPos); // converted back to column id
    length_type convColId2 = maf.convertAssemblyPositionToColumnIdSlow(asmPos); // converted back to column id
    length_type convColId3 = maf.convertAssemblyPositionToColumnIdVerySlow(asmPos); // converted back to column id

   
    cout << (i + 1) << " " << maf.getAssemblyPosition(i, "dm3") << " " 
	 << convColId << " " << convColId2 <<  " " << convColId3 << endl;
    ASSERT(i >= convColId); // not in all cases equal (gaps)
    ASSERT(convColId == convColId2);
    ASSERT(convColId == convColId3);
  }
  cout << "Finished " << methodName << endl;
}

void
MAFAlignmentTest::testFilterMAF() {
  string methodName = "testFilterMAF";
  cout << "Starting " << methodName << endl;
  MAFAlignment maf;
  string currAssembly = "hg17";
  maf.setRefAssembly(currAssembly);
  maf.setVerbose(6);
  BEDRegions bed;
  bed.setAssembly(currAssembly);
  ASSERT(bed.getAssembly() == currAssembly);
  string filename = "../../test/fixtures/reversecompl_orig2.maf"; // "../../test/fixtures/chr16_18S.maf";
  string bedfilename = "../../test/fixtures/reversecompl_orig2.bed"; 
  cout << "Reading BED format regions from file " << bedfilename << " : " << bed.getAssembly() << endl;
  ifstream ifsBed(bedfilename.c_str());
  ERROR_IF(!ifsBed, "Could not find BED format filename " + bedfilename);
  bed.read(ifsBed);
  ifsBed.close();
  cout << "Read BED data file: " << bed.getAssembly() << endl << bed << endl;
  ASSERT(bed.size() == 1);
  ASSERT(bed.validate());
  ifstream ifs(filename.c_str());
  ERROR_IF(!ifs, "Could not find filename " + filename);
  string ofilename = methodName + ".tmp.maf";
  ofstream ofs(ofilename.c_str());
  ERROR_IF(!ofs, "Could open output file " + ofilename);
  maf.filterMAF(ifs, ofs, bed);
  ifs.close();
  ofs.close();

  cout << "Number of alignment blocks: " << maf.size() << endl;
  cout << maf << endl;
  ASSERT(maf.size() == 0); // there should be no individual alignments read
  cout << "Detected reference assembly: " << maf.getRefAssembly() << endl;
  ASSERT(maf.getRefAssembly() == "hg17");
  
  cout << "Finished " << methodName << endl;
}

/** Similar test compared to testFilterMAF; However: using bed file with non-matching chromosomes. Should be no output */
void
MAFAlignmentTest::testFilterMAF2() {
  string methodName = "testFilterMAF";
  cout << "Starting " << methodName << endl;
  MAFAlignment maf;
  string currAssembly = "hg17";
  maf.setRefAssembly(currAssembly);
  maf.setVerbose(6);
  BEDRegions bed;
  bed.setAssembly(currAssembly);
  ASSERT(bed.getAssembly() == currAssembly);
  string filename = "../../test/fixtures/reversecompl_orig2.maf";
  string bedfilename = "../../test/fixtures/reversecompl_orig2b.bed"; 
  cout << "Reading BED format regions from file " << bedfilename << " : " << bed.getAssembly() << endl;
  ifstream ifsBed(bedfilename.c_str());
  ERROR_IF(!ifsBed, "Could not find BED format filename " + bedfilename);
  bed.read(ifsBed);
  ifsBed.close();
  cout << "Read BED data file: " << bed.getAssembly() << endl << bed << endl;
  ASSERT(bed.size() == 1);
  ASSERT(bed.validate());
  ifstream ifs(filename.c_str());
  ERROR_IF(!ifs, "Could not find filename " + filename);
  string ofilename = methodName + ".tmp.maf";
  ofstream ofs(ofilename.c_str());
  ERROR_IF(!ofs, "Could open output file " + ofilename);
  MAFAlignment::size_type filtResult = maf.filterMAF(ifs, ofs, bed);
  ifs.close();
  ofs.close();

  cout << "Number of alignment blocks that passed filter: " << filtResult << endl;
  
  cout << "Finished " << methodName << endl;
}

void
MAFAlignmentTest::testShuffleHorizontal() {
  string methodName = "testShuffleHorizontal";
  cout << "Starting " << methodName << endl;
  MAFAlignment maf;
  string currAssembly = "hg17";
  maf.setRemovePropertiesMode(false);
  maf.setRefAssembly(currAssembly);
  maf.setVerbose(6);
  string filename = "../../test/fixtures/reversecompl_orig2.maf"; // "../../test/fixtures/chr16_18S.maf";
  ifstream ifs(filename.c_str());
  ERROR_IF(!ifs, "Could not find filename " + filename);
  maf.setVerbose(3);
  maf.read(ifs);
  REMARK "Read " << maf.size() << " MAF blocks from file " << filename << endl; 
  ERROR_IF(maf.size() == 0, "No MAF blocks were read!");
  string ofilename = methodName + ".tmp.maf";
  ofstream ofs(ofilename.c_str());
  ERROR_IF(!ofs, "Could open output file " + ofilename);
  maf.shuffleHorizontal();
  maf.writeMAF(ofs);
  // ofs << maf << endl;
  ifs.close();
  ofs.close();
  cout << "Finished " << methodName << endl;
}

void
MAFAlignmentTest::testShuffleRows() {
  string methodName = "testShuffleRows";
  cout << "Starting " << methodName << endl;
  MAFAlignment maf;
  string currAssembly = "hg17";
  maf.setRemovePropertiesMode(false);
  maf.setRefAssembly(currAssembly);
  maf.setVerbose(6);
  string filename = "../../test/fixtures/reversecompl_orig2.maf"; // "../../test/fixtures/chr16_18S.maf";
  ifstream ifs(filename.c_str());
  ERROR_IF(!ifs, "Could not find filename " + filename);
  maf.setVerbose(3);
  maf.read(ifs);
  REMARK "Read " << maf.size() << " MAF blocks from file " << filename << endl; 
  ERROR_IF(maf.size() == 0, "No MAF blocks were read!");
  string ofilename = methodName + ".tmp.maf";
  ofstream ofs(ofilename.c_str());
  ERROR_IF(!ofs, "Could open output file " + ofilename);
  maf.shuffleRows();
  maf.writeMAF(ofs);
  // ofs << maf << endl;
  ifs.close();
  ofs.close();
  cout << "Finished " << methodName << endl;
}

void
MAFAlignmentTest::testShuffleVertical() {
  string methodName = "testShuffleVertical";
  cout << "Starting " << methodName << endl;
  MAFAlignment maf;
  string currAssembly = "hg17";
  maf.setRemovePropertiesMode(false);
  maf.setRefAssembly(currAssembly);
  maf.setVerbose(6);
  string filename = "../../test/fixtures/reversecompl_orig2.maf"; // "../../test/fixtures/chr16_18S.maf";
  ifstream ifs(filename.c_str());
  ERROR_IF(!ifs, "Could not find filename " + filename);
  maf.setVerbose(3);
  maf.read(ifs);
  REMARK "Read " << maf.size() << " MAF blocks from file " << filename << endl; 
  ERROR_IF(maf.size() == 0, "No MAF blocks were read!");
  string ofilename = methodName + ".tmp.maf";
  ofstream ofs(ofilename.c_str());
  ERROR_IF(!ofs, "Could open output file " + ofilename);
  maf.shuffleVertical();
  maf.writeMAF(ofs);
  // ofs << maf << endl;
  ifs.close();
  ofs.close();

  cout << "Finished " << methodName << endl;
}


void
MAFAlignmentTest::testReverseComplement() {
  string methodName = "testReverseComplement";
  cout << "Starting " << methodName << endl;
  MAFAlignment maf;
  maf.setRefAssembly("hg17");
  maf.setStrandMode(MAFAlignment::STRAND_MINUS); // convert all Blocks such that hg17 is minus strand
  // maf.setStrandMode(MAFAlignment::STRAND_PLUS);
  maf.setVerbose(3);
  string filename = "../../test/fixtures/reversecompl_orig.maf"; // from Galaxy doc of reverse complement
  cout << "Reading MAF alignment from file and converting everything to minus strand for " << maf.getRefAssembly()
       << " : " << filename << endl;
  ifstream ifs(filename.c_str());
  ERROR_IF(!ifs, "Could not find filename " + filename);
  maf.read(ifs);
  ifs.close();
  cout << "Number of alignment blocks: " << maf.size() << endl;
  cout << maf << endl;
  ASSERT(maf.size() == 1); // there should be 5 individual alignments
  cout << "Detected reference assembly: " << maf.getRefAssembly() << endl;
  ASSERT(maf.getRefAssembly() == "hg17");
  cout << "Output of alignment: " << endl;
  for (MAFAlignment::size_type i = 0; i < maf.size(); ++i) {
    cout << "MAF Block " << (i+1) << " : " << endl << maf[i] << endl;
    maf[i].writeProperties(cout);
  }
  cout << "Finished " << methodName << endl;
}


void
MAFAlignmentTest::testReadFASTA() {
  string methodName = "testReadFASTA";
  cout << "Starting " << methodName << endl;
  MAFAlignment maf;
  maf.setRefAssembly("K01553");
  string filename = "../../test/fixtures/anticodon_simple2.fa";
  cout << "Reading FASTA alignment from file " << filename << endl;
  ifstream ifs(filename.c_str());
  ERROR_IF(!ifs, "Could not find filename " + filename);
  maf.readFASTA(ifs);
  ifs.close();
  cout << "Number of alignment blocks: " << maf.size() << endl;
  cout << maf << endl;
  
  ASSERT(maf.size() == 1); // there should be 5 individual alignments
  cout << maf[0] << endl;
  ASSERT(maf.getTotalLength() == 17); // number of columns
  cout << "Finished " << methodName << endl;
}

void
MAFAlignmentTest::testAppend() {
  string methodName = "testAppend";
  cout << "Starting " << methodName << endl;
  MAFAlignment maf, maf2;
  maf.setRefAssembly("hg18");
  maf2.setRefAssembly("hg18");

  string filename = "../../test/fixtures/chr16_18S.maf";
  string filename2 = "../../test/fixtures/chrM.maf";
  cout << "Reading MAF alignment from file " << filename << endl;
  ifstream ifs(filename.c_str());
  ERROR_IF(!ifs, "Could not find filename " + filename);
  maf.read(ifs);
  ifs.close();
  cout << "Number of alignment blocks: " << maf.size() << endl;
  cout << maf << endl;
  ASSERT(maf.size() == 5); // there should be 5 individual alignments
  cout << "Reading MAF alignment 2 from file " << filename2 << endl;
  ifstream ifs2(filename2.c_str());
  ERROR_IF(!ifs2, "Could not find filename " + filename2);
  maf2.read(ifs2);
  ifs2.close();
  cout << "Number of alignment blocks of second alignment: " << maf2.size() << endl;
  ASSERT(maf.size() == 5); // there should be 5 individual alignments
  MAFAlignment::size_type maf1OrigSize = maf.size();
  MAFAlignment::size_type maf2OrigSize = maf2.size();
  cout << "Chromstarts of end of first MAF and beginning of second MAF: "
       << maf.getChromStart(maf.size()-1, "hg18") << " " << maf2.getChromStart(0, "hg18") << endl;
  length_type offset = 100000000;
  cout << "Adding offset of " << offset << " to chromStarts of second MAF..." << endl;
  maf2.addChromStartOffset(offset, "hg18");
  cout << "Chromstarts of end of first MAF and beginning of second MAF: "
       << maf.getChromStart(maf.size()-1, "hg18") << " " << maf2.getChromStart(0, "hg18") << endl;
  ASSERT(maf.getChromStart(maf.size()-1, "hg18") < maf2.getChromStart(0, "hg18"));
  maf.append(maf2);
  ASSERT(maf.size() == maf1OrigSize + maf2.size());
  ASSERT(maf2.size() == maf2OrigSize);
  
  cout << "Converting between internal column ids and assembly positions:" << endl;
  for (length_type i = 0; i < maf.getTotalLength(); ++i) {
    cout << (i + 1) << " " << maf.getAssemblyPosition(i, "hg18") << endl;
  }
  cout << "Finished " << methodName << endl;
}
