def test20131113Debugging(self): """ This is an example I manually examined on 2013-11-13. """ hit = ( 'GTCGAGAAGATCAAGATTGGTAAGGAGGCCGTGCAGGACACCGAGACCGTGTCCGGCA' 'AGGTTGCCAAGGAGCAGATCGACATCGATAACGCCAAGCACACCAAGTGATGCACTGA' 'CGACGGGTGAGGCCCAGATTCCTACGGCCTGGGCCTCTGTCTGCGTCGGGATGCCATT' 'AGGCCGGTAGGATCGGTCACATGATCGATCCCAAGCTCCTGCGAACGGATCCGGACGC' 'CGTTCGTCGCTCCCAGGCCGCCCGCGGCGAGGACTCCTCGGTTGTGGACGACGTTGTC' 'GCCGCAGATGAGGCTCGTCGTGAGGCTATTGCTGCCCATGAGAACCTGCGTGCAGAAC' 'AGAAGGGACTCGGCAAGCGAATCGCTAAAGCATCCGGTG') read = ( 'GTC-AGAAGATCAAGATTGGTAAGGAGGCCGTGCAGGACACCGAGACCGTGTCCGGCA' 'AGGTTGCCAAGGAGCAGATCGACATCGATAACGCCAAGCACACCAAGTGATGCACTGA' 'CGACGGGTGAGGCCCAGATTCCTACGGCCTGGGCCTCTGTCTGCGTCGGGATGCCATT' 'AGGCCGCTAGGATCGGTCACATGATCGATCCCAAGCTCCTGCGAACGGATCCGGACGC' 'CGTTCGTCGCTCCCAGGCCGCCCGCGGCGAGGACTCCTCGGTTGTGGACGACGTTGTC' 'GCCGCAGATGAGGCTCGTCGTGAGGCTATTGCTGCCCATGAGAACCTGCGTGCAGAAC' 'AGAAGGGACTCGGCAAGCGAATCGCTAAAGCATCCGGTG') hsp = FakeHSP(subjectStart=2339751, subjectEnd=2339365, readStart=1, readEnd=386, frame=self.frame, hit=hit, read=read) normalized = normalizeHSP(hsp, 396, 'blastn') self.assertEqual({ 'subjectStart': 2339364, 'subjectEnd': 2339751, 'readStart': 0, 'readEnd': 386, 'readStartInSubject': 2339354, 'readEndInSubject': 2339751, }, normalized)
def _dictToAlignments(self, blastDict, read): """ Take a dict (made by XMLRecordsReader._convertBlastRecordToDict) and convert it to a list of alignments. @param blastDict: A C{dict}, from convertBlastRecordToDict. @param read: A C{Read} instance, containing the read that BLAST used to create this record. @raise ValueError: If the query id in the BLAST dictionary does not match the id of the read. @return: A C{list} of L{dark.alignment.Alignment} instances. """ if (blastDict['query'] != read.id and blastDict['query'].split()[0] != read.id): raise ValueError( 'The reads you have provided do not match the BLAST output: ' 'BLAST record query id (%s) does not match the id of the ' 'supposedly corresponding read (%s).' % (blastDict['query'], read.id)) alignments = [] getScore = itemgetter('bits' if self._hspClass is HSP else 'expect') for blastAlignment in blastDict['alignments']: alignment = Alignment(blastAlignment['length'], blastAlignment['title']) alignments.append(alignment) for blastHsp in blastAlignment['hsps']: score = getScore(blastHsp) normalized = normalizeHSP(blastHsp, len(read), self.application) hsp = self._hspClass( score, readStart=normalized['readStart'], readEnd=normalized['readEnd'], readStartInSubject=normalized['readStartInSubject'], readEndInSubject=normalized['readEndInSubject'], readFrame=blastHsp['frame'][0], subjectStart=normalized['subjectStart'], subjectEnd=normalized['subjectEnd'], subjectFrame=blastHsp['frame'][1], readMatchedSequence=blastHsp['query'], subjectMatchedSequence=blastHsp['sbjct'], # Use blastHsp.get on identicalCount and positiveCount # because they were added in version 2.0.3 and will not # be present in any of our JSON output generated before # that. Those values will be None for those JSON files, # but that's much better than no longer being able to # read all that data. identicalCount=blastHsp.get('identicalCount'), positiveCount=blastHsp.get('positiveCount')) alignment.addHsp(hsp) return alignments
def check(self, templateStr): template = Template(templateStr) normalized = normalizeHSP(template.hsp(), template.readLen, 'blastn') self.assertEqual({ 'subjectStart': template.hitMatchStart, 'subjectEnd': template.hitMatchStart + template.matchLen, 'readStart': template.readMatchStart, 'readEnd': template.readMatchStart + template.matchLen, 'readStartInSubject': template.readResultStart, 'readEndInSubject': template.readResultStart + template.readLen, }, normalized)
def check(self, templateStr): template = Template(templateStr) normalized = normalizeHSP(template.hsp(), template.readLen, 'blastn') self.assertEqual( { 'subjectStart': template.hitMatchStart, 'subjectEnd': template.hitMatchStart + template.matchLen, 'readStart': template.readMatchStart, 'readEnd': template.readMatchStart + template.matchLen, 'readStartInSubject': template.readResultStart, 'readEndInSubject': template.readResultStart + template.readLen, }, normalized)
def _dictToAlignments(self, blastDict, read): """ Take a dict (made by XMLRecordsReader._convertBlastRecordToDict) and convert it to a list of alignments. @param blastDict: A C{dict}, from convertBlastRecordToDict. @param read: A C{Read} instance, containing the read that BLAST used to create this record. @raise ValueError: If the query id in the BLAST dictionary does not match the id of the read. @return: A C{list} of L{dark.alignment.Alignment} instances. """ if (blastDict['query'] != read.id and blastDict['query'].split()[0] != read.id): raise ValueError( 'The reads you have provided do not match the BLAST output: ' 'BLAST record query id (%s) does not match the id of the ' 'supposedly corresponding read (%s).' % (blastDict['query'], read.id)) alignments = [] getScore = itemgetter('bits' if self._hspClass is HSP else 'expect') for blastAlignment in blastDict['alignments']: alignment = Alignment(blastAlignment['length'], blastAlignment['title']) alignments.append(alignment) for blastHsp in blastAlignment['hsps']: score = getScore(blastHsp) normalized = normalizeHSP(blastHsp, len(read), self.application) hsp = self._hspClass( score, readStart=normalized['readStart'], readEnd=normalized['readEnd'], readStartInSubject=normalized['readStartInSubject'], readEndInSubject=normalized['readEndInSubject'], readFrame=blastHsp['frame'][0], subjectStart=normalized['subjectStart'], subjectEnd=normalized['subjectEnd'], subjectFrame=blastHsp['frame'][1], readMatchedSequence=blastHsp['query'], subjectMatchedSequence=blastHsp['sbjct']) alignment.addHsp(hsp) return alignments
def testHitExtendsLeft(self): """The hit overlaps the read to the left. ssssss qqqq """ hsp = FakeHSP(subjectStart=3, subjectEnd=6, readStart=1, readEnd=4, frame=self.frame) normalized = normalizeHSP(hsp, 4, 'blastn') self.assertEqual({ 'subjectStart': 2, 'subjectEnd': 6, 'readStart': 0, 'readEnd': 4, 'readStartInSubject': 2, 'readEndInSubject': 6, }, normalized)
def testReadExtendsRight2(self): """The read sticks out to the right of the read. ssss qqqqqq """ hsp = FakeHSP(subjectStart=2, subjectEnd=1, readStart=5, readEnd=6, frame=self.frame) normalized = normalizeHSP(hsp, 6, 'blastn') self.assertEqual({ 'subjectStart': 0, 'subjectEnd': 2, 'readStart': 4, 'readEnd': 6, 'readStartInSubject': 0, 'readEndInSubject': 6, }, normalized)
def testReadExtendsRightAndLeft(self): """The read extends to the right and left of the hit. ssss qqqqqqq """ hsp = FakeHSP(subjectStart=4, subjectEnd=1, readStart=3, readEnd=6, frame=self.frame) normalized = normalizeHSP(hsp, 7, 'blastn') self.assertEqual({ 'subjectStart': 0, 'subjectEnd': 4, 'readStart': 2, 'readEnd': 6, 'readStartInSubject': -1, 'readEndInSubject': 6, }, normalized)
def testIdentical(self): """The hit start and end are identical to those of the read. ssss qqqq """ hsp = FakeHSP(subjectStart=1, subjectEnd=4, readStart=1, readEnd=4, frame=self.frame) normalized = normalizeHSP(hsp, 4, 'blastn') self.assertEqual({ 'subjectStart': 0, 'subjectEnd': 4, 'readStart': 0, 'readEnd': 4, 'readStartInSubject': 0, 'readEndInSubject': 4, }, normalized)
def testHitExtendsRightAndLeft(self): """The hit extends to the right and left of the read. sssssss qqqq """ hsp = FakeHSP(subjectStart=5, subjectEnd=2, readStart=1, readEnd=4, frame=self.frame) normalized = normalizeHSP(hsp, 4, 'blastn') self.assertEqual({ 'subjectStart': 1, 'subjectEnd': 5, 'readStart': 0, 'readEnd': 4, 'readStartInSubject': 1, 'readEndInSubject': 5, }, normalized)
def test20131115Debugging(self): """ This is an example I manually examined for BM on 2013-11-15. """ read = 'CTCTTGCA-CCTTAGGTACC' hit = 'CTCTAGCAGCCTTAGGTACC' hsp = FakeHSP(subjectStart=1776, subjectEnd=1795, readStart=131, readEnd=149, frame=self.frame, read=read, hit=hit) normalized = normalizeHSP(hsp, 149, 'blastn') self.assertEqual({ 'subjectStart': 1775, 'subjectEnd': 1795, 'readStart': 130, 'readEnd': 149, 'readStartInSubject': 1775, 'readEndInSubject': 1925, }, normalized)
def testReadExtendsLeft(self): """The read sticks out to the left of the hit. ssss qqqqqq """ hsp = FakeHSP(subjectStart=4, subjectEnd=1, readStart=1, readEnd=4, frame=self.frame) normalized = normalizeHSP(hsp, 6, 'blastn') self.assertEqual({ 'subjectStart': 0, 'subjectEnd': 4, 'readStart': 0, 'readEnd': 4, 'readStartInSubject': -2, 'readEndInSubject': 4, }, normalized)
def test20131115Debugging(self): """ This is an example I manually examined for Barbara on 2013-11-15. """ read = 'TTCTTTTTGCATTTGATAGT-TTGCTACAAG' hit = 'TTCTTTTTGCAATAGTCAGTCTTGCTAAAAG' hsp = FakeHSP(subjectStart=45, subjectEnd=75, readStart=120, readEnd=149, frame=self.frame, read=read, hit=hit) normalized = normalizeHSP(hsp, 149, 'blastn') self.assertEqual({ 'subjectStart': 44, 'subjectEnd': 75, 'readStart': 119, 'readEnd': 149, 'readStartInSubject': -75, 'readEndInSubject': 75, }, normalized)
def testReadExtendsRight2(self): """The read sticks out to the right of the read. ssss qqqqqq """ hsp = FakeHSP(subjectStart=2, subjectEnd=1, readStart=5, readEnd=6, frame=self.frame) normalized = normalizeHSP(hsp, 6, 'blastn') self.assertEqual( { 'subjectStart': 0, 'subjectEnd': 2, 'readStart': 4, 'readEnd': 6, 'readStartInSubject': 0, 'readEndInSubject': 6, }, normalized)
def testHitExtendsRightAndLeft(self): """The hit extends to the right and left of the read. sssssss qqqq """ hsp = FakeHSP(subjectStart=5, subjectEnd=2, readStart=1, readEnd=4, frame=self.frame) normalized = normalizeHSP(hsp, 4, 'blastn') self.assertEqual( { 'subjectStart': 1, 'subjectEnd': 5, 'readStart': 0, 'readEnd': 4, 'readStartInSubject': 1, 'readEndInSubject': 5, }, normalized)
def testHitExtendsLeft(self): """The hit overlaps the read to the left. ssssss qqqq """ hsp = FakeHSP(subjectStart=3, subjectEnd=6, readStart=1, readEnd=4, frame=self.frame) normalized = normalizeHSP(hsp, 4, 'blastn') self.assertEqual( { 'subjectStart': 2, 'subjectEnd': 6, 'readStart': 0, 'readEnd': 4, 'readStartInSubject': 2, 'readEndInSubject': 6, }, normalized)
def testReadExtendsRightAndLeft(self): """The read extends to the right and left of the hit. ssss qqqqqqq """ hsp = FakeHSP(subjectStart=4, subjectEnd=1, readStart=3, readEnd=6, frame=self.frame) normalized = normalizeHSP(hsp, 7, 'blastn') self.assertEqual( { 'subjectStart': 0, 'subjectEnd': 4, 'readStart': 2, 'readEnd': 6, 'readStartInSubject': -1, 'readEndInSubject': 6, }, normalized)
def testIdentical(self): """The hit start and end are identical to those of the read. ssss qqqq """ hsp = FakeHSP(subjectStart=1, subjectEnd=4, readStart=1, readEnd=4, frame=self.frame) normalized = normalizeHSP(hsp, 4, 'blastn') self.assertEqual( { 'subjectStart': 0, 'subjectEnd': 4, 'readStart': 0, 'readEnd': 4, 'readStartInSubject': 0, 'readEndInSubject': 4, }, normalized)
def testReadExtendsLeft(self): """The read sticks out to the left of the hit. ssss qqqqqq """ hsp = FakeHSP(subjectStart=4, subjectEnd=1, readStart=1, readEnd=4, frame=self.frame) normalized = normalizeHSP(hsp, 6, 'blastn') self.assertEqual( { 'subjectStart': 0, 'subjectEnd': 4, 'readStart': 0, 'readEnd': 4, 'readStartInSubject': -2, 'readEndInSubject': 4, }, normalized)
def test20130721Debugging(self): """ This is an example I manually examined on 2013-07-21. I had to invent hit and read strings though on 2013-11-21 to introduce 5 gaps in the read due to more rigorous checking in normalizeHSP. """ hit = ('GTCGAGAAGATCAAGATTGGTAAGGAGGCCGTGCAGGACACCGAGACCGT' 'GTCGAGAAGATCAAGATTGGTAAGGAGGCCGTGCAGGACACCGAGACCGT' 'GTCGAGAAGATCAAGATTGGTAAGGAGGCCGTGCAGGACACCGAGACCGT' 'GTCGAGAAGATCAAGATTGGTAAGGAGGCCGTGCAGGACACCGAGACCGT' 'GTCGAGAAGATCAAGATTGGTAAGGAGGCCGTGCAGGACACCGAGACCGT' 'AAAAA') read = ('GTCGAGAAGATCAAGATTGGTAAGGAGGCCGTGCAGGACACCGAGACCGT' 'GTCGAGAAGATCAAGATTGGTAAGGAGGCCGTGCAGGACACCGAGACCGT' 'GTCGAGAAGATCAAGATTGGTAAGGAGGCCGTGCAGGACACCGAGACCGT' 'GTCGAGAAGATCAAGATTGGTAAGGAGGCCGTGCAGGACACCGAGACCGT' 'GTCGAGAAGATCAAGATTGGTAAGGAGGCCGTGCAGGACACCGAGACCGT' '-----') hsp = FakeHSP(subjectStart=9018, subjectEnd=8764, readStart=66, readEnd=315, frame=self.frame, hit=hit, read=read) normalized = normalizeHSP(hsp, 316, 'blastn') self.assertEqual( { 'subjectStart': 8763, 'subjectEnd': 9018, 'readStart': 65, 'readEnd': 315, 'readStartInSubject': 8762, 'readEndInSubject': 9083, }, normalized)
def test20131115Debugging(self): """ This is an example I manually examined for BM on 2013-11-15. """ read = 'CTCTTGCA-CCTTAGGTACC' hit = 'CTCTAGCAGCCTTAGGTACC' hsp = FakeHSP(subjectStart=1776, subjectEnd=1795, readStart=131, readEnd=149, frame=self.frame, read=read, hit=hit) normalized = normalizeHSP(hsp, 149, 'blastn') self.assertEqual( { 'subjectStart': 1775, 'subjectEnd': 1795, 'readStart': 130, 'readEnd': 149, 'readStartInSubject': 1775, 'readEndInSubject': 1925, }, normalized)
def test20131113Debugging(self): """ This is an example I manually examined on 2013-11-13. """ hit = ('GTCGAGAAGATCAAGATTGGTAAGGAGGCCGTGCAGGACACCGAGACCGTGTCCGGCA' 'AGGTTGCCAAGGAGCAGATCGACATCGATAACGCCAAGCACACCAAGTGATGCACTGA' 'CGACGGGTGAGGCCCAGATTCCTACGGCCTGGGCCTCTGTCTGCGTCGGGATGCCATT' 'AGGCCGGTAGGATCGGTCACATGATCGATCCCAAGCTCCTGCGAACGGATCCGGACGC' 'CGTTCGTCGCTCCCAGGCCGCCCGCGGCGAGGACTCCTCGGTTGTGGACGACGTTGTC' 'GCCGCAGATGAGGCTCGTCGTGAGGCTATTGCTGCCCATGAGAACCTGCGTGCAGAAC' 'AGAAGGGACTCGGCAAGCGAATCGCTAAAGCATCCGGTG') read = ('GTC-AGAAGATCAAGATTGGTAAGGAGGCCGTGCAGGACACCGAGACCGTGTCCGGCA' 'AGGTTGCCAAGGAGCAGATCGACATCGATAACGCCAAGCACACCAAGTGATGCACTGA' 'CGACGGGTGAGGCCCAGATTCCTACGGCCTGGGCCTCTGTCTGCGTCGGGATGCCATT' 'AGGCCGCTAGGATCGGTCACATGATCGATCCCAAGCTCCTGCGAACGGATCCGGACGC' 'CGTTCGTCGCTCCCAGGCCGCCCGCGGCGAGGACTCCTCGGTTGTGGACGACGTTGTC' 'GCCGCAGATGAGGCTCGTCGTGAGGCTATTGCTGCCCATGAGAACCTGCGTGCAGAAC' 'AGAAGGGACTCGGCAAGCGAATCGCTAAAGCATCCGGTG') hsp = FakeHSP(subjectStart=2339751, subjectEnd=2339365, readStart=1, readEnd=386, frame=self.frame, hit=hit, read=read) normalized = normalizeHSP(hsp, 396, 'blastn') self.assertEqual( { 'subjectStart': 2339364, 'subjectEnd': 2339751, 'readStart': 0, 'readEnd': 386, 'readStartInSubject': 2339354, 'readEndInSubject': 2339751, }, normalized)
def test20130721Debugging(self): """ This is an example I manually examined on 2013-07-21. I had to invent hit and read strings though on 2013-11-21 to introduce 5 gaps in the read due to more rigorous checking in normalizeHSP. """ hit = ( 'GTCGAGAAGATCAAGATTGGTAAGGAGGCCGTGCAGGACACCGAGACCGT' 'GTCGAGAAGATCAAGATTGGTAAGGAGGCCGTGCAGGACACCGAGACCGT' 'GTCGAGAAGATCAAGATTGGTAAGGAGGCCGTGCAGGACACCGAGACCGT' 'GTCGAGAAGATCAAGATTGGTAAGGAGGCCGTGCAGGACACCGAGACCGT' 'GTCGAGAAGATCAAGATTGGTAAGGAGGCCGTGCAGGACACCGAGACCGT' 'AAAAA') read = ( 'GTCGAGAAGATCAAGATTGGTAAGGAGGCCGTGCAGGACACCGAGACCGT' 'GTCGAGAAGATCAAGATTGGTAAGGAGGCCGTGCAGGACACCGAGACCGT' 'GTCGAGAAGATCAAGATTGGTAAGGAGGCCGTGCAGGACACCGAGACCGT' 'GTCGAGAAGATCAAGATTGGTAAGGAGGCCGTGCAGGACACCGAGACCGT' 'GTCGAGAAGATCAAGATTGGTAAGGAGGCCGTGCAGGACACCGAGACCGT' '-----') hsp = FakeHSP(subjectStart=9018, subjectEnd=8764, readStart=66, readEnd=315, frame=self.frame, hit=hit, read=read) normalized = normalizeHSP(hsp, 316, 'blastn') self.assertEqual({ 'subjectStart': 8763, 'subjectEnd': 9018, 'readStart': 65, 'readEnd': 315, 'readStartInSubject': 8762, 'readEndInSubject': 9083, }, normalized)
def test20131115Debugging(self): """ This is an example I manually examined for Barbara on 2013-11-15. """ read = 'TTCTTTTTGCATTTGATAGT-TTGCTACAAG' hit = 'TTCTTTTTGCAATAGTCAGTCTTGCTAAAAG' hsp = FakeHSP(subjectStart=45, subjectEnd=75, readStart=120, readEnd=149, frame=self.frame, read=read, hit=hit) normalized = normalizeHSP(hsp, 149, 'blastn') self.assertEqual( { 'subjectStart': 44, 'subjectEnd': 75, 'readStart': 119, 'readEnd': 149, 'readStartInSubject': -75, 'readEndInSubject': 75, }, normalized)