def testSecondExonScheme(self): """ check that output exons have the right size and relative indices from chromosome indices. """ # run the code... infh = DummyInputStream(self.readLines) outfh = DummyOutputStream() processBED(infh, outfh, SECOND_EXON) # see what we get.. outlines = [l.strip() for l in outfh.itemsWritten() if l.strip() != ""] for i in range(0, len(outlines)): out = outlines[i] e2 = parseBEDString(out) gotAnswer = len(e2) r_len = self.readEnds[e2.name[:-2]] - self.readStarts[e2.name[:-2]] glob_s = (self.firstChromStarts[e2.name[:-2]] + self.readStarts[e2.name[:-2]]) expectedAns = r_len - (self.firstChromEnds[e2.name[:-2]] - (glob_s)) - 1 self.assertTrue(gotAnswer == expectedAns) self.assertTrue(e2.start == self.secondChromStarts[e2.name[:-2]])
def testFirstExonScheme(self): """ check that output exons have the right size and relative indices from chromosome indices """ # run the code... infh = DummyInputStream(self.readLines) outfh = DummyOutputStream() processBED(infh, outfh, FIRST_EXON) # see what we get.. outlines = [l.strip() for l in outfh.itemsWritten() if l.strip() != ""] for i in range(0, len(outlines)): out = outlines[i] e1 = parseBEDString(out) gotAnswer = len(e1) read_start_global = (self.firstChromStarts[e1.name[:-2]] + self.readStarts[e1.name[:-2]]) expectedAns = self.firstChromEnds[e1.name[:-2]] - read_start_global + 1 self.assertTrue(gotAnswer == expectedAns) self.assertTrue(e1.end == self.firstChromEnds[e1.name[:-2]])
def testFirstExonScheme(self): """ check that output exons have the right size and relative indices from chromosome indices """ # run the code... infh = DummyInputStream(self.readLines) outfh = DummyOutputStream() processBED(infh, outfh, FIRST_EXON) # see what we get.. outlines = [l.strip() for l in outfh.itemsWritten() if l.strip() != ""] for i in range(0, len(outlines)): out = outlines[i] e1 = parseBEDString(out) gotAnswer = len(e1) read_start_global = (self.firstChromStarts[e1.name[:-2]] + self.readStarts[e1.name[:-2]]) expectedAns = self.firstChromEnds[ e1.name[:-2]] - read_start_global + 1 self.assertTrue(gotAnswer == expectedAns) self.assertTrue(e1.end == self.firstChromEnds[e1.name[:-2]])
def testBEDIteratorDropAfter(self): """ make sure we can drop parts after a certain field in a BED file and not screw everything else up.. """ debug = False infs = "chr12" + "\t" + "83810028" + "\t" + "83810066" + "\t" +\ "SRR189775.10000" + "\t" + "9" + "\t" + "-" + "\t" +\ "TTTTTTTTTTTTTTTAAATTCTTCGAATGCCGTTTTCT" + "\t" +\ "]&(2-'+0'+:34J########################\n" +\ "chr5" + "\t" + "177570573" + "\t" + "177570611" + "\t" +\ "SRR189775.10000001" + "\t" + "3" + "\t" + "+" + "\t" +\ "TCACCTTTTTTTCACCTTTTAATTTTATATTATTTATC" + "\t" +\ "K79:77:79797:7797<;>BC979:77B?997:79:7\n" +\ "chr4" + "\t" + "78174772" + "\t" + "78174810" + "\t" +\ "SRR189775.10000009" + "\t" + "0" + "\t" + "+" + "\t" +\ "TTTTATTTTATTTTATTTTTTTACCCTTCCTCAAACAC" + "\t" +\ "G77:797:77977<TS;:9:9:9:9:977<;7@?@=97\n" expectOut = [ "chr12" + "\t" + "83810028" + "\t" + "83810066" + "\t" "SRR189775.10000" + "\t" + "9" + "\t" + "-", "chr5" + "\t" + "177570573" + "\t" + "177570611" + "\t" + "SRR189775.10000001" + "\t" + "3" + "\t" + "+", "chr4" + "\t" + "78174772" + "\t" + "78174810" + "\t" + "SRR189775.10000009" + "\t" + "0" + "\t" + "+" ] ifh = DummyInputStream(infs) ofh = DummyOutputStream() def run(istrm, ostrm): for e in BEDIterator(istrm, dropAfter=6): ostrm.write(str(e) + "\n") run(ifh, ofh) gotOutput = [x.strip() for x in ofh.itemsWritten()] if debug: sys.stderr.write("expected -------\n") for e in expectOut: sys.stderr.write(e + "\n") sys.stderr.write("got ------------\n") for e in gotOutput: sys.stderr.write(e + "\n") self.assertTrue(gotOutput == expectOut)
def testSimpleKeys(self): debug = False infh = DummyInputStream(self.exonCounts) inref = DummyInputStream(self.genes) outfh = DummyOutputStream() process(infh, inref, outfh, verbose=False, debug=debug) gotOutput = outfh.itemsWritten() if debug: print "expected -------" for e in self.expectedAns: print e print "got ------------" for e in gotOutput: print e assert(self.expectedAns == gotOutput)
def testBEDIteratorDropAfter(self): """ make sure we can drop parts after a certain field in a BED file and not screw everything else up.. """ debug = False infs = "chr12" + "\t" + "83810028" + "\t" + "83810066" + "\t" +\ "SRR189775.10000" + "\t" + "9" + "\t" + "-" + "\t" +\ "TTTTTTTTTTTTTTTAAATTCTTCGAATGCCGTTTTCT" + "\t" +\ "]&(2-'+0'+:34J########################\n" +\ "chr5" + "\t" + "177570573" + "\t" + "177570611" + "\t" +\ "SRR189775.10000001" + "\t" + "3" + "\t" + "+" + "\t" +\ "TCACCTTTTTTTCACCTTTTAATTTTATATTATTTATC" + "\t" +\ "K79:77:79797:7797<;>BC979:77B?997:79:7\n" +\ "chr4" + "\t" + "78174772" + "\t" + "78174810" + "\t" +\ "SRR189775.10000009" + "\t" + "0" + "\t" + "+" + "\t" +\ "TTTTATTTTATTTTATTTTTTTACCCTTCCTCAAACAC" + "\t" +\ "G77:797:77977<TS;:9:9:9:9:977<;7@?@=97\n" expectOut = ["chr12" + "\t" + "83810028" + "\t" + "83810066" + "\t" "SRR189775.10000" + "\t" + "9" + "\t" + "-", "chr5" + "\t" + "177570573" + "\t" + "177570611" + "\t" + "SRR189775.10000001" + "\t" + "3" + "\t" + "+", "chr4" + "\t" + "78174772" + "\t" + "78174810" + "\t" + "SRR189775.10000009" + "\t" + "0" + "\t" + "+"] ifh = DummyInputStream(infs) ofh = DummyOutputStream() def run(istrm, ostrm): for e in BEDIterator(istrm, dropAfter=6): ostrm.write(str(e) + "\n") run(ifh, ofh) gotOutput = [x.strip() for x in ofh.itemsWritten()] if debug: sys.stderr.write("expected -------\n") for e in expectOut: sys.stderr.write(e + "\n") sys.stderr.write("got ------------\n") for e in gotOutput: sys.stderr.write(e + "\n") self.assertTrue(gotOutput == expectOut)