Exemplo n.º 1
0
  def testSecondExonScheme(self):
    """
    check that output exons have the right size and relative indices
    from chromosome indices.
    """

    # run the code...
    infh = DummyInputStream(self.readLines)
    outfh = DummyOutputStream()
    processBED(infh, outfh, SECOND_EXON)

    # see what we get..
    outlines = [l.strip() for l in outfh.itemsWritten() if l.strip() != ""]

    for i in range(0, len(outlines)):
      out = outlines[i]
      e2 = parseBEDString(out)

      gotAnswer = len(e2)
      r_len = self.readEnds[e2.name[:-2]] - self.readStarts[e2.name[:-2]]
      glob_s = (self.firstChromStarts[e2.name[:-2]] +
                self.readStarts[e2.name[:-2]])
      expectedAns = r_len - (self.firstChromEnds[e2.name[:-2]] - (glob_s)) - 1
      self.assertTrue(gotAnswer == expectedAns)
      self.assertTrue(e2.start == self.secondChromStarts[e2.name[:-2]])
Exemplo n.º 2
0
  def testFirstExonScheme(self):
    """
    check that output exons have the right size and relative indices
    from chromosome indices
    """

    # run the code...
    infh = DummyInputStream(self.readLines)
    outfh = DummyOutputStream()
    processBED(infh, outfh, FIRST_EXON)

    # see what we get..
    outlines = [l.strip() for l in outfh.itemsWritten() if l.strip() != ""]

    for i in range(0, len(outlines)):
      out = outlines[i]
      e1 = parseBEDString(out)

      gotAnswer = len(e1)

      read_start_global = (self.firstChromStarts[e1.name[:-2]] +
                           self.readStarts[e1.name[:-2]])
      expectedAns = self.firstChromEnds[e1.name[:-2]] - read_start_global + 1
      self.assertTrue(gotAnswer == expectedAns)
      self.assertTrue(e1.end == self.firstChromEnds[e1.name[:-2]])
Exemplo n.º 3
0
    def testSecondExonScheme(self):
        """
    check that output exons have the right size and relative indices
    from chromosome indices.
    """

        # run the code...
        infh = DummyInputStream(self.readLines)
        outfh = DummyOutputStream()
        processBED(infh, outfh, SECOND_EXON)

        # see what we get..
        outlines = [l.strip() for l in outfh.itemsWritten() if l.strip() != ""]

        for i in range(0, len(outlines)):
            out = outlines[i]
            e2 = parseBEDString(out)

            gotAnswer = len(e2)
            r_len = self.readEnds[e2.name[:-2]] - self.readStarts[e2.name[:-2]]
            glob_s = (self.firstChromStarts[e2.name[:-2]] +
                      self.readStarts[e2.name[:-2]])
            expectedAns = r_len - (self.firstChromEnds[e2.name[:-2]] -
                                   (glob_s)) - 1
            self.assertTrue(gotAnswer == expectedAns)
            self.assertTrue(e2.start == self.secondChromStarts[e2.name[:-2]])
Exemplo n.º 4
0
    def testFirstExonScheme(self):
        """
    check that output exons have the right size and relative indices
    from chromosome indices
    """

        # run the code...
        infh = DummyInputStream(self.readLines)
        outfh = DummyOutputStream()
        processBED(infh, outfh, FIRST_EXON)

        # see what we get..
        outlines = [l.strip() for l in outfh.itemsWritten() if l.strip() != ""]

        for i in range(0, len(outlines)):
            out = outlines[i]
            e1 = parseBEDString(out)

            gotAnswer = len(e1)

            read_start_global = (self.firstChromStarts[e1.name[:-2]] +
                                 self.readStarts[e1.name[:-2]])
            expectedAns = self.firstChromEnds[
                e1.name[:-2]] - read_start_global + 1
            self.assertTrue(gotAnswer == expectedAns)
            self.assertTrue(e1.end == self.firstChromEnds[e1.name[:-2]])
Exemplo n.º 5
0
    def testBEDIteratorDropAfter(self):
        """
      make sure we can drop parts after a certain field in a BED file and
      not screw everything else up..
    """
        debug = False
        infs = "chr12" + "\t" + "83810028" + "\t" + "83810066" + "\t" +\
               "SRR189775.10000" + "\t" + "9" + "\t" + "-" + "\t" +\
               "TTTTTTTTTTTTTTTAAATTCTTCGAATGCCGTTTTCT" + "\t" +\
               "]&(2-'+0'+:34J########################\n" +\
               "chr5" + "\t" + "177570573" + "\t" + "177570611" + "\t" +\
               "SRR189775.10000001" + "\t" + "3" + "\t" + "+" + "\t" +\
               "TCACCTTTTTTTCACCTTTTAATTTTATATTATTTATC" + "\t" +\
               "K79:77:79797:7797<;>BC979:77B?997:79:7\n" +\
               "chr4" + "\t" + "78174772" + "\t" + "78174810" + "\t" +\
               "SRR189775.10000009" + "\t" + "0" + "\t" + "+" + "\t" +\
               "TTTTATTTTATTTTATTTTTTTACCCTTCCTCAAACAC" + "\t" +\
               "G77:797:77977<TS;:9:9:9:9:977<;7@?@=97\n"
        expectOut = [
            "chr12" + "\t" + "83810028" + "\t" + "83810066" + "\t"
            "SRR189775.10000" + "\t" + "9" + "\t" + "-",
            "chr5" + "\t" + "177570573" + "\t" + "177570611" + "\t" +
            "SRR189775.10000001" + "\t" + "3" + "\t" + "+",
            "chr4" + "\t" + "78174772" + "\t" + "78174810" + "\t" +
            "SRR189775.10000009" + "\t" + "0" + "\t" + "+"
        ]
        ifh = DummyInputStream(infs)
        ofh = DummyOutputStream()

        def run(istrm, ostrm):
            for e in BEDIterator(istrm, dropAfter=6):
                ostrm.write(str(e) + "\n")

        run(ifh, ofh)
        gotOutput = [x.strip() for x in ofh.itemsWritten()]

        if debug:
            sys.stderr.write("expected -------\n")
            for e in expectOut:
                sys.stderr.write(e + "\n")
            sys.stderr.write("got ------------\n")
            for e in gotOutput:
                sys.stderr.write(e + "\n")

        self.assertTrue(gotOutput == expectOut)
Exemplo n.º 6
0
  def testSimpleKeys(self):
    debug = False
    infh = DummyInputStream(self.exonCounts)
    inref = DummyInputStream(self.genes)
    outfh = DummyOutputStream()

    process(infh, inref, outfh, verbose=False, debug=debug)
    gotOutput = outfh.itemsWritten()

    if debug:
      print "expected -------"
      for e in self.expectedAns:
        print e
      print "got ------------"
      for e in gotOutput:
        print e

    assert(self.expectedAns == gotOutput)
Exemplo n.º 7
0
  def testBEDIteratorDropAfter(self):
    """
      make sure we can drop parts after a certain field in a BED file and
      not screw everything else up..
    """
    debug = False
    infs = "chr12" + "\t" + "83810028" + "\t" + "83810066" + "\t" +\
           "SRR189775.10000" + "\t" + "9" + "\t" + "-" + "\t" +\
           "TTTTTTTTTTTTTTTAAATTCTTCGAATGCCGTTTTCT" + "\t" +\
           "]&(2-'+0'+:34J########################\n" +\
           "chr5" + "\t" + "177570573" + "\t" + "177570611" + "\t" +\
           "SRR189775.10000001" + "\t" + "3" + "\t" + "+" + "\t" +\
           "TCACCTTTTTTTCACCTTTTAATTTTATATTATTTATC" + "\t" +\
           "K79:77:79797:7797<;>BC979:77B?997:79:7\n" +\
           "chr4" + "\t" + "78174772" + "\t" + "78174810" + "\t" +\
           "SRR189775.10000009" + "\t" + "0" + "\t" + "+" + "\t" +\
           "TTTTATTTTATTTTATTTTTTTACCCTTCCTCAAACAC" + "\t" +\
           "G77:797:77977<TS;:9:9:9:9:977<;7@?@=97\n"
    expectOut = ["chr12" + "\t" + "83810028" + "\t" + "83810066" + "\t"
                 "SRR189775.10000" + "\t" + "9" + "\t" + "-",
                 "chr5" + "\t" + "177570573" + "\t" + "177570611" + "\t" +
                 "SRR189775.10000001" + "\t" + "3" + "\t" + "+",
                 "chr4" + "\t" + "78174772" + "\t" + "78174810" + "\t" +
                 "SRR189775.10000009" + "\t" + "0" + "\t" + "+"]
    ifh = DummyInputStream(infs)
    ofh = DummyOutputStream()

    def run(istrm, ostrm):
      for e in BEDIterator(istrm, dropAfter=6):
        ostrm.write(str(e) + "\n")
    run(ifh, ofh)
    gotOutput = [x.strip() for x in ofh.itemsWritten()]

    if debug:
      sys.stderr.write("expected -------\n")
      for e in expectOut:
        sys.stderr.write(e + "\n")
      sys.stderr.write("got ------------\n")
      for e in gotOutput:
        sys.stderr.write(e + "\n")

    self.assertTrue(gotOutput == expectOut)