Exemplo n.º 1
0
    def test_split_by_contigs_presplit(self):
        # Consumes too much memory for Jenkins

        # Test to make sure the result of a split by contigs has an appropriate
        # number of records (make sure filters are appropriately aggressive)
        ds2 = DataSet(data.getXml(14))
        bams = ds2.externalResources.resourceIds
        assert len(bams) == 2
        refwindows = ds2.refWindows
        assert refwindows == [(0, 0, 224992)]
        res1 = openIndexedAlignmentFile(bams[0][7:])
        res2 = openIndexedAlignmentFile(bams[1][7:])

        def count(iterable):
            count = 0
            for _ in iterable:
                count += 1
            return count

        assert count(res1.readsInRange(*refwindows[0])) == 1409
        assert count(res2.readsInRange(*refwindows[0])) == 1375
        assert count(ds2.readsInRange(*refwindows[0])) == 2784
        assert count(ds2.records) == 2784
        ds2.disableFilters()
        assert count(ds2.records) == 53552
        assert ds2.countRecords() == 53552
Exemplo n.º 2
0
    def test_split_by_contigs_presplit(self):
        # Consumes too much memory for Jenkins

        # Test to make sure the result of a split by contigs has an appropriate
        # number of records (make sure filters are appropriately aggressive)
        ds2 = DataSet(data.getXml(15))
        bams = ds2.externalResources.resourceIds
        self.assertEqual(len(bams), 2)
        refwindows = ds2.refWindows
        self.assertEqual(refwindows, [(0, 0, 224992)])
        res1 = openIndexedAlignmentFile(bams[0][7:])
        res2 = openIndexedAlignmentFile(bams[1][7:])

        def count(iterable):
            count = 0
            for _ in iterable:
                count += 1
            return count

        self.assertEqual(count(res1.readsInRange(*refwindows[0])), 1409)
        self.assertEqual(count(res2.readsInRange(*refwindows[0])), 1375)
        self.assertEqual(count(ds2.readsInRange(*refwindows[0])), 2784)
        self.assertEqual(count(ds2.records), 2784)
        ds2.disableFilters()
        self.assertEqual(count(ds2.records), 53552)
        self.assertEqual(ds2.countRecords(), 53552)
Exemplo n.º 3
0
    def test_reads_in_range(self):
        ds = DataSet(data.getBam())
        refNames = ds.refNames

        rn = refNames[15]
        reads = ds.readsInRange(rn, 10, 100)
        self.assertEqual(len(list(reads)), 10)

        ds2 = DataSet(data.getBam(0))
        reads = ds2.readsInRange("E.faecalis.1", 0, 1400)
        self.assertEqual(len(list(reads)), 20)

        lengths = ds.refLengths
        for rname, rId in ds.refInfo('ID'):
            rn = ds._idToRname(rId)
            self.assertEqual(rname, rn)
            rlen = lengths[rn]
            self.assertEqual(len(list(ds.readsInReference(rn))),
                             len(list(ds.readsInReference(rId))))
            self.assertEqual(len(list(ds.readsInRange(rn, 0, rlen))),
                             len(list(ds.readsInRange(rId, 0, rlen))))
Exemplo n.º 4
0
    def test_reads_in_range(self):
        ds = DataSet(data.getBam())
        refNames = ds.refNames

        rn = refNames[15]
        reads = ds.readsInRange(rn, 10, 100)
        self.assertEqual(len(list(reads)), 10)

        ds2 = DataSet(data.getBam(0))
        reads = ds2.readsInRange("E.faecalis.1", 0, 1400)
        self.assertEqual(len(list(reads)), 20)

        lengths = ds.refLengths
        for rname, rId in ds.refInfo('ID'):
            rn = ds._idToRname(rId)
            self.assertEqual(rname, rn)
            rlen = lengths[rn]
            self.assertEqual(len(list(ds.readsInReference(rn))),
                             len(list(ds.readsInReference(rId))))
            self.assertEqual(len(list(ds.readsInRange(rn, 0, rlen))),
                             len(list(ds.readsInRange(rId, 0, rlen))))
Exemplo n.º 5
0
    def test_staggered_reads_in_range(self):
        ds = DataSet(data.getXml(8))
        refNames = ds.refNames

        rn = 'B.vulgatus.5'
        reads = list(ds.readsInRange(rn, 0, 10000))
        ds2 = DataSet(data.getXml(11))
        reads2 = list(ds2.readsInRange(rn, 0, 10000))
        dsBoth = DataSet(data.getXml(8), data.getXml(11))
        readsBoth = list(dsBoth.readsInRange(rn, 0, 10000))
        self.assertEqual(len(reads), 2)
        self.assertEqual(len(reads2), 5)
        self.assertEqual(len(readsBoth), 7)
        read_starts = (0, 1053)
        for read, start in zip(reads, read_starts):
            self.assertEqual(read.tStart, start)
        read2_starts = (0, 0, 3, 3, 4)
        for read, start in zip(reads2, read2_starts):
            self.assertEqual(read.tStart, start)
        readboth_starts = (0, 0, 0, 3, 3, 4, 1053)
        for read, start in zip(readsBoth, readboth_starts):
            self.assertEqual(read.tStart, start)
Exemplo n.º 6
0
    def test_staggered_reads_in_range(self):
        ds = DataSet(data.getXml(8))
        refNames = ds.refNames

        rn = 'B.vulgatus.5'
        reads = list(ds.readsInRange(rn, 0, 10000))
        ds2 = DataSet(data.getXml(11))
        reads2 = list(ds2.readsInRange(rn, 0, 10000))
        dsBoth = DataSet(data.getXml(8), data.getXml(11))
        readsBoth = list(dsBoth.readsInRange(rn, 0, 10000))
        self.assertEqual(len(reads), 2)
        self.assertEqual(len(reads2), 5)
        self.assertEqual(len(readsBoth), 7)
        read_starts = (0, 1053)
        for read, start in zip(reads, read_starts):
            self.assertEqual(read.tStart, start)
        read2_starts = (0, 0, 3, 3, 4)
        for read, start in zip(reads2, read2_starts):
            self.assertEqual(read.tStart, start)
        readboth_starts = (0, 0, 0, 3, 3, 4, 1053)
        for read, start in zip(readsBoth, readboth_starts):
            self.assertEqual(read.tStart, start)
Exemplo n.º 7
0
    def test_split_by_contigs_presplit(self):
        # Consumes too much memory for Jenkins

        # Test to make sure the result of a split by contigs has an appropriate
        # number of records (make sure filters are appropriately aggressive)
        ds2 = DataSet(data.getXml(15))
        bams = ds2.externalResources.resourceIds
        self.assertEqual(len(bams), 2)
        refwindows = ds2.refWindows
        self.assertEqual(refwindows, [(0, 0, 224992)])
        res1 = openIndexedAlignmentFile(bams[0][7:])
        res2 = openIndexedAlignmentFile(bams[1][7:])
        def count(iterable):
            count = 0
            for _ in iterable:
                count += 1
            return count
        self.assertEqual(count(res1.readsInRange(*refwindows[0])), 1409)
        self.assertEqual(count(res2.readsInRange(*refwindows[0])), 1375)
        self.assertEqual(count(ds2.readsInRange(*refwindows[0])), 2784)
        self.assertEqual(count(ds2.records), 2784)
        ds2.disableFilters()
        self.assertEqual(count(ds2.records), 53552)
        self.assertEqual(ds2.countRecords(), 53552)