Пример #1
0
    def testConvertRegion(self):
        '''
        Convert region/junction coordinates
        '''
        ngsutils.bam.convertregion.bam_convertregion(infname,
                                                     outfname,
                                                     chromsizes,
                                                     quiet=True)

        bam = ngsutils.bam.bam_open(outfname)
        for read in ngsutils.bam.bam_iter(bam):
            if read.qname == 'A':
                self.assertEqual(bam.getrname(read.tid), 'chr1')
                self.assertEqual(read.pos,
                                 109)  # 0-based in BAM, 1-based in SAM
                self.assertEqual(read.cigar, ngsutils.bam.cigar_fromstr('50M'))
            elif read.qname == 'B':
                self.assertEqual(bam.getrname(read.tid), 'chr1')
                self.assertEqual(read.pos, 174)
                self.assertEqual(read.cigar,
                                 ngsutils.bam.cigar_fromstr('26M100N24M'))
            elif read.qname == 'C':
                self.assertEqual(bam.getrname(read.tid), 'chr1')
                self.assertEqual(read.pos, 197)
                self.assertEqual(read.cigar,
                                 ngsutils.bam.cigar_fromstr('3M100N47M'))
            elif read.qname == 'Z':
                self.assertTrue(read.is_unmapped)
Пример #2
0
    def testConvertRegionOverlap(self):
        '''
        Convert region/junction coordinates, require overlap with junction
        '''
        ngsutils.bam.convertregion.bam_convertregion(infname,
                                                     outfname,
                                                     chromsizes,
                                                     enforce_overlap=True,
                                                     quiet=True)
        foundA = False
        foundB = False
        foundC = False
        foundZ = False

        bam = ngsutils.bam.bam_open(outfname)
        for read in ngsutils.bam.bam_iter(bam):
            if read.qname == 'A':
                foundA = True
            elif read.qname == 'B':
                self.assertEqual(bam.getrname(read.tid), 'chr1')
                self.assertEqual(read.pos, 174)
                self.assertEqual(read.cigar,
                                 ngsutils.bam.cigar_fromstr('26M100N24M'))
                foundB = True
            elif read.qname == 'C':
                foundC = True
            elif read.qname == 'Z':
                foundZ = True

        self.assertFalse(foundA)
        self.assertTrue(foundB)
        self.assertFalse(foundC)
        self.assertFalse(foundZ)
Пример #3
0
    def testConvertRegionOverlap(self):
        '''
        Convert region/junction coordinates, require overlap with junction
        '''
        ngsutils.bam.convertregion.bam_convertregion(infname, outfname, chromsizes, enforce_overlap=True, quiet=True)
        foundA = False
        foundB = False
        foundC = False
        foundZ = False

        bam = ngsutils.bam.bam_open(outfname)
        for read in ngsutils.bam.bam_iter(bam):
            if read.qname == 'A':
                foundA = True
            elif read.qname == 'B':
                self.assertEqual(bam.getrname(read.tid), 'chr1')
                self.assertEqual(read.pos, 174)
                self.assertEqual(read.cigar, ngsutils.bam.cigar_fromstr('26M100N24M'))
                foundB = True
            elif read.qname == 'C':
                foundC = True
            elif read.qname == 'Z':
                foundZ = True

        self.assertFalse(foundA)
        self.assertTrue(foundB)
        self.assertFalse(foundC)
        self.assertFalse(foundZ)
Пример #4
0
    def testMerge(self):
        '''
        Merge test.bam and test2.bam to tmp.bam
        '''
        fname1 = os.path.join(os.path.dirname(__file__), 'test.bam')
        fname2 = os.path.join(os.path.dirname(__file__), 'test2.bam')
        outfname = os.path.join(os.path.dirname(__file__), 'tmp.bam')

        ngsutils.bam.merge.bam_merge(outfname, [fname1, fname2], quiet=True)

        bam = ngsutils.bam.bam_open(outfname)
        for read in ngsutils.bam.bam_iter(bam):
            if read.qname == 'A':  # AS chr2
                self.assertEqual(bam.getrname(read.tid), 'chr2')
            elif read.qname == 'B':  # AS tie
                self.assertEqual(bam.getrname(read.tid), 'chr1')
            elif read.qname == 'C':  # AS tie
                self.assertEqual(bam.getrname(read.tid), 'chr1')
            elif read.qname == 'D':  # AS tie
                self.assertEqual(bam.getrname(read.tid), 'chr1')
            elif read.qname == 'E':  # AS chr2
                self.assertEqual(bam.getrname(read.tid), 'chr2')
            elif read.qname == 'F':  # AS chr1
                self.assertEqual(bam.getrname(read.tid), 'chr1')
            elif read.qname == 'Z':  # still unmapped
                self.assertTrue(read.is_unmapped)
Пример #5
0
    def testMerge(self):
        '''
        Merge test.bam and test2.bam to tmp.bam
        '''
        fname1 = os.path.join(os.path.dirname(__file__), 'test.bam')
        fname2 = os.path.join(os.path.dirname(__file__), 'test2.bam')
        outfname = os.path.join(os.path.dirname(__file__), 'tmp.bam')

        ngsutils.bam.merge.bam_merge(outfname, [fname1, fname2], quiet=True)

        bam = ngsutils.bam.bam_open(outfname)
        for read in ngsutils.bam.bam_iter(bam):
            if read.qname == 'A':  # AS chr2
                self.assertEqual(bam.getrname(read.tid), 'chr2')
            elif read.qname == 'B':  # AS tie
                self.assertEqual(bam.getrname(read.tid), 'chr1')
            elif read.qname == 'C':  # AS tie
                self.assertEqual(bam.getrname(read.tid), 'chr1')
            elif read.qname == 'D':  # AS tie
                self.assertEqual(bam.getrname(read.tid), 'chr1')
            elif read.qname == 'E':  # AS chr2
                self.assertEqual(bam.getrname(read.tid), 'chr2')
            elif read.qname == 'F':  # AS chr1
                self.assertEqual(bam.getrname(read.tid), 'chr1')
            elif read.qname == 'Z':  # still unmapped
                self.assertTrue(read.is_unmapped)
Пример #6
0
    def testConvertRegion(self):
        '''
        Convert region/junction coordinates
        '''
        ngsutils.bam.convertregion.bam_convertregion(infname, outfname, chromsizes, quiet=True)

        bam = ngsutils.bam.bam_open(outfname)
        for read in ngsutils.bam.bam_iter(bam):
            if read.qname == 'A':
                self.assertEqual(bam.getrname(read.tid), 'chr1')
                self.assertEqual(read.pos, 109)  # 0-based in BAM, 1-based in SAM
                self.assertEqual(read.cigar, ngsutils.bam.cigar_fromstr('50M'))
            elif read.qname == 'B':
                self.assertEqual(bam.getrname(read.tid), 'chr1')
                self.assertEqual(read.pos, 174)
                self.assertEqual(read.cigar, ngsutils.bam.cigar_fromstr('26M100N24M'))
            elif read.qname == 'C':
                self.assertEqual(bam.getrname(read.tid), 'chr1')
                self.assertEqual(read.pos, 197)
                self.assertEqual(read.cigar, ngsutils.bam.cigar_fromstr('3M100N47M'))
            elif read.qname == 'Z':
                self.assertTrue(read.is_unmapped)
Пример #7
0
def find_nearest(bam, bed, maxdist=100000, out=sys.stdout):
    for read in ngsutils.bam.bam_iter(bam):
        if read.is_unmapped:
            continue

        dists = []  # will be an list tuples: (abs_val of the distance, 'up/down') (respective to + strand)

        chrom = bam.getrname(read.tid)
        strand = '-' if read.is_reverse else '+'
        start = max(0, read.pos - maxdist)
        end = min(read.aend + maxdist, bam.lengths[read.tid])

        for region in bed.fetch(chrom, start, end, strand):
            if region.start <= read.pos <= region.end:
                # start is w/in region
                dists.append((0, ''))
                continue
            elif region.start <= read.aend <= region.end:
                # end is w/in region
                dists.append((0, ''))
                continue
            elif read.pos <= region.start <= region.end <= read.aend:
                # read spans the entire region
                dists.append((0,''))
                continue
            elif region.end < read.pos:
                dists.append((read.pos - region.end, 'up'))
            else:
                dists.append((region.start - read.aend, 'down'))
            

        if dists:
            dists.sort()

            distance = dists[0][0]
            orient = dists[0][1]

            if distance > 0:
                if not read.is_reverse and orient == 'down':
                    distance = -distance
                elif read.is_reverse and orient == 'up':
                    distance = -distance
                    

            out.write('%s\t%s\n' % (read.qname, distance))
        else:
            out.write('%s\t*\n' % (read.qname,))
Пример #8
0
def find_nearest(bam, bed, maxdist=100000, out=sys.stdout):
    for read in ngsutils.bam.bam_iter(bam):
        if read.is_unmapped:
            continue

        dists = [
        ]  # will be an list tuples: (abs_val of the distance, 'up/down') (respective to + strand)

        chrom = bam.getrname(read.tid)
        strand = '-' if read.is_reverse else '+'
        start = max(0, read.pos - maxdist)
        end = min(read.aend + maxdist, bam.lengths[read.tid])

        for region in bed.fetch(chrom, start, end, strand):
            if region.start <= read.pos <= region.end:
                # start is w/in region
                dists.append((0, ''))
                continue
            elif region.start <= read.aend <= region.end:
                # end is w/in region
                dists.append((0, ''))
                continue
            elif read.pos <= region.start <= region.end <= read.aend:
                # read spans the entire region
                dists.append((0, ''))
                continue
            elif region.end < read.pos:
                dists.append((read.pos - region.end, 'up'))
            else:
                dists.append((region.start - read.aend, 'down'))

        if dists:
            dists.sort()

            distance = dists[0][0]
            orient = dists[0][1]

            if distance > 0:
                if not read.is_reverse and orient == 'down':
                    distance = -distance
                elif read.is_reverse and orient == 'up':
                    distance = -distance

            out.write('%s\t%s\n' % (read.qname, distance))
        else:
            out.write('%s\t*\n' % (read.qname, ))