def testConvertRegion(self): ''' Convert region/junction coordinates ''' ngsutils.bam.convertregion.bam_convertregion(infname, outfname, chromsizes, quiet=True) bam = ngsutils.bam.bam_open(outfname) for read in ngsutils.bam.bam_iter(bam): if read.qname == 'A': self.assertEqual(bam.getrname(read.tid), 'chr1') self.assertEqual(read.pos, 109) # 0-based in BAM, 1-based in SAM self.assertEqual(read.cigar, ngsutils.bam.cigar_fromstr('50M')) elif read.qname == 'B': self.assertEqual(bam.getrname(read.tid), 'chr1') self.assertEqual(read.pos, 174) self.assertEqual(read.cigar, ngsutils.bam.cigar_fromstr('26M100N24M')) elif read.qname == 'C': self.assertEqual(bam.getrname(read.tid), 'chr1') self.assertEqual(read.pos, 197) self.assertEqual(read.cigar, ngsutils.bam.cigar_fromstr('3M100N47M')) elif read.qname == 'Z': self.assertTrue(read.is_unmapped)
def testConvertRegionOverlap(self): ''' Convert region/junction coordinates, require overlap with junction ''' ngsutils.bam.convertregion.bam_convertregion(infname, outfname, chromsizes, enforce_overlap=True, quiet=True) foundA = False foundB = False foundC = False foundZ = False bam = ngsutils.bam.bam_open(outfname) for read in ngsutils.bam.bam_iter(bam): if read.qname == 'A': foundA = True elif read.qname == 'B': self.assertEqual(bam.getrname(read.tid), 'chr1') self.assertEqual(read.pos, 174) self.assertEqual(read.cigar, ngsutils.bam.cigar_fromstr('26M100N24M')) foundB = True elif read.qname == 'C': foundC = True elif read.qname == 'Z': foundZ = True self.assertFalse(foundA) self.assertTrue(foundB) self.assertFalse(foundC) self.assertFalse(foundZ)
def testMerge(self): ''' Merge test.bam and test2.bam to tmp.bam ''' fname1 = os.path.join(os.path.dirname(__file__), 'test.bam') fname2 = os.path.join(os.path.dirname(__file__), 'test2.bam') outfname = os.path.join(os.path.dirname(__file__), 'tmp.bam') ngsutils.bam.merge.bam_merge(outfname, [fname1, fname2], quiet=True) bam = ngsutils.bam.bam_open(outfname) for read in ngsutils.bam.bam_iter(bam): if read.qname == 'A': # AS chr2 self.assertEqual(bam.getrname(read.tid), 'chr2') elif read.qname == 'B': # AS tie self.assertEqual(bam.getrname(read.tid), 'chr1') elif read.qname == 'C': # AS tie self.assertEqual(bam.getrname(read.tid), 'chr1') elif read.qname == 'D': # AS tie self.assertEqual(bam.getrname(read.tid), 'chr1') elif read.qname == 'E': # AS chr2 self.assertEqual(bam.getrname(read.tid), 'chr2') elif read.qname == 'F': # AS chr1 self.assertEqual(bam.getrname(read.tid), 'chr1') elif read.qname == 'Z': # still unmapped self.assertTrue(read.is_unmapped)
def find_nearest(bam, bed, maxdist=100000, out=sys.stdout): for read in ngsutils.bam.bam_iter(bam): if read.is_unmapped: continue dists = [] # will be an list tuples: (abs_val of the distance, 'up/down') (respective to + strand) chrom = bam.getrname(read.tid) strand = '-' if read.is_reverse else '+' start = max(0, read.pos - maxdist) end = min(read.aend + maxdist, bam.lengths[read.tid]) for region in bed.fetch(chrom, start, end, strand): if region.start <= read.pos <= region.end: # start is w/in region dists.append((0, '')) continue elif region.start <= read.aend <= region.end: # end is w/in region dists.append((0, '')) continue elif read.pos <= region.start <= region.end <= read.aend: # read spans the entire region dists.append((0,'')) continue elif region.end < read.pos: dists.append((read.pos - region.end, 'up')) else: dists.append((region.start - read.aend, 'down')) if dists: dists.sort() distance = dists[0][0] orient = dists[0][1] if distance > 0: if not read.is_reverse and orient == 'down': distance = -distance elif read.is_reverse and orient == 'up': distance = -distance out.write('%s\t%s\n' % (read.qname, distance)) else: out.write('%s\t*\n' % (read.qname,))
def find_nearest(bam, bed, maxdist=100000, out=sys.stdout): for read in ngsutils.bam.bam_iter(bam): if read.is_unmapped: continue dists = [ ] # will be an list tuples: (abs_val of the distance, 'up/down') (respective to + strand) chrom = bam.getrname(read.tid) strand = '-' if read.is_reverse else '+' start = max(0, read.pos - maxdist) end = min(read.aend + maxdist, bam.lengths[read.tid]) for region in bed.fetch(chrom, start, end, strand): if region.start <= read.pos <= region.end: # start is w/in region dists.append((0, '')) continue elif region.start <= read.aend <= region.end: # end is w/in region dists.append((0, '')) continue elif read.pos <= region.start <= region.end <= read.aend: # read spans the entire region dists.append((0, '')) continue elif region.end < read.pos: dists.append((read.pos - region.end, 'up')) else: dists.append((region.start - read.aend, 'down')) if dists: dists.sort() distance = dists[0][0] orient = dists[0][1] if distance > 0: if not read.is_reverse and orient == 'down': distance = -distance elif read.is_reverse and orient == 'up': distance = -distance out.write('%s\t%s\n' % (read.qname, distance)) else: out.write('%s\t*\n' % (read.qname, ))