def anchor(args): """ %prog anchor map.bed markers.blast > anchored.bed Anchor scaffolds based on map. """ from jcvi.formats.blast import bed p = OptionParser(anchor.__doc__) opts, args = p.parse_args(args) if len(args) != 2: sys.exit(not p.print_help()) mapbed, blastfile = args bedfile = bed([blastfile]) markersbed = Bed(bedfile) markers = markersbed.order mapbed = Bed(mapbed, sorted=False) for b in mapbed: m = b.accn if m not in markers: continue i, mb = markers[m] new_accn = "{0}:{1}-{2}".format(mb.seqid, mb.start, mb.end) b.accn = new_accn print b
def rename(args): """ %prog rename map markers.blast > renamed.map Rename markers according to the new mapping locations. """ from jcvi.formats.blast import bed p = OptionParser(rename.__doc__) opts, args = p.parse_args(args) if len(args) != 2: sys.exit(not p.print_help()) mstmap, blastfile = args bedfile = bed([blastfile]) markersbed = Bed(bedfile) markers = markersbed.order data = MSTMap(mstmap) header = data.header header = [header[0]] + ["seqid", "start"] + header[1:] print "\t".join(header) for b in data: m, geno = b.id, b.genotype if m not in markers: continue i, mb = markers[m] print "\t".join(str(x) for x in \ (m, mb.seqid, mb.start, "\t".join(list(geno))))
def breakpoint(args): """ %prog breakpoint blastfile bedfile Identify breakpoints where collinearity ends. `blastfile` contains mapping from markers (query) to scaffolds (subject). `bedfile` contains marker locations in the related species. """ from jcvi.formats.blast import bed from jcvi.utils.range import range_interleave p = OptionParser(breakpoint.__doc__) p.add_option("--xdist", type="int", default=20, help="xdist (in related genome) cutoff [default: %default]") p.add_option("--ydist", type="int", default=200000, help="ydist (in current genome) cutoff [default: %default]") p.add_option("-n", type="int", default=5, help="number of markers in a block [default: %default]") opts, args = p.parse_args(args) if len(args) != 2: sys.exit(not p.print_help()) blastfile, bedfile = args order = Bed(bedfile).order blastbedfile = bed([blastfile]) bbed = Bed(blastbedfile) key = lambda x: x[1] for scaffold, bs in bbed.sub_beds(): blocks = get_blocks(scaffold, bs, order, xdist=opts.xdist, ydist=opts.ydist, N=opts.n) sblocks = [] for block in blocks: xx, yy = zip(*block) sblocks.append((scaffold, min(yy), max(yy))) iblocks = range_interleave(sblocks) for ib in iblocks: ch, start, end = ib print "{0}\t{1}\t{2}".format(ch, start - 1, end)