def estimate(args): """ %prog estimate gaps.bed all.spans.bed all.mates Estimate gap sizes based on mate positions and library insert sizes. """ from collections import defaultdict from jcvi.formats.bed import intersectBed_wao from jcvi.formats.posmap import MatesFile p = OptionParser(estimate.__doc__) p.add_option("--minlinks", default=3, type="int", help="Minimum number of links to place [default: %default]") opts, args = p.parse_args(args) if len(args) != 3: sys.exit(not p.print_help()) gapsbed, spansbed, matesfile = args mf = MatesFile(matesfile) bed = Bed(gapsbed) order = bed.order gap2mate = defaultdict(set) mate2gap = defaultdict(set) for a, b in intersectBed_wao(gapsbed, spansbed): gapsize = a.span if gapsize != 100: continue gapname = a.accn if b is None: gap2mate[gapname] = set() continue matename = b.accn gap2mate[gapname].add(matename) mate2gap[matename].add(gapname) omgapsbed = "gaps.linkage.bed" fw = open(omgapsbed, "w") for gapname, mates in sorted(gap2mate.items()): i, b = order[gapname] nmates = len(mates) if nmates < opts.minlinks: print("{0}\t{1}".format(b, nmates), file=fw) continue print(gapname, mates) fw.close()
def estimate(args): """ %prog estimate gaps.bed all.spans.bed all.mates Estimate gap sizes based on mate positions and library insert sizes. """ from collections import defaultdict from jcvi.formats.bed import intersectBed_wao from jcvi.formats.posmap import MatesFile p = OptionParser(estimate.__doc__) p.add_option("--minlinks", default=3, type="int", help="Minimum number of links to place [default: %default]") opts, args = p.parse_args(args) if len(args) != 3: sys.exit(not p.print_help()) gapsbed, spansbed, matesfile = args mf = MatesFile(matesfile) bed = Bed(gapsbed) order = bed.order gap2mate = defaultdict(set) mate2gap = defaultdict(set) for a, b in intersectBed_wao(gapsbed, spansbed): gapsize = a.span if gapsize != 100: continue gapname = a.accn if b is None: gap2mate[gapname] = set() continue matename = b.accn gap2mate[gapname].add(matename) mate2gap[matename].add(gapname) omgapsbed = "gaps.linkage.bed" fw = open(omgapsbed, "w") for gapname, mates in sorted(gap2mate.items()): i, b = order[gapname] nmates = len(mates) if nmates < opts.minlinks: print >> fw, "{0}\t{1}".format(b, nmates) continue print gapname, mates fw.close()
def validate(args): """ %prog validate diploid.napus.fractionation cds.bed Check whether [S] intervals overlap with CDS. """ from jcvi.formats.bed import intersectBed_wao p = OptionParser(validate.__doc__) opts, args = p.parse_args(args) if len(args) != 2: sys.exit(not p.print_help()) fractionation, cdsbed = args fp = open(fractionation) sbed = "S.bed" fw = open(sbed, "w") for row in fp: a, b, c = row.split() if not c.startswith("[S]"): continue tag, (seqid, start, end) = get_tag(c, None) print >> fw, "\t".join(str(x) for x in (seqid, start - 1, end, b)) fw.close() pairs = {} for a, b in intersectBed_wao(sbed, cdsbed): if b is None: continue pairs[a.accn] = b.accn validated = fractionation + ".validated" fw = open(validated, "w") fp.seek(0) fixed = 0 for row in fp: a, b, c = row.split() if b in pairs: assert c.startswith("[S]") c = pairs[b] fixed += 1 print >> fw, "\t".join((a, b, c)) logging.debug("Fixed {0} [S] cases in `{1}`.".format(fixed, validated)) fw.close()
def gffselect(args): """ %prog gffselect gmaplocation.bed expectedlocation.bed translated.ids tag Try to match up the expected location and gmap locations for particular genes. translated.ids was generated by fasta.translate --ids. tag must be one of "complete|pseudogene|partial". """ from jcvi.formats.bed import intersectBed_wao p = OptionParser(gffselect.__doc__) opts, args = p.parse_args(args) if len(args) != 4: sys.exit(not p.print_help()) gmapped, expected, idsfile, tag = args data = get_tags(idsfile) completeness = dict((a.replace("mrna", "path"), c) \ for (a, b, c) in data) seen = set() idsfile = expected.rsplit(".", 1)[0] + ".ids" fw = open(idsfile, "w") cnt = 0 for a, b in intersectBed_wao(expected, gmapped): if b is None: continue aname, bbname = a.accn, b.accn bname = bbname.split(".")[0] if completeness[bbname] != tag: continue if aname == bname: if bname in seen: continue seen.add(bname) print >> fw, bbname cnt += 1 fw.close() logging.debug("Total {0} records written to `{1}`.".format(cnt, idsfile))