def scaffold(args): """ %prog scaffold ctgfasta reads1.fasta mapping1.bed reads2.fasta mapping2.bed ... Run BAMBUS on set of contigs, reads and read mappings. """ from jcvi.formats.base import FileMerger from jcvi.formats.bed import mates from jcvi.formats.contig import frombed from jcvi.formats.fasta import join from jcvi.utils.iter import grouper p = OptionParser(scaffold.__doc__) p.add_option("--conf", help="BAMBUS configuration file [default: %default]") p.add_option( "--prefix", default=False, action="store_true", help="Only keep links between IDs with same prefix [default: %default]" ) opts, args = p.parse_args(args) nargs = len(args) if nargs < 3 or nargs % 2 != 1: sys.exit(not p.print_help()) ctgfasta = args[0] duos = list(grouper(2, args[1:])) trios = [] for fastafile, bedfile in duos: prefix = bedfile.rsplit(".", 1)[0] matefile = prefix + ".mates" matebedfile = matefile + ".bed" if need_update(bedfile, [matefile, matebedfile]): matesopt = [bedfile, "--lib", "--nointra"] if opts.prefix: matesopt += ["--prefix"] matefile, matebedfile = mates(matesopt) trios.append((fastafile, matebedfile, matefile)) # Merge the readfasta, bedfile and matefile bbfasta, bbbed, bbmate = "bambus.reads.fasta", "bambus.bed", "bambus.mates" for files, outfile in zip(zip(*trios), (bbfasta, bbbed, bbmate)): FileMerger(files, outfile=outfile).merge(checkexists=True) ctgfile = "bambus.contig" idsfile = "bambus.ids" frombedInputs = [bbbed, ctgfasta, bbfasta] if need_update(frombedInputs, ctgfile): frombed(frombedInputs) inputfasta = "bambus.contigs.fasta" singletonfasta = "bambus.singletons.fasta" cmd = "faSomeRecords {0} {1} ".format(ctgfasta, idsfile) sh(cmd + inputfasta) sh(cmd + singletonfasta + " -exclude") # Run bambus prefix = "bambus" cmd = "goBambus -c {0} -m {1} -o {2}".format(ctgfile, bbmate, prefix) if opts.conf: cmd += " -C {0}".format(opts.conf) sh(cmd) cmd = "untangle -e {0}.evidence.xml -s {0}.out.xml -o {0}.untangle.xml".\ format(prefix) sh(cmd) final = "final" cmd = "printScaff -e {0}.evidence.xml -s {0}.untangle.xml -l {0}.lib " \ "-merge -detail -oo -sum -o {1}".format(prefix, final) sh(cmd) oofile = final + ".oo" join([inputfasta, "--oo={0}".format(oofile)])
def coverage(args): """ %prog coverage fastafile ctg bedfile1 bedfile2 .. Plot coverage from a set of BED files that contain the read mappings. The paired read span will be converted to a new bedfile that contain the happy mates. ctg is the chr/scf/ctg that you want to plot the histogram on. If the bedfiles already contain the clone spans, turn on --spans. """ from jcvi.formats.bed import mates, bedpe p = OptionParser(coverage.__doc__) p.add_option("--ymax", default=None, type="int", help="Limit ymax [default: %default]") p.add_option("--spans", default=False, action="store_true", help="BED files already contain clone spans [default: %default]") opts, args, iopts = p.set_image_options(args, figsize="8x5") if len(args) < 3: sys.exit(not p.print_help()) fastafile, ctg = args[0:2] bedfiles = args[2:] sizes = Sizes(fastafile) size = sizes.mapping[ctg] plt.figure(1, (iopts.w, iopts.h)) ax = plt.gca() bins = 100 # smooth the curve lines = [] legends = [] not_covered = [] yy = .9 for bedfile, c in zip(bedfiles, "rgbcky"): if not opts.spans: pf = bedfile.rsplit(".", 1)[0] matesfile = pf + ".mates" if need_update(bedfile, matesfile): matesfile, matesbedfile = mates([bedfile, "--lib"]) bedspanfile = pf + ".spans.bed" if need_update(matesfile, bedspanfile): bedpefile, bedspanfile = bedpe([bedfile, "--span", "--mates={0}".format(matesfile)]) bedfile = bedspanfile bedsum = Bed(bedfile).sum(seqid=ctg) notcoveredbases = size - bedsum legend = bedfile.split(".")[0] msg = "{0}: {1} bp not covered".format(legend, thousands(notcoveredbases)) not_covered.append(msg) print >> sys.stderr, msg ax.text(.1, yy, msg, color=c, size=9, transform=ax.transAxes) yy -= .08 cov = Coverage(bedfile, sizes.filename) x, y = cov.get_plot_data(ctg, bins=bins) line, = ax.plot(x, y, '-', color=c, lw=2, alpha=.5) lines.append(line) legends.append(legend) leg = ax.legend(lines, legends, shadow=True, fancybox=True) leg.get_frame().set_alpha(.5) ylabel = "Average depth per {0}Kb".format(size / bins / 1000) ax.set_xlim(0, size) ax.set_ylim(0, opts.ymax) ax.set_xlabel(ctg) ax.set_ylabel(ylabel) set_human_base_axis(ax) figname ="{0}.{1}.pdf".format(fastafile, ctg) savefig(figname, dpi=iopts.dpi, iopts=iopts)
def coverage(args): """ %prog coverage fastafile ctg bedfile1 bedfile2 .. Plot coverage from a set of BED files that contain the read mappings. The paired read span will be converted to a new bedfile that contain the happy mates. ctg is the chr/scf/ctg that you want to plot the histogram on. If the bedfiles already contain the clone spans, turn on --spans. """ from jcvi.formats.bed import mates, bedpe p = OptionParser(coverage.__doc__) p.add_option("--ymax", default=None, type="int", help="Limit ymax [default: %default]") p.add_option( "--spans", default=False, action="store_true", help="BED files already contain clone spans [default: %default]") opts, args, iopts = p.set_image_options(args, figsize="8x5") if len(args) < 3: sys.exit(not p.print_help()) fastafile, ctg = args[0:2] bedfiles = args[2:] sizes = Sizes(fastafile) size = sizes.mapping[ctg] plt.figure(1, (iopts.w, iopts.h)) ax = plt.gca() bins = 100 # smooth the curve lines = [] legends = [] not_covered = [] yy = .9 for bedfile, c in zip(bedfiles, "rgbcky"): if not opts.spans: pf = bedfile.rsplit(".", 1)[0] matesfile = pf + ".mates" if need_update(bedfile, matesfile): matesfile, matesbedfile = mates([bedfile, "--lib"]) bedspanfile = pf + ".spans.bed" if need_update(matesfile, bedspanfile): bedpefile, bedspanfile = bedpe( [bedfile, "--span", "--mates={0}".format(matesfile)]) bedfile = bedspanfile bedsum = Bed(bedfile).sum(seqid=ctg) notcoveredbases = size - bedsum legend = bedfile.split(".")[0] msg = "{0}: {1} bp not covered".format(legend, thousands(notcoveredbases)) not_covered.append(msg) print >> sys.stderr, msg ax.text(.1, yy, msg, color=c, size=9, transform=ax.transAxes) yy -= .08 cov = Coverage(bedfile, sizes.filename) x, y = cov.get_plot_data(ctg, bins=bins) line, = ax.plot(x, y, '-', color=c, lw=2, alpha=.5) lines.append(line) legends.append(legend) leg = ax.legend(lines, legends, shadow=True, fancybox=True) leg.get_frame().set_alpha(.5) ylabel = "Average depth per {0}Kb".format(size / bins / 1000) ax.set_xlim(0, size) ax.set_ylim(0, opts.ymax) ax.set_xlabel(ctg) ax.set_ylabel(ylabel) set_human_base_axis(ax) figname = "{0}.{1}.pdf".format(fastafile, ctg) savefig(figname, dpi=iopts.dpi, iopts=iopts)
def scaffold(args): """ %prog scaffold ctgfasta reads1.fasta mapping1.bed reads2.fasta mapping2.bed ... Run BAMBUS on set of contigs, reads and read mappings. """ from jcvi.formats.base import FileMerger from jcvi.formats.bed import mates from jcvi.formats.contig import frombed from jcvi.formats.fasta import join from jcvi.utils.iter import grouper p = OptionParser(scaffold.__doc__) p.set_rclip(rclip=1) p.add_option("--conf", help="BAMBUS configuration file [default: %default]") p.add_option("--prefix", default=False, action="store_true", help="Only keep links between IDs with same prefix [default: %default]") opts, args = p.parse_args(args) nargs = len(args) if nargs < 3 or nargs % 2 != 1: sys.exit(not p.print_help()) rclip = opts.rclip ctgfasta = args[0] duos = list(grouper(args[1:], 2)) trios = [] for fastafile, bedfile in duos: prefix = bedfile.rsplit(".", 1)[0] matefile = prefix + ".mates" matebedfile = matefile + ".bed" if need_update(bedfile, [matefile, matebedfile]): matesopt = [bedfile, "--lib", "--nointra", "--rclip={0}".format(rclip), "--cutoff={0}".format(opts.cutoff)] if opts.prefix: matesopt += ["--prefix"] matefile, matebedfile = mates(matesopt) trios.append((fastafile, matebedfile, matefile)) # Merge the readfasta, bedfile and matefile bbfasta, bbbed, bbmate = "bambus.reads.fasta", "bambus.bed", "bambus.mates" for files, outfile in zip(zip(*trios), (bbfasta, bbbed, bbmate)): FileMerger(files, outfile=outfile).merge(checkexists=True) ctgfile = "bambus.contig" idsfile = "bambus.ids" frombedInputs = [bbbed, ctgfasta, bbfasta] if need_update(frombedInputs, ctgfile): frombed(frombedInputs) inputfasta = "bambus.contigs.fasta" singletonfasta = "bambus.singletons.fasta" cmd = "faSomeRecords {0} {1} ".format(ctgfasta, idsfile) sh(cmd + inputfasta) sh(cmd + singletonfasta + " -exclude") # Run bambus prefix = "bambus" cmd = "goBambus -c {0} -m {1} -o {2}".format(ctgfile, bbmate, prefix) if opts.conf: cmd += " -C {0}".format(opts.conf) sh(cmd) cmd = "untangle -e {0}.evidence.xml -s {0}.out.xml -o {0}.untangle.xml".\ format(prefix) sh(cmd) final = "final" cmd = "printScaff -e {0}.evidence.xml -s {0}.untangle.xml -l {0}.lib " \ "-merge -detail -oo -sum -o {1}".format(prefix, final) sh(cmd) oofile = final + ".oo" join([inputfasta, "--oo={0}".format(oofile)])