def blastplot( ax, blastfile, qsizes, ssizes, qbed, sbed, style="dot", sampleN=None, baseticks=False, insetLabels=False, stripNames=False, highlights=None, ): assert style in DotStyles fp = open(blastfile) qorder = qbed.order if qbed else None sorder = sbed.order if sbed else None data = [] for row in fp: b = BlastLine(row) query, subject = b.query, b.subject if stripNames: query = query.rsplit(".", 1)[0] subject = subject.rsplit(".", 1)[0] if qorder: if query not in qorder: continue qi, q = qorder[query] query = q.seqid qstart, qend = q.start, q.end else: qstart, qend = b.qstart, b.qstop if sorder: if subject not in sorder: continue si, s = sorder[subject] subject = s.seqid sstart, send = s.start, s.end else: sstart, send = b.sstart, b.sstop qi = qsizes.get_position(query, qstart) qj = qsizes.get_position(query, qend) si = ssizes.get_position(subject, sstart) sj = ssizes.get_position(subject, send) if None in (qi, si): continue data.append(((qi, qj), (si, sj))) if sampleN: if len(data) > sampleN: data = sample(data, sampleN) if not data: return logging.error("no blast data imported") xsize, ysize = qsizes.totalsize, ssizes.totalsize logging.debug("xsize=%d ysize=%d" % (xsize, ysize)) if style == "line": for a, b in data: ax.plot(a, b, "ro-", mfc="w", mec="r", ms=3) else: data = [(x[0], y[0]) for x, y in data] x, y = zip(*data) if style == "circle": ax.plot(x, y, "mo", mfc="w", mec="m", ms=3) elif style == "dot": ax.scatter(x, y, s=3, lw=0) xlim = (0, xsize) ylim = (ysize, 0) # invert the y-axis xchr_labels, ychr_labels = [], [] ignore = True # tag to mark whether to plot chr name (skip small ones) ignore_size_x = ignore_size_y = 0 # plot the chromosome breaks logging.debug("xbreaks={0} ybreaks={1}".format(len(qsizes), len(ssizes))) for (seqid, beg, end) in qsizes.get_breaks(): ignore = abs(end - beg) < ignore_size_x if ignore: continue seqid = rename_seqid(seqid) xchr_labels.append((seqid, (beg + end) / 2, ignore)) ax.plot([end, end], ylim, "-", lw=1, color="grey") for (seqid, beg, end) in ssizes.get_breaks(): ignore = abs(end - beg) < ignore_size_y if ignore: continue seqid = rename_seqid(seqid) ychr_labels.append((seqid, (beg + end) / 2, ignore)) ax.plot(xlim, [end, end], "-", lw=1, color="grey") # plot the chromosome labels for label, pos, ignore in xchr_labels: if not ignore: if insetLabels: ax.text(pos, 0, label, size=8, ha="center", va="top", color="grey") else: pos = 0.1 + pos * 0.8 / xsize root.text( pos, 0.91, label, size=10, ha="center", va="bottom", rotation=45, color="grey", ) # remember y labels are inverted for label, pos, ignore in ychr_labels: if not ignore: if insetLabels: continue pos = 0.9 - pos * 0.8 / ysize root.text(0.91, pos, label, size=10, va="center", color="grey") # Highlight regions based on a list of BedLine qhighlights = shighlights = None if highlights: if isinstance(highlights[0], BedLine): shighlights = highlights elif len(highlights) == 2: qhighlights, shighlights = highlights if qhighlights: for hl in qhighlights: hls = qsizes.get_position(hl.seqid, hl.start) ax.add_patch( Rectangle((hls, 0), hl.span, ysize, fc="r", alpha=0.2, lw=0)) if shighlights: for hl in shighlights: hls = ssizes.get_position(hl.seqid, hl.start) ax.add_patch( Rectangle((0, hls), xsize, hl.span, fc="r", alpha=0.2, lw=0)) if baseticks: def increaseDensity(a, ratio=4): assert len(a) > 1 stepsize = a[1] - a[0] newstepsize = int(stepsize / ratio) return np.arange(0, a[-1], newstepsize) # Increase the density of the ticks xticks = ax.get_xticks() yticks = ax.get_yticks() xticks = increaseDensity(xticks, ratio=2) yticks = increaseDensity(yticks, ratio=2) ax.set_xticks(xticks) # Plot outward ticklines for pos in xticks[1:]: if pos > xsize: continue pos = 0.1 + pos * 0.8 / xsize root.plot((pos, pos), (0.08, 0.1), "-", color="grey", lw=2) for pos in yticks[1:]: if pos > ysize: continue pos = 0.9 - pos * 0.8 / ysize root.plot((0.09, 0.1), (pos, pos), "-", color="grey", lw=2) ax.set_xlim(xlim) ax.set_ylim(ylim) # beautify the numeric axis for tick in ax.get_xticklines() + ax.get_yticklines(): tick.set_visible(False) set_human_base_axis(ax) plt.setp(ax.get_xticklabels() + ax.get_yticklabels(), color="gray", size=10) plt.setp(ax.get_yticklabels(), rotation=90)
def coverage(args): """ %prog coverage fastafile ctg bedfile1 bedfile2 .. Plot coverage from a set of BED files that contain the read mappings. The paired read span will be converted to a new bedfile that contain the happy mates. ctg is the chr/scf/ctg that you want to plot the histogram on. If the bedfiles already contain the clone spans, turn on --spans. """ from jcvi.formats.bed import mates, bedpe p = OptionParser(coverage.__doc__) p.add_option("--ymax", default=None, type="int", help="Limit ymax [default: %default]") p.add_option( "--spans", default=False, action="store_true", help="BED files already contain clone spans [default: %default]") opts, args, iopts = p.set_image_options(args, figsize="8x5") if len(args) < 3: sys.exit(not p.print_help()) fastafile, ctg = args[0:2] bedfiles = args[2:] sizes = Sizes(fastafile) size = sizes.mapping[ctg] plt.figure(1, (iopts.w, iopts.h)) ax = plt.gca() bins = 100 # smooth the curve lines = [] legends = [] not_covered = [] yy = .9 for bedfile, c in zip(bedfiles, "rgbcky"): if not opts.spans: pf = bedfile.rsplit(".", 1)[0] matesfile = pf + ".mates" if need_update(bedfile, matesfile): matesfile, matesbedfile = mates([bedfile, "--lib"]) bedspanfile = pf + ".spans.bed" if need_update(matesfile, bedspanfile): bedpefile, bedspanfile = bedpe( [bedfile, "--span", "--mates={0}".format(matesfile)]) bedfile = bedspanfile bedsum = Bed(bedfile).sum(seqid=ctg) notcoveredbases = size - bedsum legend = bedfile.split(".")[0] msg = "{0}: {1} bp not covered".format(legend, thousands(notcoveredbases)) not_covered.append(msg) print >> sys.stderr, msg ax.text(.1, yy, msg, color=c, size=9, transform=ax.transAxes) yy -= .08 cov = Coverage(bedfile, sizes.filename) x, y = cov.get_plot_data(ctg, bins=bins) line, = ax.plot(x, y, '-', color=c, lw=2, alpha=.5) lines.append(line) legends.append(legend) leg = ax.legend(lines, legends, shadow=True, fancybox=True) leg.get_frame().set_alpha(.5) ylabel = "Average depth per {0}Kb".format(size / bins / 1000) ax.set_xlim(0, size) ax.set_ylim(0, opts.ymax) ax.set_xlabel(ctg) ax.set_ylabel(ylabel) set_human_base_axis(ax) figname = "{0}.{1}.pdf".format(fastafile, ctg) savefig(figname, dpi=iopts.dpi, iopts=iopts)
def qc(args): """ %prog qc prefix Expects data files including: 1. `prefix.bedpe` draws Bezier curve between paired reads 2. `prefix.sizes` draws length of the contig/scaffold 3. `prefix.gaps.bed` mark the position of the gaps in sequence 4. `prefix.bed.coverage` plots the base coverage 5. `prefix.pairs.bed.coverage` plots the clone coverage See assembly.coverage.posmap() for the generation of these files. """ from jcvi.graphics.glyph import Bezier p = OptionParser(qc.__doc__) opts, args = p.parse_args(args) if len(args) != 1: sys.exit(p.print_help()) prefix, = args scf = prefix # All these files *must* be present in the current folder bedpefile = prefix + ".bedpe" fastafile = prefix + ".fasta" sizesfile = prefix + ".sizes" gapsbedfile = prefix + ".gaps.bed" bedfile = prefix + ".bed" bedpefile = prefix + ".bedpe" pairsbedfile = prefix + ".pairs.bed" sizes = Sizes(fastafile).mapping size = sizes[scf] fig = plt.figure(1, (8, 5)) root = fig.add_axes([0, 0, 1, 1]) # the scaffold root.add_patch(Rectangle((.1, .15), .8, .03, fc='k')) # basecoverage and matecoverage ax = fig.add_axes([.1, .45, .8, .45]) bins = 200 # Smooth the curve basecoverage = Coverage(bedfile, sizesfile) matecoverage = Coverage(pairsbedfile, sizesfile) x, y = basecoverage.get_plot_data(scf, bins=bins) baseline, = ax.plot(x, y, 'g-') x, y = matecoverage.get_plot_data(scf, bins=bins) mateline, = ax.plot(x, y, 'r-') legends = ("Base coverage", "Mate coverage") leg = ax.legend((baseline, mateline), legends, shadow=True, fancybox=True) leg.get_frame().set_alpha(.5) ax.set_xlim(0, size) # draw the read pairs fp = open(bedpefile) pairs = [] for row in fp: scf, astart, aend, scf, bstart, bend, clonename = row.split() astart, bstart = int(astart), int(bstart) aend, bend = int(aend), int(bend) start = min(astart, bstart) + 1 end = max(aend, bend) pairs.append((start, end)) bpratio = .8 / size cutoff = 1000 # inserts smaller than this are not plotted # this convert from base => x-coordinate pos = lambda x: (.1 + x * bpratio) ypos = .15 + .03 for start, end in pairs: dist = end - start if dist < cutoff: continue dist = min(dist, 10000) # 10Kb == .25 canvas height height = .25 * dist / 10000 xstart = pos(start) xend = pos(end) p0 = (xstart, ypos) p1 = (xstart, ypos + height) p2 = (xend, ypos + height) p3 = (xend, ypos) Bezier(root, p0, p1, p2, p3) # gaps on the scaffold fp = open(gapsbedfile) for row in fp: b = BedLine(row) start, end = b.start, b.end xstart = pos(start) xend = pos(end) root.add_patch(Rectangle((xstart, .15), xend - xstart, .03, fc='w')) root.text(.5, .1, scf, color='b', ha="center") warn_msg = "Only the inserts > {0}bp are shown".format(cutoff) root.text(.5, .1, scf, color='b', ha="center") root.text(.5, .05, warn_msg, color='gray', ha="center") # clean up and output set_human_base_axis(ax) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() figname = prefix + ".pdf" savefig(figname, dpi=300)
def coverage(args): """ %prog coverage fastafile ctg bedfile1 bedfile2 .. Plot coverage from a set of BED files that contain the read mappings. The paired read span will be converted to a new bedfile that contain the happy mates. ctg is the chr/scf/ctg that you want to plot the histogram on. If the bedfiles already contain the clone spans, turn on --spans. """ from jcvi.formats.bed import mates, bedpe p = OptionParser(coverage.__doc__) p.add_option("--ymax", default=None, type="int", help="Limit ymax [default: %default]") p.add_option("--spans", default=False, action="store_true", help="BED files already contain clone spans [default: %default]") opts, args, iopts = p.set_image_options(args, figsize="8x5") if len(args) < 3: sys.exit(not p.print_help()) fastafile, ctg = args[0:2] bedfiles = args[2:] sizes = Sizes(fastafile) size = sizes.mapping[ctg] plt.figure(1, (iopts.w, iopts.h)) ax = plt.gca() bins = 100 # smooth the curve lines = [] legends = [] not_covered = [] yy = .9 for bedfile, c in zip(bedfiles, "rgbcky"): if not opts.spans: pf = bedfile.rsplit(".", 1)[0] matesfile = pf + ".mates" if need_update(bedfile, matesfile): matesfile, matesbedfile = mates([bedfile, "--lib"]) bedspanfile = pf + ".spans.bed" if need_update(matesfile, bedspanfile): bedpefile, bedspanfile = bedpe([bedfile, "--span", "--mates={0}".format(matesfile)]) bedfile = bedspanfile bedsum = Bed(bedfile).sum(seqid=ctg) notcoveredbases = size - bedsum legend = bedfile.split(".")[0] msg = "{0}: {1} bp not covered".format(legend, thousands(notcoveredbases)) not_covered.append(msg) print >> sys.stderr, msg ax.text(.1, yy, msg, color=c, size=9, transform=ax.transAxes) yy -= .08 cov = Coverage(bedfile, sizes.filename) x, y = cov.get_plot_data(ctg, bins=bins) line, = ax.plot(x, y, '-', color=c, lw=2, alpha=.5) lines.append(line) legends.append(legend) leg = ax.legend(lines, legends, shadow=True, fancybox=True) leg.get_frame().set_alpha(.5) ylabel = "Average depth per {0}Kb".format(size / bins / 1000) ax.set_xlim(0, size) ax.set_ylim(0, opts.ymax) ax.set_xlabel(ctg) ax.set_ylabel(ylabel) set_human_base_axis(ax) figname ="{0}.{1}.pdf".format(fastafile, ctg) savefig(figname, dpi=iopts.dpi, iopts=iopts)
def blastplot(ax, blastfile, qsizes, ssizes, qbed, sbed, style="dot", proportional=False, sampleN=None, baseticks=False, insetLabels=False, stripNames=False, highlights=None): assert style in DotStyles fp = open(blastfile) qorder = qbed.order if qbed else None sorder = sbed.order if sbed else None data = [] for row in fp: b = BlastLine(row) query, subject = b.query, b.subject if stripNames: query = query.rsplit(".", 1)[0] subject = subject.rsplit(".", 1)[0] if qorder: if query not in qorder: continue qi, q = qorder[query] query = q.seqid qstart, qend = q.start, q.end else: qstart, qend = b.qstart, b.qstop if sorder: if subject not in sorder: continue si, s = sorder[subject] subject = s.seqid sstart, send = s.start, s.end else: sstart, send = b.sstart, b.sstop qi = qsizes.get_position(query, qstart) qj = qsizes.get_position(query, qend) si = ssizes.get_position(subject, sstart) sj = ssizes.get_position(subject, send) if None in (qi, si): continue data.append(((qi, qj), (si, sj))) if sampleN: if len(data) > sampleN: data = sample(data, sampleN) if not data: return logging.error("no blast data imported") xsize, ysize = qsizes.totalsize, ssizes.totalsize logging.debug("xsize=%d ysize=%d" % (xsize, ysize)) if style == "line": for a, b in data: ax.plot(a, b, 'ro-', mfc="w", mec="r", ms=3) else: data = [(x[0], y[0]) for x, y in data] x, y = zip(*data) if style == "circle": ax.plot(x, y, 'mo', mfc="w", mec="m", ms=3) elif style == "dot": ax.scatter(x, y, s=3, lw=0) xlim = (0, xsize) ylim = (ysize, 0) # invert the y-axis xchr_labels, ychr_labels = [], [] ignore = True # tag to mark whether to plot chr name (skip small ones) #ignore_size_x = xsize * .02 #ignore_size_y = ysize * .02 ignore_size_x = ignore_size_y = 0 # plot the chromosome breaks logging.debug("xbreaks={0} ybreaks={1}".format(len(qsizes), len(ssizes))) for (seqid, beg, end) in qsizes.get_breaks(): ignore = abs(end - beg) < ignore_size_x if ignore: continue seqid = rename_seqid(seqid) xchr_labels.append((seqid, (beg + end) / 2, ignore)) ax.plot([end, end], ylim, "-", lw=1, color="grey") for (seqid, beg, end) in ssizes.get_breaks(): ignore = abs(end - beg) < ignore_size_y if ignore: continue seqid = rename_seqid(seqid) ychr_labels.append((seqid, (beg + end) / 2, ignore)) ax.plot(xlim, [end, end], "-", lw=1, color="grey") # plot the chromosome labels for label, pos, ignore in xchr_labels: if not ignore: if insetLabels: ax.text(pos, 0, label, size=8, \ ha="center", va="top", color="grey") else: pos = .1 + pos * .8 / xsize root.text(pos, .91, label, size=10, ha="center", va="bottom", rotation=45, color="grey") # remember y labels are inverted for label, pos, ignore in ychr_labels: if not ignore: if insetLabels: continue pos = .9 - pos * .8 / ysize root.text(.91, pos, label, size=10, va="center", color="grey") # Highlight regions based on a list of BedLine qhighlights = shighlights = None if highlights: if isinstance(highlights[0], BedLine): shighlights = highlights elif len(highlights) == 2: qhighlights, shighlights = highlights if qhighlights: for hl in qhighlights: hls = qsizes.get_position(hl.seqid, hl.start) ax.add_patch(Rectangle((hls, 0), hl.span, ysize,\ fc="r", alpha=.2, lw=0)) if shighlights: for hl in shighlights: hls = ssizes.get_position(hl.seqid, hl.start) ax.add_patch(Rectangle((0, hls), xsize, hl.span, \ fc="r", alpha=.2, lw=0)) if baseticks: def increaseDensity(a, ratio=4): assert len(a) > 1 stepsize = a[1] - a[0] newstepsize = int(stepsize / ratio) return np.arange(0, a[-1], newstepsize) # Increase the density of the ticks xticks = ax.get_xticks() yticks = ax.get_yticks() xticks = increaseDensity(xticks, ratio=2) yticks = increaseDensity(yticks, ratio=2) ax.set_xticks(xticks) #ax.set_yticks(yticks) # Plot outward ticklines for pos in xticks[1:]: if pos > xsize: continue pos = .1 + pos * .8 / xsize root.plot((pos, pos), (.08, .1), '-', color="grey", lw=2) for pos in yticks[1:]: if pos > ysize: continue pos = .9 - pos * .8 / ysize root.plot((.09, .1), (pos, pos), '-', color="grey", lw=2) ax.set_xlim(xlim) ax.set_ylim(ylim) # beautify the numeric axis for tick in ax.get_xticklines() + ax.get_yticklines(): tick.set_visible(False) set_human_base_axis(ax) plt.setp(ax.get_xticklabels() + ax.get_yticklabels(), color='gray', size=10) plt.setp(ax.get_yticklabels(), rotation=90)