def main(): p = OptionParser(__doc__) p.add_option("--switch", help="Rename the seqid with two-column file [default: %default]") p.add_option("--tree", help="Display trees on the bottom of the figure [default: %default]") p.add_option("--extra", help="Extra features in BED format") p.add_option("--scalebar", default=False, action="store_true", help="Add scale bar to the plot") opts, args, iopts = p.set_image_options(figsize="8x7") if len(args) != 3: sys.exit(not p.print_help()) datafile, bedfile, layoutfile = args switch = opts.switch tree = opts.tree pf = datafile.rsplit(".", 1)[0] fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) Synteny(fig, root, datafile, bedfile, layoutfile, switch=switch, tree=tree, extra_features=opts.extra, scalebar=opts.scalebar) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def composite_qc(df_orig, size=(16, 12)): """ Plot composite QC figures """ df = df_orig.rename(columns={"hli_calc_age_sample_taken": "Age", "hli_calc_gender": "Gender", "eth7_max": "Ethnicity", "MeanCoverage": "Mean coverage", "Chemistry": "Sequencing chemistry", "Release Client": "Cohort", }) fig = plt.figure(1, size) ax1 = plt.subplot2grid((2, 7), (0, 0), rowspan=1, colspan=2) ax2 = plt.subplot2grid((2, 7), (0, 2), rowspan=1, colspan=2) ax3 = plt.subplot2grid((2, 7), (0, 4), rowspan=1, colspan=3) ax4 = plt.subplot2grid((2, 7), (1, 0), rowspan=1, colspan=2) ax5 = plt.subplot2grid((2, 7), (1, 2), rowspan=1, colspan=2) ax6 = plt.subplot2grid((2, 7), (1, 4), rowspan=1, colspan=3) sns.distplot(df["Age"].dropna(), kde=False, ax=ax1) sns.countplot(x="Gender", data=df, ax=ax2) sns.countplot(x="Ethnicity", data=df, ax=ax3, order = df['Ethnicity'].value_counts().index) sns.distplot(df["Mean coverage"].dropna(), kde=False, ax=ax4) ax4.set_xlim(0, 100) sns.countplot(x="Sequencing chemistry", data=df, ax=ax5) sns.countplot(x="Cohort", data=df, ax=ax6, order = df['Cohort'].value_counts().index) # Anonymize the cohorts cohorts = ax6.get_xticklabels() newCohorts = [] for i, c in enumerate(cohorts): if c.get_text() == "Spector": c = "TwinsUK" elif c.get_text() != "Health Nucleus": c = "C{}".format(i + 1) newCohorts.append(c) ax6.set_xticklabels(newCohorts) for ax in (ax6,): ax.set_xticklabels(ax.get_xticklabels(), ha="right", rotation=30) for ax in (ax1, ax2, ax3, ax4, ax5, ax6): ax.set_title(ax.get_xlabel()) ax.set_xlabel("") plt.tight_layout() root = fig.add_axes((0, 0, 1, 1)) labels = ((.02, .96, "A"), (.3, .96, "B"), (.6, .96, "C"), (.02, .52, "D"), (.3, .52, "E"), (.6, .52, "F")) panel_labels(root, labels) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off()
def main(): p = OptionParser(__doc__) p.add_option("--order", help="The order to plot the tracks, comma-separated") opts, args, iopts = p.set_image_options() if len(args) != 3: sys.exit(not p.print_help()) chr, sizes, datadir = args order = opts.order hlsuffix = opts.hlsuffix if order: order = order.split(",") sizes = Sizes(sizes) fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) canvas = (.12, .35, .8, .35) chr_size = sizes.get_size(chr) c = Coverage(fig, root, canvas, chr, (0, chr_size), datadir, order=order, hlsuffix=hlsuffix) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() image_name = chr + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def birch(args): """ %prog birch seqids layout Plot birch macro-synteny, with an embedded phylogenetic tree to the right. """ p = OptionParser(birch.__doc__) opts, args, iopts = p.set_image_options(args, figsize="8x6") if len(args) != 2: sys.exit(not p.print_help()) seqids, layout = args fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) K = Karyotype(fig, root, seqids, layout) L = K.layout xs = .79 dt = dict(rectangle=False, circle=False) # Embed a phylogenetic tree to the right coords = {} coords["Amborella"] = (xs, L[0].y) coords["Vitis"] = (xs, L[1].y) coords["Prunus"] = (xs, L[2].y) coords["Betula"] = (xs, L[3].y) coords["Populus"] = (xs, L[4].y) coords["Arabidopsis"] = (xs, L[5].y) coords["fabids"] = join_nodes(root, coords, "Prunus", "Betula", xs, **dt) coords["malvids"] = join_nodes(root, coords, \ "Populus", "Arabidopsis", xs, **dt) coords["rosids"] = join_nodes(root, coords, "fabids", "malvids", xs, **dt) coords["eudicots"] = join_nodes(root, coords, "rosids", "Vitis", xs, **dt) coords["angiosperm"] = join_nodes(root, coords, \ "eudicots", "Amborella", xs, **dt) # Show branch length branch_length(root, coords["Amborella"], coords["angiosperm"], ">160.0") branch_length(root, coords["eudicots"], coords["angiosperm"], ">78.2", va="top") branch_length(root, coords["Vitis"], coords["eudicots"], "138.5") branch_length(root, coords["rosids"], coords["eudicots"], "19.8", va="top") branch_length(root, coords["Prunus"], coords["fabids"], "104.2", ha="right", va="top") branch_length(root, coords["Arabidopsis"], coords["malvids"], "110.2", va="top") branch_length(root, coords["fabids"], coords["rosids"], "19.8", ha="right", va="top") branch_length(root, coords["malvids"], coords["rosids"], "8.5", va="top") root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() pf = "birch" image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def pomegranate(args): """ %prog cotton seqids karyotype.layout mcscan.out all.bed synteny.layout Build a figure that calls graphics.karyotype to illustrate the high ploidy of WGD history of pineapple genome. The script calls both graphics.karyotype and graphic.synteny. """ p = OptionParser(pomegranate.__doc__) opts, args, iopts = p.set_image_options(args, figsize="9x7") if len(args) != 5: sys.exit(not p.print_help()) seqidsfile, klayout, datafile, bedfile, slayout = args fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) Karyotype(fig, root, seqidsfile, klayout) Synteny(fig, root, datafile, bedfile, slayout) # legend showing the orientation of the genes draw_gene_legend(root, .42, .52, .48) labels = ((.04, .96, 'A'), (.04, .52, 'B')) panel_labels(root, labels) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() pf = "pomegranate-karyotype" image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def epoch(args): """ %prog epoch Illustrate the methods used in Maggie's epoch paper, in particular, how to classifiy S/G/F/FB/FN for the genes. """ p = OptionParser(__doc__) opts, args = p.parse_args() fig = plt.figure(1, (6, 4)) root = fig.add_axes([0, 0, 1, 1]) # Separators linestyle = dict(lw=2, color="b", alpha=.2, zorder=2) root.plot((0, 1), (.5, .5), "--", **linestyle) for i in (1./3, 2./3): root.plot((i, i), (.5, 1), "--", **linestyle) for i in (1./6, 3./6, 5./6): root.plot((i, i), (0, .5), "--", **linestyle) # Diagrams plot_diagram(root, 1./6, 3./4, "S", "syntenic") plot_diagram(root, 3./6, 3./4, "F", "missing, with both flankers") plot_diagram(root, 5./6, 3./4, "G", "missing, with one flanker") plot_diagram(root, 2./6, 1./4, "FB", "has non-coding matches") plot_diagram(root, 4./6, 1./4, "FN", "syntenic region has gap") root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() figname = fname() + ".pdf" savefig(figname, dpi=300)
def resample(args): """ %prog resample yellow-catfish-resample.txt medicago-resample.txt Plot ALLMAPS performance across resampled real data. """ p = OptionParser(resample.__doc__) opts, args, iopts = p.set_image_options(args, figsize="8x4", dpi=300) if len(args) != 2: sys.exit(not p.print_help()) dataA, dataB = args fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) A = fig.add_axes([.1, .18, .32, .64]) B = fig.add_axes([.6, .18, .32, .64]) dataA = import_data(dataA) dataB = import_data(dataB) xlabel = "Fraction of markers" ylabels = ("Anchor rate", "Runtime (m)") legend = ("anchor rate", "runtime") subplot_twinx(A, dataA, xlabel, ylabels, title="Yellow catfish", legend=legend) subplot_twinx(B, dataB, xlabel, ylabels, title="Medicago", legend=legend) labels = ((.04, .92, "A"), (.54, .92, "B")) panel_labels(root, labels) normalize_axes(root) image_name = "resample." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def _draw_trees(trees, nrow=1, ncol=1, rmargin=.3, iopts=None, outdir=".", shfile=None, **kwargs): """ Draw one or multiple trees on one plot. """ from jcvi.graphics.tree import draw_tree if shfile: SHs = DictFile(shfile, delimiter="\t") ntrees = len(trees) n = nrow * ncol for x in xrange(int(ceil(float(ntrees)/n))): fig = plt.figure(1, (iopts.w, iopts.h)) if iopts \ else plt.figure(1, (5, 5)) root = fig.add_axes([0, 0, 1, 1]) xiv = 1. / ncol yiv = 1. / nrow xstart = list(np.arange(0, 1, xiv)) * nrow ystart = list(chain(*zip(*[list(np.arange(0, 1, yiv))[::-1]] * ncol))) for i in xrange(n*x, n*(x+1)): if i == ntrees: break ax = fig.add_axes([xstart[i%n], ystart[i%n], xiv, yiv]) f = trees.keys()[i] tree = trees[f] try: SH = SHs[f] except: SH = None draw_tree(ax, tree, rmargin=rmargin, reroot=False, \ supportcolor="r", SH=SH, **kwargs) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() format = iopts.format if iopts else "pdf" dpi = iopts.dpi if iopts else 300 if n == 1: image_name = f.rsplit(".", 1)[0] + "." + format else: image_name = "trees{0}.{1}".format(x, format) image_name = op.join(outdir, image_name) savefig(image_name, dpi=dpi, iopts=iopts) plt.clf()
def report(args): ''' %prog report ksfile generate a report given a Ks result file (as produced by synonymous_calc.py). describe the median Ks, Ka values, as well as the distribution in stem-leaf plot ''' from jcvi.utils.cbook import SummaryStats from jcvi.graphics.histogram import stem_leaf_plot p = OptionParser(report.__doc__) p.add_option("--pdf", default=False, action="store_true", help="Generate graphic output for the histogram [default: %default]") p.add_option("--components", default=1, type="int", help="Number of components to decompose peaks [default: %default]") add_plot_options(p) opts, args, iopts = p.set_image_options(args, figsize="5x5") if len(args) != 1: sys.exit(not p.print_help()) ks_file, = args data = read_ks_file(ks_file) ks_min = opts.vmin ks_max = opts.vmax bins = opts.bins for f in fields.split(",")[1:]: columndata = [getattr(x, f) for x in data] ks = ("ks" in f) if not ks: continue columndata = [x for x in columndata if ks_min <= x <= ks_max] st = SummaryStats(columndata) title = "{0} ({1}): ".format(descriptions[f], ks_file) title += "Median:{0:.3f} (1Q:{1:.3f}|3Q:{2:.3f}||".\ format(st.median, st.firstq, st.thirdq) title += "Mean:{0:.3f}|Std:{1:.3f}||N:{2})".\ format(st.mean, st.sd, st.size) tbins = (0, ks_max, bins) if ks else (0, .6, 10) digit = 2 if (ks_max * 1. / bins) < .1 else 1 stem_leaf_plot(columndata, *tbins, digit=digit, title=title) if not opts.pdf: return components = opts.components data = [x.ng_ks for x in data] data = [x for x in data if ks_min <= x <= ks_max] fig = plt.figure(1, (iopts.w, iopts.h)) ax = fig.add_axes([.12, .1, .8, .8]) kp = KsPlot(ax, ks_max, opts.bins, legendp=opts.legendp) kp.add_data(data, components, fill=opts.fill) kp.draw(title=opts.title)
def oropetium(args): """ %prog oropetium mcscan.out all.bed layout switch.ids Build a composite figure that calls graphis.synteny. """ p = OptionParser(oropetium.__doc__) p.add_option("--extra", help="Extra features in BED format") opts, args, iopts = p.set_image_options(args, figsize="9x6") if len(args) != 4: sys.exit(not p.print_help()) datafile, bedfile, slayout, switch = args fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) Synteny(fig, root, datafile, bedfile, slayout, switch=switch, extra_features=opts.extra) # legend showing the orientation of the genes draw_gene_legend(root, .4, .57, .74, text=True, repeat=True) # On the left panel, make a species tree fc = 'lightslategrey' coords = {} xs, xp = .16, .03 coords["oropetium"] = (xs, .7) coords["setaria"] = (xs, .6) coords["sorghum"] = (xs, .5) coords["rice"] = (xs, .4) coords["brachypodium"] = (xs, .3) xs -= xp coords["Panicoideae"] = join_nodes(root, coords, "setaria", "sorghum", xs) xs -= xp coords["BEP"] = join_nodes(root, coords, "rice", "brachypodium", xs) coords["PACMAD"] = join_nodes(root, coords, "oropetium", "Panicoideae", xs) xs -= xp coords["Poaceae"] = join_nodes(root, coords, "BEP", "PACMAD", xs) # Names of the internal nodes for tag in ("BEP", "Poaceae"): nx, ny = coords[tag] nx, ny = nx - .005, ny - .02 root.text(nx, ny, tag, rotation=90, ha="right", va="top", color=fc) for tag in ("PACMAD",): nx, ny = coords[tag] nx, ny = nx - .005, ny + .02 root.text(nx, ny, tag, rotation=90, ha="right", va="bottom", color=fc) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() pf = "oropetium" image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def composite(df, sameGenderMZ, sameGenderDZ, size=(16, 24)): """Embed both absdiff figures and heritability figures. """ fig = plt.figure(1, size) ax1a = plt.subplot2grid((6, 4), (0, 0), rowspan=2, colspan=1) ax2a = plt.subplot2grid((6, 4), (0, 1), rowspan=2, colspan=1) ax3a = plt.subplot2grid((6, 4), (0, 2), rowspan=2, colspan=1) ax4a = plt.subplot2grid((6, 4), (0, 3), rowspan=2, colspan=1) ax1b = plt.subplot2grid((6, 4), (2, 0), rowspan=2, colspan=2) ax2b = plt.subplot2grid((6, 4), (2, 2), rowspan=2, colspan=2) ax3b = plt.subplot2grid((6, 4), (4, 0), rowspan=2, colspan=2) ax4b = plt.subplot2grid((6, 4), (4, 2), rowspan=2, colspan=2) # Telomeres telomeres = extract_trait(df, "Sample name", "telomeres.Length") mzTelomeres = extract_twin_values(sameGenderMZ, telomeres) dzTelomeres = extract_twin_values(sameGenderDZ, telomeres) plot_paired_values(ax1b, mzTelomeres, dzTelomeres, label="Telomere length") plot_abs_diff(ax1a, mzTelomeres, dzTelomeres, label="Telomere length") # CCNX CCNX = extract_trait(df, "Sample name", "ccn.chrX") mzCCNX = extract_twin_values(sameGenderMZ, CCNX, gender="Female") dzCCNX = extract_twin_values(sameGenderDZ, CCNX, gender="Female") dzCCNX = filter_low_values(dzCCNX, 1.75) plot_paired_values(ax2b, mzCCNX, dzCCNX, gender="Female only", label="ChrX copy number") plot_abs_diff(ax2a, mzCCNX, dzCCNX, label="ChrX copy number") # CCNY CCNY = extract_trait(df, "Sample name", "ccn.chrY") mzCCNY = extract_twin_values(sameGenderMZ, CCNY, gender="Male") dzCCNY = extract_twin_values(sameGenderDZ, CCNY, gender="Male") dzCCNY = filter_low_values(dzCCNY, .75) plot_paired_values(ax3b, mzCCNY, dzCCNY, gender="Male only", label="ChrY copy number") plot_abs_diff(ax3a, mzCCNY, dzCCNY, label="ChrY copy number") # CCNY TRA = extract_trait(df, "Sample name", "TRA.PPM") mzTRA = extract_twin_values(sameGenderMZ, TRA) dzTRA = extract_twin_values(sameGenderDZ, TRA) plot_paired_values(ax4b, mzTRA, dzTRA, label="TCR-$\\alpha$ deletions") plot_abs_diff(ax4a, mzTRA, dzTRA, label="TCR-$\\alpha$ deletions") plt.tight_layout() root = fig.add_axes((0, 0, 1, 1)) # ABCD absdiff, EFGH heritability labels = ((.03, .99, 'A'), (.27, .99, 'B'), (.53, .99, 'C'), (.77, .99, 'D'), (.03, .67, 'E'), (.53, .67, 'F'), (.03, .34, 'G'), (.53, .34, 'H')) panel_labels(root, labels) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off()
def venn(args): """ %prog venn *.benchmark Display benchmark results as Venn diagram. """ from matplotlib_venn import venn2 p = OptionParser(venn.__doc__) opts, args, iopts = p.set_image_options(args, figsize="9x9") if len(args) < 1: sys.exit(not p.print_help()) bcs = args fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) pad = .02 ystart = 1 ywidth = 1. / len(bcs) tags = ("Bowers", "YGOB", "Schnable") for bc, tag in zip(bcs, tags): fp = open(bc) data = [] for row in fp: prog, pcounts, tcounts, shared = row.split() pcounts = int(pcounts) tcounts = int(tcounts) shared = int(shared) data.append((prog, pcounts, tcounts, shared)) xstart = 0 xwidth = 1. / len(data) for prog, pcounts, tcounts, shared in data: a, b, c = pcounts - shared, tcounts - shared, shared ax = fig.add_axes([xstart + pad, ystart - ywidth + pad, xwidth - 2 * pad, ywidth - 2 * pad]) venn2(subsets=(a, b, c), set_labels=(prog, tag), ax=ax) message = "Sn={0} Pu={1}".\ format(percentage(shared, tcounts, precision=0, mode=-1), percentage(shared, pcounts, precision=0, mode=-1)) print >> sys.stderr, message ax.text(.5, .92, latex(message), ha="center", va="center", transform=ax.transAxes, color='b') ax.set_axis_off() xstart += xwidth ystart -= ywidth panel_labels(root, ((.04, .96, "A"), (.04, .96 - ywidth, "B"), (.04, .96 - 2 * ywidth, "C"))) panel_labels(root, ((.5, .98, "A. thaliana duplicates"), (.5, .98 - ywidth, "14 Yeast genomes"), (.5, .98 - 2 * ywidth, "4 Grass genomes"))) normalize_axes(root) savefig("venn.pdf", dpi=opts.dpi)
def scenario(args): """ %prog scenario Illustration of the two-step genome merger process for B. rapa companion paper. """ p = OptionParser(__doc__) opts, args = p.parse_args() fig = plt.figure(1, (5, 5)) root = fig.add_axes([0, 0, 1, 1]) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() # Layout format: (x, y, label, (chr lengths)) anc = (.5, .9, "Ancestor", (1,)) s1 = (.2, .6, "Genome I", (1,)) s2 = (.5, .6, "Genome II", (1,)) s3 = (.8, .6, "Genome III", (1,)) tetra = (.35, .4, "Tetraploid I / II", (.5, .9)) hexa = (.5, .1, "Hexaploid I / II / III", (.36, .46, .9)) labels = (anc, s1, s2, s3, tetra, hexa) connections = ((anc, s1), (anc, s2), (anc, s3),\ (s1, tetra), (s2, tetra), (tetra, hexa), (s3, hexa)) xinterval = .02 yratio = .05 for xx, yy, label, chrl in labels: #RoundLabel(root, xx, yy, label) root.text(xx, yy, label, ha="center", va="center") offset = len(label) * .012 for i, c in enumerate(chrl): ya = yy + yratio * c yb = yy - yratio * c Chromosome(root, xx - offset + i * xinterval, ya, yb, width=.01) # Comments comments = ((.15, .33, "II dominant"), (.25, .03, "III dominant")) for xx, yy, c in comments: root.text(xx, yy, c, size=9, ha="center", va="center") # Branches tip = .04 for a, b in connections: xa, ya, la, chra = a xb, yb, lb, chrb = b plt.plot((xa, xb), (ya - tip, yb + 2 * tip), 'k-', lw=2, alpha=.5) figname = fname() + ".pdf" savefig(figname, dpi=300)
def composite_correlation(df, size=(12, 8)): """ Plot composite correlation figure """ fig = plt.figure(1, size) ax1 = plt.subplot2grid((2, 2), (0, 0)) ax2 = plt.subplot2grid((2, 2), (0, 1)) ax3 = plt.subplot2grid((2, 2), (1, 0)) ax4 = plt.subplot2grid((2, 2), (1, 1)) chemistry = ["V1", "V2", "V2.5", float("nan")] colors = sns.color_palette("Set2", 8) color_map = dict(zip(chemistry, colors)) age_label = "Chronological age (yr)" ax1.scatter(df["hli_calc_age_sample_taken"], df["teloLength"], s=10, marker='.', color=df["Chemistry"].map(color_map)) ax1.set_ylim(0, 15) ax1.set_ylabel("Telomere length (Kb)") ax2.scatter(df["hli_calc_age_sample_taken"], df["ccn.chrX"], s=10, marker='.', color=df["Chemistry"].map(color_map)) ax2.set_ylim(1.8, 2.1) ax2.set_ylabel("ChrX copy number") ax4.scatter(df["hli_calc_age_sample_taken"], df["ccn.chrY"], s=10, marker='.', color=df["Chemistry"].map(color_map)) ax4.set_ylim(0.8, 1.1) ax4.set_ylabel("ChrY copy number") ax3.scatter(df["hli_calc_age_sample_taken"], df["TRA.PPM"], s=10, marker='.', color=df["Chemistry"].map(color_map)) ax3.set_ylim(0, 250) ax3.set_ylabel("$TCR-\\alpha$ deletions (count per million reads)") from matplotlib.lines import Line2D legend_elements = [Line2D([0], [0], marker='.', color='w', label=chem, markerfacecolor=color, markersize=16) \ for (chem, color) in zip(chemistry, colors)[:3]] for ax in (ax1, ax2, ax3, ax4): ax.set_xlabel(age_label) ax.legend(handles=legend_elements, loc="upper right") plt.tight_layout() root = fig.add_axes((0, 0, 1, 1)) labels = ((.02, .98, "A"), (.52, .98, "B"), (.02, .5, "C"), (.52, .5, "D")) panel_labels(root, labels) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off()
def litchi(args): """ %prog litchi mcscan.out all.bed layout switch.ids Build a composite figure that calls graphis.synteny. """ p = OptionParser(litchi.__doc__) opts, args, iopts = p.set_image_options(args, figsize="9x6") if len(args) != 4: sys.exit(not p.print_help()) datafile, bedfile, slayout, switch = args fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) Synteny(fig, root, datafile, bedfile, slayout, switch=switch) # legend showing the orientation of the genes draw_gene_legend(root, .4, .7, .82) # On the left panel, make a species tree fc = 'lightslategrey' coords = {} xs, xp = .16, .03 coords["lychee"] = (xs, .37) coords["clementine"] = (xs, .5) coords["cacao"] = (xs, .6) coords["strawberry"] = (xs, .7) coords["grape"] = (xs, .8) xs -= xp coords["Sapindales"] = join_nodes(root, coords, "clementine", "lychee", xs) xs -= xp coords["Rosid-II"] = join_nodes(root, coords, "cacao", "Sapindales", xs) xs -= xp coords["Rosid"] = join_nodes(root, coords, "strawberry", "Rosid-II", xs) xs -= xp coords["crown"] = join_nodes(root, coords, "grape", "Rosid", xs, circle=False) # Names of the internal nodes for tag in ("Rosid", "Rosid-II", "Sapindales"): nx, ny = coords[tag] nx, ny = nx - .01, ny - .02 root.text(nx, ny, tag, rotation=90, ha="right", va="top", color=fc) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() pf = "litchi" image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def snpplot(args): """ %prog counts.cdt Illustrate the histogram per SNP site. """ p = OptionParser(snpplot.__doc__) opts, args, iopts = p.set_image_options(args, format="png") if len(args) != 1: sys.exit(not p.print_help()) datafile, = args # Read in CDT file fp = open(datafile) next(fp) next(fp) data = [] for row in fp: atoms = row.split()[4:] nval = len(atoms) values = [float(x) for x in atoms] # normalize values = [x * 1. / sum(values) for x in values] data.append(values) pf = datafile.rsplit(".", 1)[0] fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) xmin, xmax = .1, .9 ymin, ymax = .1, .9 yinterval = (ymax - ymin) / len(data) colors = "rbg" if nval == 3 else ["lightgray"] + list("rbg") ystart = ymax for d in data: xstart = xmin for dd, c in zip(d, colors): xend = xstart + (xmax - xmin) * dd root.plot((xstart, xend), (ystart, ystart), "-", color=c) xstart = xend ystart -= yinterval root.text(.05, .5, "{0} LMD50 SNPs".format(len(data)), ha="center", va="center", rotation=90, color="lightslategray") for x, t, c in zip((.3, .5, .7), ("REF", "ALT", "HET"), "rbg"): root.text(x, .95, t, color=c, ha="center", va="center") normalize_axes(root) image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def dotplot_main(anchorfile, qbed, sbed, image_name, iopts, vmin=0, vmax=1, is_self=False, synteny=False, cmap_text=None, cmap="copper", genomenames=None, sample_number=10000, minfont=5, palette=None, chrlw=.01, title=None): fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) # the whole canvas ax = fig.add_axes([.1, .1, .8, .8]) # the dot plot dotplot(anchorfile, qbed, sbed, fig, root, ax, vmin=vmin, vmax=vmax, is_self=is_self, synteny=synteny, cmap_text=cmap_text, cmap=cmap, genomenames=genomenames, sample_number=sample_number, minfont=minfont, palette=palette, chrlw=chrlw, title=title) savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def ploidy(args): """ %prog cotton seqids karyotype.layout mcscan.out all.bed synteny.layout Build a figure that calls graphics.karyotype to illustrate the high ploidy of WGD history of pineapple genome. The script calls both graphics.karyotype and graphic.synteny. """ p = OptionParser(ploidy.__doc__) p.add_option("--switch", help="Rename the seqid with two-column file") opts, args, iopts = p.set_image_options(args, figsize="9x7") if len(args) != 5: sys.exit(not p.print_help()) seqidsfile, klayout, datafile, bedfile, slayout = args fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) Karyotype(fig, root, seqidsfile, klayout) Synteny(fig, root, datafile, bedfile, slayout, switch=opts.switch) # legend showing the orientation of the genes draw_gene_legend(root, .27, .37, .52) # annotate the WGD events fc = 'lightslategrey' x = .09 radius = .012 TextCircle(root, x, .825, r'$\tau$', radius=radius, fc=fc) TextCircle(root, x, .8, r'$\sigma$', radius=radius, fc=fc) TextCircle(root, x, .72, r'$\rho$', radius=radius, fc=fc) for ypos in (.825, .8, .72): root.text(.12, ypos, r"$\times2$", color=fc, ha="center", va="center") root.plot([x, x], [.85, .775], ":", color=fc, lw=2) root.plot([x, x], [.75, .675], ":", color=fc, lw=2) labels = ((.04, .96, 'A'), (.04, .54, 'B')) panel_labels(root, labels) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() pf = "pineapple-karyotype" image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def main(tx=None): """ %prog newicktree Plot Newick formatted tree. The gene structure can be plotted along if --gffdir is given. The gff file needs to be `genename.gff`. If --sizes is on, also show the number of amino acids. """ p = OptionParser(main.__doc__) p.add_option("--outgroup", help="Root the tree using the outgroup. " + \ "Use comma to separate multiple taxa.") p.add_option("--rmargin", default=.3, type="float", help="Set blank rmargin to the right [default: %default]") p.add_option("--gffdir", default=None, help="The directory that contain GFF files [default: %default]") p.add_option("--sizes", default=None, help="The FASTA file or the sizes file [default: %default]") opts, args, iopts = set_image_options(p, figsize="8x6") if len(args) != 1: sys.exit(not p.print_help()) datafile, = args outgroup = None if opts.outgroup: outgroup = opts.outgroup.split(",") pf = datafile.rsplit(".", 1)[0] if tx: pf = "demo" else: tx = open(datafile).read() logging.debug("Load tree file `{0}`.".format(datafile)) fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) draw_tree(root, tx, rmargin=opts.rmargin, outgroup=outgroup, gffdir=opts.gffdir, sizes=opts.sizes) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() image_name = pf + "." + iopts.format logging.debug("Print image to `{0}` {1}".format(image_name, iopts)) plt.savefig(image_name, dpi=iopts.dpi) plt.rcdefaults()
def gff(args): """ %prog gff *.gff Draw exons for genes based on gff files. Each gff file should contain only one gene, and only the "mRNA" and "CDS" feature will be drawn on the canvas. """ align_choices = ("left", "center", "right") p = OptionParser(gff.__doc__) p.add_option("--align", default="left", choices=align_choices, help="Horizontal alignment {0} [default: %default]".\ format("|".join(align_choices))) p.add_option("--noUTR", default=False, action="store_true", help="Do not plot UTRs [default: %default]") opts, args = p.parse_args(args) if len(args) < 1: sys.exit(not p.print_help()) fig = plt.figure(1, (8, 5)) root = fig.add_axes([0, 0, 1, 1]) gffiles = args ngenes = len(gffiles) setups, ratio = get_setups(gffiles, canvas=.6, noUTR=opts.noUTR) align = opts.align xs = .2 if align == "left" else .8 yinterval = canvas / ngenes ys = .8 tip = .01 for genename, mrnabed, cdsbeds in setups: ex = ExonGlyph(root, xs, ys, mrnabed, cdsbeds, ratio=ratio, align=align) genename = _(genename) if align == "left": root.text(xs - tip, ys, genename, ha="right", va="center") elif align == "right": root.text(xs + tip, ys, genename, ha="left", va="center") ys -= yinterval root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() figname = "exons.pdf" plt.savefig(figname, dpi=300) logging.debug("Figure saved to `{0}`".format(figname))
def amborella(args): """ %prog amborella seqids karyotype.layout mcscan.out all.bed synteny.layout Build a composite figure that calls graphics.karyotype and graphics.synteny. """ p = OptionParser(amborella.__doc__) p.add_option("--tree", help="Display trees on the bottom of the figure [default: %default]") p.add_option("--switch", help="Rename the seqid with two-column file [default: %default]") opts, args, iopts = p.set_image_options(args, figsize="8x7") if len(args) != 5: sys.exit(not p.print_help()) seqidsfile, klayout, datafile, bedfile, slayout = args switch = opts.switch tree = opts.tree fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) Karyotype(fig, root, seqidsfile, klayout) Synteny(fig, root, datafile, bedfile, slayout, switch=switch, tree=tree) # legend showing the orientation of the genes draw_gene_legend(root, .5, .68, .5) # annotate the WGD events fc = 'lightslategrey' x = .05 radius = .012 TextCircle(root, x, .86, '$\gamma$', radius=radius) TextCircle(root, x, .95, '$\epsilon$', radius=radius) root.plot([x, x], [.83, .9], ":", color=fc, lw=2) pts = plot_cap((x, .95), np.radians(range(-70, 250)), .02) x, y = zip(*pts) root.plot(x, y, ":", color=fc, lw=2) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() pf = "amborella" image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def ploidy(args): """ %prog ploidy seqids layout Build a figure that calls graphics.karyotype to illustrate the high ploidy of B. napus genome. """ p = OptionParser(ploidy.__doc__) opts, args, iopts = p.set_image_options(args, figsize="8x7") if len(args) != 2: sys.exit(not p.print_help()) seqidsfile, klayout = args fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) Karyotype(fig, root, seqidsfile, klayout) fc = "darkslategrey" radius = .012 ot = -.05 # use this to adjust vertical position of the left panel TextCircle(root, .1, .9 + ot, r'$\gamma$', radius=radius, fc=fc) root.text(.1, .88 + ot, r"$\times3$", ha="center", va="top", color=fc) TextCircle(root, .08, .79 + ot, r'$\alpha$', radius=radius, fc=fc) TextCircle(root, .12, .79 + ot, r'$\beta$', radius=radius, fc=fc) root.text(.1, .77 + ot, r"$\times3\times2\times2$", ha="center", va="top", color=fc) root.text(.1, .67 + ot, r"Brassica triplication", ha="center", va="top", color=fc, size=11) root.text(.1, .65 + ot, r"$\times3\times2\times2\times3$", ha="center", va="top", color=fc) root.text(.1, .42 + ot, r"Allo-tetraploidy", ha="center", va="top", color=fc, size=11) root.text(.1, .4 + ot, r"$\times3\times2\times2\times3\times2$", ha="center", va="top", color=fc) bb = dict(boxstyle="round,pad=.5", fc="w", ec="0.5", alpha=0.5) root.text(.5, .2 + ot, r"\noindent\textit{Brassica napus}\\" "(A$\mathsf{_n}$C$\mathsf{_n}$ genome)", ha="center", size=16, color="k", bbox=bb) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() pf = "napus" image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def mtdotplots(args): """ %prog mtdotplots Mt3.5 Mt4.0 medicago.medicago.lifted.1x1.anchors Plot Mt3.5 and Mt4.0 side-by-side. This is essentially combined from two graphics.dotplot() function calls as panel A and B. """ from jcvi.graphics.dotplot import check_beds, dotplot p = OptionParser(mtdotplots.__doc__) p.set_beds() opts, args, iopts = p.set_image_options(args, figsize="16x8", dpi=90) if len(args) != 3: sys.exit(not p.print_help()) a, b, ac = args fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) r1 = fig.add_axes([0, 0, .5, 1]) r2 = fig.add_axes([.5, 0, .5, 1]) a1 = fig.add_axes([.05, .1, .4, .8]) a2 = fig.add_axes([.55, .1, .4, .8]) anchorfile = op.join(a, ac) qbed, sbed, qorder, sorder, is_self = check_beds(anchorfile, p, opts) dotplot(anchorfile, qbed, sbed, fig, r1, a1, is_self=is_self, genomenames="Mt3.5_Mt3.5") opts.qbed = opts.sbed = None anchorfile = op.join(b, ac) qbed, sbed, qorder, sorder, is_self = check_beds(anchorfile, p, opts) dotplot(anchorfile, qbed, sbed, fig, r2, a2, is_self=is_self, genomenames="Mt4.0_Mt4.0") root.text(.03, .95, "A", ha="center", va="center", size=36) root.text(.53, .95, "B", ha="center", va="center", size=36) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() pf = "mtdotplots" image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def movieframe(args): """ %prog movieframe tour test.clm contigs.ref.anchors Draw heatmap and synteny in the same plot. """ p = OptionParser(movieframe.__doc__) p.add_option("--label", help="Figure title") p.set_beds() p.set_outfile(outfile=None) opts, args, iopts = p.set_image_options(args, figsize="16x8", style="white", cmap="coolwarm", format="png", dpi=120) if len(args) != 3: sys.exit(not p.print_help()) tour, clmfile, anchorsfile = args tour = tour.split(",") image_name = opts.outfile or ("movieframe." + iopts.format) label = opts.label or op.basename(image_name).rsplit(".", 1)[0] clm = CLMFile(clmfile) totalbins, bins, breaks = make_bins(tour, clm.tig_to_size) M = read_clm(clm, totalbins, bins) fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) # whole canvas ax1 = fig.add_axes([.05, .1, .4, .8]) # heatmap ax2 = fig.add_axes([.55, .1, .4, .8]) # dot plot ax2_root = fig.add_axes([.5, 0, .5, 1]) # dot plot canvas # Left axis: heatmap plot_heatmap(ax1, M, breaks, iopts) # Right axis: synteny qbed, sbed, qorder, sorder, is_self = check_beds(anchorsfile, p, opts, sorted=False) dotplot(anchorsfile, qbed, sbed, fig, ax2_root, ax2, sep=False, title="") root.text(.5, .98, clm.name, color="g", ha="center", va="center") root.text(.5, .95, label, color="darkslategray", ha="center", va="center") normalize_axes(root) savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def multireport(args): """ %prog multireport layoutfile Generate several Ks value distributions in the same figure. If the layout file is missing then a template file listing all ks files will be written. The layout file contains the Ks file, number of components, colors, and labels: # Ks file, ncomponents, label, color, marker LAP.sorghum.ks, 1, LAP-sorghum, r, o SES.sorghum.ks, 1, SES-sorghum, g, + MOL.sorghum.ks, 1, MOL-sorghum, m, ^ If color or marker is missing, then a random one will be assigned. """ p = OptionParser(multireport.__doc__) p.set_outfile(outfile="Ks_plot.pdf") add_plot_options(p) opts, args, iopts = p.set_image_options(args, figsize="5x5") if len(args) != 1: sys.exit(not p.print_help()) layoutfile, = args ks_min = opts.vmin ks_max = opts.vmax bins = opts.bins fill = opts.fill layout = Layout(layoutfile) print >> sys.stderr, layout fig = plt.figure(1, (iopts.w, iopts.h)) ax = fig.add_axes([.12, .1, .8, .8]) kp = KsPlot(ax, ks_max, bins, legendp=opts.legendp) for lo in layout: data = KsFile(lo.ksfile) data = [x.ng_ks for x in data] data = [x for x in data if ks_min <= x <= ks_max] kp.add_data(data, lo.components, label=lo.label, \ color=lo.color, marker=lo.marker, fill=fill, fitted=opts.fit) kp.draw(title=opts.title, filename=opts.outfile)
def multireport(args): """ %prog multireport layoutfile Generate several Ks value distributions in the same figure. The layout file contains the Ks file to plot, number of components, colors, labels. For example: # Ks file, ncomponents, label, color, marker LAP.sorghum.ks, 1, LAP-sorghum, r, o SES.sorghum.ks, 1, SES-sorghum, g, + MOL.sorghum.ks, 1, MOL-sorghum, m, ^ """ from jcvi.graphics.base import plt p = OptionParser(multireport.__doc__) p.add_option("--nofit", default=False, action="store_true", help="Do not plot fitted lines [default: %default]") add_plot_options(p) opts, args = p.parse_args(args) if len(args) != 1: sys.exit(not p.print_help()) layoutfile, = args ks_min = opts.vmin ks_max = opts.vmax bins = opts.bins fill = opts.fill fitted = not opts.nofit layout = Layout(layoutfile) fig = plt.figure(1, (5, 5)) ax = fig.add_axes([.12, .1, .8, .8]) kp = KsPlot(ax, ks_max, bins, legendp=opts.legendp) for lo in layout: data = read_ks_file(lo.ksfile) data = [x.ng_ks for x in data] data = [x for x in data if ks_min <= x <= ks_max] kp.add_data(data, lo.components, label=lo.label, \ color=lo.color, marker=lo.marker, fill=fill, fitted=fitted) kp.draw(title=opts.title)
def main(): p = OptionParser(__doc__) opts, args, iopts = p.set_image_options(figsize="8x7") if len(args) != 2: sys.exit(not p.print_help()) seqidsfile, layoutfile = args fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) Karyotype(fig, root, seqidsfile, layoutfile) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() pf = "karyotype" image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def utricularia(args): from jcvi.graphics.synteny import main as synteny_main p = OptionParser(synteny_main.__doc__) p.add_option("--switch", help="Rename the seqid with two-column file") opts, args, iopts = p.set_image_options(args, figsize="8x7") if len(args) != 3: sys.exit(not p.print_help()) datafile, bedfile, layoutfile = args switch = opts.switch pf = datafile.rsplit(".", 1)[0] fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) s = Synteny(fig, root, datafile, bedfile, layoutfile, loc_label=False, switch=switch) light = "lightslategrey" RoundRect(root, (.02, .69), .96, .24, fill=False, lw=2, ec=light) RoundRect(root, (.02, .09), .96, .48, fill=False, lw=2, ec=light) za, zb = s.layout[1].ratio, s.layout[-1].ratio # zoom level if za != 1: root.text(.96, .89, "{}x zoom".format(za).replace(".0x", "x"), color=light, ha="right", va="center", size=14) if zb != 1: root.text(.96, .12, "{}x zoom".format(zb).replace(".0x", "x"), color=light, ha="right", va="center", size=14) # legend showing the orientation of the genes draw_gene_legend(root, .22, .3, .64, text=True) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def plot_one_scaffold(scaffoldID, ssizes, sbed, trios, imagename, iopts, highlights=None): ntrios = len(trios) fig = plt.figure(1, (14, 8)) plt.cla() plt.clf() root = fig.add_axes([0, 0, 1, 1]) axes = [fig.add_subplot(1, ntrios, x) for x in range(1, ntrios + 1)] scafsize = ssizes.get_size(scaffoldID) for trio, ax in zip(trios, axes): blastf, qsizes, qbed = trio scaffolding(ax, scaffoldID, blastf, qsizes, ssizes, qbed, sbed, highlights=highlights) root.text(.5, .95, "{0} (size={1})".format(scaffoldID, thousands(scafsize)), size=18, ha="center", color='b') root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() savefig(imagename, dpi=iopts.dpi, iopts=iopts)
def simulation(args): """ %prog simulation inversion.txt translocation.txt maps.txt multimaps.txt Plot ALLMAPS accuracy across a range of simulated datasets. """ p = OptionParser(simulation.__doc__) opts, args, iopts = p.set_image_options(args, dpi=300) if len(args) != 4: sys.exit(not p.print_help()) dataA, dataB, dataC, dataD = args fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) A = fig.add_axes([.12, .62, .35, .35]) B = fig.add_axes([.62, .62, .35, .35]) C = fig.add_axes([.12, .12, .35, .35]) D = fig.add_axes([.62, .12, .35, .35]) dataA = import_data(dataA) dataB = import_data(dataB) dataC = import_data(dataC) dataD = import_data(dataD) subplot(A, dataA, "Inversion error rate", "Accuracy", xlim=.5) subplot(B, dataB, "Translocation error rate", "Accuracy", xlim=.5, legend=("intra-chromosomal", "inter-chromosomal", "75\% intra + 25\% inter")) subplot(C, dataC, "Number of input maps", "Accuracy", xcast=int) subplot(D, dataD, "Number of input maps", "Accuracy", xcast=int) labels = ((.03, .97, "A"), (.53, .97, "B"), (.03, .47, "C"), (.53, .47, "D")) panel_labels(root, labels) normalize_axes(root) image_name = "simulation." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def demo(args): """ %prog demo Draw sample gene features to illustrate the various fates of duplicate genes - to be used in a book chapter. """ p = OptionParser(demo.__doc__) opts, args = p.parse_args(args) fig = plt.figure(1, (8, 5)) root = fig.add_axes([0, 0, 1, 1]) panel_space = .23 dup_space = .025 # Draw a gene and two regulatory elements at these arbitrary locations locs = [ (.5, .9), # ancestral gene (.5, .9 - panel_space + dup_space), # identical copies (.5, .9 - panel_space - dup_space), (.5, .9 - 2 * panel_space + dup_space), # degenerate copies (.5, .9 - 2 * panel_space - dup_space), (.2, .9 - 3 * panel_space + dup_space), # sub-functionalization (.2, .9 - 3 * panel_space - dup_space), (.5, .9 - 3 * panel_space + dup_space), # neo-functionalization (.5, .9 - 3 * panel_space - dup_space), (.8, .9 - 3 * panel_space + dup_space), # non-functionalization (.8, .9 - 3 * panel_space - dup_space), ] default_regulator = "gm" regulators = [ default_regulator, default_regulator, default_regulator, "wm", default_regulator, "wm", "gw", "wb", default_regulator, "ww", default_regulator, ] width = .24 for i, (xx, yy) in enumerate(locs): regulator = regulators[i] x1, x2 = xx - .5 * width, xx + .5 * width Glyph(root, x1, x2, yy) if i == 9: # upper copy for non-functionalization continue # coding region x1, x2 = xx - .16 * width, xx + .45 * width Glyph(root, x1, x2, yy, fc="k") # two regulatory elements x1, x2 = xx - .4 * width, xx - .28 * width for xx, fc in zip((x1, x2), regulator): if fc == 'w': continue DoubleCircle(root, xx, yy, fc=fc) rotation = 30 tip = .02 if i == 0: ya = yy + tip root.text(x1, ya, "Flower", rotation=rotation, va="bottom") root.text(x2, ya, "Root", rotation=rotation, va="bottom") elif i == 7: ya = yy + tip root.text(x2, ya, "Leaf", rotation=rotation, va="bottom") # Draw arrows between panels (center) arrow_dist = .08 ar_xpos = .5 for ar_ypos in (.3, .53, .76): root.annotate(" ", (ar_xpos, ar_ypos), (ar_xpos, ar_ypos + arrow_dist), arrowprops=arrowprops) ar_ypos = .3 for ar_xpos in (.2, .8): root.annotate(" ", (ar_xpos, ar_ypos), (.5, ar_ypos + arrow_dist), arrowprops=arrowprops) # Duplication, Degeneration xx = .6 ys = (.76, .53) processes = ("Duplication", "Degeneration") for yy, process in zip(ys, processes): root.text(xx, yy + .02, process, fontweight="bold") # Label of fates xs = (.2, .5, .8) fates = ("Subfunctionalization", "Neofunctionalization", "Nonfunctionalization") yy = .05 for xx, fate in zip(xs, fates): RoundLabel(root, xx, yy, fate) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() figname = "demo.pdf" savefig(figname, dpi=300)
def main(): p = OptionParser(__doc__) opts, args, iopts = p.set_image_options(figsize="9x7") if len(args) != 1: sys.exit(not p.print_help()) (mode, ) = args assert mode == "demo" a, b = 30, 70 pad = 0.08 w = 0.31 fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) # Row separators yy = 1 - pad for i in range(3): root.plot((0, 1), (yy, yy), "-", lw=2, color="lightgray") yy -= w # Row headers xx = pad * 0.6 yy = 1 - pad - 0.5 * w for title in ("Inversion", "Indel", "Duplication"): root.text(xx, yy, title, ha="center", va="center") yy -= w # Column headers xx = pad + 0.5 * w yy = 1 - pad / 2 for title in ("Assembly alignment", "Read alignment", "Optical map alignment"): root.text(xx, yy, title, ha="center", va="center") xx += w p = PairwiseAlign(fig, [pad, 2 * w, w, w]) p.invert(a, b) p.draw() p = PairwiseAlign(fig, [pad, w, w, w]) p.delete(a, b) p.draw() p = PairwiseAlign(fig, [pad, 0, w, w]) p.duplicate(a, b, gap=5) p.draw() p = ReadAlign(fig, [pad + w, 2 * w, w, w]) p.invert(a, b) p.draw() p = ReadAlign(fig, [pad + w, w, w, w]) p.delete(a, b) p.draw() p = ReadAlign(fig, [pad + w, 0, w, w]) p.duplicate(a, b) p.draw() p = OpticalMapAlign(fig, [pad + 2 * w, 2 * w, w, w]) p.invert(a, b) p.draw() p = OpticalMapAlign(fig, [pad + 2 * w, w, w, w]) p.delete(a, b) p.draw() p = OpticalMapAlign(fig, [pad + 2 * w, 0, w, w]) p.duplicate(a, b) p.draw() normalize_axes(root) image_name = mode + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def main(args): """ %prog newicktree Plot Newick formatted tree. The gene structure can be plotted along if --gffdir is given. The gff file needs to be `genename.gff`. If --sizes is on, also show the number of amino acids. """ p = OptionParser(main.__doc__) p.add_option( "--outgroup", help="Outgroup for rerooting the tree. " + "Use comma to separate multiple taxa.", ) p.add_option( "--noreroot", default=False, action="store_true", help="Don't reroot the input tree", ) p.add_option("--rmargin", default=0.2, type="float", help="Set blank rmargin to the right") p.add_option("--gffdir", default=None, help="The directory that contain GFF files") p.add_option("--sizes", default=None, help="The FASTA file or the sizes file") p.add_option("--SH", default=None, type="string", help="SH test p-value") group = p.add_option_group("Node style") group.add_option("--leafcolor", default="k", help="Font color for the OTUs") group.add_option("--leaffont", default=12, help="Font size for the OTUs") group.add_option( "--leafinfo", help="CSV file specifying the leaves: name,color,new_name") group.add_option( "--scutoff", default=0, type="int", help="cutoff for displaying node support, 0-100", ) group.add_option( "--no_support", dest="support", default=True, action="store_false", help="Do not print node support values", ) group.add_option( "--no_internal", dest="internal", default=True, action="store_false", help="Do not show internal nodes", ) group = p.add_option_group("Edge style") group.add_option( "--dashedoutgroup", default=False, action="store_true", help="Gray out the edges connecting outgroup and non-outgroup", ) group = p.add_option_group("Additional annotations") group.add_option( "--geoscale", default=False, action="store_true", help="Plot geological scale", ) group.add_option( "--wgdinfo", help="CSV specifying the position and style of WGD events") group.add_option( "--groups", help="Group names from top to bottom, to the right of the tree. " "Each distinct color in --leafinfo is considered part of the same group. " "Separate the names with comma, such as 'eudicots,,monocots,'. " "Empty names will be ignored for that specific group. ", ) opts, args, iopts = p.set_image_options(args, figsize="10x7") if len(args) != 1: sys.exit(not p.print_help()) (datafile, ) = args outgroup = None reroot = not opts.noreroot if opts.outgroup: outgroup = opts.outgroup.split(",") hpd = None if datafile == "demo": t = Tree("""(((Os02g0681100:0.1151,Sb04g031800:0.11220)1.0:0.0537, (Os04g0578800:0.04318,Sb06g026210:0.04798)-1.0:0.08870)1.0:0.06985, ((Os03g0124100:0.08845,Sb01g048930:0.09055)1.0:0.05332, (Os10g0534700:0.06592,Sb01g030630:0.04824)-1.0:0.07886):0.09389);""") else: logging.debug("Load tree file `{0}`".format(datafile)) t, hpd = parse_tree(datafile) pf = datafile.rsplit(".", 1)[0] fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) supportcolor = "k" if opts.support else None margin, rmargin = 0.1, opts.rmargin # Left and right margin leafinfo = LeafInfoFile(opts.leafinfo).cache if opts.leafinfo else None wgdinfo = WGDInfoFile(opts.wgdinfo).cache if opts.wgdinfo else None draw_tree( root, t, hpd=hpd, margin=margin, rmargin=rmargin, ymargin=margin, supportcolor=supportcolor, internal=opts.internal, outgroup=outgroup, dashedoutgroup=opts.dashedoutgroup, reroot=reroot, gffdir=opts.gffdir, sizes=opts.sizes, SH=opts.SH, scutoff=opts.scutoff, leafcolor=opts.leafcolor, leaffont=opts.leaffont, leafinfo=leafinfo, wgdinfo=wgdinfo, geoscale=opts.geoscale, groups=opts.groups.split(",") if opts.groups else [], ) normalize_axes(root) image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def fig4(args): """ %prog fig4 layout data Napus Figure 4A displays an example deleted region for quartet chromosomes, showing read alignments from high GL and low GL lines. """ p = OptionParser(fig4.__doc__) p.add_option("--gauge_step", default=200000, type="int", help="Step size for the base scale") opts, args, iopts = p.set_image_options(args, figsize="9x7") if len(args) != 2: sys.exit(not p.print_help()) layout, datadir = args layout = F4ALayout(layout, datadir=datadir) gs = opts.gauge_step fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) block, napusbed, slayout = "r28.txt", "all.bed", "r28.layout" s = Synteny(fig, root, block, napusbed, slayout, chr_label=False) synteny_exts = [(x.xstart, x.xend) for x in s.rr] h = .1 order = "bzh,yudal".split(",") labels = (r"\textit{B. napus} A$\mathsf{_n}$2", r"\textit{B. rapa} A$\mathsf{_r}$2", r"\textit{B. oleracea} C$\mathsf{_o}$2", r"\textit{B. napus} C$\mathsf{_n}$2") for t in layout: xstart, xend = synteny_exts[2 * t.i] canvas = [xstart, t.y, xend - xstart, h] root.text(xstart - h, t.y + h / 2, labels[t.i], ha="center", va="center") ch, ab = t.box_region.split(":") a, b = ab.split("-") vlines = [int(x) for x in (a, b)] Coverage(fig, root, canvas, t.seqid, (t.start, t.end), datadir, order=order, gauge="top", plot_chr_label=False, gauge_step=gs, palette="gray", cap=40, hlsuffix="regions.forhaibao", vlines=vlines) # Highlight GSL biosynthesis genes a, b = (3, "Bra029311"), (5, "Bo2g161590") for gid in (a, b): start, end = s.gg[gid] xstart, ystart = start xend, yend = end x = (xstart + xend) / 2 arrow = FancyArrowPatch(posA=(x, ystart - .04), posB=(x, ystart - .005), arrowstyle="fancy,head_width=6,head_length=8", lw=3, fc='k', ec='k', zorder=20) root.add_patch(arrow) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() image_name = "napus-fig4." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def wheel(args): """ %prog wheel datafile.csv groups.csv Wheel plot that shows continous data in radial axes. """ p = OptionParser(wheel.__doc__) p.add_option( "--column", default="score", choices=("score", "percentile"), help="Which column to extract from `datafile.csv`", ) opts, args, iopts = p.set_image_options(args, figsize="5x5", format="png") if len(args) != 2: sys.exit(not p.print_help()) datafile, groupsfile = args column = opts.column linecolor = "#d6d6d6" df = parse_data(datafile, score_column=opts.column) groups = parse_groups(groupsfile) labels = [g for g in groups if g in df] print(labels) df = [df[g] for g in labels] print(df) groups = [groups[g] for g in labels] print(groups) pf = datafile.rsplit(".", 1)[0] fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) categories = len(df) # ax = plt.subplot(111, projection='polar') ax = fig.add_axes([0.1, 0.1, 0.8, 0.8], polar=True) brewer = [ "#FF3B30", "#DD43A0", "#5856D6", "#007AFE", "#56BDEC", "#4CD8BA", "#4CD864", "#B0F457", "#FEF221", "#FFCC01", "#FF9500", "#FF3B30", ] # Baseline theta = np.linspace(1.5 * np.pi, 3.5 * np.pi, endpoint=False, num=categories) _theta = np.linspace(1.5 * np.pi, 3.5 * np.pi) R = max(max(df), 10) xlim = (-R, R) if column == "score" else (-100, 100) plim = (-R / 2, R) if column == "score" else (0, 100) ci = (-0.5, 2) if column == "score" else (10, 90) # Grid if column == "score": for t in theta: ax.plot([t, t], plim, color=linecolor) ax.axis("off") # Contours for t in plim: ax.plot(_theta, [t] * len(_theta), color=linecolor) # Sectors (groupings) collapsed_groups = [] gg = [] for group, c in groupby(enumerate(groups), lambda x: x[1]): c = [x[0] for x in list(c)] collapsed_groups.append(group) gg.append(c) show_sector = False if show_sector: theta_interval = 2 * np.pi / categories theta_pad = theta_interval / 2 * 0.9 for color, group in zip(brewer, gg): tmin, tmax = min(group), max(group) sector( ax, theta[tmin], theta[tmax], theta_pad, R * 0.95, ls="-", color=color, lw=2, ) # Data r = df closed_plot(ax, theta, r, color="lightslategray", alpha=0.25) for color, group in zip(brewer, gg): hidden_data = [(theta[x], r[x]) for x in group if (ci[0] <= r[x] <= ci[1])] shown_data = [(theta[x], r[x]) for x in group if (r[x] < ci[0] or r[x] > ci[1])] for alpha, data in zip((1, 1), (hidden_data, shown_data)): if not data: continue color_theta, color_r = zip(*data) ax.plot(color_theta, color_r, "o", color=color, alpha=alpha) # Print out data diseaseNames, risks = labels, df print("var theta = [{}]".format(",".join("{:.1f}".format(degrees(x)) for x in theta))) print("var risks = [{}]".format(",".join(str(x) for x in risks))) print("var diseaseNames = [{}]".format(",".join( ['"{}"'.format(x) for x in diseaseNames]))) # Labels from math import cos, sin r = 0.5 for i, label in enumerate(labels): tl = theta[i] x, y = 0.5 + r * cos(tl), 0.5 + r * sin(tl) d = degrees(tl) if 90 < d % 360 < 270: # On the left quardrants d -= 180 root.text(x, y, label, size=4, rotation=d, ha="center", va="center", color=linecolor) print(x, y, label) # Add baseline baseline = 0 if column == "score" else 50 _r = len(_theta) * [baseline] closed_plot(ax, _theta, _r, "k:", lw=1, ms=4) # Add confidence interval if column == "percentile": barcolor = "#eeeeee" ax.bar([0], [ci[1] - ci[0]], width=2 * np.pi, bottom=ci[0], fc=barcolor) ax.set_rmin(xlim[0]) ax.set_rmax(xlim[1]) normalize_axes(root) image_name = pf + "-" + column + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def estimategaps(args): """ %prog estimategaps JM-4 chr1 JMMale-1 Illustrate ALLMAPS gap estimation algorithm. """ p = OptionParser(estimategaps.__doc__) opts, args, iopts = p.set_image_options(args, figsize="6x6", dpi=300) if len(args) != 3: sys.exit(not p.print_help()) pf, seqid, mlg = args bedfile = pf + ".lifted.bed" agpfile = pf + ".agp" function = lambda x: x.cm cc = Map(bedfile, scaffold_info=True, function=function) agp = AGP(agpfile) g = GapEstimator(cc, agp, seqid, mlg, function=function) pp, chrsize, mlgsize = g.pp, g.chrsize, g.mlgsize spl, spld = g.spl, g.spld g.compute_all_gaps(verbose=False) fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) # Panel A xstart, ystart = 0.15, 0.65 w, h = 0.7, 0.3 t = np.linspace(0, chrsize, 1000) ax = fig.add_axes([xstart, ystart, w, h]) mx, my = zip(*g.scatter_data) rho = spearmanr(mx, my) dsg = "g" ax.vlines(pp, 0, mlgsize, colors="beige") ax.plot(mx, my, ".", color=set2[3]) ax.plot(t, spl(t), "-", color=dsg) ax.text(0.05, 0.95, mlg, va="top", transform=ax.transAxes) normalize_lms_axis(ax, xlim=chrsize, ylim=mlgsize, ylabel="Genetic distance (cM)") if rho < 0: ax.invert_yaxis() # Panel B ystart -= 0.28 h = 0.25 ax = fig.add_axes([xstart, ystart, w, h]) ax.vlines(pp, 0, mlgsize, colors="beige") ax.plot(t, spld(t), "-", lw=2, color=dsg) ax.plot(pp, spld(pp), "o", mfc="w", mec=dsg, ms=5) normalize_lms_axis( ax, xlim=chrsize, ylim=25 * 1e-6, xfactor=1e-6, xlabel="Physical position (Mb)", yfactor=1000000, ylabel="Recomb. rate\n(cM / Mb)", ) ax.xaxis.grid(False) # Panel C (specific to JMMale-1) a, b = "scaffold_1076", "scaffold_861" sizes = dict( (x.component_id, (x.object_beg, x.object_end, x.component_span, x.orientation)) for x in g.agp if not x.is_gap ) a_beg, a_end, asize, ao = sizes[a] b_beg, b_end, bsize, bo = sizes[b] gapsize = g.get_gapsize(a) total_size = asize + gapsize + bsize ratio = 0.6 / total_size y = 0.16 pad = 0.03 pb_ratio = w / chrsize # Zoom lsg = "lightslategray" root.plot((0.15 + pb_ratio * a_beg, 0.2), (ystart, ystart - 0.14), ":", color=lsg) root.plot((0.15 + pb_ratio * b_end, 0.3), (ystart, ystart - 0.08), ":", color=lsg) ends = [] for tag, size, marker, beg in zip( (a, b), (asize, bsize), (49213, 81277), (0.2, 0.2 + (asize + gapsize) * ratio) ): end = beg + size * ratio marker = beg + marker * ratio ends.append((beg, end, marker)) root.plot((marker,), (y,), "o", color=lsg) root.text((beg + end) / 2, y + pad, latex(tag), ha="center", va="center") HorizontalChromosome(root, beg, end, y, height=0.025, fc="gainsboro") begs, ends, markers = zip(*ends) fontprop = dict(color=lsg, ha="center", va="center") ypos = y + pad * 2 root.plot(markers, (ypos, ypos), "-", lw=2, color=lsg) root.text( sum(markers) / 2, ypos + pad, "Distance: 1.29cM $\Leftrightarrow$ 211,824bp (6.1 cM/Mb)", **fontprop ) ypos = y - pad xx = markers[0], ends[0] root.plot(xx, (ypos, ypos), "-", lw=2, color=lsg) root.text(sum(xx) / 2, ypos - pad, "34,115bp", **fontprop) xx = markers[1], begs[1] root.plot(xx, (ypos, ypos), "-", lw=2, color=lsg) root.text(sum(xx) / 2, ypos - pad, "81,276bp", **fontprop) root.plot((ends[0], begs[1]), (y, y), ":", lw=2, color=lsg) root.text( sum(markers) / 2, ypos - 3 * pad, r"$\textit{Estimated gap size: 96,433bp}$", color="r", ha="center", va="center", ) labels = ((0.05, 0.95, "A"), (0.05, 0.6, "B"), (0.05, 0.27, "C")) panel_labels(root, labels) normalize_axes(root) pf = "estimategaps" image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def heatmap(args): """ %prog heatmap input.npy genome.json Plot heatmap based on .npy data file. The .npy stores a square matrix with bins of genome, and cells inside the matrix represent number of links between bin i and bin j. The `genome.json` contains the offsets of each contig/chr so that we know where to draw boundary lines, or extract per contig/chromosome heatmap. """ p = OptionParser(heatmap.__doc__) p.add_option("--resolution", default=500000, type="int", help="Resolution when counting the links") p.add_option("--chr", help="Plot this contig/chr only") p.add_option("--nobreaks", default=False, action="store_true", help="Do not plot breaks (esp. if contigs are small)") opts, args, iopts = p.set_image_options(args, figsize="10x10", style="white", cmap="coolwarm", format="png", dpi=120) if len(args) != 2: sys.exit(not p.print_help()) npyfile, jsonfile = args contig = opts.chr # Load contig/chromosome starts and sizes header = json.loads(open(jsonfile).read()) resolution = header.get("resolution", opts.resolution) logging.debug("Resolution set to {}".format(resolution)) # Load the matrix A = np.load(npyfile) # Select specific submatrix if contig: contig_start = header["starts"][contig] contig_size = header["sizes"][contig] contig_end = contig_start + contig_size A = A[contig_start: contig_end, contig_start: contig_end] # Several concerns in practice: # The diagonal counts may be too strong, this can either be resolved by # masking them. Or perform a log transform on the entire heatmap. B = A.astype("float64") B += 1.0 B = np.log(B) vmin, vmax = 1, 7 B[B < vmin] = vmin B[B > vmax] = vmax print B logging.debug("Matrix log-transformation and thresholding ({}-{}) done" .format(vmin, vmax)) # Canvas fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) # whole canvas ax = fig.add_axes([.05, .05, .9, .9]) # just the heatmap breaks = header["starts"].values() breaks += [header["total_bins"]] # This is actually discarded breaks = sorted(breaks)[1:] if contig or opts.nobreaks: breaks = [] plot_heatmap(ax, B, breaks, iopts, binsize=resolution) # Title pf = npyfile.rsplit(".", 1)[0] title = pf if contig: title += "-{}".format(contig) root.text(.5, .98, title, color="darkslategray", size=18, ha="center", va="center") normalize_axes(root) image_name = title + "." + iopts.format # macOS sometimes has way too verbose output logging.getLogger().setLevel(logging.CRITICAL) savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def cotton(args): """ %prog cotton seqids karyotype.layout mcscan.out all.bed synteny.layout Build a composite figure that calls graphics.karyotype and graphic.synteny. """ p = OptionParser(cotton.__doc__) p.add_option("--depthfile", help="Use depth info in this file") p.add_option("--switch", help="Rename the seqid with two-column file") opts, args, iopts = p.set_image_options(args, figsize="8x7") if len(args) != 5: sys.exit(p.print_help()) seqidsfile, klayout, datafile, bedfile, slayout = args switch = opts.switch depthfile = opts.depthfile fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) kt = Karyotype(fig, root, seqidsfile, klayout) Synteny(fig, root, datafile, bedfile, slayout, switch=switch) light = "lightslategrey" # Show the dup depth along the cotton chromosomes if depthfile: ymin, ymax = 0.9, 0.95 root.text(0.11, 0.96, "Cotton duplication level", color="gray", size=10) root.plot([0.1, 0.95], [ymin, ymin], color="gray") root.text(0.96, 0.9, "1x", color="gray", va="center") root.plot([0.1, 0.95], [ymax, ymax], color="gray") root.text(0.96, 0.95, "6x", color="gray", va="center") fp = open(depthfile) track = kt.tracks[0] # Cotton depths = [] for row in fp: a, b, depth = row.split() depth = int(depth) try: p = track.get_coords(a) depths.append((p, depth)) except KeyError: pass depths.sort(key=lambda x: (x[0], -x[1])) xx, yy = zip(*depths) yy = [ymin + 0.01 * (x - 1) for x in yy] root.plot(xx, yy, "-", color=light) # legend showing the orientation of the genes draw_gene_legend(root, 0.5, 0.68, 0.5) # Zoom xpos = 0.835 ytop = 0.9 xmin, xmax = 0.18, 0.82 ymin, ymax = ytop, 0.55 lc = "k" kwargs = dict(lw=3, color=lc, mec=lc, mfc="w", zorder=3) root.plot((xpos, xpos), (ymax, 0.63), ":o", **kwargs) root.plot((xpos, xmin), (ymax, ymin), ":o", **kwargs) root.plot((xpos, xmax), (ymax, ymin), ":o", **kwargs) RoundRect(root, (0.06, 0.17), 0.92, 0.35, fill=False, lw=2, ec=light) # Panels root.text(0.05, 0.95, "a", size=20, fontweight="bold") root.text(0.1, 0.45, "b", size=20, fontweight="bold") root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() pf = "cotton" image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def composite(args): """ %prog composite fastafile chr1 Combine line plots, feature bars and alt-bars, different data types specified in options. Inputs must be BED-formatted. Three types of viz are currently supported: --lines: traditional line plots, useful for plotting feature freq --bars: show where the extent of features are --altbars: similar to bars, yet in two alternating tracks, e.g. scaffolds """ from jcvi.graphics.chromosome import HorizontalChromosome p = OptionParser(composite.__doc__) p.add_option("--lines", help="Features to plot in lineplot [default: %default]") p.add_option("--bars", help="Features to plot in bars [default: %default]") p.add_option("--altbars", help="Features to plot in alt-bars [default: %default]") p.add_option("--fatten", default=False, action="store_true", help="Help visualize certain narrow features [default: %default]") p.add_option("--mode", default="span", choices=("span", "count", "score"), help="Accumulate feature based on [default: %default]") add_window_options(p) opts, args, iopts = p.set_image_options(args, figsize="8x5") if len(args) != 2: sys.exit(not p.print_help()) fastafile, chr = args window, shift, subtract = check_window_options(opts) linebeds, barbeds, altbarbeds = [], [], [] fatten = opts.fatten if opts.lines: lines = opts.lines.split(",") linebeds = get_beds(lines) if opts.bars: bars = opts.bars.split(",") barbeds = get_beds(bars) if opts.altbars: altbars = opts.altbars.split(",") altbarbeds = get_beds(altbars) linebins = get_binfiles(linebeds, fastafile, shift, mode=opts.mode) margin = .12 clen = Sizes(fastafile).mapping[chr] nbins = get_nbins(clen, shift) plt.rcParams["xtick.major.size"] = 0 plt.rcParams["ytick.major.size"] = 0 fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) root.text(.5, .95, chr, ha="center", color="darkslategray") xstart, xend = margin, 1 - margin xlen = xend - xstart ratio = xlen / clen # Line plots ax = fig.add_axes([xstart, .6, xlen, .3]) lineplot(ax, linebins, nbins, chr, window, shift) # Bar plots yy = .5 yinterval = .08 xs = lambda x: xstart + ratio * x r = .01 fattend = .0025 for bb in barbeds: root.text(xend + .01, yy, bb.split(".")[0], va="center") HorizontalChromosome(root, xstart, xend, yy, height=.02) bb = Bed(bb) for b in bb: start, end = xs(b.start), xs(b.end) span = end - start if fatten and span < fattend: span = fattend root.add_patch(Rectangle((start, yy - r), span, 2 * r, \ lw=0, fc="darkslategray")) yy -= yinterval # Alternative bar plots offset = r / 2 for bb in altbarbeds: root.text(xend + .01, yy, bb.split(".")[0], va="center") bb = Bed(bb) for i, b in enumerate(bb): start, end = xs(b.start), xs(b.end) span = end - start if span < .0001: continue offset = -offset root.add_patch(Rectangle((start, yy + offset), end - start, .003, \ lw=0, fc="darkslategray")) yy -= yinterval root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() image_name = chr + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def astat(args): """ %prog astat coverage.log Create coverage-rho scatter plot. """ p = OptionParser(astat.__doc__) p.add_option("--cutoff", default=1000, type="int", help="Length cutoff") p.add_option("--genome", default="", help="Genome name") p.add_option( "--arrDist", default=False, action="store_true", help="Use arrDist instead", ) opts, args = p.parse_args(args) if len(args) != 1: sys.exit(not p.print_help()) (covfile, ) = args cutoff = opts.cutoff genome = opts.genome plot_arrDist = opts.arrDist suffix = ".{0}".format(cutoff) small_covfile = covfile + suffix update_covfile = need_update(covfile, small_covfile) if update_covfile: fw = open(small_covfile, "w") else: logging.debug("Found `{0}`, will use this one".format(small_covfile)) covfile = small_covfile fp = open(covfile) header = next(fp) if update_covfile: fw.write(header) data = [] msg = "{0} tigs scanned ..." for row in fp: tigID, rho, covStat, arrDist = row.split() tigID = int(tigID) if tigID % 1000000 == 0: sys.stderr.write(msg.format(tigID) + "\r") rho, covStat, arrDist = [float(x) for x in (rho, covStat, arrDist)] if rho < cutoff: continue if update_covfile: fw.write(row) data.append((tigID, rho, covStat, arrDist)) print(msg.format(tigID), file=sys.stderr) from jcvi.graphics.base import plt, savefig logging.debug("Plotting {0} data points.".format(len(data))) tigID, rho, covStat, arrDist = zip(*data) y = arrDist if plot_arrDist else covStat ytag = "arrDist" if plot_arrDist else "covStat" fig = plt.figure(1, (7, 7)) ax = fig.add_axes([0.12, 0.1, 0.8, 0.8]) ax.plot(rho, y, ".", color="lightslategrey") xtag = "rho" info = (genome, xtag, ytag) title = "{0} {1} vs. {2}".format(*info) ax.set_title(title) ax.set_xlabel(xtag) ax.set_ylabel(ytag) if plot_arrDist: ax.set_yscale("log") imagename = "{0}.png".format(".".join(info)) savefig(imagename, dpi=150)
def heatmap(args): """ %prog heatmap fastafile chr1 Combine stack plot with heatmap to show abundance of various tracks along given chromosome. Need to give multiple beds to --stacks and --heatmaps """ p = OptionParser(heatmap.__doc__) p.add_option("--stacks", default="Exons,Introns,DNA_transposons,Retrotransposons", help="Features to plot in stackplot [default: %default]") p.add_option("--heatmaps", default="Copia,Gypsy,hAT,Helitron,Introns,Exons", help="Features to plot in heatmaps [default: %default]") p.add_option("--meres", default=None, help="Extra centromere / telomere features [default: %default]") add_window_options(p) opts, args, iopts = p.set_image_options(args, figsize="8x5") if len(args) != 2: sys.exit(not p.print_help()) fastafile, chr = args window, shift, subtract = check_window_options(opts) stacks = opts.stacks.split(",") heatmaps = opts.heatmaps.split(",") stackbeds = get_beds(stacks) heatmapbeds = get_beds(heatmaps) stackbins = get_binfiles(stackbeds, fastafile, shift, subtract=subtract) heatmapbins = get_binfiles(heatmapbeds, fastafile, shift, subtract=subtract) margin = .06 inner = .015 clen = Sizes(fastafile).mapping[chr] fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) # Gauge ratio = draw_gauge(root, margin, clen, rightmargin=4 * margin) yinterval = .3 xx = margin yy = 1 - margin yy -= yinterval xlen = clen / ratio cc = chr if "_" in chr: ca, cb = chr.split("_") cc = ca[0].upper() + cb root.add_patch(Rectangle((xx, yy), xlen, yinterval - inner, color=gray)) ax = fig.add_axes([xx, yy, xlen, yinterval - inner]) nbins = get_nbins(clen, shift) owindow = clen / 100 if owindow > window: window = owindow / shift * shift stackplot(ax, stackbins, nbins, palette, chr, window, shift) ax.text(.1, .9, cc, va="top", zorder=100, transform=ax.transAxes, bbox=dict(boxstyle="round", fc="w", alpha=.5)) # Legends xx += xlen + .01 yspace = (yinterval - inner) / (len(stackbins) + 1) yy = 1 - margin - yinterval for s, p in zip(stacks, palette): s = s.replace("_", " ") s = Registration.get(s, s) yy += yspace root.add_patch(Rectangle((xx, yy), inner, inner, color=p, lw=0)) root.text(xx + 1.5 * inner, yy, s, size=10) yh = .05 # Heatmap height # Heatmaps xx = margin yy = 1 - margin - yinterval - inner for s, p in zip(heatmaps, heatmapbins): s = s.replace("_", " ") s = Registration.get(s, s) yy -= yh m = stackarray(p, chr, window, shift) Y = np.array([m, m]) root.imshow(Y, extent=(xx, xx + xlen, yy, yy + yh - inner), interpolation="nearest", aspect="auto") root.text(xx + xlen + .01, yy, s, size=10) yy -= yh meres = opts.meres if meres: bed = Bed(meres) for b in bed: if b.seqid != chr: continue pos = (b.start + b.end) / 2 cpos = pos / ratio xx = margin + cpos accn = b.accn.capitalize() root.add_patch(CirclePolygon((xx, yy), radius=.01, fc="m", ec="m")) root.text(xx + .014, yy, accn, va="center", color="m") root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() image_name = chr + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def main(args): """ %prog newicktree Plot Newick formatted tree. The gene structure can be plotted along if --gffdir is given. The gff file needs to be `genename.gff`. If --sizes is on, also show the number of amino acids. With --barcode a mapping file can be provided to convert seq names to eg. species names, useful in unified tree display. This file should have distinctive barcodes in column1 and new names in column2, tab delimited. """ p = OptionParser(main.__doc__) p.add_option("--outgroup", help="Outgroup for rerooting the tree. " + \ "Use comma to separate multiple taxa.") p.add_option("--noreroot", default=False, action="store_true", \ help="Don't reroot the input tree [default: %default]") p.add_option("--rmargin", default=.3, type="float", help="Set blank rmargin to the right [default: %default]") p.add_option("--gffdir", default=None, help="The directory that contain GFF files [default: %default]") p.add_option("--sizes", default=None, help="The FASTA file or the sizes file [default: %default]") p.add_option("--SH", default=None, type="string", help="SH test p-value [default: %default]") p.add_option("--scutoff", default=0, type="int", help="cutoff for displaying node support, 0-100 [default: %default]") p.add_option("--barcode", default=None, help="path to seq names barcode mapping file: " \ "barcode<tab>new_name [default: %default]") p.add_option("--leafcolor", default="k", help="Font color for the OTUs, or path to a file " \ "containing color mappings: leafname<tab>color [default: %default]") p.add_option("--leaffont", default=12, help="Font size for the OTUs") p.add_option("--geoscale", default=False, action="store_true", help="Plot geological scale") opts, args, iopts = p.set_image_options(args, figsize="8x6") if len(args) != 1: sys.exit(not p.print_help()) datafile, = args outgroup = None reroot = not opts.noreroot if opts.outgroup: outgroup = opts.outgroup.split(",") if datafile == "demo": tx = """(((Os02g0681100:0.1151,Sb04g031800:0.11220)1.0:0.0537, (Os04g0578800:0.04318,Sb06g026210:0.04798)-1.0:0.08870)1.0:0.06985, ((Os03g0124100:0.08845,Sb01g048930:0.09055)1.0:0.05332, (Os10g0534700:0.06592,Sb01g030630:0.04824)-1.0:0.07886):0.09389);""" else: logging.debug("Load tree file `{0}`.".format(datafile)) tx = open(datafile).read() pf = datafile.rsplit(".", 1)[0] fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) if opts.geoscale: draw_geoscale(root) else: if op.isfile(opts.leafcolor): leafcolor = "k" leafcolorfile = opts.leafcolor else: leafcolor = opts.leafcolor leafcolorfile = None draw_tree(root, tx, rmargin=opts.rmargin, leafcolor=leafcolor, \ outgroup=outgroup, reroot=reroot, gffdir=opts.gffdir, \ sizes=opts.sizes, SH=opts.SH, scutoff=opts.scutoff, \ barcodefile=opts.barcode, leafcolorfile=leafcolorfile, leaffont=opts.leaffont) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def lms(args): """ %prog lms ALLMAPS cartoon to illustrate LMS metric. """ from random import randint from jcvi.graphics.chromosome import HorizontalChromosome p = OptionParser(lms.__doc__) opts, args, iopts = p.set_image_options(args, figsize="6x6", dpi=300) fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) # Panel A w, h = 0.7, 0.35 ax = fig.add_axes([0.15, 0.6, w, h]) xdata = [x + randint(-3, 3) for x in range(10, 110, 10)] ydata = [x + randint(-3, 3) for x in range(10, 110, 10)] ydata[3:7] = ydata[3:7][::-1] xydata = zip(xdata, ydata) lis = xydata[:3] + [xydata[4]] + xydata[7:] lds = xydata[3:7] xlis, ylis = zip(*lis) xlds, ylds = zip(*lds) ax.plot( xlis, ylis, "r-", lw=12, alpha=0.3, solid_capstyle="round", solid_joinstyle="round", ) ax.plot( xlds, ylds, "g-", lw=12, alpha=0.3, solid_capstyle="round", solid_joinstyle="round", ) ax.plot(xdata, ydata, "k.", mec="k", mfc="w", mew=3, ms=12) HorizontalChromosome(root, 0.15, 0.15 + w, 0.57, height=0.02, lw=2) root.text(0.15 + w / 2, 0.55, "Chromosome location (bp)", ha="center", va="top") ax.text(80, 30, "LIS = 7", color="r", ha="center", va="center") ax.text(80, 20, "LDS = 4", color="g", ha="center", va="center") ax.text(80, 10, "LMS = $max$(LIS, LDS) = 7", ha="center", va="center") normalize_lms_axis(ax, xlim=110, ylim=110) # Panel B w = 0.37 p = (0, 45, 75, 110) ax = fig.add_axes([0.1, 0.12, w, h]) xdata = [x for x in range(10, 110, 10)] ydata = ydata_orig = [x for x in range(10, 110, 10)] ydata = ydata[:4] + ydata[7:] + ydata[4:7][::-1] xydata = zip(xdata, ydata) lis = xydata[:7] xlis, ylis = zip(*lis) ax.plot( xlis, ylis, "r-", lw=12, alpha=0.3, solid_capstyle="round", solid_joinstyle="round", ) ax.plot(xdata, ydata, "k.", mec="k", mfc="w", mew=3, ms=12) ax.vlines(p, 0, 110, colors="beige", lw=3) normalize_lms_axis(ax, xlim=110, ylim=110) patch = [0.1 + w * x / 110.0 for x in p] HorizontalChromosome(root, 0.1, 0.1 + w, 0.09, patch=patch, height=0.02, lw=2) scaffolds = ("a", "b", "c") for i, s in enumerate(scaffolds): xx = (patch[i] + patch[i + 1]) / 2 root.text(xx, 0.09, s, va="center", ha="center") root.text(0.1 + w / 2, 0.04, "LMS($a||b||c$) = 7", ha="center") # Panel C ax = fig.add_axes([0.6, 0.12, w, h]) patch = [0.6 + w * x / 110.0 for x in p] ydata = ydata_orig ax.plot( xdata, ydata, "r-", lw=12, alpha=0.3, solid_capstyle="round", solid_joinstyle="round", ) ax.plot(xdata, ydata, "k.", mec="k", mfc="w", mew=3, ms=12) ax.vlines(p, [0], [110], colors="beige", lw=3) normalize_lms_axis(ax, xlim=110, ylim=110) HorizontalChromosome(root, 0.6, 0.6 + w, 0.09, patch=patch, height=0.02, lw=2) scaffolds = ("a", "-c", "b") for i, s in enumerate(scaffolds): xx = (patch[i] + patch[i + 1]) / 2 root.text(xx, 0.09, s, va="center", ha="center") root.text(0.6 + w / 2, 0.04, "LMS($a||-c||b$) = 10", ha="center") labels = ((0.05, 0.95, "A"), (0.05, 0.48, "B"), (0.55, 0.48, "C")) panel_labels(root, labels) normalize_axes(root) pf = "lms" image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def main(): p = OptionParser(__doc__) p.add_option("--groups", default=False, action="store_true", help="The first row contains group info [default: %default]") p.add_option("--rowgroups", help="Row groupings [default: %default]") p.add_option("--horizontalbar", default=False, action="store_true", help="Horizontal color bar [default: vertical]") opts, args, iopts = p.set_image_options(figsize="8x8") if len(args) != 1: sys.exit(not p.print_help()) datafile, = args pf = datafile.rsplit(".", 1)[0] rowgroups = opts.rowgroups groups, rows, cols, data = parse_csv(datafile, vmin=1, groups=opts.groups) cols = [x.replace("ay ", "") for x in cols] if rowgroups: fp = open(rowgroups) rgroups = [] for row in fp: a, b = row.split() irows = [rows.index(x) for x in b.split(",")] rgroups.append((a, min(irows), max(irows))) plt.rcParams["axes.linewidth"] = 0 xstart = .18 fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) ax = fig.add_axes([xstart, .15, .7, .7]) im = ax.matshow(data, cmap=iopts.cmap, norm=mpl.colors.LogNorm(vmin=1, vmax=10000)) nrows, ncols = len(rows), len(cols) xinterval = .7 / ncols yinterval = .7 / max(nrows, ncols) plt.xticks(range(ncols), cols, rotation=45, size=10, ha="center") plt.yticks(range(nrows), rows, size=10) for x in ax.get_xticklines() + ax.get_yticklines(): x.set_visible(False) ax.set_xlim(-.5, ncols - .5) t = [1, 10, 100, 1000, 10000] pad = .06 if opts.horizontalbar: ypos = .5 * (1 - nrows * yinterval) - pad axcolor = fig.add_axes([.3, ypos, .4, .02]) orientation = "horizontal" else: axcolor = fig.add_axes([.9, .3, .02, .4]) orientation = "vertical" fig.colorbar(im, cax=axcolor, ticks=t, orientation=orientation) if groups: groups = [(key, len(list(nn))) for key, nn in groupby(groups)] yy = .5 + .5 * nrows / ncols * .7 + .06 e = .005 sep = -.5 for k, kl in groups: # Separator in the array area sep += kl ax.plot([sep, sep], [-.5, nrows - .5], "w-", lw=2) # Group labels on the top kl *= xinterval root.plot([xstart + e, xstart + kl - e], [yy, yy], "-", color="gray", lw=2) root.text(xstart + .5 * kl, yy + e, k, ha="center", color="gray") xstart += kl if rowgroups: from jcvi.graphics.glyph import TextCircle xpos = .04 tip = .015 assert rgroups ystart = 1 - .5 * (1 - nrows * yinterval) for gname, start, end in rgroups: start = ystart - start * yinterval end = ystart - (end + 1) * yinterval start -= tip / 3 end += tip / 3 # Bracket the groups root.plot((xpos, xpos + tip), (start, start), "k-", lw=2) root.plot((xpos, xpos), (start, end), "k-", lw=2) root.plot((xpos, xpos + tip), (end, end), "k-", lw=2) TextCircle(root, xpos, .5 * (start + end), gname) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() image_name = pf + "." + opts.cmap + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def report(args): ''' %prog report ksfile generate a report given a Ks result file (as produced by synonymous_calc.py). describe the median Ks, Ka values, as well as the distribution in stem-leaf plot ''' from jcvi.utils.cbook import SummaryStats from jcvi.graphics.histogram import stem_leaf_plot p = OptionParser(report.__doc__) p.add_option( "--pdf", default=False, action="store_true", help="Generate graphic output for the histogram [default: %default]") p.add_option( "--components", default=1, type="int", help="Number of components to decompose peaks [default: %default]") add_plot_options(p) opts, args, iopts = p.set_image_options(args, figsize="5x5") if len(args) != 1: sys.exit(not p.print_help()) ks_file, = args data = KsFile(ks_file) ks_min = opts.vmin ks_max = opts.vmax bins = opts.bins for f in fields.split(",")[1:]: columndata = [getattr(x, f) for x in data] ks = ("ks" in f) if not ks: continue columndata = [x for x in columndata if ks_min <= x <= ks_max] st = SummaryStats(columndata) title = "{0} ({1}): ".format(descriptions[f], ks_file) title += "Median:{0:.3f} (1Q:{1:.3f}|3Q:{2:.3f}||".\ format(st.median, st.firstq, st.thirdq) title += "Mean:{0:.3f}|Std:{1:.3f}||N:{2})".\ format(st.mean, st.sd, st.size) tbins = (0, ks_max, bins) if ks else (0, .6, 10) digit = 2 if (ks_max * 1. / bins) < .1 else 1 stem_leaf_plot(columndata, *tbins, digit=digit, title=title) if not opts.pdf: return components = opts.components data = [x.ng_ks for x in data] data = [x for x in data if ks_min <= x <= ks_max] fig = plt.figure(1, (iopts.w, iopts.h)) ax = fig.add_axes([.12, .1, .8, .8]) kp = KsPlot(ax, ks_max, opts.bins, legendp=opts.legendp) kp.add_data(data, components, fill=opts.fill, fitted=opts.fit) kp.draw(title=opts.title)
def dotplot_main(args): p = OptionParser(__doc__) p.set_beds() p.add_option( "--synteny", default=False, action="store_true", help="Run a fast synteny scan and display blocks", ) p.add_option("--cmaptext", help="Draw colormap box on the bottom-left corner") p.add_option( "--vmin", dest="vmin", type="float", default=0, help="Minimum value in the colormap", ) p.add_option( "--vmax", dest="vmax", type="float", default=2, help="Maximum value in the colormap", ) p.add_option( "--nmax", dest="sample_number", type="int", default=10000, help="Maximum number of data points to plot", ) p.add_option( "--minfont", type="int", default=4, help="Do not render labels with size smaller than", ) p.add_option("--colormap", help="Two column file, block id to color mapping") p.add_option( "--colororientation", action="store_true", default=False, help="Color the blocks based on orientation, similar to mummerplot", ) p.add_option( "--nosort", default=False, action="store_true", help="Do not sort the seqids along the axes", ) p.add_option("--nosep", default=False, action="store_true", help="Do not add contig lines") p.add_option("--title", help="Title of the dot plot") p.set_dotplot_opts() p.set_outfile(outfile=None) opts, args, iopts = p.set_image_options(args, figsize="9x9", style="dark", dpi=90, cmap="copper") if len(args) != 1: sys.exit(not p.print_help()) (anchorfile, ) = args qbed, sbed, qorder, sorder, is_self = check_beds(anchorfile, p, opts, sorted=(not opts.nosort)) palette = opts.colormap if palette: palette = Palette(palettefile=palette) elif opts.colororientation: palette = Palette.from_block_orientation(anchorfile, qbed, sbed) cmaptext = opts.cmaptext if anchorfile.endswith(".ks"): from jcvi.apps.ks import KsFile logging.debug("Anchors contain Ks values") cmaptext = cmaptext or "*Ks* values" anchorksfile = anchorfile + ".anchors" if need_update(anchorfile, anchorksfile): ksfile = KsFile(anchorfile) ksfile.print_to_anchors(anchorksfile) anchorfile = anchorksfile if opts.skipempty: ac = AnchorFile(anchorfile) if is_self: qseqids = sseqids = set() else: qseqids, sseqids = set(), set() for pair in ac.iter_pairs(): q, s = pair[:2] qi, q = qorder[q] si, s = sorder[s] qseqids.add(q.seqid) sseqids.add(s.seqid) if is_self: qbed = sbed = subset_bed(qbed, qseqids) else: qbed = subset_bed(qbed, qseqids) sbed = subset_bed(sbed, sseqids) fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) # the whole canvas ax = fig.add_axes([0.1, 0.1, 0.8, 0.8]) # the dot plot dotplot( anchorfile, qbed, sbed, fig, root, ax, vmin=opts.vmin, vmax=opts.vmax, is_self=is_self, synteny=opts.synteny, cmap_text=opts.cmaptext, cmap=iopts.cmap, genomenames=opts.genomenames, sample_number=opts.sample_number, minfont=opts.minfont, palette=palette, sep=(not opts.nosep), sepcolor=set1[int(opts.theme)], title=opts.title, stdpf=(not opts.nostdpf), chpf=(not opts.nochpf), ) image_name = opts.outfile or (op.splitext(anchorfile)[0] + "." + opts.format) savefig(image_name, dpi=iopts.dpi, iopts=iopts) fig.clear()
def stack(args): """ %prog stack fastafile Create landscape plots that show the amounts of genic sequences, and repetitive sequences along the chromosomes. """ p = OptionParser(stack.__doc__) p.add_option("--top", default=10, type="int", help="Draw the first N chromosomes [default: %default]") p.add_option("--stacks", default="Exons,Introns,DNA_transposons,Retrotransposons", help="Features to plot in stackplot [default: %default]") p.add_option("--switch", help="Change chr names based on two-column file [default: %default]") add_window_options(p) opts, args, iopts = p.set_image_options(args, figsize="8x8") if len(args) != 1: sys.exit(not p.print_help()) fastafile, = args top = opts.top window, shift, subtract = check_window_options(opts) switch = opts.switch if switch: switch = DictFile(opts.switch) stacks = opts.stacks.split(",") bedfiles = get_beds(stacks) binfiles = get_binfiles(bedfiles, fastafile, shift, subtract=subtract) sizes = Sizes(fastafile) s = list(sizes.iter_sizes())[:top] maxl = max(x[1] for x in s) margin = .08 inner = .02 # y distance between tracks pf = fastafile.rsplit(".", 1)[0] fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) # Gauge ratio = draw_gauge(root, margin, maxl) # Per chromosome yinterval = (1 - 2 * margin) / (top + 1) xx = margin yy = 1 - margin for chr, clen in s: yy -= yinterval xlen = clen / ratio cc = chr if "_" in chr: ca, cb = chr.split("_") cc = ca[0].upper() + cb if switch and cc in switch: cc = "\n".join((cc, "({0})".format(switch[cc]))) root.add_patch(Rectangle((xx, yy), xlen, yinterval - inner, color=gray)) ax = fig.add_axes([xx, yy, xlen, yinterval - inner]) nbins = clen / shift if clen % shift: nbins += 1 stackplot(ax, binfiles, nbins, palette, chr, window, shift) root.text(xx - .04, yy + .5 * (yinterval - inner), cc, ha="center", va="center") ax.set_xlim(0, nbins) ax.set_ylim(0, 1) ax.set_axis_off() # Legends yy -= yinterval xx = margin for b, p in zip(bedfiles, palette): b = b.rsplit(".", 1)[0].replace("_", " ") b = Registration.get(b, b) root.add_patch(Rectangle((xx, yy), inner, inner, color=p, lw=0)) xx += 2 * inner root.text(xx, yy, b, size=13) xx += len(b) * .012 + inner root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def main(): """ %prog bedfile id_mappings Takes a bedfile that contains the coordinates of features to plot on the chromosomes, and `id_mappings` file that map the ids to certain class. Each class will get assigned a unique color. `id_mappings` file is optional (if omitted, will not paint the chromosome features, except the centromere). """ p = OptionParser(main.__doc__) p.add_option("--title", default="Medicago truncatula v3.5", help="title of the image [default: `%default`]") p.add_option("--gauge", default=False, action="store_true", help="draw a gauge with size label [default: %default]") p.add_option( "--imagemap", default=False, action="store_true", help= "generate an HTML image map associated with the image [default: %default]" ) p.add_option( "--winsize", default=50000, type="int", help= "if drawing an imagemap, specify the window size (bases) of each map element " "[default: %default bp]") p.add_option("--empty", help="Write legend for unpainted region") opts, args, iopts = p.set_image_options(figsize="6x6", dpi=300) if len(args) not in (1, 2): sys.exit(p.print_help()) bedfile = args[0] mappingfile = None if len(args) == 2: mappingfile = args[1] winsize = opts.winsize imagemap = opts.imagemap w, h = iopts.w, iopts.h dpi = iopts.dpi prefix = bedfile.rsplit(".", 1)[0] figname = prefix + "." + opts.format if imagemap: imgmapfile = prefix + '.map' mapfh = open(imgmapfile, "w") print('<map id="' + prefix + '">', file=mapfh) if mappingfile: mappings = DictFile(mappingfile, delimiter="\t") classes = sorted(set(mappings.values())) logging.debug("A total of {0} classes found: {1}".format( len(classes), ','.join(classes))) else: mappings = {} classes = [] logging.debug("No classes registered (no id_mappings given).") mycolors = "rgbymc" class_colors = dict(zip(classes, mycolors)) bed = Bed(bedfile) chr_lens = {} centromeres = {} for b, blines in groupby(bed, key=(lambda x: x.seqid)): blines = list(blines) maxlen = max(x.end for x in blines) chr_lens[b] = maxlen for b in bed: accn = b.accn if accn == "centromere": centromeres[b.seqid] = b.start if accn in mappings: b.accn = mappings[accn] else: b.accn = '-' chr_number = len(chr_lens) if centromeres: assert chr_number == len(centromeres) fig = plt.figure(1, (w, h)) root = fig.add_axes([0, 0, 1, 1]) r = .7 # width and height of the whole chromosome set xstart, ystart = .15, .85 xinterval = r / chr_number xwidth = xinterval * .5 # chromosome width max_chr_len = max(chr_lens.values()) ratio = r / max_chr_len # canvas / base # first the chromosomes for a, (chr, clen) in enumerate(sorted(chr_lens.items())): xx = xstart + a * xinterval + .5 * xwidth root.text(xx, ystart + .01, chr, ha="center") if centromeres: yy = ystart - centromeres[chr] * ratio ChromosomeWithCentromere(root, xx, ystart, yy, ystart - clen * ratio, width=xwidth) else: Chromosome(root, xx, ystart, ystart - clen * ratio, width=xwidth) chr_idxs = dict((a, i) for i, a in enumerate(sorted(chr_lens.keys()))) alpha = .75 # color the regions for chr in sorted(chr_lens.keys()): segment_size, excess = 0, 0 bac_list = [] for b in bed.sub_bed(chr): clen = chr_lens[chr] idx = chr_idxs[chr] klass = b.accn start = b.start end = b.end xx = xstart + idx * xinterval yystart = ystart - end * ratio yyend = ystart - start * ratio root.add_patch( Rectangle((xx, yystart), xwidth, yyend - yystart, fc=class_colors.get(klass, "w"), lw=0, alpha=alpha)) if imagemap: """ `segment` : size of current BAC being investigated + `excess` `excess` : left-over bases from the previous BAC, as a result of iterating over `winsize` regions of `segment` """ if excess == 0: segment_start = start segment = (end - start + 1) + excess while True: if segment < winsize: bac_list.append(b.accn) excess = segment break segment_end = segment_start + winsize - 1 tlx, tly, brx, bry = xx, (1 - ystart) + segment_start * ratio, \ xx + xwidth, (1 - ystart) + segment_end * ratio print('\t' + write_ImageMapLine(tlx, tly, brx, bry, \ w, h, dpi, chr+":"+",".join(bac_list), segment_start, segment_end), file=mapfh) segment_start += winsize segment -= winsize bac_list = [] if imagemap and excess > 0: bac_list.append(b.accn) segment_end = end tlx, tly, brx, bry = xx, (1 - ystart) + segment_start * ratio, \ xx + xwidth, (1 - ystart) + segment_end * ratio print('\t' + write_ImageMapLine(tlx, tly, brx, bry, \ w, h, dpi, chr+":"+",".join(bac_list), segment_start, segment_end), file=mapfh) if imagemap: print('</map>', file=mapfh) mapfh.close() logging.debug("Image map written to `{0}`".format(mapfh.name)) if opts.gauge: xstart, ystart = .9, .85 Gauge(root, xstart, ystart - r, ystart, max_chr_len) # class legends, four in a row xstart = .1 xinterval = .2 xwidth = .04 yy = .08 for klass, cc in sorted(class_colors.items()): if klass == '-': continue root.add_patch( Rectangle((xstart, yy), xwidth, xwidth, fc=cc, lw=0, alpha=alpha)) root.text(xstart + xwidth + .01, yy, klass, fontsize=10) xstart += xinterval empty = opts.empty if empty: root.add_patch( Rectangle((xstart, yy), xwidth, xwidth, fill=False, lw=1)) root.text(xstart + xwidth + .01, yy, empty, fontsize=10) root.text(.5, .95, opts.title, fontstyle="italic", ha="center", va="center") root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() savefig(figname, dpi=dpi, iopts=iopts)
def bites(args): """ %prog bites Illustrate the pipeline for automated bite discovery. """ p = OptionParser(__doc__) opts, args = p.parse_args() fig = plt.figure(1, (6, 6)) root = fig.add_axes([0, 0, 1, 1]) # HSP pairs hsps = ( ((50, 150), (60, 180)), ((190, 250), (160, 235)), ((300, 360), (270, 330)), ((430, 470), (450, 490)), ((570, 620), (493, 543)), ((540, 555), (370, 385)), # non-collinear hsps ) titlepos = (0.9, 0.65, 0.4) titles = ("Compare orthologous region", "Find collinear HSPs", "Scan paired gaps") ytip = 0.01 mrange = 650.0 m = lambda x: x / mrange * 0.7 + 0.1 for i, (ya, title) in enumerate(zip(titlepos, titles)): yb = ya - 0.1 plt.plot((0.1, 0.8), (ya, ya), "-", color="gray", lw=2, zorder=1) plt.plot((0.1, 0.8), (yb, yb), "-", color="gray", lw=2, zorder=1) RoundLabel(root, 0.5, ya + 4 * ytip, title) root.text(0.9, ya, "A. thaliana", ha="center", va="center") root.text(0.9, yb, "B. rapa", ha="center", va="center") myhsps = hsps if i >= 1: myhsps = hsps[:-1] for (a, b), (c, d) in myhsps: a, b, c, d = [m(x) for x in (a, b, c, d)] r1 = Rectangle((a, ya - ytip), b - a, 2 * ytip, fc="r", lw=0, zorder=2) r2 = Rectangle((c, yb - ytip), d - c, 2 * ytip, fc="r", lw=0, zorder=2) r3 = Rectangle((a, ya - ytip), b - a, 2 * ytip, fill=False, zorder=3) r4 = Rectangle((c, yb - ytip), d - c, 2 * ytip, fill=False, zorder=3) r5 = Polygon( ((a, ya - ytip), (c, yb + ytip), (d, yb + ytip), (b, ya - ytip)), fc="r", alpha=0.2, ) rr = (r1, r2, r3, r4, r5) if i == 2: rr = rr[:-1] for r in rr: root.add_patch(r) # Gap pairs hspa, hspb = zip(*myhsps) gapa, gapb = [], [] for (a, b), (c, d) in pairwise(hspa): gapa.append((b + 1, c - 1)) for (a, b), (c, d) in pairwise(hspb): gapb.append((b + 1, c - 1)) gaps = zip(gapa, gapb) tpos = titlepos[-1] yy = tpos - 0.05 for i, ((a, b), (c, d)) in enumerate(gaps): i += 1 a, b, c, d = [m(x) for x in (a, b, c, d)] xx = (a + b + c + d) / 4 TextCircle(root, xx, yy, str(i)) # Bites ystart = 0.24 ytip = 0.05 bites = ( ("Bite(40=>-15)", True), ("Bite(50=>35)", False), ("Bite(70=>120)", False), ("Bite(100=>3)", True), ) for i, (bite, selected) in enumerate(bites): xx = 0.15 if (i % 2 == 0) else 0.55 yy = ystart - i / 2 * ytip i += 1 TextCircle(root, xx, yy, str(i)) color = "k" if selected else "gray" root.text(xx + ytip, yy, bite, size=10, color=color, va="center") root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() figname = fname() + ".pdf" savefig(figname, dpi=300)
def waterlilyGOM(args): """ %prog mcmctree.tre table.csv Customized figure to plot phylogeny and related infographics. """ from jcvi.graphics.tree import ( LeafInfoFile, WGDInfoFile, draw_tree, parse_tree, draw_wgd_xy, ) from jcvi.graphics.table import CsvTable, draw_table p = OptionParser(waterlilyGOM.__doc__) opts, args, iopts = p.set_image_options(args, figsize="12x9") if len(args) != 2: sys.exit(not p.print_help()) (datafile, csvfile) = args outgroup = ["ginkgo"] logging.debug("Load tree file `{0}`".format(datafile)) t, hpd = parse_tree(datafile) pf = datafile.rsplit(".", 1)[0] fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) margin, rmargin = 0.15, 0.19 # Left and right margin leafinfo = LeafInfoFile("leafinfo.csv").cache wgdinfo = WGDInfoFile("wgdinfo.csv").cache groups = "Monocots,Eudicots,ANA-grade,Gymnosperms" draw_tree( root, t, hpd=hpd, margin=margin, rmargin=rmargin, supportcolor=None, internal=False, outgroup=outgroup, leafinfo=leafinfo, wgdinfo=wgdinfo, geoscale=True, groups=groups.split(","), ) # Bottom right show legends for the WGD circles pad = 0.02 ypad = 0.04 xstart = 1 - rmargin + pad ystart = 0.2 waterlily_wgdline = wgdinfo["waterlily"][0] ypos = ystart - 2 * ypad draw_wgd_xy(root, xstart, ypos, waterlily_wgdline) root.text( xstart + pad, ypos, "Nymphaealean WGD", color=waterlily_wgdline.color, va="center", ) other_wgdline = wgdinfo["banana"][0] ypos = ystart - 3 * ypad draw_wgd_xy(root, xstart, ypos, other_wgdline) root.text( xstart + pad, ypos, "Other known WGDs", color=other_wgdline.color, va="center", ) # Top left draw the comparison table csv_table = CsvTable(csvfile) draw_table( root, csv_table, extent=(0.02, 0.44, 0.55, 0.985), stripe_color="lavender", yinflation=iopts.w / iopts.h, ) normalize_axes(root) image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def main(): p = OptionParser(__doc__) p.add_option( "--switch", help="Rename the seqid with two-column file [default: %default]") p.add_option( "--tree", help="Display trees on the bottom of the figure [default: %default]") p.add_option("--extra", help="Extra features in BED format") p.add_option( "--gene_style", default="Rectangle", help= "Default <Rectangle> to plot genes as rectangle. Accept <Arrow> to add orientation of genes." ) p.add_option( "--scalebar", default=False, action="store_true", help="Add scale bar to the plot", ) p.add_option( "--add_gene_legend", default=False, action="store_true", help="Add forward and reverse strand gene legend to the plot", ) p.add_option( "--add_gene_label", default=False, action="store_true", help="Add gene names to the plot", ) p.add_option( "--shadestyle", default="curve", choices=Shade.Styles, help="Style of syntenic wedges", ) opts, args, iopts = p.set_image_options(figsize="8x7") if len(args) != 3: sys.exit(not p.print_help()) datafile, bedfile, layoutfile = args switch = opts.switch tree = opts.tree pf = datafile.rsplit(".", 1)[0] fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) Synteny(fig, root, datafile, bedfile, layoutfile, switch=switch, tree=tree, extra_features=opts.extra, scalebar=opts.scalebar, shadestyle=opts.shadestyle, gene_legend=opts.add_gene_legend, add_gene_label=opts.add_gene_label, gene_style=opts.gene_style) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def expr(args): """ %prog expr block exp layout napus.bed Plot a composite figure showing synteny and the expression level between homeologs in two tissues - total 4 lists of values. block file contains the gene pairs between AN and CN. """ from jcvi.graphics.base import red_purple as default_cm p = OptionParser(expr.__doc__) opts, args, iopts = p.set_image_options(args, figsize="8x5") if len(args) != 4: sys.exit(not p.print_help()) block, exp, layout, napusbed = args fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) s = Synteny(fig, root, block, napusbed, layout) # Import the expression values # Columns are: leaf-A, leaf-C, root-A, root-C fp = open(exp) data = {} for row in fp: gid, lf, rt = row.split() lf, rt = float(lf), float(rt) data[gid] = (lf, rt) rA, rB = s.rr gA = [x.accn for x in rA.genes] gC = [x.accn for x in rB.genes] A = [data.get(x, (0, 0)) for x in gA] C = [data.get(x, (0, 0)) for x in gC] A = np.array(A) C = np.array(C) A = np.transpose(A) C = np.transpose(C) d, h = .01, .1 lsg = "lightslategrey" coords = s.gg # Coordinates of the genes axes = [] for j, (y, gg) in enumerate(((.79, gA), (.24, gC))): r = s.rr[j] x = r.xstart w = r.xend - r.xstart ax = fig.add_axes([x, y, w, h]) axes.append(ax) root.add_patch( Rectangle((x - h, y - d), w + h + d, h + 2 * d, fill=False, ec=lsg, lw=1)) root.text(x - d, y + 3 * h / 4, "root", ha="right", va="center") root.text(x - d, y + h / 4, "leaf", ha="right", va="center") ty = y - 2 * d if y > .5 else y + h + 2 * d nrows = len(gg) for i, g in enumerate(gg): start, end = coords[(j, g)] sx, sy = start ex, ey = end assert sy == ey sy = sy + 2 * d if sy > .5 else sy - 2 * d root.plot(((sx + ex) / 2, x + w * (i + .5) / nrows), (sy, ty), lw=1, ls=":", color="k", alpha=.2) axA, axC = axes p = axA.pcolormesh(A, cmap=default_cm) p = axC.pcolormesh(C, cmap=default_cm) axA.set_xlim(0, len(gA)) axC.set_xlim(0, len(gC)) x, y, w, h = .35, .1, .3, .05 ax_colorbar = fig.add_axes([x, y, w, h]) fig.colorbar(p, cax=ax_colorbar, orientation='horizontal') root.text(x - d, y + h / 2, "RPKM", ha="right", va="center") root.set_xlim(0, 1) root.set_ylim(0, 1) for x in (axA, axC, root): x.set_axis_off() image_name = "napusf4b." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def depth(args): """ %prog depth anchorfile --qbed qbedfile --sbed sbedfile Calculate the depths in the two genomes in comparison, given in --qbed and --sbed. The synteny blocks will be layered on the genomes, and the multiplicity will be summarized to stderr. """ from jcvi.utils.range import range_depth p = OptionParser(depth.__doc__) p.add_option("--depthfile", help="Generate file with gene and depth [default: %default]") p.add_option("--histogram", default=False, action="store_true", help="Plot histograms in PDF") p.add_option("--xmax", type="int", help="x-axis maximum to display in plot") p.add_option("--title", default=None, help="Title to display in plot") p.add_option("--quota", help="Force to use this quota, e.g. 1:1, 1:2 ...") p.set_beds() opts, args = p.parse_args(args) if len(args) != 1: sys.exit(not p.print_help()) anchorfile, = args qbed, sbed, qorder, sorder, is_self = check_beds(anchorfile, p, opts) depthfile = opts.depthfile ac = AnchorFile(anchorfile) qranges = [] sranges = [] blocks = ac.blocks for ib in blocks: q, s, t = zip(*ib) q = [qorder[x] for x in q] s = [sorder[x] for x in s] qrange = (min(q)[0], max(q)[0]) srange = (min(s)[0], max(s)[0]) qranges.append(qrange) sranges.append(srange) if is_self: qranges.append(srange) qgenome = op.basename(qbed.filename).split(".")[0] sgenome = op.basename(sbed.filename).split(".")[0] qtag = "Genome {0} depths".format(qgenome) print("{}:".format(qtag), file=sys.stderr) dsq, details = range_depth(qranges, len(qbed)) if depthfile: fw = open(depthfile, "w") write_details(fw, details, qbed) if is_self: return stag = "Genome {0} depths".format(sgenome) print("{}:".format(stag), file=sys.stderr) dss, details = range_depth(sranges, len(sbed)) if depthfile: write_details(fw, details, sbed) fw.close() logging.debug("Depth written to `{0}`.".format(depthfile)) if not opts.histogram: return from jcvi.graphics.base import plt, quickplot_ax, savefig, normalize_axes # Plot two histograms one for query genome, one for subject genome plt.figure(1, (6, 3)) f, (ax1, ax2) = plt.subplots(1, 2, sharey=True) xmax = opts.xmax or max(4, max(dsq.keys() + dss.keys())) if opts.quota: speak, qpeak = opts.quota.split(":") qpeak, speak = int(qpeak), int(speak) else: qpeak = find_peak(dsq) speak = find_peak(dss) qtag = "# of {} blocks per {} gene".format(sgenome, qgenome) stag = "# of {} blocks per {} gene".format(qgenome, sgenome) quickplot_ax(ax1, dss, 0, xmax, stag, ylabel="Percentage of genome", highlight=range(1, speak + 1)) quickplot_ax(ax2, dsq, 0, xmax, qtag, ylabel=None, highlight=range(1, qpeak + 1)) title = opts.title or "{} vs {} syntenic depths\n{}:{} pattern"\ .format(qgenome, sgenome, speak, qpeak) root = f.add_axes([0, 0, 1, 1]) vs, pattern = title.split('\n') root.text(.5, .97, vs, ha="center", va="center", color="darkslategray") root.text(.5, .925, pattern, ha="center", va="center", color="tomato", size=16) print(title, file=sys.stderr) normalize_axes(root) pf = anchorfile.rsplit(".", 1)[0] + ".depth" image_name = pf + ".pdf" savefig(image_name)
def deletion(args): """ %prog deletion [deletion-genes|deletion-bases] C2-deletions boleracea.bed Plot histogram for napus deletions. Can plot deletion-genes or deletion-bases. The three largest segmental deletions will be highlighted along with a drawing of the C2 chromosome. """ import math from jcvi.formats.bed import Bed from jcvi.graphics.chromosome import HorizontalChromosome from jcvi.graphics.base import kb_formatter p = OptionParser(deletion.__doc__) opts, args, iopts = p.set_image_options(args) if len(args) != 3: sys.exit(not p.print_help()) deletion_genes, deletions, bed = args dg = [int(x) for x in open(deletion_genes)] dsg, lsg = "darkslategray", "lightslategray" fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) ax = fig.add_axes([.1, .1, .8, .8]) minval = 2 if deletion_genes == "deleted-genes" else 2048 bins = np.logspace(math.log(minval, 10), math.log(max(dg), 10), 16) n, bins, histpatches = ax.hist(dg, bins=bins, \ fc=lsg, alpha=.75) ax.set_xscale('log', basex=2) if deletion_genes == "deleted-genes": ax.xaxis.set_major_formatter(mpl.ticker.FormatStrFormatter('%d')) ax.set_xlabel('No. of deleted genes in each segment') else: ax.xaxis.set_major_formatter(kb_formatter) ax.set_xlabel('No. of deleted bases in each segment') ax.yaxis.set_major_formatter(mpl.ticker.FormatStrFormatter('%d')) ax.set_ylabel('No. of segments') ax.patch.set_alpha(0.1) # Draw chromosome C2 na, nb = .45, .85 root.text((na + nb) / 2, .54, "ChrC02", ha="center") HorizontalChromosome(root, na, nb, .5, height=.025, fc=lsg, fill=True) order = Bed(bed).order fp = open(deletions) scale = lambda x: na + x * (nb - na) / 52886895 for i, row in enumerate(fp): i += 1 num, genes = row.split() genes = genes.split("|") ia, a = order[genes[0]] ib, b = order[genes[-1]] mi, mx = a.start, a.end mi, mx = scale(mi), scale(mx) root.add_patch(Rectangle((mi, .475), mx - mi, .05, fc="red", ec="red")) if i == 1: # offset between two adjacent regions for aesthetics mi -= .015 elif i == 2: mi += .015 TextCircle(root, mi, .44, str(i), fc="red") for i, mi in zip(range(1, 4), (.83, .78, .73)): TextCircle(root, mi, .2, str(i), fc="red") root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() image_name = deletion_genes + ".pdf" savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def histogram(args): """ %prog histogram meryl.histogram species K Plot the histogram based on meryl K-mer distribution, species and N are only used to annotate the graphic. """ p = OptionParser(histogram.__doc__) p.add_option("--vmin", dest="vmin", default=1, type="int", help="minimum value, inclusive [default: %default]") p.add_option("--vmax", dest="vmax", default=100, type="int", help="maximum value, inclusive [default: %default]") p.add_option("--pdf", default=False, action="store_true", help="Print PDF instead of ASCII plot [default: %default]") p.add_option("--coverage", default=0, type="int", help="Kmer coverage [default: auto]") p.add_option("--nopeaks", default=False, action="store_true", help="Do not annotate K-mer peaks") opts, args = p.parse_args(args) if len(args) != 3: sys.exit(not p.print_help()) histfile, species, N = args ascii = not opts.pdf peaks = not opts.nopeaks N = int(N) if histfile.rsplit(".", 1)[-1] in ("mcdat", "mcidx"): logging.debug("CA kmer index found") histfile = merylhistogram(histfile) ks = KmerSpectrum(histfile) ks.analyze(K=N) Total_Kmers = int(ks.totalKmers) coverage = opts.coverage Kmer_coverage = ks.max2 if not coverage else coverage Genome_size = int(round(Total_Kmers * 1. / Kmer_coverage)) Total_Kmers_msg = "Total {0}-mers: {1}".format(N, thousands(Total_Kmers)) Kmer_coverage_msg = "{0}-mer coverage: {1}".format(N, Kmer_coverage) Genome_size_msg = "Estimated genome size: {0:.1f}Mb".\ format(Genome_size / 1e6) Repetitive_msg = ks.repetitive SNPrate_msg = ks.snprate for msg in (Total_Kmers_msg, Kmer_coverage_msg, Genome_size_msg): print >> sys.stderr, msg x, y = ks.get_xy(opts.vmin, opts.vmax) title = "{0} {1}-mer histogram".format(species, N) if ascii: asciiplot(x, y, title=title) return Genome_size plt.figure(1, (6, 6)) plt.plot(x, y, 'g-', lw=2, alpha=.5) ax = plt.gca() if peaks: t = (ks.min1, ks.max1, ks.min2, ks.max2, ks.min3) tcounts = [(x, y) for x, y in ks.counts if x in t] if tcounts: x, y = zip(*tcounts) tcounts = dict(tcounts) plt.plot(x, y, 'ko', lw=2, mec='k', mfc='w') ax.text(ks.max1, tcounts[ks.max1], "SNP peak", va="top") ax.text(ks.max2, tcounts[ks.max2], "Main peak") messages = [Total_Kmers_msg, Kmer_coverage_msg, Genome_size_msg, Repetitive_msg, SNPrate_msg] write_messages(ax, messages) ymin, ymax = ax.get_ylim() ymax = ymax * 7 / 6 ax.set_title(markup(title)) ax.set_ylim((ymin, ymax)) xlabel, ylabel = "Coverage (X)", "Counts" ax.set_xlabel(xlabel) ax.set_ylabel(ylabel) set_human_axis(ax) imagename = histfile.split(".")[0] + ".pdf" savefig(imagename, dpi=100) return Genome_size
def fig3(args): """ %prog fig3 chrA02,A02,C2,chrC02 chr.sizes all.bed data Napus Figure 3 displays alignments between quartet chromosomes, inset with read histograms. """ from jcvi.formats.bed import Bed p = OptionParser(fig3.__doc__) p.add_option("--gauge_step", default=10000000, type="int", help="Step size for the base scale") opts, args, iopts = p.set_image_options(args, figsize="12x9") if len(args) != 4: sys.exit(not p.print_help()) chrs, sizes, bedfile, datadir = args gauge_step = opts.gauge_step diverge = iopts.diverge rr, gg = diverge chrs = [[x] for x in chrs.split(",")] sizes = Sizes(sizes).mapping fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) chr_sizes, chr_sum_sizes, ratio = calc_ratio(chrs, sizes) # Synteny panel seqidsfile = make_seqids(chrs) klayout = make_layout(chrs, chr_sum_sizes, ratio, template_f3a, shift=.05) height = .07 r = height / 4 K = Karyotype(fig, root, seqidsfile, klayout, gap=gap, height=height, lw=2, generank=False, sizes=sizes, heightpad=r, roundrect=True, plot_label=False) # Chromosome labels for kl in K.layout: if kl.empty: continue lx, ly = kl.xstart, kl.y if lx < .11: lx += .1 ly += .06 label = kl.label root.text(lx - .015, ly, label, fontsize=15, ha="right", va="center") # Inset with datafiles datafiles = ("chrA02.bzh.forxmgr", "parent.A02.per10kb.forxmgr", "parent.C2.per10kb.forxmgr", "chrC02.bzh.forxmgr") datafiles = [op.join(datadir, x) for x in datafiles] tracks = K.tracks hlfile = op.join(datadir, "bzh.regions.forhaibao") xy_axes = [] for t, datafile in zip(tracks, datafiles): ax = make_affix_axis(fig, t, -r, height=2 * r) xy_axes.append(ax) chr = t.seqids[0] xy = XYtrack(ax, datafile, color="lightslategray") start, end = 0, t.total xy.interpolate(end) xy.cap(ymax=40) xy.import_hlfile(hlfile, chr, diverge=diverge) xy.draw() ax.set_xlim(start, end) gauge_ax = make_affix_axis(fig, t, -r) adjust_spines(gauge_ax, ["bottom"]) setup_gauge_ax(gauge_ax, start, end, gauge_step) # Converted gene tracks ax_Ar = make_affix_axis(fig, tracks[1], r, height=r / 2) ax_Co = make_affix_axis(fig, tracks[2], r, height=r / 2) order = Bed(bedfile).order for asterisk in (False, True): conversion_track(order, "data/Genes.Converted.seuil.0.6.AtoC.txt", 0, "A02", ax_Ar, rr, asterisk=asterisk) conversion_track(order, "data/Genes.Converted.seuil.0.6.AtoC.txt", 1, "C2", ax_Co, gg, asterisk=asterisk) conversion_track(order, "data/Genes.Converted.seuil.0.6.CtoA.txt", 0, "A02", ax_Ar, gg, ypos=1, asterisk=asterisk) conversion_track(order, "data/Genes.Converted.seuil.0.6.CtoA.txt", 1, "C2", ax_Co, rr, ypos=1, asterisk=asterisk) Ar, Co = xy_axes[1:3] annotations = ((Ar, "Bra028920 Bra028897", "center", "1DAn2+"), (Ar, "Bra020081 Bra020171", "right", "2DAn2+"), (Ar, "Bra020218 Bra020286", "left", "3DAn2+"), (Ar, "Bra008143 Bra008167", "left", "4DAn2-"), (Ar, "Bra029317 Bra029251", "right", "5DAn2+ (GSL)"), (Co, "Bo2g001000 Bo2g001300", "left", "1DCn2-"), (Co, "Bo2g018560 Bo2g023700", "right", "2DCn2-"), (Co, "Bo2g024450 Bo2g025390", "left", "3DCn2-"), (Co, "Bo2g081060 Bo2g082340", "left", "4DCn2+"), (Co, "Bo2g161510 Bo2g164260", "right", "5DCn2-")) for ax, genes, ha, label in annotations: g1, g2 = genes.split() x1, x2 = order[g1][1].start, order[g2][1].start if ha == "center": x = (x1 + x2) / 2 * .8 elif ha == "left": x = x2 else: x = x1 label = r"\textit{{{0}}}".format(label) color = rr if "+" in label else gg ax.text(x, 30, label, color=color, fontsize=9, ha=ha, va="center") ax_Ar.set_xlim(0, tracks[1].total) ax_Ar.set_ylim(-1, 1) ax_Co.set_xlim(0, tracks[2].total) ax_Co.set_ylim(-1, 1) # Plot coverage in resequencing lines gstep = 5000000 order = "swede,kale,h165,yudal,aviso,abu,bristol".split(",") labels_dict = {"h165": "Resynthesized (H165)", "abu": "Aburamasari"} hlsuffix = "regions.forhaibao" chr1, chr2 = "chrA02", "chrC02" t1, t2 = tracks[0], tracks[-1] s1, s2 = sizes[chr1], sizes[chr2] canvas1 = (t1.xstart, .75, t1.xend - t1.xstart, .2) c = Coverage(fig, root, canvas1, chr1, (0, s1), datadir, order=order, gauge=None, plot_chr_label=False, gauge_step=gstep, palette="gray", cap=40, hlsuffix=hlsuffix, labels_dict=labels_dict, diverge=diverge) yys = c.yys x1, x2 = .37, .72 tip = .02 annotations = ((x1, yys[2] + .3 * tip, tip, tip / 2, "FLC"), (x1, yys[3] + .6 * tip, tip, tip / 2, "FLC"), (x1, yys[5] + .6 * tip, tip, tip / 2, "FLC"), (x2, yys[0] + .9 * tip, -1.2 * tip, 0, "GSL"), (x2, yys[4] + .9 * tip, -1.2 * tip, 0, "GSL"), (x2, yys[6] + .9 * tip, -1.2 * tip, 0, "GSL")) arrowprops = dict(facecolor='black', shrink=.05, frac=.5, width=1, headwidth=4) for x, y, dx, dy, label in annotations: label = r"\textit{{{0}}}".format(label) root.annotate(label, xy=(x, y), xytext=(x + dx, y + dy), arrowprops=arrowprops, color=rr, fontsize=9, ha="center", va="center") canvas2 = (t2.xstart, .05, t2.xend - t2.xstart, .2) Coverage(fig, root, canvas2, chr2, (0, s2), datadir, order=order, gauge=None, plot_chr_label=False, gauge_step=gstep, palette="gray", cap=40, hlsuffix=hlsuffix, labels_dict=labels_dict, diverge=diverge) pad = .03 labels = ((.1, .67, "A"), (t1.xstart - 3 * pad, .95 + pad, "B"), (t2.xstart - 3 * pad, .25 + pad, "C")) panel_labels(root, labels) normalize_axes(root) image_name = "napus-fig3." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def multihistogram(args): """ %prog multihistogram *.histogram species Plot the histogram based on a set of K-mer hisotograms. The method is based on Star et al.'s method (Atlantic Cod genome paper). """ p = OptionParser(multihistogram.__doc__) p.add_option("--kmin", default=15, type="int", help="Minimum K-mer size, inclusive") p.add_option("--kmax", default=30, type="int", help="Maximum K-mer size, inclusive") p.add_option("--vmin", default=2, type="int", help="Minimum value, inclusive") p.add_option("--vmax", default=100, type="int", help="Maximum value, inclusive") opts, args, iopts = p.set_image_options(args, figsize="10x5", dpi=300) if len(args) < 1: sys.exit(not p.print_help()) histfiles = args[:-1] species = args[-1] fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) A = fig.add_axes([.08, .12, .38, .76]) B = fig.add_axes([.58, .12, .38, .76]) lines = [] legends = [] genomesizes = [] for histfile in histfiles: ks = KmerSpectrum(histfile) x, y = ks.get_xy(opts.vmin, opts.vmax) K = get_number(op.basename(histfile).split(".")[0].split("-")[-1]) if not opts.kmin <= K <= opts.kmax: continue line, = A.plot(x, y, '-', lw=1) lines.append(line) legends.append("K = {0}".format(K)) ks.analyze(K=K) genomesizes.append((K, ks.genomesize / 1e6)) leg = A.legend(lines, legends, shadow=True, fancybox=True) leg.get_frame().set_alpha(.5) title = "{0} genome K-mer histogram".format(species) A.set_title(markup(title)) xlabel, ylabel = "Coverage (X)", "Counts" A.set_xlabel(xlabel) A.set_ylabel(ylabel) set_human_axis(A) title = "{0} genome size estimate".format(species) B.set_title(markup(title)) x, y = zip(*genomesizes) B.plot(x, y, "ko", mfc='w') t = np.linspace(opts.kmin - .5, opts.kmax + .5, 100) p = np.poly1d(np.polyfit(x, y, 2)) B.plot(t, p(t), "r:") xlabel, ylabel = "K-mer size", "Estimated genome size (Mb)" B.set_xlabel(xlabel) B.set_ylabel(ylabel) set_ticklabels_helvetica(B) labels = ((.04, .96, 'A'), (.54, .96, 'B')) panel_labels(root, labels) normalize_axes(root) imagename = species + ".multiK.pdf" savefig(imagename, dpi=iopts.dpi, iopts=iopts)
def plot(args): """ %prog plot tagged.new.bed chr1 Plot gene identifiers along a particular chromosome, often to illustrate the gene id assignment procedure. """ from jcvi.graphics.base import plt, savefig from jcvi.graphics.chromosome import ChromosomeMap p = OptionParser(plot.__doc__) p.add_option("--firstn", type="int", help="Only plot the first N genes") p.add_option("--ymax", type="int", help="Y-axis max value") p.add_option("--log", action="store_true", help="Write plotting data") opts, args, iopts = p.set_image_options(args, figsize="6x4") if len(args) != 2: sys.exit(not p.print_help()) taggedbed, chr = args bed = Bed(taggedbed) beds = list(bed.sub_bed(chr)) old, new = [], [] i = 0 for b in beds: accn = b.extra[0] if "te" in accn: continue accn, tag = accn.split("|") if tag == "OVERLAP": continue c, r = atg_name(accn) if tag == "NEW": new.append((i, r)) else: old.append((i, r)) i += 1 ngenes = i assert ngenes == len(new) + len(old) logging.debug("Imported {0} ranks on {1}.".format(ngenes, chr)) fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) xstart, xend = 0.2, 0.8 ystart, yend = 0.2, 0.8 pad = 0.02 ngenes = opts.firstn or ngenes ymax = opts.ymax or 500000 title = "Assignment of Medtr identifiers" if opts.ymax: subtitle = "{0}, first {1} genes".format(chr, ngenes) else: subtitle = "{0}, {1} genes ({2} new)".format(chr, ngenes, len(new)) chr_map = ChromosomeMap(fig, root, xstart, xend, ystart, yend, pad, 0, ymax, 5, title, subtitle) ax = chr_map.axes if opts.log: from jcvi.utils.table import write_csv header = ["x", "y"] write_csv(header, new, filename=chr + ".new") write_csv(header, old, filename=chr + ".old") x, y = zip(*new) ax.plot(x, y, "b,") x, y = zip(*old) ax.plot(x, y, "r,") # Legends ymid = (ystart + yend) / 2 y = ymid + pad root.plot([0.2], [y], "r.", lw=2) root.text(0.2 + pad, y, "Existing Medtr ids", va="center", size=10) y = ymid - pad root.plot([0.2], [y], "b.", lw=2) root.text(0.2 + pad, y, "Newly instantiated ids", va="center", size=10) ax.set_xlim(0, ngenes) ax.set_ylim(0, ymax) ax.set_axis_off() root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() image_name = chr + ".identifiers." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def main(): p = OptionParser(__doc__) p.add_option("--switch", help="Rename the seqid with two-column file") p.add_option("--tree", help="Display trees on the bottom of the figure") p.add_option("--extra", help="Extra features in BED format") p.add_option( "--genelabelsize", default=0, type="int", help="Show gene labels at this font size, useful for debugging. " + "However, plot may appear visually crowded. " + "Reasonably good values are 2 to 6 [Default: disabled]", ) p.add_option( "--scalebar", default=False, action="store_true", help="Add scale bar to the plot", ) p.add_option( "--glyphstyle", default="box", choices=Glyph.Styles, help="Style of feature glyphs", ) p.add_option( "--glyphcolor", default="orientation", choices=Glyph.Palette, help="Glyph coloring based on", ) p.add_option( "--shadestyle", default="curve", choices=Shade.Styles, help="Style of syntenic wedges", ) opts, args, iopts = p.set_image_options(figsize="8x7") if len(args) != 3: sys.exit(not p.print_help()) datafile, bedfile, layoutfile = args switch = opts.switch tree = opts.tree pf = datafile.rsplit(".", 1)[0] fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) Synteny( fig, root, datafile, bedfile, layoutfile, switch=switch, tree=tree, extra_features=opts.extra, genelabelsize=opts.genelabelsize, scalebar=opts.scalebar, shadestyle=opts.shadestyle, glyphstyle=opts.glyphstyle, glyphcolor=opts.glyphcolor, ) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)