def scaffold(args): """ %prog scaffold scaffold.fasta synteny.blast synteny.sizes synteny.bed physicalmap.blast physicalmap.sizes physicalmap.bed As evaluation of scaffolding, visualize external line of evidences: * Plot synteny to an external genome * Plot alignments to physical map * Plot alignments to genetic map (TODO) Each trio defines one panel to be plotted. blastfile defines the matchings between the evidences vs scaffolds. Then the evidence sizes, and evidence bed to plot dot plots. This script will plot a dot in the dot plot in the corresponding location the plots are one contig/scaffold per plot. """ from jcvi.graphics.base import set_image_options from jcvi.utils.iter import grouper p = OptionParser(scaffold.__doc__) p.add_option("--cutoff", type="int", default=1000000, help="Plot scaffolds with size larger than [default: %default]") p.add_option("--highlights", help="A set of regions in BED format to highlight [default: %default]") opts, args, iopts = set_image_options(p, args, figsize="14x8", dpi=150) if len(args) < 4 or len(args) % 3 != 1: sys.exit(not p.print_help()) highlights = opts.highlights scafsizes = Sizes(args[0]) trios = list(grouper(3, args[1:])) trios = [(a, Sizes(b), Bed(c)) for a, b, c in trios] if highlights: hlbed = Bed(highlights) for scaffoldID, scafsize in scafsizes.iter_sizes(): if scafsize < opts.cutoff: continue logging.debug("Loading {0} (size={1})".format(scaffoldID, thousands(scafsize))) tmpname = scaffoldID + ".sizes" tmp = open(tmpname, "w") tmp.write("{0}\t{1}".format(scaffoldID, scafsize)) tmp.close() tmpsizes = Sizes(tmpname) tmpsizes.close(clean=True) if highlights: subhighlights = list(hlbed.sub_bed(scaffoldID)) imagename = ".".join((scaffoldID, opts.format)) plot_one_scaffold(scaffoldID, tmpsizes, None, trios, imagename, iopts, highlights=subhighlights)
def main(tx=None): """ %prog newicktree Plot Newick formatted tree. The gene structure can be plotted along if --gffdir is given. The gff file needs to be `genename.gff`. If --sizes is on, also show the number of amino acids. """ p = OptionParser(main.__doc__) p.add_option("--outgroup", help="Root the tree using the outgroup. " + \ "Use comma to separate multiple taxa.") p.add_option("--rmargin", default=.3, type="float", help="Set blank rmargin to the right [default: %default]") p.add_option( "--gffdir", default=None, help="The directory that contain GFF files [default: %default]") p.add_option("--sizes", default=None, help="The FASTA file or the sizes file [default: %default]") opts, args, iopts = set_image_options(p, figsize="8x6") if len(args) != 1: sys.exit(not p.print_help()) datafile, = args outgroup = None if opts.outgroup: outgroup = opts.outgroup.split(",") pf = datafile.rsplit(".", 1)[0] if tx: pf = "demo" else: tx = open(datafile).read() logging.debug("Load tree file `{0}`.".format(datafile)) fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) draw_tree(root, tx, rmargin=opts.rmargin, outgroup=outgroup, gffdir=opts.gffdir, sizes=opts.sizes) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() image_name = pf + "." + iopts.format logging.debug("Print image to `{0}` {1}".format(image_name, iopts)) plt.savefig(image_name, dpi=iopts.dpi) plt.rcdefaults()
def main(tx=None): """ %prog newicktree Plot Newick formatted tree. The gene structure can be plotted along if --gffdir is given. The gff file needs to be `genename.gff`. If --sizes is on, also show the number of amino acids. """ p = OptionParser(main.__doc__) p.add_option("--outgroup", help="Root the tree using the outgroup. " + \ "Use comma to separate multiple taxa.") p.add_option("--rmargin", default=.3, type="float", help="Set blank rmargin to the right [default: %default]") p.add_option("--gffdir", default=None, help="The directory that contain GFF files [default: %default]") p.add_option("--sizes", default=None, help="The FASTA file or the sizes file [default: %default]") opts, args, iopts = set_image_options(p, figsize="8x6") if len(args) != 1: sys.exit(not p.print_help()) datafile, = args outgroup = None if opts.outgroup: outgroup = opts.outgroup.split(",") pf = datafile.rsplit(".", 1)[0] if tx: pf = "demo" else: tx = open(datafile).read() logging.debug("Load tree file `{0}`.".format(datafile)) fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) draw_tree(root, tx, rmargin=opts.rmargin, outgroup=outgroup, gffdir=opts.gffdir, sizes=opts.sizes) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() image_name = pf + "." + iopts.format logging.debug("Print image to `{0}` {1}".format(image_name, iopts)) plt.savefig(image_name, dpi=iopts.dpi) plt.rcdefaults()
def main(): """ %prog bedfile id_mappings Takes a bedfile that contains the coordinates of features to plot on the chromosomes, and `id_mappings` file that map the ids to certain class. Each class will get assigned a unique color. `id_mappings` file is optional (if omitted, will not paint the chromosome features, except the centromere). """ p = OptionParser(main.__doc__) p.add_option("--title", default="Medicago truncatula v3.5", help="title of the image [default: `%default`]") p.add_option("--gauge", default=False, action="store_true", help="draw a gauge with size label [default: %default]") p.add_option("--imagemap", default=False, action="store_true", help="generate an HTML image map associated with the image [default: %default]") p.add_option("--winsize", default=50000, type="int", help="if drawing an imagemap, specify the window size (bases) of each map element " "[default: %default bp]") opts, args, iopts = set_image_options(p, figsize="6x6", dpi=300) if len(args) not in (1, 2): sys.exit(p.print_help()) bedfile = args[0] mappingfile = None if len(args) == 2: mappingfile = args[1] winsize = opts.winsize imagemap = opts.imagemap w, h = iopts.w, iopts.h dpi = iopts.dpi prefix = bedfile.rsplit(".", 1)[0] figname = prefix + "." + opts.format if imagemap: imgmapfile = prefix + '.map' mapfh = open(imgmapfile, "w") print >> mapfh, '<map id="' + prefix + '">' if mappingfile: mappings = dict(x.split() for x in open(mappingfile)) classes = sorted(set(mappings.values())) logging.debug("A total of {0} classes found: {1}".format(len(classes), ','.join(classes))) else: mappings = {} classes = [] logging.debug("No classes registered (no id_mappings given).") mycolors = "wrgbymc" class_colors = dict(zip(classes, mycolors)) bed = Bed(bedfile) chr_lens = {} centromeres = {} for b, blines in groupby(bed, key=(lambda x: x.seqid)): blines = list(blines) maxlen = max(x.end for x in blines) chr_lens[b] = maxlen for b in bed: accn = b.accn if accn == "centromere": centromeres[b.seqid] = b.start if accn in mappings: b.accn = mappings[accn] else: b.accn = '-' chr_number = len(chr_lens) assert chr_number == len(centromeres) fig = plt.figure(1, (w, h)) root = fig.add_axes([0, 0, 1, 1]) r = .7 # width and height of the whole chromosome set xstart, ystart = .15, .85 xinterval = r / chr_number xwidth = xinterval * .5 # chromosome width max_chr_len = max(chr_lens.values()) ratio = r / max_chr_len # canvas / base # first the chromosomes for a, (chr, cent_position) in enumerate(sorted(centromeres.items())): clen = chr_lens[chr] xx = xstart + a * xinterval + .5 * xwidth yy = ystart - cent_position * ratio root.text(xx, ystart + .01, _(chr), ha="center") ChromosomeWithCentromere(root, xx, ystart, yy, ystart - clen * ratio, width=xwidth) chr_idxs = dict((a, i) for i, a in enumerate(sorted(chr_lens.keys()))) alpha = .75 # color the regions for chr in sorted(chr_lens.keys()): segment_size, excess = 0, 0 bac_list = [] for b in bed.sub_bed(chr): clen = chr_lens[chr] idx = chr_idxs[chr] klass = b.accn start = b.start end = b.end xx = xstart + idx * xinterval yystart = ystart - end * ratio yyend = ystart - start * ratio root.add_patch(Rectangle((xx, yystart), xwidth, yyend - yystart, fc=class_colors.get(klass, "w"), lw=0, alpha=alpha)) if imagemap: """ `segment` : size of current BAC being investigated + `excess` `excess` : left-over bases from the previous BAC, as a result of iterating over `winsize` regions of `segment` """ if excess == 0: segment_start = start segment = (end - start + 1) + excess while True: if segment < winsize: bac_list.append(b.accn) excess = segment break segment_end = segment_start + winsize - 1 tlx, tly, brx, bry = xx, (1 - ystart) + segment_start * ratio, \ xx + xwidth, (1 - ystart) + segment_end * ratio print >> mapfh, '\t' + write_ImageMapLine(tlx, tly, brx, bry, \ w, h, dpi, chr+":"+",".join(bac_list), segment_start, segment_end) segment_start += winsize segment -= winsize bac_list = [] if imagemap and excess > 0: bac_list.append(b.accn) segment_end = end tlx, tly, brx, bry = xx, (1 - ystart) + segment_start * ratio, \ xx + xwidth, (1 - ystart) + segment_end * ratio print >> mapfh, '\t' + write_ImageMapLine(tlx, tly, brx, bry, \ w, h, dpi, chr+":"+",".join(bac_list), segment_start, segment_end) if imagemap: print >> mapfh, '</map>' mapfh.close() logging.debug("Image map written to `{0}`".format(mapfh.name)) if opts.gauge: tip = .008 # the ticks on the gauge bar extra = .006 # the offset for the unit label xstart, ystart = .9, .85 yy = ystart gauge = int(ceil(max_chr_len / 1e6)) mb = ratio * 1e6 yinterval = 2 * mb root.plot([xstart, xstart], [yy, yy - r], 'b-', lw=2) for x in xrange(0, gauge, 2): if x % 10: root.plot([xstart, xstart + tip], [yy, yy], "b-") else: root.plot([xstart - tip, xstart + tip], [yy, yy], 'b-', lw=2) root.text(xstart + tip + extra, yy, _(x), color="gray", va="center") yy -= yinterval root.text(xstart, yy - .03, _("Mb"), color="gray", va="center") # class legends, four in a row xstart = .1 xinterval = .2 xwidth = .04 yy = .08 for klass, cc in sorted(class_colors.items()): if klass == '-': continue root.add_patch(Rectangle((xstart, yy), xwidth, xwidth, fc=cc, lw=0, alpha=alpha)) root.text(xstart + xwidth + .01, yy, _(klass), fontsize=9) xstart += xinterval root.text(.5, .95, opts.title, fontstyle="italic", ha="center", va="center") root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() plt.savefig(figname, dpi=dpi) logging.debug("Figure saved to `{0}` {1}".format(figname, iopts))
help="Style of the dots, one of {0} [default: %default]".\ format("|".join(DotStyles))) p.add_option( "--proportional", default=False, action="store_true", help="Make image width:height equal to seq ratio [default: %default]") p.add_option("--stripNames", default=False, action="store_true", help="Remove trailing .? from gene names [default: %default]") p.add_option("--sample", default=None, type="int", help="Only plot maximum of N dots [default: %default]") opts, args, iopts = set_image_options(p, figsize="8x8", dpi=150) qsizes, ssizes = opts.qsizes, opts.ssizes qbed, sbed = opts.qbed, opts.sbed proportional = opts.proportional if len(args) != 1: sys.exit(not p.print_help()) if qbed: qsizes = qsizes or sizes([qbed]) qbed = Bed(qbed) if sbed: ssizes = ssizes or sizes([sbed]) sbed = Bed(sbed)
def main(): """ %prog bedfile id_mappings Takes a bedfile that contains the coordinates of features to plot on the chromosomes, and `id_mappings` file that map the ids to certain class. Each class will get assigned a unique color. `id_mappings` file is optional (if omitted, will not paint the chromosome features, except the centromere). """ p = OptionParser(main.__doc__) p.add_option("--title", default="Medicago truncatula v3.5", help="title of the image [default: `%default`]") p.add_option("--gauge", default=False, action="store_true", help="draw a gauge with size label [default: %default]") p.add_option( "--imagemap", default=False, action="store_true", help= "generate an HTML image map associated with the image [default: %default]" ) p.add_option( "--winsize", default=50000, type="int", help= "if drawing an imagemap, specify the window size (bases) of each map element " "[default: %default bp]") opts, args, iopts = set_image_options(p, figsize="6x6", dpi=300) if len(args) not in (1, 2): sys.exit(p.print_help()) bedfile = args[0] mappingfile = None if len(args) == 2: mappingfile = args[1] winsize = opts.winsize imagemap = opts.imagemap w, h = iopts.w, iopts.h dpi = iopts.dpi prefix = bedfile.rsplit(".", 1)[0] figname = prefix + "." + opts.format if imagemap: imgmapfile = prefix + '.map' mapfh = open(imgmapfile, "w") print >> mapfh, '<map id="' + prefix + '">' if mappingfile: mappings = dict(x.split() for x in open(mappingfile)) classes = sorted(set(mappings.values())) logging.debug("A total of {0} classes found: {1}".format( len(classes), ','.join(classes))) else: mappings = {} classes = [] logging.debug("No classes registered (no id_mappings given).") mycolors = "wrgbymc" class_colors = dict(zip(classes, mycolors)) bed = Bed(bedfile) chr_lens = {} centromeres = {} for b, blines in groupby(bed, key=(lambda x: x.seqid)): blines = list(blines) maxlen = max(x.end for x in blines) chr_lens[b] = maxlen for b in bed: accn = b.accn if accn == "centromere": centromeres[b.seqid] = b.start if accn in mappings: b.accn = mappings[accn] else: b.accn = '-' chr_number = len(chr_lens) assert chr_number == len(centromeres) fig = plt.figure(1, (w, h)) root = fig.add_axes([0, 0, 1, 1]) r = .7 # width and height of the whole chromosome set xstart, ystart = .15, .85 xinterval = r / chr_number xwidth = xinterval * .5 # chromosome width max_chr_len = max(chr_lens.values()) ratio = r / max_chr_len # canvas / base # first the chromosomes for a, (chr, cent_position) in enumerate(sorted(centromeres.items())): clen = chr_lens[chr] xx = xstart + a * xinterval + .5 * xwidth yy = ystart - cent_position * ratio root.text(xx, ystart + .01, _(chr), ha="center") ChromosomeWithCentromere(root, xx, ystart, yy, ystart - clen * ratio, width=xwidth) chr_idxs = dict((a, i) for i, a in enumerate(sorted(chr_lens.keys()))) alpha = .75 # color the regions for chr in sorted(chr_lens.keys()): segment_size, excess = 0, 0 bac_list = [] for b in bed.sub_bed(chr): clen = chr_lens[chr] idx = chr_idxs[chr] klass = b.accn start = b.start end = b.end xx = xstart + idx * xinterval yystart = ystart - end * ratio yyend = ystart - start * ratio root.add_patch( Rectangle((xx, yystart), xwidth, yyend - yystart, fc=class_colors.get(klass, "w"), lw=0, alpha=alpha)) if imagemap: """ `segment` : size of current BAC being investigated + `excess` `excess` : left-over bases from the previous BAC, as a result of iterating over `winsize` regions of `segment` """ if excess == 0: segment_start = start segment = (end - start + 1) + excess while True: if segment < winsize: bac_list.append(b.accn) excess = segment break segment_end = segment_start + winsize - 1 tlx, tly, brx, bry = xx, (1 - ystart) + segment_start * ratio, \ xx + xwidth, (1 - ystart) + segment_end * ratio print >> mapfh, '\t' + write_ImageMapLine(tlx, tly, brx, bry, \ w, h, dpi, chr+":"+",".join(bac_list), segment_start, segment_end) segment_start += winsize segment -= winsize bac_list = [] if imagemap and excess > 0: bac_list.append(b.accn) segment_end = end tlx, tly, brx, bry = xx, (1 - ystart) + segment_start * ratio, \ xx + xwidth, (1 - ystart) + segment_end * ratio print >> mapfh, '\t' + write_ImageMapLine(tlx, tly, brx, bry, \ w, h, dpi, chr+":"+",".join(bac_list), segment_start, segment_end) if imagemap: print >> mapfh, '</map>' mapfh.close() logging.debug("Image map written to `{0}`".format(mapfh.name)) if opts.gauge: tip = .008 # the ticks on the gauge bar extra = .006 # the offset for the unit label xstart, ystart = .9, .85 yy = ystart gauge = int(ceil(max_chr_len / 1e6)) mb = ratio * 1e6 yinterval = 2 * mb root.plot([xstart, xstart], [yy, yy - r], 'b-', lw=2) for x in xrange(0, gauge, 2): if x % 10: root.plot([xstart, xstart + tip], [yy, yy], "b-") else: root.plot([xstart - tip, xstart + tip], [yy, yy], 'b-', lw=2) root.text(xstart + tip + extra, yy, _(x), color="gray", va="center") yy -= yinterval root.text(xstart, yy - .03, _("Mb"), color="gray", va="center") # class legends, four in a row xstart = .1 xinterval = .2 xwidth = .04 yy = .08 for klass, cc in sorted(class_colors.items()): if klass == '-': continue root.add_patch( Rectangle((xstart, yy), xwidth, xwidth, fc=cc, lw=0, alpha=alpha)) root.text(xstart + xwidth + .01, yy, _(klass), fontsize=9) xstart += xinterval root.text(.5, .95, opts.title, fontstyle="italic", ha="center", va="center") root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() plt.savefig(figname, dpi=dpi) logging.debug("Figure saved to `{0}` {1}".format(figname, iopts))
def coverage(args): """ %prog coverage fastafile ctg bedfile1 bedfile2 .. Plot coverage from a set of BED files that contain the read mappings. The paired read span will be converted to a new bedfile that contain the happy mates. ctg is the chr/scf/ctg that you want to plot the histogram on. If the bedfiles already contain the clone spans, turn on --spans. """ from jcvi.formats.bed import mates, bedpe p = OptionParser(coverage.__doc__) p.add_option("--ymax", default=None, type="int", help="Limit ymax [default: %default]") p.add_option("--spans", default=False, action="store_true", help="BED files already contain clone spans [default: %default]") opts, args, iopts = set_image_options(p, args, figsize="8x5") if len(args) < 3: sys.exit(not p.print_help()) fastafile, ctg = args[0:2] bedfiles = args[2:] sizes = Sizes(fastafile) size = sizes.mapping[ctg] fig = plt.figure(1, (iopts.w, iopts.h)) ax = plt.gca() bins = 100 # smooth the curve lines = [] legends = [] not_covered = [] yy = .9 for bedfile, c in zip(bedfiles, "rgbcky"): if not opts.spans: pf = bedfile.rsplit(".", 1)[0] matesfile = pf + ".mates" if need_update(bedfile, matesfile): matesfile, matesbedfile = mates([bedfile, "--lib"]) bedspanfile = pf + ".spans.bed" if need_update(matesfile, bedspanfile): bedpefile, bedspanfile = bedpe([bedfile, "--span", "--mates={0}".format(matesfile)]) bedfile = bedspanfile bedsum = Bed(bedfile).sum(seqid=ctg) notcoveredbases = size - bedsum legend = _(bedfile.split(".")[0]) msg = "{0}: {1} bp not covered".format(legend, thousands(notcoveredbases)) not_covered.append(msg) print >> sys.stderr, msg ax.text(.1, yy, msg, color=c, size=9, transform=ax.transAxes) yy -= .08 cov = Coverage(bedfile, sizes.filename) x, y = cov.get_plot_data(ctg, bins=bins) line, = ax.plot(x, y, '-', color=c, lw=2, alpha=.5) lines.append(line) legends.append(legend) leg = ax.legend(lines, legends, shadow=True, fancybox=True) leg.get_frame().set_alpha(.5) ylabel = "Average depth per {0}Kb".format(size / bins / 1000) ax.set_xlim(0, size) ax.set_ylim(0, opts.ymax) ax.set_xlabel(ctg) ax.set_ylabel(ylabel) set_human_base_axis(ax) figname ="{0}.{1}.pdf".format(fastafile, ctg) plt.savefig(figname, dpi=iopts.dpi) logging.debug("Figure saved to `{0}` {1}.".format(figname, iopts))
def heatmap(args): """ %prog heatmap fastafile chr1 Combine stack plot with heatmap to show abundance of various tracks along given chromosome. Need to give multiple beds to --stacks and --heatmaps """ p = OptionParser(heatmap.__doc__) p.add_option("--stacks", default="Exons,Introns,DNA_transposons,Retrotransposons", help="Features to plot in stackplot [default: %default]") p.add_option("--heatmaps", default="Copia,Gypsy,hAT,Helitron,Introns,Exons", help="Features to plot in heatmaps [default: %default]") p.add_option("--meres", default=None, help="Extra centromere / telomere features [default: %default]") add_window_options(p) opts, args, iopts = set_image_options(p, args, figsize="8x5") if len(args) != 2: sys.exit(not p.print_help()) fastafile, chr = args window, shift = check_window_options(opts) stacks = opts.stacks.split(",") heatmaps = opts.heatmaps.split(",") stackbeds = [x + ".bed" for x in stacks] heatmapbeds = [x + ".bed" for x in heatmaps] stackbins = get_binfiles(stackbeds, fastafile, shift) heatmapbins = get_binfiles(heatmapbeds, fastafile, shift) window, shift = check_window_options(opts) margin = .06 inner = .015 clen = Sizes(fastafile).mapping[chr] fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) # Gauge ratio = draw_gauge(root, margin, clen, rightmargin=4 * margin) yinterval = .3 xx = margin yy = 1 - margin yy -= yinterval xlen = clen / ratio if "_" in chr: ca, cb = chr.split("_") cc = ca[0].upper() + cb root.add_patch(Rectangle((xx, yy), xlen, yinterval - inner, color=gray)) ax = fig.add_axes([xx, yy, xlen, yinterval - inner]) nbins = clen / shift if clen % shift: nbins += 1 owindow = clen / 100 if owindow > window: window = owindow / shift * shift stackplot(ax, stackbins, nbins, palette, chr, window, shift) root.text(xx + inner, yy + yinterval - 2 * inner, cc, va="top") # Legends xx += xlen + .01 yspace = (yinterval - inner) / (len(stackbins) + 1) yy = 1 - margin - yinterval for s, p in zip(stacks, palette): s = s.replace("_", " ") s = Registration.get(s, s) yy += yspace root.add_patch(Rectangle((xx, yy), inner, inner, color=p, lw=0)) root.text(xx + 1.5 * inner, yy, s, size=10) yh = .05 # Heatmap height # Heatmaps xx = margin yy = 1 - margin - yinterval - inner for s, p in zip(heatmaps, heatmapbins): s = s.replace("_", " ") s = Registration.get(s, s) yy -= yh m = stackarray(p, chr, window, shift) Y = np.array([m, m]) root.imshow(Y, extent=(xx, xx + xlen, yy, yy + yh - inner), interpolation="nearest", aspect="auto") root.text(xx + xlen + .01, yy, s, size=10) yy -= yh meres = opts.meres if meres: bed = Bed(meres) for b in bed: if b.seqid != chr: continue pos = (b.start + b.end) / 2 cpos = pos / ratio xx = margin + cpos accn = b.accn.capitalize() root.add_patch(CirclePolygon((xx, yy), radius=.01, fc="m", ec="m")) root.text(xx + .014, yy, _(accn), va="center", color="m") root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() image_name = chr + "." + iopts.format logging.debug("Print image to `{0}` {1}".format(image_name, iopts)) plt.savefig(image_name, dpi=iopts.dpi) plt.rcdefaults()
def stack(args): """ %prog stack fastafile Create landscape plots that show the amounts of genic sequences, and repetitive sequences along the chromosomes. """ p = OptionParser(stack.__doc__) p.add_option("--top", default=10, type="int", help="Draw the first N chromosomes [default: %default]") p.add_option("--stacks", default="Exons,Introns,DNA_transposons,Retrotransposons", help="Features to plot in stackplot [default: %default]") p.add_option("--switch", help="Change chr names based on two-column file [default: %default]") add_window_options(p) opts, args, iopts = set_image_options(p, args, figsize="8x8") if len(args) != 1: sys.exit(not p.print_help()) fastafile, = args top = opts.top window, shift = check_window_options(opts) switch = opts.switch if switch: switch = DictFile(opts.switch) bedfiles = [x + ".bed" for x in opts.stacks.split(",")] binfiles = get_binfiles(bedfiles, fastafile, shift) sizes = Sizes(fastafile) s = list(sizes.iter_sizes())[:top] maxl = max(x[1] for x in s) margin = .08 inner = .02 # y distance between tracks pf = fastafile.rsplit(".", 1)[0] fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) max_len = s # Gauge ratio = draw_gauge(root, margin, maxl) # Per chromosome yinterval = (1 - 2 * margin) / (top + 1) xx = margin yy = 1 - margin for chr, clen in s: yy -= yinterval xlen = clen / ratio if "_" in chr: ca, cb = chr.split("_") cc = ca[0].upper() + cb if switch and cc in switch: cc = "\n".join((cc, "({0})".format(switch[cc]))) root.add_patch(Rectangle((xx, yy), xlen, yinterval - inner, color=gray)) ax = fig.add_axes([xx, yy, xlen, yinterval - inner]) nbins = clen / shift if clen % shift: nbins += 1 stackplot(ax, binfiles, nbins, palette, chr, window, shift) root.text(xx - .04, yy + .5 * (yinterval - inner), cc, ha="center", va="center") ax.set_xlim(0, nbins) ax.set_ylim(0, 1) ax.set_axis_off() # Legends yy -= yinterval xx = margin for b, p in zip(bedfiles, palette): b = b.rsplit(".", 1)[0].replace("_", " ") b = Registration.get(b, b) root.add_patch(Rectangle((xx, yy), inner, inner, color=p, lw=0)) xx += 2 * inner root.text(xx, yy, _(b), size=13) xx += len(b) * .012 + inner root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() image_name = pf + "." + iopts.format logging.debug("Print image to `{0}` {1}".format(image_name, iopts)) plt.savefig(image_name, dpi=iopts.dpi) plt.rcdefaults()
def main(): p = OptionParser(__doc__) p.add_option("--groups", default=False, action="store_true", help="The first row contains group info [default: %default]") p.add_option("--rowgroups", help="Row groupings [default: %default]") p.add_option("--horizontalbar", default=False, action="store_true", help="Horizontal color bar [default: vertical]") p.add_option("--cmap", default="jet", help="Use this color map [default: %default]") opts, args, iopts = set_image_options(p, figsize="8x8") if len(args) != 1: sys.exit(not p.print_help()) datafile, = args pf = datafile.rsplit(".", 1)[0] rowgroups = opts.rowgroups groups, rows, cols, data = parse_csv(datafile, vmin=1, groups=opts.groups) cols = [x.replace("ay ", "") for x in cols] if rowgroups: fp = open(rowgroups) rgroups = [] for row in fp: a, b = row.split() irows = [rows.index(x) for x in b.split(",")] rgroups.append((a, min(irows), max(irows))) plt.rcParams["axes.linewidth"] = 0 xstart = .18 fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) ax = fig.add_axes([xstart, .15, .7, .7]) default_cm = cm.get_cmap(opts.cmap) im = ax.matshow(data, cmap=default_cm, norm=LogNorm(vmin=1, vmax=10000)) nrows, ncols = len(rows), len(cols) xinterval = .7 / ncols yinterval = .7 / max(nrows, ncols) plt.xticks(range(ncols), cols, rotation=45, size=10, ha="center") plt.yticks(range(nrows), rows, size=10) for x in ax.get_xticklines() + ax.get_yticklines(): x.set_visible(False) ax.set_xlim(-.5, ncols - .5) t = [1, 10, 100, 1000, 10000] pad = .06 if opts.horizontalbar: ypos = .5 * (1 - nrows * yinterval) - pad axcolor = fig.add_axes([.3, ypos, .4, .02]) orientation = "horizontal" else: axcolor = fig.add_axes([.9, .3, .02, .4]) orientation = "vertical" fig.colorbar(im, cax=axcolor, ticks=t, format=_("%d"), orientation=orientation) if groups: groups = [(key, len(list(nn))) for key, nn in groupby(groups)] yy = .5 + .5 * nrows / ncols * .7 + .06 e = .005 sep = -.5 for k, kl in groups: # Separator in the array area sep += kl ax.plot([sep, sep], [-.5, nrows - .5], "w-", lw=2) # Group labels on the top kl *= xinterval root.plot([xstart + e, xstart + kl - e], [yy, yy], "-", color="gray", lw=2) root.text(xstart + .5 * kl, yy + e, k, ha="center", color="gray") xstart += kl if rowgroups: from jcvi.graphics.glyph import TextCircle xpos = .04 tip = .015 assert rgroups ystart = 1 - .5 * (1 - nrows * yinterval) for gname, start, end in rgroups: start = ystart - start * yinterval end = ystart - (end + 1) * yinterval start -= tip / 3 end += tip / 3 # Bracket the groups root.plot((xpos, xpos + tip), (start, start), "k-", lw=2) root.plot((xpos, xpos), (start, end), "k-", lw=2) root.plot((xpos, xpos + tip), (end, end), "k-", lw=2) TextCircle(root, xpos, .5 * (start + end), gname) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() image_name = pf + "." + opts.cmap + "." + iopts.format logging.debug("Print image to `{0}` {1}".format(image_name, iopts)) plt.savefig(image_name, dpi=iopts.dpi) plt.rcdefaults()
p.add_option("--qbed", help="Path to qbed") p.add_option("--sbed", help="Path to sbed") p.add_option("--qselect", default=0, type="int", help="Minimum size of query contigs to select [default: %default]") p.add_option("--sselect", default=0, type="int", help="Minimum size of subject contigs to select [default: %default]") p.add_option("--style", default="dot", choices=DotStyles, help="Style of the dots, one of {0} [default: %default]".\ format("|".join(DotStyles))) p.add_option("--proportional", default=False, action="store_true", help="Make image width:height equal to seq ratio [default: %default]") p.add_option("--stripNames", default=False, action="store_true", help="Remove trailing .? from gene names [default: %default]") p.add_option("--sample", default=None, type="int", help="Only plot maximum of N dots [default: %default]") opts, args, iopts = set_image_options(p, figsize="8x8", dpi=150) qsizes, ssizes = opts.qsizes, opts.ssizes qbed, sbed = opts.qbed, opts.sbed proportional = opts.proportional if len(args) != 1: sys.exit(not p.print_help()) if qbed: qsizes = qsizes or sizes([qbed]) qbed = Bed(qbed) if sbed: ssizes = ssizes or sizes([sbed]) sbed = Bed(sbed)
if __name__ == "__main__": p = OptionParser(__doc__) add_beds(p) p.add_option("--synteny", default=False, action="store_true", help="Run a fast synteny scan and display blocks [default: %default]") p.add_option("--cmap", default="Synonymous substitutions (Ks)", help="Draw colormap box on the bottom-left corner " "[default: `%default`]") p.add_option("--vmin", dest="vmin", type="float", default=0, help="Minimum value in the colormap [default: %default]") p.add_option("--vmax", dest="vmax", type="float", default=1, help="Maximum value in the colormap [default: %default]") opts, args, iopts = set_image_options(p, sys.argv[1:], figsize="8x8", dpi=90) if len(args) != 1: sys.exit(not p.print_help()) qbed, sbed, qorder, sorder, is_self = check_beds(p, opts) synteny = opts.synteny vmin, vmax = opts.vmin, opts.vmax cmap_text = opts.cmap anchorfile = args[0] image_name = op.splitext(anchorfile)[0] + "." + opts.format dotplot(anchorfile, qbed, sbed, image_name, vmin, vmax, iopts, is_self=is_self, synteny=synteny, cmap_text=cmap_text)
def stack(args): """ %prog stack fastafile Create landscape plots that show the amounts of genic sequences, and repetitive sequences along the chromosomes. """ p = OptionParser(stack.__doc__) p.add_option("--top", default=10, type="int", help="Draw the first N chromosomes [default: %default]") p.add_option("--stacks", default="Exons,Introns,DNA_transposons,Retrotransposons", help="Features to plot in stackplot [default: %default]") p.add_option( "--switch", help="Change chr names based on two-column file [default: %default]") add_window_options(p) opts, args, iopts = set_image_options(p, args, figsize="8x8") if len(args) != 1: sys.exit(not p.print_help()) fastafile, = args top = opts.top window, shift, subtract = check_window_options(opts) switch = opts.switch if switch: switch = DictFile(opts.switch) bedfiles = [x + ".bed" for x in opts.stacks.split(",")] binfiles = get_binfiles(bedfiles, fastafile, shift, subtract) sizes = Sizes(fastafile) s = list(sizes.iter_sizes())[:top] maxl = max(x[1] for x in s) margin = .08 inner = .02 # y distance between tracks pf = fastafile.rsplit(".", 1)[0] fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) max_len = s # Gauge ratio = draw_gauge(root, margin, maxl) # Per chromosome yinterval = (1 - 2 * margin) / (top + 1) xx = margin yy = 1 - margin for chr, clen in s: yy -= yinterval xlen = clen / ratio if "_" in chr: ca, cb = chr.split("_") cc = ca[0].upper() + cb if switch and cc in switch: cc = "\n".join((cc, "({0})".format(switch[cc]))) root.add_patch(Rectangle((xx, yy), xlen, yinterval - inner, color=gray)) ax = fig.add_axes([xx, yy, xlen, yinterval - inner]) nbins = clen / shift if clen % shift: nbins += 1 stackplot(ax, binfiles, nbins, palette, chr, window, shift) root.text(xx - .04, yy + .5 * (yinterval - inner), cc, ha="center", va="center") ax.set_xlim(0, nbins) ax.set_ylim(0, 1) ax.set_axis_off() # Legends yy -= yinterval xx = margin for b, p in zip(bedfiles, palette): b = b.rsplit(".", 1)[0].replace("_", " ") b = Registration.get(b, b) root.add_patch(Rectangle((xx, yy), inner, inner, color=p, lw=0)) xx += 2 * inner root.text(xx, yy, _(b), size=13) xx += len(b) * .012 + inner root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() image_name = pf + "." + iopts.format logging.debug("Print image to `{0}` {1}".format(image_name, iopts)) plt.savefig(image_name, dpi=iopts.dpi) plt.rcdefaults()
def heatmap(args): """ %prog heatmap fastafile chr1 Combine stack plot with heatmap to show abundance of various tracks along given chromosome. Need to give multiple beds to --stacks and --heatmaps """ p = OptionParser(heatmap.__doc__) p.add_option("--stacks", default="Exons,Introns,DNA_transposons,Retrotransposons", help="Features to plot in stackplot [default: %default]") p.add_option("--heatmaps", default="Copia,Gypsy,hAT,Helitron,Introns,Exons", help="Features to plot in heatmaps [default: %default]") p.add_option( "--meres", default=None, help="Extra centromere / telomere features [default: %default]") add_window_options(p) opts, args, iopts = set_image_options(p, args, figsize="8x5") if len(args) != 2: sys.exit(not p.print_help()) fastafile, chr = args window, shift, subtract = check_window_options(opts) stacks = opts.stacks.split(",") heatmaps = opts.heatmaps.split(",") stackbeds = [x + ".bed" for x in stacks] heatmapbeds = [x + ".bed" for x in heatmaps] stackbins = get_binfiles(stackbeds, fastafile, shift, subtract) heatmapbins = get_binfiles(heatmapbeds, fastafile, shift, subtract) margin = .06 inner = .015 clen = Sizes(fastafile).mapping[chr] fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) # Gauge ratio = draw_gauge(root, margin, clen, rightmargin=4 * margin) yinterval = .3 xx = margin yy = 1 - margin yy -= yinterval xlen = clen / ratio if "_" in chr: ca, cb = chr.split("_") cc = ca[0].upper() + cb root.add_patch(Rectangle((xx, yy), xlen, yinterval - inner, color=gray)) ax = fig.add_axes([xx, yy, xlen, yinterval - inner]) nbins = clen / shift if clen % shift: nbins += 1 owindow = clen / 100 if owindow > window: window = owindow / shift * shift stackplot(ax, stackbins, nbins, palette, chr, window, shift) root.text(xx + inner, yy + yinterval - 2 * inner, cc, va="top") # Legends xx += xlen + .01 yspace = (yinterval - inner) / (len(stackbins) + 1) yy = 1 - margin - yinterval for s, p in zip(stacks, palette): s = s.replace("_", " ") s = Registration.get(s, s) yy += yspace root.add_patch(Rectangle((xx, yy), inner, inner, color=p, lw=0)) root.text(xx + 1.5 * inner, yy, s, size=10) yh = .05 # Heatmap height # Heatmaps xx = margin yy = 1 - margin - yinterval - inner for s, p in zip(heatmaps, heatmapbins): s = s.replace("_", " ") s = Registration.get(s, s) yy -= yh m = stackarray(p, chr, window, shift) Y = np.array([m, m]) root.imshow(Y, extent=(xx, xx + xlen, yy, yy + yh - inner), interpolation="nearest", aspect="auto") root.text(xx + xlen + .01, yy, s, size=10) yy -= yh meres = opts.meres if meres: bed = Bed(meres) for b in bed: if b.seqid != chr: continue pos = (b.start + b.end) / 2 cpos = pos / ratio xx = margin + cpos accn = b.accn.capitalize() root.add_patch(CirclePolygon((xx, yy), radius=.01, fc="m", ec="m")) root.text(xx + .014, yy, _(accn), va="center", color="m") root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() image_name = chr + "." + iopts.format logging.debug("Print image to `{0}` {1}".format(image_name, iopts)) plt.savefig(image_name, dpi=iopts.dpi) plt.rcdefaults()