Пример #1
0
def wgd(args):
    """
    %prog wgd vplanifoliaA_blocks.bed vplanifoliaA.sizes

    Create a wgd figure.
    """
    from jcvi.graphics.chromosome import draw_chromosomes

    p = OptionParser(synteny.__doc__)
    opts, args, iopts = p.set_image_options(args, figsize="8x5")

    (bedfile, sizesfile) = args

    fig = plt.figure(1, (iopts.w, iopts.h))
    ax1 = fig.add_axes([0, 0, 1, 1])

    title = r"Genome duplication $\alpha^{O}$ event in $\textit{Vanilla}$"
    draw_chromosomes(
        ax1,
        bedfile,
        sizes=sizesfile,
        iopts=iopts,
        mergedist=200000,
        winsize=50000,
        imagemap=False,
        gauge=True,
        legend=False,
        title=title,
    )

    normalize_axes([ax1])

    image_name = "wgd.pdf"
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Пример #2
0
def main(args):
    """
    %prog table.csv

    Render a table on canvas. Input is a CSV file.
    """
    p = OptionParser(main.__doc__)
    opts, args, iopts = p.set_image_options(args, figsize="7x7")

    if len(args) != 1:
        sys.exit(not p.print_help())

    (csvfile, ) = args
    pf = csvfile.rsplit(".", 1)[0]

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    csv_table = CsvTable(csvfile)

    draw_table(root, csv_table)

    normalize_axes(root)

    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Пример #3
0
 def draw(self):
     self.om1.draw()
     self.om2.draw()
     self.sax.set_xlim(0, self.om1.amax)
     self.sax.set_ylim(-8, 8)
     normalize_axes(self.ax)
     self.sax.set_axis_off()
Пример #4
0
 def draw(self, width=0.03):
     HorizontalChromosome(
         self.ax,
         self.xpad,
         1 - self.xpad,
         self.ypad - 0.05,
         height=width * 1.5,
         patch=self.apatch,
         lw=2,
     )
     Chromosome(
         self.ax,
         self.xpad - 0.05,
         self.ypad,
         1 - self.ypad,
         width=width,
         patch=self.bpatch,
         patchcolor=self.bpatchcolor,
         lw=2,
     )
     for a, b in zip(self.a, self.b):
         self.sax.plot(a, b, "-", color="darkslategrey", lw=2)
     self.sax.set_xticklabels([])
     self.sax.set_yticklabels([])
     self.sax.set_xlim((1, self.amax))
     self.sax.set_ylim((1, self.bmax))
     normalize_axes(self.ax)
Пример #5
0
 def draw(self):
     self.om1.draw()
     self.om2.draw()
     self.sax.set_xlim(0, self.om1.amax)
     self.sax.set_ylim(-8, 8)
     normalize_axes(self.ax)
     self.sax.set_axis_off()
Пример #6
0
def allelefreq(args):
    """
    %prog allelefreq HD,DM1,SCA1,SCA17

    Plot the allele frequencies of some STRs.
    """
    p = OptionParser(allelefreq.__doc__)
    opts, args, iopts = p.set_image_options(args, figsize="10x10")

    if len(args) != 1:
        sys.exit(not p.print_help())

    loci, = args
    fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(ncols=2,
                                                 nrows=2,
                                                 figsize=(iopts.w, iopts.h))
    plt.tight_layout(pad=4)
    treds, df = read_treds()
    df = df.set_index(["abbreviation"])

    for ax, locus in zip((ax1, ax2, ax3, ax4), loci.split(",")):
        plot_allelefreq(ax, df, locus)

    root = fig.add_axes([0, 0, 1, 1])
    pad = .03
    panel_labels(root, ((pad / 2, 1 - pad, "A"), (1 / 2., 1 - pad, "B"),
                        (pad / 2, 1 / 2., "C"), (1 / 2., 1 / 2., "D")))
    normalize_axes(root)

    image_name = "allelefreq." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Пример #7
0
def resample(args):
    """
    %prog resample yellow-catfish-resample.txt medicago-resample.txt

    Plot ALLMAPS performance across resampled real data.
    """
    p = OptionParser(resample.__doc__)
    opts, args, iopts = p.set_image_options(args, figsize="8x4", dpi=300)

    if len(args) != 2:
        sys.exit(not p.print_help())

    dataA, dataB = args
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])
    A = fig.add_axes([0.1, 0.18, 0.32, 0.64])
    B = fig.add_axes([0.6, 0.18, 0.32, 0.64])
    dataA = import_data(dataA)
    dataB = import_data(dataB)
    xlabel = "Fraction of markers"
    ylabels = ("Anchor rate", "Runtime (m)")
    legend = ("anchor rate", "runtime")
    subplot_twinx(A, dataA, xlabel, ylabels, title="Yellow catfish", legend=legend)
    subplot_twinx(B, dataB, xlabel, ylabels, title="Medicago", legend=legend)

    labels = ((0.04, 0.92, "A"), (0.54, 0.92, "B"))
    panel_labels(root, labels)

    normalize_axes(root)
    image_name = "resample." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Пример #8
0
def likelihood2(args):
    """
    %prog likelihood2 100_20.json

    Plot the likelihood surface and marginal distributions.
    """
    from matplotlib import gridspec

    p = OptionParser(likelihood2.__doc__)
    opts, args, iopts = p.set_image_options(args,
                                            figsize="10x5",
                                            style="white",
                                            cmap="coolwarm")

    if len(args) != 1:
        sys.exit(not p.print_help())

    jsonfile, = args
    fig = plt.figure(figsize=(iopts.w, iopts.h))
    gs = gridspec.GridSpec(2, 2)
    ax1 = fig.add_subplot(gs[:, 0])
    ax2 = fig.add_subplot(gs[0, 1])
    ax3 = fig.add_subplot(gs[1, 1])
    plt.tight_layout(pad=3)
    pf = plot_panel(jsonfile, ax1, ax2, ax3, opts.cmap)

    root = fig.add_axes([0, 0, 1, 1])
    normalize_axes(root)

    image_name = "likelihood2.{}.".format(pf) + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Пример #9
0
def resample(args):
    """
    %prog resample yellow-catfish-resample.txt medicago-resample.txt

    Plot ALLMAPS performance across resampled real data.
    """
    p = OptionParser(resample.__doc__)
    opts, args, iopts = p.set_image_options(args, figsize="8x4", dpi=300)

    if len(args) != 2:
        sys.exit(not p.print_help())

    dataA, dataB = args
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])
    A = fig.add_axes([.1, .18, .32, .64])
    B = fig.add_axes([.6, .18, .32, .64])
    dataA = import_data(dataA)
    dataB = import_data(dataB)
    xlabel = "Fraction of markers"
    ylabels = ("Anchor rate", "Runtime (m)")
    legend = ("anchor rate", "runtime")
    subplot_twinx(A, dataA, xlabel, ylabels,
                     title="Yellow catfish", legend=legend)
    subplot_twinx(B, dataB, xlabel, ylabels,
                     title="Medicago", legend=legend)

    labels = ((.04, .92, "A"), (.54, .92, "B"))
    panel_labels(root, labels)

    normalize_axes(root)
    image_name = "resample." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Пример #10
0
def compare2(args):
    """
    %prog compare2

    Compare performances of various variant callers on simulated STR datasets.
    """
    p = OptionParser(compare2.__doc__)
    p.add_option('--maxinsert',
                 default=300,
                 type="int",
                 help="Maximum number of repeats")
    add_simulate_options(p)
    opts, args, iopts = p.set_image_options(args, figsize="10x5")

    if len(args) != 0:
        sys.exit(not p.print_help())

    depth = opts.depth
    readlen = opts.readlen
    distance = opts.distance
    max_insert = opts.maxinsert
    fig, (ax1, ax2) = plt.subplots(ncols=2,
                                   nrows=1,
                                   figsize=(iopts.w, iopts.h))
    plt.tight_layout(pad=2)

    # ax1: lobSTR vs TREDPARSE with haploid model
    lobstr_results = parse_results("lobstr_results_homo.txt")
    tredparse_results = parse_results("tredparse_results_homo.txt")
    title = SIMULATED_HAPLOID + \
            r" ($D=%s\times, L=%dbp, V=%dbp$)" % (depth, readlen, distance)
    plot_compare(ax1,
                 title,
                 tredparse_results,
                 lobstr_results,
                 max_insert=max_insert)

    # ax2: lobSTR vs TREDPARSE with diploid model
    lobstr_results = parse_results("lobstr_results_het.txt", exclude=20)
    tredparse_results = parse_results("tredparse_results_het.txt", exclude=20)
    title = SIMULATED_DIPLOID + \
            r" ($D=%s\times, L=%dbp, V=%dbp$)" % (depth, readlen, distance)
    plot_compare(ax2,
                 title,
                 tredparse_results,
                 lobstr_results,
                 max_insert=max_insert)

    for ax in (ax1, ax2):
        ax.set_xlim(0, max_insert)
        ax.set_ylim(0, max_insert)

    root = fig.add_axes([0, 0, 1, 1])
    pad = .03
    panel_labels(root, ((pad / 2, 1 - pad, "A"), (1 / 2., 1 - pad, "B")))
    normalize_axes(root)

    image_name = "tredparse." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Пример #11
0
def compare(args):
    """
    %prog compare Evaluation.csv

    Compare performances of various variant callers on simulated STR datasets.
    """
    p = OptionParser(compare.__doc__)
    opts, args, iopts = p.set_image_options(args, figsize="10x10")

    if len(args) != 1:
        sys.exit(not p.print_help())

    datafile, = args
    pf = datafile.rsplit(".", 1)[0]
    fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(ncols=2,
                                                 nrows=2,
                                                 figsize=(iopts.w, iopts.h))
    plt.tight_layout(pad=3)

    bbox = {'facecolor': 'tomato', 'alpha': .2, 'ec': 'w'}
    pad = 2

    # Read benchmark data
    df = pd.read_csv("Evaluation.csv")
    truth = df["Truth"]
    axes = (ax1, ax2, ax3, ax4)
    progs = ("Manta", "Isaac", "GATK", "lobSTR")
    markers = ("bx-", "yo-", "md-", "c+-")

    for ax, prog, marker in zip(axes, progs, markers):
        ax.plot(truth, df[prog], marker)
        ax.plot(truth, truth, 'k--')  # to show diagonal
        ax.axhline(infected_thr, color='tomato')
        ax.text(max(truth) - pad,
                infected_thr + pad,
                'Risk threshold',
                bbox=bbox,
                ha="right")
        ax.axhline(ref_thr, color='tomato')
        ax.text(max(truth) - pad,
                ref_thr - pad,
                'Reference repeat count',
                bbox=bbox,
                ha="right",
                va="top")
        ax.set_title(SIMULATED_HAPLOID)
        ax.set_xlabel(r'Num of CAG repeats inserted ($\mathit{h}$)')
        ax.set_ylabel('Num of CAG repeats called')
        ax.legend([prog, 'Truth'], loc='best')

    root = fig.add_axes([0, 0, 1, 1])
    pad = .03
    panel_labels(root, ((pad / 2, 1 - pad, "A"), (1 / 2., 1 - pad, "B"),
                        (pad / 2, 1 / 2., "C"), (1 / 2., 1 / 2., "D")))
    normalize_axes(root)

    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Пример #12
0
def snpplot(args):
    """
    %prog counts.cdt

    Illustrate the histogram per SNP site.
    """
    p = OptionParser(snpplot.__doc__)
    opts, args, iopts = p.set_image_options(args, format="png")

    if len(args) != 1:
        sys.exit(not p.print_help())

    (datafile,) = args
    # Read in CDT file
    fp = open(datafile)
    next(fp)
    next(fp)
    data = []
    for row in fp:
        atoms = row.split()[4:]
        nval = len(atoms)
        values = [float(x) for x in atoms]
        # normalize
        values = [x * 1.0 / sum(values) for x in values]
        data.append(values)

    pf = datafile.rsplit(".", 1)[0]
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])
    xmin, xmax = 0.1, 0.9
    ymin, ymax = 0.1, 0.9
    yinterval = (ymax - ymin) / len(data)
    colors = "rbg" if nval == 3 else ["lightgray"] + list("rbg")
    ystart = ymax
    for d in data:
        xstart = xmin
        for dd, c in zip(d, colors):
            xend = xstart + (xmax - xmin) * dd
            root.plot((xstart, xend), (ystart, ystart), "-", color=c)
            xstart = xend
        ystart -= yinterval

    root.text(
        0.05,
        0.5,
        "{0} LMD50 SNPs".format(len(data)),
        ha="center",
        va="center",
        rotation=90,
        color="lightslategray",
    )

    for x, t, c in zip((0.3, 0.5, 0.7), ("REF", "ALT", "HET"), "rbg"):
        root.text(x, 0.95, t, color=c, ha="center", va="center")
    normalize_axes(root)

    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Пример #13
0
 def draw(self, width=.03):
     HorizontalChromosome(self.ax, self.xpad, 1 - self.xpad,
                          self.ypad - width / 2, height=width * 1.5,
                          patch=self.apatch, lw=2)
     for r in self.reads:
         r.draw(self.sax)
     self.sax.set_xlim((1, self.amax))
     self.sax.set_ylim((-1, self.ymax))
     normalize_axes(self.ax)
     self.sax.set_axis_off()
Пример #14
0
def venn(args):
    """
    %prog venn *.benchmark

    Display benchmark results as Venn diagram.
    """
    from matplotlib_venn import venn2

    p = OptionParser(venn.__doc__)
    opts, args, iopts = p.set_image_options(args, figsize="9x9")

    if len(args) < 1:
        sys.exit(not p.print_help())

    bcs = args
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    pad = .02
    ystart = 1
    ywidth = 1. / len(bcs)
    tags = ("Bowers", "YGOB", "Schnable")
    for bc, tag in zip(bcs, tags):
        fp = open(bc)
        data = []
        for row in fp:
            prog, pcounts, tcounts, shared = row.split()
            pcounts = int(pcounts)
            tcounts = int(tcounts)
            shared = int(shared)
            data.append((prog, pcounts, tcounts, shared))
        xstart = 0
        xwidth = 1. / len(data)
        for prog, pcounts, tcounts, shared in data:
            a, b, c = pcounts - shared, tcounts - shared, shared
            ax = fig.add_axes([xstart + pad, ystart - ywidth + pad,
                               xwidth - 2 * pad, ywidth - 2 * pad])
            venn2(subsets=(a, b, c), set_labels=(prog, tag), ax=ax)
            message = "Sn={0} Pu={1}".\
                format(percentage(shared, tcounts, precision=0, mode=-1),
                       percentage(shared, pcounts, precision=0, mode=-1))
            print >> sys.stderr, message
            ax.text(.5, .92, latex(message), ha="center", va="center",
                    transform=ax.transAxes, color='b')
            ax.set_axis_off()
            xstart += xwidth
        ystart -= ywidth

    panel_labels(root, ((.04, .96, "A"), (.04, .96 - ywidth, "B"),
                  (.04, .96 - 2 * ywidth, "C")))
    panel_labels(root, ((.5, .98, "A. thaliana duplicates"),
                        (.5, .98 - ywidth, "14 Yeast genomes"),
                        (.5, .98 - 2 * ywidth, "4 Grass genomes")))
    normalize_axes(root)
    savefig("venn.pdf", dpi=opts.dpi)
Пример #15
0
def venn(args):
    """
    %prog venn *.benchmark

    Display benchmark results as Venn diagram.
    """
    from matplotlib_venn import venn2

    p = OptionParser(venn.__doc__)
    opts, args, iopts = p.set_image_options(args, figsize="9x9")

    if len(args) < 1:
        sys.exit(not p.print_help())

    bcs = args
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    pad = .02
    ystart = 1
    ywidth = 1. / len(bcs)
    tags = ("Bowers", "YGOB", "Schnable")
    for bc, tag in zip(bcs, tags):
        fp = open(bc)
        data = []
        for row in fp:
            prog, pcounts, tcounts, shared = row.split()
            pcounts = int(pcounts)
            tcounts = int(tcounts)
            shared = int(shared)
            data.append((prog, pcounts, tcounts, shared))
        xstart = 0
        xwidth = 1. / len(data)
        for prog, pcounts, tcounts, shared in data:
            a, b, c = pcounts - shared, tcounts - shared, shared
            ax = fig.add_axes([xstart + pad, ystart - ywidth + pad,
                               xwidth - 2 * pad, ywidth - 2 * pad])
            venn2(subsets=(a, b, c), set_labels=(prog, tag), ax=ax)
            message = "Sn={0} Pu={1}".\
                format(percentage(shared, tcounts, precision=0, mode=-1),
                       percentage(shared, pcounts, precision=0, mode=-1))
            print(message, file=sys.stderr)
            ax.text(.5, .92, latex(message), ha="center", va="center",
                    transform=ax.transAxes, color='b')
            ax.set_axis_off()
            xstart += xwidth
        ystart -= ywidth

    panel_labels(root, ((.04, .96, "A"), (.04, .96 - ywidth, "B"),
                  (.04, .96 - 2 * ywidth, "C")))
    panel_labels(root, ((.5, .98, "A. thaliana duplicates"),
                        (.5, .98 - ywidth, "14 Yeast genomes"),
                        (.5, .98 - 2 * ywidth, "4 Grass genomes")))
    normalize_axes(root)
    savefig("venn.pdf", dpi=opts.dpi)
Пример #16
0
def snpplot(args):
    """
    %prog counts.cdt

    Illustrate the histogram per SNP site.
    """
    p = OptionParser(snpplot.__doc__)
    opts, args, iopts = p.set_image_options(args, format="png")

    if len(args) != 1:
        sys.exit(not p.print_help())

    datafile, = args
    # Read in CDT file
    fp = open(datafile)
    next(fp)
    next(fp)
    data = []
    for row in fp:
        atoms = row.split()[4:]
        nval = len(atoms)
        values = [float(x) for x in atoms]
        # normalize
        values = [x * 1. / sum(values) for x in values]
        data.append(values)

    pf = datafile.rsplit(".", 1)[0]
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])
    xmin, xmax = .1, .9
    ymin, ymax = .1, .9
    yinterval = (ymax - ymin) / len(data)
    colors = "rbg" if nval == 3 else ["lightgray"] + list("rbg")
    ystart = ymax
    for d in data:
        xstart = xmin
        for dd, c in zip(d, colors):
            xend = xstart + (xmax - xmin) * dd
            root.plot((xstart, xend), (ystart, ystart), "-", color=c)
            xstart = xend
        ystart -= yinterval

    root.text(.05, .5, "{0} LMD50 SNPs".format(len(data)),
              ha="center", va="center", rotation=90, color="lightslategray")

    for x, t, c in zip((.3, .5, .7), ("REF", "ALT", "HET"), "rbg"):
        root.text(x, .95, t, color=c, ha="center", va="center")
    normalize_axes(root)

    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Пример #17
0
 def draw(self, width=.03):
     HorizontalChromosome(self.ax,
                          self.xpad,
                          1 - self.xpad,
                          self.ypad - width / 2,
                          height=width * 1.5,
                          patch=self.apatch,
                          lw=2)
     for r in self.reads:
         r.draw(self.sax)
     self.sax.set_xlim((1, self.amax))
     self.sax.set_ylim((-1, self.ymax))
     normalize_axes(self.ax)
     self.sax.set_axis_off()
Пример #18
0
 def draw(self, width=.03):
     HorizontalChromosome(self.ax, self.xpad, 1 - self.xpad,
                          self.ypad - .05, height=width * 1.5,
                          patch=self.apatch, lw=2)
     Chromosome(self.ax, self.xpad - .05, self.ypad, 1 - self.ypad,
                width=width, patch=self.bpatch,
                patchcolor=self.bpatchcolor, lw=2)
     for a, b in zip(self.a, self.b):
         self.sax.plot(a, b, "-", color="darkslategrey", lw=2)
     self.sax.set_xticklabels([])
     self.sax.set_yticklabels([])
     self.sax.set_xlim((1, self.amax))
     self.sax.set_ylim((1, self.bmax))
     normalize_axes(self.ax)
Пример #19
0
def movieframe(args):
    """
    %prog movieframe tour test.clm contigs.ref.anchors

    Draw heatmap and synteny in the same plot.
    """
    p = OptionParser(movieframe.__doc__)
    p.add_option("--label", help="Figure title")
    p.set_beds()
    p.set_outfile(outfile=None)
    opts, args, iopts = p.set_image_options(args,
                                            figsize="16x8",
                                            style="white",
                                            cmap="coolwarm",
                                            format="png",
                                            dpi=120)

    if len(args) != 3:
        sys.exit(not p.print_help())

    tour, clmfile, anchorsfile = args
    tour = tour.split(",")
    image_name = opts.outfile or ("movieframe." + iopts.format)
    label = opts.label or op.basename(image_name).rsplit(".", 1)[0]

    clm = CLMFile(clmfile)
    totalbins, bins, breaks = make_bins(tour, clm.tig_to_size)
    M = read_clm(clm, totalbins, bins)

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])  # whole canvas
    ax1 = fig.add_axes([.05, .1, .4, .8])  # heatmap
    ax2 = fig.add_axes([.55, .1, .4, .8])  # dot plot
    ax2_root = fig.add_axes([.5, 0, .5, 1])  # dot plot canvas

    # Left axis: heatmap
    plot_heatmap(ax1, M, breaks, iopts)

    # Right axis: synteny
    qbed, sbed, qorder, sorder, is_self = check_beds(anchorsfile,
                                                     p,
                                                     opts,
                                                     sorted=False)
    dotplot(anchorsfile, qbed, sbed, fig, ax2_root, ax2, sep=False, title="")

    root.text(.5, .98, clm.name, color="g", ha="center", va="center")
    root.text(.5, .95, label, color="darkslategray", ha="center", va="center")
    normalize_axes(root)
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Пример #20
0
def simulation(args):
    """
    %prog simulation inversion.txt translocation.txt maps.txt multimaps.txt

    Plot ALLMAPS accuracy across a range of simulated datasets.
    """
    p = OptionParser(simulation.__doc__)
    opts, args, iopts = p.set_image_options(args, dpi=300)

    if len(args) != 4:
        sys.exit(not p.print_help())

    dataA, dataB, dataC, dataD = args
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])
    A = fig.add_axes([0.12, 0.62, 0.35, 0.35])
    B = fig.add_axes([0.62, 0.62, 0.35, 0.35])
    C = fig.add_axes([0.12, 0.12, 0.35, 0.35])
    D = fig.add_axes([0.62, 0.12, 0.35, 0.35])
    dataA = import_data(dataA)
    dataB = import_data(dataB)
    dataC = import_data(dataC)
    dataD = import_data(dataD)
    subplot(A, dataA, "Inversion error rate", "Accuracy", xlim=0.5)
    subplot(
        B,
        dataB,
        "Translocation error rate",
        "Accuracy",
        xlim=0.5,
        legend=("intra-chromosomal", "inter-chromosomal", "75\% intra + 25\% inter"),
    )
    subplot(C, dataC, "Number of input maps", "Accuracy", xcast=int)
    subplot(D, dataD, "Number of input maps", "Accuracy", xcast=int)

    labels = (
        (0.03, 0.97, "A"),
        (0.53, 0.97, "B"),
        (0.03, 0.47, "C"),
        (0.53, 0.47, "D"),
    )
    panel_labels(root, labels)

    normalize_axes(root)
    image_name = "simulation." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Пример #21
0
def movieframe(args):
    """
    %prog movieframe tour test.clm contigs.ref.anchors

    Draw heatmap and synteny in the same plot.
    """
    p = OptionParser(movieframe.__doc__)
    p.add_option("--label", help="Figure title")
    p.set_beds()
    p.set_outfile(outfile=None)
    opts, args, iopts = p.set_image_options(args, figsize="16x8",
                                            style="white", cmap="coolwarm",
                                            format="png", dpi=120)

    if len(args) != 3:
        sys.exit(not p.print_help())

    tour, clmfile, anchorsfile = args
    tour = tour.split(",")
    image_name = opts.outfile or ("movieframe." + iopts.format)
    label = opts.label or op.basename(image_name).rsplit(".", 1)[0]

    clm = CLMFile(clmfile)
    totalbins, bins, breaks = make_bins(tour, clm.tig_to_size)
    M = read_clm(clm, totalbins, bins)

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])        # whole canvas
    ax1 = fig.add_axes([.05, .1, .4, .8])    # heatmap
    ax2 = fig.add_axes([.55, .1, .4, .8])    # dot plot
    ax2_root = fig.add_axes([.5, 0, .5, 1])  # dot plot canvas

    # Left axis: heatmap
    plot_heatmap(ax1, M, breaks, iopts)

    # Right axis: synteny
    qbed, sbed, qorder, sorder, is_self = check_beds(anchorsfile, p, opts,
                                                     sorted=False)
    dotplot(anchorsfile, qbed, sbed, fig, ax2_root, ax2, sep=False, title="")

    root.text(.5, .98, clm.name, color="g", ha="center", va="center")
    root.text(.5, .95, label, color="darkslategray", ha="center", va="center")
    normalize_axes(root)
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Пример #22
0
def tree(args):
    """
    %prog tree treefile

    Create a tree figure.
    """
    from jcvi.graphics.tree import parse_tree, LeafInfoFile, WGDInfoFile, draw_tree

    p = OptionParser(tree.__doc__)
    opts, args, iopts = p.set_image_options(args, figsize="10x8")

    (datafile, ) = args
    logging.debug("Load tree file `{0}`".format(datafile))
    t, hpd = parse_tree(datafile)

    fig = plt.figure(1, (iopts.w, iopts.h))
    ax1 = fig.add_axes([0, 0, 1, 1])

    supportcolor = "k"
    margin, rmargin = 0.1, 0.2  # Left and right margin
    leafinfo = LeafInfoFile("leafinfo.csv").cache
    wgdinfo = WGDInfoFile("wgdinfo.csv").cache
    outgroup = "ginkgo"

    # Panel A
    draw_tree(
        ax1,
        t,
        hpd=hpd,
        margin=margin,
        rmargin=rmargin,
        supportcolor=None,
        internal=False,
        outgroup=outgroup,
        reroot=False,
        leafinfo=leafinfo,
        wgdinfo=wgdinfo,
        geoscale=True,
    )

    normalize_axes([ax1])
    image_name = "tree.pdf"
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Пример #23
0
def synteny(args):
    """
    %prog synteny vplanifoliaA_blocks.bed vplanifoliaA.sizes \
        b1.blocks all.bed b1.layout

    Create a composite figure with (A) wgd and (B) microsynteny.
    """
    from jcvi.graphics.chromosome import draw_chromosomes

    p = OptionParser(synteny.__doc__)
    opts, args, iopts = p.set_image_options(args, figsize="12x12")

    (bedfile, sizesfile, blocksfile, allbedfile, blockslayout) = args

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])
    ax1 = fig.add_axes([0, 0.5, 1, 0.5])
    ax2 = fig.add_axes([0.02, 0, 0.98, 0.5])

    # Panel A
    title = r"Genome duplication $\alpha^{O}$ event in $\textit{Vanilla}$"
    draw_chromosomes(
        ax1,
        bedfile,
        sizes=sizesfile,
        iopts=iopts,
        mergedist=200000,
        winsize=50000,
        imagemap=False,
        gauge=True,
        legend=False,
        title=title,
    )

    # Panel B
    draw_ploidy(fig, ax2, blocksfile, allbedfile, blockslayout)

    normalize_axes([root, ax1, ax2])
    labels = ((0.05, 0.95, "A"), (0.05, 0.5, "B"))
    panel_labels(root, labels)

    image_name = "synteny.pdf"
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Пример #24
0
def microsynteny(args):
    """
    %prog microsynteny b1.blocks all.bed b1.layout

    Create a microsynteny figure.
    """
    p = OptionParser(synteny.__doc__)
    opts, args, iopts = p.set_image_options(args, figsize="12x6")

    (blocksfile, allbedfile, blockslayout) = args

    fig = plt.figure(1, (iopts.w, iopts.h))
    ax2 = fig.add_axes([0, 0, 1, 1])

    draw_ploidy(fig, ax2, blocksfile, allbedfile, blockslayout)

    normalize_axes([ax2])

    image_name = "microsynteny.pdf"
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Пример #25
0
def simulation(args):
    """
    %prog simulation inversion.txt translocation.txt maps.txt multimaps.txt

    Plot ALLMAPS accuracy across a range of simulated datasets.
    """
    p = OptionParser(simulation.__doc__)
    opts, args, iopts = p.set_image_options(args, dpi=300)

    if len(args) != 4:
        sys.exit(not p.print_help())

    dataA, dataB, dataC, dataD = args
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])
    A = fig.add_axes([.12, .62, .35, .35])
    B = fig.add_axes([.62, .62, .35, .35])
    C = fig.add_axes([.12, .12, .35, .35])
    D = fig.add_axes([.62, .12, .35, .35])
    dataA = import_data(dataA)
    dataB = import_data(dataB)
    dataC = import_data(dataC)
    dataD = import_data(dataD)
    subplot(A, dataA, "Inversion error rate", "Accuracy", xlim=.5)
    subplot(B, dataB, "Translocation error rate", "Accuracy", xlim=.5,
                      legend=("intra-chromosomal", "inter-chromosomal",
                              "75\% intra + 25\% inter"))
    subplot(C, dataC, "Number of input maps", "Accuracy", xcast=int)
    subplot(D, dataD, "Number of input maps", "Accuracy", xcast=int)

    labels = ((.03, .97, "A"), (.53, .97, "B"),
              (.03, .47, "C"), (.53, .47, "D"))
    panel_labels(root, labels)

    normalize_axes(root)
    image_name = "simulation." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Пример #26
0
def likelihood3(args):
    """
    %prog likelihood2 200_20.json 200_100.json

    Plot the likelihood surface and marginal distributions for two settings.
    """
    from matplotlib import gridspec

    p = OptionParser(likelihood3.__doc__)
    opts, args, iopts = p.set_image_options(args,
                                            figsize="10x10",
                                            style="white",
                                            cmap="coolwarm")
    if len(args) != 2:
        sys.exit(not p.print_help())

    jsonfile1, jsonfile2 = args
    fig = plt.figure(figsize=(iopts.w, iopts.h))
    gs = gridspec.GridSpec(9, 2)
    ax1 = fig.add_subplot(gs[:4, 0])
    ax2 = fig.add_subplot(gs[:2, 1])
    ax3 = fig.add_subplot(gs[2:4, 1])
    ax4 = fig.add_subplot(gs[5:, 0])
    ax5 = fig.add_subplot(gs[5:7, 1])
    ax6 = fig.add_subplot(gs[7:, 1])
    plt.tight_layout(pad=2)

    plot_panel(jsonfile1, ax1, ax2, ax3, opts.cmap)
    plot_panel(jsonfile2, ax4, ax5, ax6, opts.cmap)

    root = fig.add_axes([0, 0, 1, 1])
    pad = .02
    panel_labels(root, ((pad, 1 - pad, "A"), (pad, 4. / 9, "B")))
    normalize_axes(root)

    image_name = "likelihood3." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Пример #27
0
def lms(args):
    """
    %prog lms

    ALLMAPS cartoon to illustrate LMS metric.
    """
    from random import randint
    from jcvi.graphics.chromosome import HorizontalChromosome

    p = OptionParser(lms.__doc__)
    opts, args, iopts = p.set_image_options(args, figsize="6x6", dpi=300)

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    # Panel A
    w, h = .7, .35
    ax = fig.add_axes([.15, .6, w, h])

    xdata = [x + randint(-3, 3) for x in range(10, 110, 10)]
    ydata = [x + randint(-3, 3) for x in range(10, 110, 10)]
    ydata[3:7] = ydata[3:7][::-1]
    xydata = zip(xdata, ydata)
    lis = xydata[:3] + [xydata[4]] + xydata[7:]
    lds = xydata[3:7]
    xlis, ylis = zip(*lis)
    xlds, ylds = zip(*lds)
    ax.plot(xlis, ylis, "r-", lw=12, alpha=.3,
                              solid_capstyle="round", solid_joinstyle="round")
    ax.plot(xlds, ylds, "g-", lw=12, alpha=.3,
                              solid_capstyle="round", solid_joinstyle="round")
    ax.plot(xdata, ydata, "k.", mec="k", mfc="w", mew=3, ms=12)
    HorizontalChromosome(root, .15, .15 + w, .57, height=.02, lw=2)
    root.text(.15 + w / 2, .55, "Chromosome location (bp)", ha="center", va="top")

    ax.text(80, 30, "LIS = 7", color="r", ha="center", va="center")
    ax.text(80, 20, "LDS = 4", color="g", ha="center", va="center")
    ax.text(80, 10, "LMS = $max$(LIS, LDS) = 7", ha="center", va="center")
    normalize_lms_axis(ax)

    # Panel B
    w = .37
    p = (0, 45, 75, 110)
    ax = fig.add_axes([.1, .12, w, h])
    xdata = [x for x in range(10, 110, 10)]
    ydata = ydata_orig = [x for x in range(10, 110, 10)]
    ydata = ydata[:4] + ydata[7:] + ydata[4:7][::-1]
    xydata = zip(xdata, ydata)
    lis = xydata[:7]
    xlis, ylis = zip(*lis)
    ax.plot(xlis, ylis, "r-", lw=12, alpha=.3,
                              solid_capstyle="round", solid_joinstyle="round")
    ax.plot(xdata, ydata, "k.", mec="k", mfc="w", mew=3, ms=12)
    ax.vlines(p, 0, 110, colors="beige", lw=3)
    normalize_lms_axis(ax)
    patch = [.1 + w * x / 110. for x in p]
    HorizontalChromosome(root, .1, .1 + w, .09, patch=patch,
                         height=.02, lw=2)
    scaffolds = ("a", "b", "c")
    for i, s in enumerate(scaffolds):
        xx = (patch[i] + patch[i + 1]) / 2
        root.text(xx, .09, s, va="center", ha="center")
    root.text(.1 + w / 2, .04, "LMS($a||b||c$) = 7", ha="center")

    # Panel C
    ax = fig.add_axes([.6, .12, w, h])
    patch = [.6 + w * x / 110. for x in p]
    ydata = ydata_orig
    ax.plot(xdata, ydata, "r-", lw=12, alpha=.3,
                              solid_capstyle="round", solid_joinstyle="round")
    ax.plot(xdata, ydata, "k.", mec="k", mfc="w", mew=3, ms=12)
    ax.vlines(p, [0], [110], colors="beige", lw=3)
    normalize_lms_axis(ax)
    HorizontalChromosome(root, .6, .6 + w, .09, patch=patch,
                         height=.02, lw=2)
    scaffolds = ("a", "-c", "b")
    for i, s in enumerate(scaffolds):
        xx = (patch[i] + patch[i + 1]) / 2
        root.text(xx, .09, s, va="center", ha="center")
    root.text(.6 + w / 2, .04, "LMS($a||-c||b$) = 10", ha="center")

    labels = ((.05, .95, 'A'), (.05, .48, 'B'), (.55, .48, 'C'))
    panel_labels(root, labels)

    normalize_axes(root)

    pf = "lms"
    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Пример #28
0
def wheel(args):
    """
    %prog wheel datafile.csv groups.csv

    Wheel plot that shows continous data in radial axes.
    """
    p = OptionParser(wheel.__doc__)
    p.add_option("--column", default="score", choices=("score", "percentile"),
                    help="Which column to extract from `datafile.csv`")
    opts, args, iopts = p.set_image_options(args, figsize="5x5", format="png")

    if len(args) != 2:
        sys.exit(not p.print_help())

    datafile, groupsfile = args
    column = opts.column
    linecolor = "#d6d6d6"
    df = parse_data(datafile, score_column=opts.column)
    groups = parse_groups(groupsfile)
    labels = [g for g in groups if g in df]
    print labels
    df = [df[g] for g in labels]
    print df
    groups = [groups[g] for g in labels]
    print groups

    pf = datafile.rsplit(".", 1)[0]
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])
    categories = len(df)
    #ax = plt.subplot(111, projection='polar')
    ax = fig.add_axes([0.1, 0.1, 0.8, 0.8], polar=True)

    brewer = [ "#FF3B30", "#DD43A0", "#5856D6",
	       "#007AFE", "#56BDEC", "#4CD8BA",
	       "#4CD864", "#B0F457", "#FEF221",
	       "#FFCC01", "#FF9500", "#FF3B30",
    ]

    # Baseline
    theta = np.linspace(1.5 * np.pi, 3.5 * np.pi, endpoint=False, num=categories)
    _theta = np.linspace(1.5 * np.pi, 3.5 * np.pi)
    R = max(max(df), 10)
    xlim = (-R, R) if column == "score" else (-100, 100)
    plim = (-R / 2, R) if column == "score" else (0, 100)
    ci = (-.5, 2) if column == "score" else (10, 90)

    # Grid
    if column == "score":
        for t in theta:
            ax.plot([t, t], plim, color=linecolor)
    ax.axis('off')

    # Contours
    for t in plim:
        ax.plot(_theta, [t] * len(_theta), color=linecolor)

    # Sectors (groupings)
    collapsed_groups = []
    gg = []
    for group, c in groupby(enumerate(groups), lambda x: x[1]):
        c = [x[0] for x in list(c)]
        collapsed_groups.append(group)
        gg.append(c)

    sector = False
    if sector:
        theta_interval = 2 * np.pi / categories
        theta_pad = theta_interval / 2 * .9
        for color, group in zip(brewer, gg):
            tmin, tmax = min(group), max(group)
            sector(ax, theta[tmin], theta[tmax], theta_pad, R * .95,
                       "-", color=color, lw=2)

    # Data
    r = df
    closed_plot(ax, theta, r, color="lightslategray", alpha=.25)
    all_data = []
    for color, group in zip(brewer, gg):
        hidden_data = [(theta[x], r[x]) for x in group if \
                            (ci[0] <= r[x] <= ci[1])]
        shown_data = [(theta[x], r[x]) for x in group if (r[x] < ci[0] \
                            or r[x] > ci[1])]
        all_data.append((theta[x], labels[x], r[x]))
        for alpha, data in zip((1, 1), (hidden_data, shown_data)):
            if not data:
                continue
            color_theta, color_r = zip(*data)
            ax.plot(color_theta, color_r, "o", color=color, alpha=alpha)

    # Print out data
    diseaseNames, risks = labels, df
    print "var theta = [{}]".format(",".join("{:.1f}".format(degrees(x)) for x in theta))
    print "var risks = [{}]".format(",".join(str(x) for x in risks))
    print "var diseaseNames = [{}]".format(",".join(\
                    ['"{}"'.format(x) for x in diseaseNames]))

    # Labels
    from math import cos, sin
    r = .5
    for i, label in enumerate(labels):
        tl = theta[i]
        x, y = .5 + r * cos(tl), .5 + r * sin(tl)
        d = degrees(tl)
        if 90 < d % 360 < 270:  # On the left quardrants
            d -= 180
        root.text(x, y, label, size=4, rotation=d,
                  ha="center", va="center", color=linecolor)
        print x, y, label

    # Add baseline
    baseline = 0 if column == "score" else 50
    _r = len(_theta) * [baseline]
    closed_plot(ax, _theta, _r, "k:", lw=1, ms=4)

    # Add confidence interval
    if column == "percentile":
        barcolor = "#eeeeee"
        ax.bar([0], [ci[1] - ci[0]], width=2 * np.pi, bottom=ci[0], fc=barcolor)

    ax.set_rmin(xlim[0])
    ax.set_rmax(xlim[1])

    normalize_axes(root)

    image_name = pf + "-" + column + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Пример #29
0
def seeds(args):
    """
    %prog seeds [pngfile|jpgfile]

    Extract seed metrics from [pngfile|jpgfile]. Use --rows and --cols to crop image.
    """
    p = OptionParser(seeds.__doc__)
    p.set_outfile()
    opts, args, iopts = add_seeds_options(p, args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    pngfile, = args
    pf = opts.prefix or op.basename(pngfile).rsplit(".", 1)[0]
    sigma, kernel = opts.sigma, opts.kernel
    rows, cols = opts.rows, opts.cols
    labelrows, labelcols = opts.labelrows, opts.labelcols
    ff = opts.filter
    calib = opts.calibrate
    outdir = opts.outdir
    if outdir != '.':
        mkdir(outdir)
    if calib:
        calib = json.load(must_open(calib))
        pixel_cm_ratio, tr = calib["PixelCMratio"], calib["RGBtransform"]
        tr = np.array(tr)

    resizefile, mainfile, labelfile, exif = \
                      convert_image(pngfile, pf, outdir=outdir,
                                    rotate=opts.rotate,
                                    rows=rows, cols=cols,
                                    labelrows=labelrows, labelcols=labelcols)

    oimg = load_image(resizefile)
    img = load_image(mainfile)

    fig, (ax1, ax2, ax3, ax4) = plt.subplots(ncols=4, nrows=1,
                                             figsize=(iopts.w, iopts.h))

    # Edge detection
    img_gray = rgb2gray(img)
    logging.debug("Running {0} edge detection ...".format(ff))
    if ff == "canny":
        edges = canny(img_gray, sigma=opts.sigma)
    elif ff == "roberts":
        edges = roberts(img_gray)
    elif ff == "sobel":
        edges = sobel(img_gray)
    edges = clear_border(edges, buffer_size=opts.border)
    selem = disk(kernel)
    closed = closing(edges, selem) if kernel else edges
    filled = binary_fill_holes(closed)

    # Watershed algorithm
    if opts.watershed:
        distance = distance_transform_edt(filled)
        local_maxi = peak_local_max(distance, threshold_rel=.05, indices=False)
        coordinates = peak_local_max(distance, threshold_rel=.05)
        markers, nmarkers = label(local_maxi, return_num=True)
        logging.debug("Identified {0} watershed markers".format(nmarkers))
        labels = watershed(closed, markers, mask=filled)
    else:
        labels = label(filled)

    # Object size filtering
    w, h = img_gray.shape
    canvas_size = w * h
    min_size = int(round(canvas_size * opts.minsize / 100))
    max_size = int(round(canvas_size * opts.maxsize / 100))
    logging.debug("Find objects with pixels between {0} ({1}%) and {2} ({3}%)"\
                    .format(min_size, opts.minsize, max_size, opts.maxsize))

    # Plotting
    ax1.set_title('Original picture')
    ax1.imshow(oimg)

    params = "{0}, $\sigma$={1}, $k$={2}".format(ff, sigma, kernel)
    if opts.watershed:
        params += ", watershed"
    ax2.set_title('Edge detection\n({0})'.format(params))
    closed = gray2rgb(closed)
    ax2_img = labels
    if opts.edges:
        ax2_img = closed
    elif opts.watershed:
        ax2.plot(coordinates[:, 1], coordinates[:, 0], 'g.')
    ax2.imshow(ax2_img, cmap=iopts.cmap)

    ax3.set_title('Object detection')
    ax3.imshow(img)

    filename = op.basename(pngfile)
    if labelfile:
        accession = extract_label(labelfile)
    else:
        accession = pf

    # Calculate region properties
    rp = regionprops(labels)
    rp = [x for x in rp if min_size <= x.area <= max_size]
    nb_labels = len(rp)
    logging.debug("A total of {0} objects identified.".format(nb_labels))
    objects = []
    for i, props in enumerate(rp):
        i += 1
        if i > opts.count:
            break

        y0, x0 = props.centroid
        orientation = props.orientation
        major, minor = props.major_axis_length, props.minor_axis_length
        major_dx = cos(orientation) * major / 2
        major_dy = sin(orientation) * major / 2
        minor_dx = sin(orientation) * minor / 2
        minor_dy = cos(orientation) * minor / 2
        ax2.plot((x0 - major_dx, x0 + major_dx),
                 (y0 + major_dy, y0 - major_dy), 'r-')
        ax2.plot((x0 - minor_dx, x0 + minor_dx),
                 (y0 - minor_dy, y0 + minor_dy), 'r-')

        npixels = int(props.area)
        # Sample the center of the blob for color
        d = min(int(round(minor / 2 * .35)) + 1, 50)
        x0d, y0d = int(round(x0)), int(round(y0))
        square = img[(y0d - d):(y0d + d), (x0d - d):(x0d + d)]
        pixels = []
        for row in square:
            pixels.extend(row)
        logging.debug("Seed #{0}: {1} pixels ({2} sampled) - {3:.2f}%".\
                        format(i, npixels, len(pixels), 100. * npixels / canvas_size))

        rgb = pixel_stats(pixels)
        objects.append(Seed(filename, accession, i, rgb, props, exif))
        minr, minc, maxr, maxc = props.bbox
        rect = Rectangle((minc, minr), maxc - minc, maxr - minr,
                                  fill=False, ec='w', lw=1)
        ax3.add_patch(rect)
        mc, mr = (minc + maxc) / 2, (minr + maxr) / 2
        ax3.text(mc, mr, "{0}".format(i), color='w',
                    ha="center", va="center", size=6)

    for ax in (ax2, ax3):
        ax.set_xlim(0, h)
        ax.set_ylim(w, 0)

    # Output identified seed stats
    ax4.text(.1, .92, "File: {0}".format(latex(filename)), color='g')
    ax4.text(.1, .86, "Label: {0}".format(latex(accession)), color='m')
    yy = .8
    fw = must_open(opts.outfile, "w")
    if not opts.noheader:
        print(Seed.header(calibrate=calib), file=fw)
    for o in objects:
        if calib:
            o.calibrate(pixel_cm_ratio, tr)
        print(o, file=fw)
        i = o.seedno
        if i > 7:
            continue
        ax4.text(.01, yy, str(i), va="center", bbox=dict(fc='none', ec='k'))
        ax4.text(.1, yy, o.pixeltag, va="center")
        yy -= .04
        ax4.add_patch(Rectangle((.1, yy - .025), .12, .05, lw=0,
                      fc=rgb_to_hex(o.rgb)))
        ax4.text(.27, yy, o.hashtag, va="center")
        yy -= .06
    ax4.text(.1 , yy, "(A total of {0} objects displayed)".format(nb_labels),
             color="darkslategrey")
    normalize_axes(ax4)

    for ax in (ax1, ax2, ax3):
        xticklabels = [int(x) for x in ax.get_xticks()]
        yticklabels = [int(x) for x in ax.get_yticks()]
        ax.set_xticklabels(xticklabels, family='Helvetica', size=8)
        ax.set_yticklabels(yticklabels, family='Helvetica', size=8)

    image_name = op.join(outdir, pf + "." + iopts.format)
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
    return objects
Пример #30
0
def compare(args):
    """
    %prog compare Evaluation.csv

    Compare performances of various variant callers on simulated STR datasets.
    """
    p = OptionParser(__doc__)
    opts, args, iopts = p.set_image_options(args, figsize="15x5")

    if len(args) != 1:
        sys.exit(not p.print_help())

    datafile, = args
    pf = datafile.rsplit(".", 1)[0]
    fig, (ax1, ax2, ax3) = plt.subplots(ncols=3,
                                        nrows=1,
                                        figsize=(iopts.w, iopts.h))
    plt.tight_layout(pad=2)

    # Huntington risk allele
    infected_thr = 40
    ref_thr = 19

    # ax1: Multiple callers at lower range
    df = pd.read_csv("Evaluation.csv")
    truth = df["Truth"]

    ax1.plot(truth, df["Manta"], 'bx-')
    ax1.plot(truth, df["Isaac"], 'yo-')
    ax1.plot(truth, df["GATK"], 'md-')
    ax1.plot(truth, df["lobSTR"], 'c+-')
    ax1.plot(truth, truth, 'k--')  # to show diagonal

    bbox = {'facecolor': 'tomato', 'alpha': .2, 'ec': 'w'}
    pad = 2
    ax1.axhline(infected_thr, color='tomato')
    ax1.text(max(truth) - pad,
             infected_thr + pad,
             'Risk threshold',
             bbox=bbox,
             ha="right")
    ax1.axhline(ref_thr, color='tomato')
    ax1.text(max(truth) - pad,
             ref_thr - pad,
             'Reference repeat count',
             bbox=bbox,
             ha="right",
             va="top")

    ax1.set_xlabel(r'Num of CAG repeats inserted ($\mathit{h}$)')
    ax1.set_ylabel('Num of CAG repeats called')
    ax1.set_title(r'Simulated haploid $\mathit{h}$')
    ax1.legend(['Manta', 'Isaac', 'GATK', 'lobSTR', 'Truth'], loc='best')

    max_insert = 120
    # ax2: lobSTR vs TREDPARSE with haploid model
    lobstr_results = parse_results("lobstr_results_homo.txt")
    tredparse_results = parse_results("tredparse_results_homo.txt")
    truth = range(10, max_insert + 1)
    lx, ly = zip(*lobstr_results)
    tx, ty = zip(*tredparse_results)

    ax2.plot(lx, ly, 'c+-')
    ax2.plot(tx, ty, 'gx-')
    ax2.plot(truth, truth, 'k--')

    ax2.set_xlabel(r'Num of CAG repeats inserted ($\mathit{h}$)')
    ax2.set_ylabel('Num of CAG repeats called')
    ax2.set_title(r'Simulated haploid $\mathit{h}$')
    ax2.legend(['lobSTR', 'TREDPARSE', 'Truth'], loc='best')

    pad *= 2
    ax2.axhline(infected_thr, color='tomato')
    ax2.text(max(truth) - pad,
             infected_thr + pad,
             'Risk threshold',
             bbox=bbox,
             ha="right")
    ax2.set_xlim(10, max_insert)

    # ax3: lobSTR vs TREDPARSE with haploid model
    lobstr_results = parse_results("lobstr_results_het.txt", exclude=20)
    tredparse_results = parse_results("tredparse_results_het.txt", exclude=20)
    truth = range(10, max_insert + 1)
    lx, ly = zip(*lobstr_results)
    tx, ty = zip(*tredparse_results)

    ax3.plot(lx, ly, 'c+-')
    ax3.plot(tx, ty, 'gx-')
    ax3.plot(truth, truth, 'k--')

    ax3.set_xlabel(r'Num of CAG repeats inserted ($\mathit{h}$)')
    ax3.set_ylabel('Num of CAG repeats called')
    ax3.set_title(r'Simulated diploid $\mathit{20/h}$')
    ax3.legend(['lobSTR', 'TREDPARSE', 'Truth'], loc='best')
    ax3.axhline(infected_thr, color='tomato')
    ax3.text(max(truth) - pad,
             infected_thr + pad,
             'Risk threshold',
             bbox=bbox,
             ha="right")
    ax3.set_xlim(10, max_insert)

    root = fig.add_axes([0, 0, 1, 1])
    pad = .03
    panel_labels(root, ((pad / 2, 1 - pad, "A"), (1 / 3., 1 - pad, "B"),
                        (2 / 3., 1 - pad, "C")))
    normalize_axes(root)

    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Пример #31
0
def lms(args):
    """
    %prog lms

    ALLMAPS cartoon to illustrate LMS metric.
    """
    from random import randint
    from jcvi.graphics.chromosome import HorizontalChromosome

    p = OptionParser(lms.__doc__)
    opts, args, iopts = p.set_image_options(args, figsize="6x6", dpi=300)

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    # Panel A
    w, h = 0.7, 0.35
    ax = fig.add_axes([0.15, 0.6, w, h])

    xdata = [x + randint(-3, 3) for x in range(10, 110, 10)]
    ydata = [x + randint(-3, 3) for x in range(10, 110, 10)]
    ydata[3:7] = ydata[3:7][::-1]
    xydata = zip(xdata, ydata)
    lis = xydata[:3] + [xydata[4]] + xydata[7:]
    lds = xydata[3:7]
    xlis, ylis = zip(*lis)
    xlds, ylds = zip(*lds)
    ax.plot(
        xlis,
        ylis,
        "r-",
        lw=12,
        alpha=0.3,
        solid_capstyle="round",
        solid_joinstyle="round",
    )
    ax.plot(
        xlds,
        ylds,
        "g-",
        lw=12,
        alpha=0.3,
        solid_capstyle="round",
        solid_joinstyle="round",
    )
    ax.plot(xdata, ydata, "k.", mec="k", mfc="w", mew=3, ms=12)
    HorizontalChromosome(root, 0.15, 0.15 + w, 0.57, height=0.02, lw=2)
    root.text(0.15 + w / 2, 0.55, "Chromosome location (bp)", ha="center", va="top")

    ax.text(80, 30, "LIS = 7", color="r", ha="center", va="center")
    ax.text(80, 20, "LDS = 4", color="g", ha="center", va="center")
    ax.text(80, 10, "LMS = $max$(LIS, LDS) = 7", ha="center", va="center")
    normalize_lms_axis(ax, xlim=110, ylim=110)

    # Panel B
    w = 0.37
    p = (0, 45, 75, 110)
    ax = fig.add_axes([0.1, 0.12, w, h])
    xdata = [x for x in range(10, 110, 10)]
    ydata = ydata_orig = [x for x in range(10, 110, 10)]
    ydata = ydata[:4] + ydata[7:] + ydata[4:7][::-1]
    xydata = zip(xdata, ydata)
    lis = xydata[:7]
    xlis, ylis = zip(*lis)
    ax.plot(
        xlis,
        ylis,
        "r-",
        lw=12,
        alpha=0.3,
        solid_capstyle="round",
        solid_joinstyle="round",
    )
    ax.plot(xdata, ydata, "k.", mec="k", mfc="w", mew=3, ms=12)
    ax.vlines(p, 0, 110, colors="beige", lw=3)
    normalize_lms_axis(ax, xlim=110, ylim=110)
    patch = [0.1 + w * x / 110.0 for x in p]
    HorizontalChromosome(root, 0.1, 0.1 + w, 0.09, patch=patch, height=0.02, lw=2)
    scaffolds = ("a", "b", "c")
    for i, s in enumerate(scaffolds):
        xx = (patch[i] + patch[i + 1]) / 2
        root.text(xx, 0.09, s, va="center", ha="center")
    root.text(0.1 + w / 2, 0.04, "LMS($a||b||c$) = 7", ha="center")

    # Panel C
    ax = fig.add_axes([0.6, 0.12, w, h])
    patch = [0.6 + w * x / 110.0 for x in p]
    ydata = ydata_orig
    ax.plot(
        xdata,
        ydata,
        "r-",
        lw=12,
        alpha=0.3,
        solid_capstyle="round",
        solid_joinstyle="round",
    )
    ax.plot(xdata, ydata, "k.", mec="k", mfc="w", mew=3, ms=12)
    ax.vlines(p, [0], [110], colors="beige", lw=3)
    normalize_lms_axis(ax, xlim=110, ylim=110)
    HorizontalChromosome(root, 0.6, 0.6 + w, 0.09, patch=patch, height=0.02, lw=2)
    scaffolds = ("a", "-c", "b")
    for i, s in enumerate(scaffolds):
        xx = (patch[i] + patch[i + 1]) / 2
        root.text(xx, 0.09, s, va="center", ha="center")
    root.text(0.6 + w / 2, 0.04, "LMS($a||-c||b$) = 10", ha="center")

    labels = ((0.05, 0.95, "A"), (0.05, 0.48, "B"), (0.55, 0.48, "C"))
    panel_labels(root, labels)

    normalize_axes(root)

    pf = "lms"
    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Пример #32
0
def plot(args):
    """
    %prog plot input.bed seqid

    Plot the matchings between the reconstructed pseudomolecules and the maps.
    Two types of visualizations are available in one canvas:

    1. Parallel axes, and matching markers are shown in connecting lines;
    2. Scatter plot.
    """
    from jcvi.graphics.base import plt, savefig, normalize_axes, \
                set2, panel_labels
    from jcvi.graphics.chromosome import Chromosome, GeneticMap, \
                HorizontalChromosome

    p = OptionParser(plot.__doc__)
    add_allmaps_plot_options(p)
    opts, args, iopts = p.set_image_options(args, figsize="10x6")

    if len(args) != 2:
        sys.exit(not p.print_help())

    inputbed, seqid = args
    pf = inputbed.rsplit(".", 1)[0]
    bedfile = pf + ".lifted.bed"
    agpfile = pf + ".agp"
    weightsfile = opts.weightsfile
    links = opts.links

    function = get_function(opts.distance)
    cc = Map(bedfile, function)
    allseqids = cc.seqids
    mapnames = cc.mapnames
    weights = Weights(weightsfile, mapnames)
    assert seqid in allseqids, "{0} not in {1}".format(seqid, allseqids)

    s = Scaffold(seqid, cc)
    mlgs = [k for k, v in s.mlg_counts.items() if v >= links]
    while not mlgs:
        links /= 2
        logging.error("No markers to plot, --links reset to {0}".format(links))
        mlgs = [k for k, v in s.mlg_counts.items() if v >= links]

    mlgsizes = {}
    for mlg in mlgs:
        mm = cc.extract_mlg(mlg)
        mlgsize = max(function(x) for x in mm)
        mlgsizes[mlg] = mlgsize

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])
    ax1 = fig.add_axes([0, 0, .5, 1])
    ax2 = fig.add_axes([.5, 0, .5, 1])

    # Find the layout first
    ystart, ystop = .9, .1
    L = Layout(mlgsizes)
    coords = L.coords

    tip = .02
    marker_pos = {}
    # Palette
    colors = dict((mapname, set2[i]) for i, mapname in enumerate(mapnames))
    colors = dict((mlg, colors[mlg.split("-")[0]]) for mlg in mlgs)

    rhos = {}
    # Parallel coordinates
    for mlg, (x, y1, y2) in coords.items():
        mm = cc.extract_mlg(mlg)
        markers = [(m.accn, function(m)) for m in mm]  # exhaustive marker list
        xy = [(m.pos, function(m)) for m in mm if m.seqid == seqid]
        mx, my = zip(*xy)
        rho = spearmanr(mx, my)
        rhos[mlg] = rho
        flip = rho < 0

        g = GeneticMap(ax1, x, y1, y2, markers, tip=tip, flip=flip)
        extra = -3 * tip if x < .5 else 3 * tip
        ha = "right" if x < .5 else "left"
        mapname = mlg.split("-")[0]
        tlg = mlg.replace("_", ".")  # Latex does not like underscore char
        label = "{0} (w={1})".format(tlg, weights[mapname])
        ax1.text(x + extra, (y1 + y2) / 2, label, color=colors[mlg],
                 ha=ha, va="center", rotation=90)
        marker_pos.update(g.marker_pos)

    agp = AGP(agpfile)
    agp = [x for x in agp if x.object == seqid]
    chrsize = max(x.object_end for x in agp)

    # Pseudomolecules in the center
    r = ystart - ystop
    ratio = r / chrsize
    f = lambda x: (ystart - ratio * x)
    patchstart = [f(x.object_beg) for x in agp if not x.is_gap]
    Chromosome(ax1, .5, ystart, ystop, width=2 * tip, patch=patchstart, lw=2)

    label = "{0} ({1})".format(seqid, human_size(chrsize, precision=0))
    ax1.text(.5, ystart + tip, label, ha="center")

    scatter_data = defaultdict(list)
    # Connecting lines
    for b in s.markers:
        marker_name = b.accn
        if marker_name not in marker_pos:
            continue

        cx = .5
        cy = f(b.pos)
        mx = coords[b.mlg][0]
        my = marker_pos[marker_name]

        extra = -tip if mx < cx else tip
        extra *= 1.25  # leave boundaries for aesthetic reasons
        cx += extra
        mx -= extra
        ax1.plot((cx, mx), (cy, my), "-", color=colors[b.mlg])
        scatter_data[b.mlg].append((b.pos, function(b)))

    # Scatter plot, same data as parallel coordinates
    xstart, xstop = sorted((ystart, ystop))
    f = lambda x: (xstart + ratio * x)
    pp = [x.object_beg for x in agp if not x.is_gap]
    patchstart = [f(x) for x in pp]
    HorizontalChromosome(ax2, xstart, xstop, ystop,
                         height=2 * tip, patch=patchstart, lw=2)

    gap = .03
    ratio = (r - gap * len(mlgs) - tip) / sum(mlgsizes.values())

    tlgs = []
    for mlg, mlgsize in sorted(mlgsizes.items()):
        height = ratio * mlgsize
        ystart -= height
        xx = .5 + xstart / 2
        width = r / 2
        color = colors[mlg]
        ax = fig.add_axes([xx, ystart, width, height])
        ypos = ystart + height / 2
        ystart -= gap
        sd = scatter_data[mlg]
        xx, yy = zip(*sd)
        ax.vlines(pp, 0, mlgsize, colors="beige")
        ax.plot(xx, yy, ".", color=color)
        rho = rhos[mlg]
        ax.text(.5, 1 - .4 * gap / height, r"$\rho$={0:.3f}".format(rho),
                    ha="center", va="top", transform=ax.transAxes, color="gray")
        tlg = mlg.replace("_", ".")
        tlgs.append((tlg, ypos, color))
        ax.set_xlim(0, chrsize)
        ax.set_ylim(0, mlgsize)
        ax.set_xticks([])
        while height / len(ax.get_yticks()) < .03 and len(ax.get_yticks()) >= 2:
            ax.set_yticks(ax.get_yticks()[::2])  # Sparsify the ticks
        yticklabels = [int(x) for x in ax.get_yticks()]
        ax.set_yticklabels(yticklabels, family='Helvetica')
        if rho < 0:
            ax.invert_yaxis()

    for i, (tlg, ypos, color) in enumerate(tlgs):
        ha = "center"
        if len(tlgs) > 4:
            ha = "right" if i % 2 else "left"
        root.text(.5, ypos, tlg, color=color, rotation=90,
                      ha=ha, va="center")

    if opts.panels:
        labels = ((.04, .96, 'A'), (.48, .96, 'B'))
        panel_labels(root, labels)

    normalize_axes((ax1, ax2, root))
    image_name = seqid + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
    plt.close(fig)
Пример #33
0
def compare3(args):
    """
    %prog compare3

    Compare performances of various variant callers on simulated STR datasets.
    This compares the power of various evidence types.
    """
    p = OptionParser(compare3.__doc__)
    p.add_option('--maxinsert',
                 default=300,
                 type="int",
                 help="Maximum number of repeats")
    add_simulate_options(p)
    opts, args, iopts = p.set_image_options(args, figsize="10x10")

    if len(args) != 0:
        sys.exit(not p.print_help())

    max_insert = opts.maxinsert
    fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(ncols=2,
                                                 nrows=2,
                                                 figsize=(iopts.w, iopts.h))
    plt.tight_layout(pad=3)

    color = "lightslategray"
    # ax1: Spanning
    tredparse_results = parse_results("tredparse_results_het-spanning.txt")
    title = SIMULATED_DIPLOID + "( Sub-model 1: Spanning reads)"
    plot_compare(ax1,
                 title,
                 tredparse_results,
                 None,
                 color=color,
                 max_insert=max_insert,
                 risk=False)

    # ax2: Partial
    tredparse_results = parse_results("tredparse_results_het-partial.txt",
                                      exclude=20)
    title = SIMULATED_DIPLOID + " (Sub-model 2: Partial reads)"
    plot_compare(ax2,
                 title,
                 tredparse_results,
                 None,
                 color=color,
                 max_insert=max_insert,
                 risk=False)

    # ax3: Repeat
    tredparse_results = parse_results("tredparse_results_het-repeat.txt",
                                      exclude=20)
    # HACK (repeat reads won't work under 50)
    tredparse_results = [x for x in tredparse_results if x[0] > 50]
    title = SIMULATED_DIPLOID + " (Sub-model 3: Repeat-only reads)"
    plot_compare(ax3,
                 title,
                 tredparse_results,
                 None,
                 color=color,
                 max_insert=max_insert,
                 risk=False)

    # ax4: Pair
    tredparse_results = parse_results("tredparse_results_het-pair.txt",
                                      exclude=20)
    title = SIMULATED_DIPLOID + " (Sub-model 4: Paired-end reads)"
    plot_compare(ax4,
                 title,
                 tredparse_results,
                 None,
                 color=color,
                 max_insert=max_insert,
                 risk=False)

    for ax in (ax1, ax2, ax3, ax4):
        ax.set_xlim(0, max_insert)
        ax.set_ylim(0, max_insert)

    root = fig.add_axes([0, 0, 1, 1])
    pad = .03
    panel_labels(root, ((pad / 2, 1 - pad, "A"), (1 / 2., 1 - pad, "B"),
                        (pad / 2, 1 / 2., "C"), (1 / 2., 1 / 2., "D")))
    normalize_axes(root)

    image_name = "tredparse." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Пример #34
0
def dotplot(anchorfile, qbed, sbed, fig, root, ax, vmin=0, vmax=1,
        is_self=False, synteny=False, cmap_text=None, cmap="copper",
        genomenames=None, sample_number=10000, minfont=5, palette=None,
        chrlw=.1, title=None, sep=True, sepcolor="g", stdpf=True):

    fp = open(anchorfile)
    # add genome names
    if genomenames:
        gx, gy = genomenames.split("_")
    else:
        to_ax_label = lambda fname: op.basename(fname).split(".")[0]
        gx, gy = [to_ax_label(x.filename) for x in (qbed, sbed)]
    gx, gy = markup(gx), markup(gy)

    qorder = qbed.order
    sorder = sbed.order

    data = []
    if cmap_text:
        logging.debug("Capping values within [{0:.1f}, {1:.1f}]"\
                        .format(vmin, vmax))

    block_id = 0
    for row in fp:
        atoms = row.split()
        block_color = None
        if row[0] == "#":
            block_id += 1
            if palette:
                block_color = palette.get(block_id, "k")
            continue

        # first two columns are query and subject, and an optional third column
        if len(atoms) < 2:
            continue

        query, subject = atoms[:2]
        value = atoms[-1]

        if cmap_text:
            try:
                value = float(value)
            except ValueError:
                value = vmax

            if value < vmin:
                continue
            if value > vmax:
                continue
        else:
            value = 0

        if query not in qorder:
            continue
        if subject not in sorder:
            continue

        qi, q = qorder[query]
        si, s = sorder[subject]

        nv = value if block_color is None else block_color
        data.append((qi, si, nv))
        if is_self:  # Mirror image
            data.append((si, qi, nv))

    npairs = downsample(data, sample_number=sample_number)
    x, y, c = zip(*data)

    if palette:
        ax.scatter(x, y, c=c, edgecolors="none", s=2, lw=0)
    else:
        ax.scatter(x, y, c=c, edgecolors="none", s=2, lw=0, cmap=cmap,
                vmin=vmin, vmax=vmax)

    if synteny:
        clusters = batch_scan(data, qbed, sbed)
        draw_box(clusters, ax)

    if cmap_text:
        draw_cmap(root, cmap_text, vmin, vmax, cmap=cmap)

    xsize, ysize = len(qbed), len(sbed)
    logging.debug("xsize=%d ysize=%d" % (xsize, ysize))
    qbreaks = qbed.get_breaks()
    sbreaks = sbed.get_breaks()
    xlim, ylim = plot_breaks_and_labels(fig, root, ax, gx, gy, xsize, ysize,
                           qbreaks, sbreaks, sep=sep, chrlw=chrlw,
                           sepcolor=sepcolor, minfont=minfont, stdpf=stdpf)

    # create a diagonal to separate mirror image for self comparison
    if is_self:
        ax.plot(xlim, (0, ysize), 'm-', alpha=.5, lw=2)

    if palette:  # bottom-left has the palette, if available
        colors = palette.colors
        xstart, ystart = .1, .05
        for category, c in sorted(colors.items()):
            root.add_patch(Rectangle((xstart, ystart), .03, .02, lw=0, fc=c))
            root.text(xstart + .04, ystart, category, color=c)
            xstart += .1

    if title is None:
        title = "Inter-genomic comparison: {0} vs {1}".format(gx, gy)
        if is_self:
            title = "Intra-genomic comparison within {0}".format(gx)
            npairs /= 2
        title += " ({0} gene pairs)".format(thousands(npairs))
    root.set_title(title, x=.5, y=.96, color="k")
    if title:
        logging.debug("Dot plot title: {}".format(title))
    normalize_axes(root)
Пример #35
0
def multihistogram(args):
    """
    %prog multihistogram *.histogram species

    Plot the histogram based on a set of K-mer hisotograms. The method is based
    on Star et al.'s method (Atlantic Cod genome paper).
    """
    p = OptionParser(multihistogram.__doc__)
    p.add_option("--kmin", default=15, type="int", help="Minimum K-mer size, inclusive")
    p.add_option("--kmax", default=30, type="int", help="Maximum K-mer size, inclusive")
    p.add_option("--vmin", default=2, type="int", help="Minimum value, inclusive")
    p.add_option("--vmax", default=100, type="int", help="Maximum value, inclusive")
    opts, args, iopts = p.set_image_options(args, figsize="10x5", dpi=300)

    histfiles = args[:-1]
    species = args[-1]
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])
    A = fig.add_axes([0.08, 0.12, 0.38, 0.76])
    B = fig.add_axes([0.58, 0.12, 0.38, 0.76])

    lines = []
    legends = []
    genomesizes = []
    for histfile in histfiles:
        ks = KmerSpectrum(histfile)
        x, y = ks.get_xy(opts.vmin, opts.vmax)
        K = get_number(op.basename(histfile).split(".")[0].split("-")[-1])
        if not opts.kmin <= K <= opts.kmax:
            continue

        line, = A.plot(x, y, "-", lw=1)
        lines.append(line)
        legends.append("K = {0}".format(K))
        ks.analyze(K=K)
        genomesizes.append((K, ks.genomesize / 1e6))

    leg = A.legend(lines, legends, shadow=True, fancybox=True)
    leg.get_frame().set_alpha(0.5)

    title = "{0} genome K-mer histogram".format(species)
    A.set_title(markup(title))
    xlabel, ylabel = "Coverage (X)", "Counts"
    A.set_xlabel(xlabel)
    A.set_ylabel(ylabel)
    set_human_axis(A)

    title = "{0} genome size estimate".format(species)
    B.set_title(markup(title))
    x, y = zip(*genomesizes)
    B.plot(x, y, "ko", mfc="w")
    t = np.linspace(opts.kmin - 0.5, opts.kmax + 0.5, 100)
    p = np.poly1d(np.polyfit(x, y, 2))
    B.plot(t, p(t), "r:")

    xlabel, ylabel = "K-mer size", "Estimated genome size (Mb)"
    B.set_xlabel(xlabel)
    B.set_ylabel(ylabel)
    set_ticklabels_helvetica(B)

    labels = ((0.04, 0.96, "A"), (0.54, 0.96, "B"))
    panel_labels(root, labels)

    normalize_axes(root)
    imagename = species + ".multiK.pdf"
    savefig(imagename, dpi=iopts.dpi, iopts=iopts)
Пример #36
0
def estimategaps(args):
    """
    %prog estimategaps JM-4 chr1 JMMale-1

    Illustrate ALLMAPS gap estimation algorithm.
    """
    p = OptionParser(estimategaps.__doc__)
    opts, args, iopts = p.set_image_options(args, figsize="6x6", dpi=300)

    if len(args) != 3:
        sys.exit(not p.print_help())

    pf, seqid, mlg = args
    bedfile = pf + ".lifted.bed"
    agpfile = pf + ".agp"

    function = lambda x: x.cm
    cc = Map(bedfile, scaffold_info=True, function=function)
    agp = AGP(agpfile)

    g = GapEstimator(cc, agp, seqid, mlg, function=function)
    pp, chrsize, mlgsize = g.pp, g.chrsize, g.mlgsize
    spl, spld = g.spl, g.spld
    g.compute_all_gaps(verbose=False)

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    # Panel A
    xstart, ystart = .15, .65
    w, h = .7, .3
    t = np.linspace(0, chrsize, 1000)
    ax = fig.add_axes([xstart, ystart, w, h])
    mx, my = zip(*g.scatter_data)
    rho = spearmanr(mx, my)

    dsg = "g"
    ax.vlines(pp, 0, mlgsize, colors="beige")
    ax.plot(mx, my, ".", color=set2[3])
    ax.plot(t, spl(t), "-", color=dsg)
    ax.text(.05, .95, mlg, va="top", transform=ax.transAxes)
    normalize_lms_axis(ax, xlim=chrsize, ylim=mlgsize,
                       ylabel="Genetic distance (cM)")
    if rho < 0:
        ax.invert_yaxis()

    # Panel B
    ystart -= .28
    h = .25
    ax = fig.add_axes([xstart, ystart, w, h])
    ax.vlines(pp, 0, mlgsize, colors="beige")
    ax.plot(t, spld(t), "-", lw=2, color=dsg)
    ax.plot(pp, spld(pp), "o", mfc="w", mec=dsg, ms=5)
    normalize_lms_axis(ax, xlim=chrsize, ylim=25 * 1e-6,
                       xfactor=1e-6, xlabel="Physical position (Mb)",
                       yfactor=1000000, ylabel="Recomb. rate\n(cM / Mb)")

    # Panel C (specific to JMMale-1)
    a, b = "scaffold_1076", "scaffold_861"
    sizes = dict((x.component_id, (x.object_beg, x.object_end,
                                   x.component_span, x.orientation)) \
                                   for x in g.agp if not x.is_gap)
    a_beg, a_end, asize, ao = sizes[a]
    b_beg, b_end, bsize, bo = sizes[b]
    gapsize = g.get_gapsize(a)
    total_size = asize + gapsize + bsize
    ratio = .6 / total_size
    y = .16
    pad = .03
    pb_ratio = w / chrsize

    # Zoom
    lsg = "lightslategray"
    root.plot((.15 + pb_ratio * a_beg, .2),
              (ystart, ystart - .14), ":", color=lsg)
    root.plot((.15 + pb_ratio * b_end, .3),
              (ystart, ystart - .08), ":", color=lsg)
    ends = []
    for tag, size, marker, beg in zip((a, b), (asize, bsize), (49213, 81277),
                              (.2, .2 + (asize + gapsize) * ratio)):
        end = beg + size * ratio
        marker = beg + marker * ratio
        ends.append((beg, end, marker))
        root.plot((marker,), (y,), "o", color=lsg)
        root.text((beg + end) / 2, y + pad, latex(tag),
                  ha="center", va="center")
        HorizontalChromosome(root, beg, end, y, height=.025, fc='gainsboro')

    begs, ends, markers = zip(*ends)
    fontprop = dict(color=lsg, ha="center", va="center")
    ypos = y + pad * 2
    root.plot(markers, (ypos, ypos), "-", lw=2, color=lsg)
    root.text(sum(markers) / 2, ypos + pad,
              "Distance: 1.29cM $\Leftrightarrow$ 211,824bp (6.1 cM/Mb)", **fontprop)

    ypos = y - pad
    xx = markers[0], ends[0]
    root.plot(xx, (ypos, ypos), "-", lw=2, color=lsg)
    root.text(sum(xx) / 2, ypos - pad, "34,115bp", **fontprop)
    xx = markers[1], begs[1]
    root.plot(xx, (ypos, ypos), "-", lw=2, color=lsg)
    root.text(sum(xx) / 2, ypos - pad, "81,276bp", **fontprop)

    root.plot((ends[0], begs[1]), (y, y), ":", lw=2, color=lsg)
    root.text(sum(markers) / 2, ypos - 3 * pad, r"$\textit{Estimated gap size: 96,433bp}$",
                                  color="r", ha="center", va="center")

    labels = ((.05, .95, 'A'), (.05, .6, 'B'), (.05, .27, 'C'))
    panel_labels(root, labels)
    normalize_axes(root)

    pf = "estimategaps"
    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Пример #37
0
def heatmap(args):
    """
    %prog heatmap input.npy genome.json

    Plot heatmap based on .npy data file. The .npy stores a square matrix with
    bins of genome, and cells inside the matrix represent number of links
    between bin i and bin j. The `genome.json` contains the offsets of each
    contig/chr so that we know where to draw boundary lines, or extract per
    contig/chromosome heatmap.
    """
    p = OptionParser(heatmap.__doc__)
    p.add_option("--resolution", default=500000, type="int",
                 help="Resolution when counting the links")
    p.add_option("--chr", help="Plot this contig/chr only")
    opts, args, iopts = p.set_image_options(args, figsize="10x10",
                                            style="white", cmap="coolwarm",
                                            format="png", dpi=120)

    if len(args) != 2:
        sys.exit(not p.print_help())

    npyfile, jsonfile = args
    contig = opts.chr
    # Load contig/chromosome starts and sizes
    header = json.loads(open(jsonfile).read())
    # Load the matrix
    A = np.load(npyfile)

    # Select specific submatrix
    if contig:
        contig_start = header["starts"][contig]
        contig_size = header["sizes"][contig]
        contig_end = contig_start + contig_size
        A = A[contig_start: contig_end, contig_start: contig_end]

    # Several concerns in practice:
    # The diagonal counts may be too strong, this can either be resolved by
    # masking them. Or perform a log transform on the entire heatmap.
    B = A.astype("float64")
    B += 1.0
    B = np.log(B)
    vmin, vmax = 1, 7
    B[B < vmin] = vmin
    B[B > vmax] = vmax
    print B
    logging.debug("Matrix log-transformation and thresholding ({}-{}) done"
                  .format(vmin, vmax))

    # Canvas
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])       # whole canvas
    ax = fig.add_axes([.05, .05, .9, .9])   # just the heatmap

    breaks = header["starts"].values()
    breaks += [header["total_bins"]]   # This is actually discarded
    breaks = sorted(breaks)[1:]
    if contig:
        breaks = []
    plot_heatmap(ax, B, breaks, iopts, binsize=opts.resolution)

    # Title
    pf = npyfile.rsplit(".", 1)[0]
    title = pf
    if contig:
        title += "-{}".format(contig)
    root.text(.5, .98, title, color="darkslategray", size=18,
              ha="center", va="center")

    normalize_axes(root)
    image_name = title + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Пример #38
0
def draw_tree(ax, tx, rmargin=.3, leafcolor="k", supportcolor="k",
              outgroup=None, reroot=True, gffdir=None, sizes=None,
              trunc_name=None, SH=None, scutoff=0, barcodefile=None,
              leafcolorfile=None, leaffont=12):
    """
    main function for drawing phylogenetic tree
    """

    t = Tree(tx)
    if reroot:
        if outgroup:
            R = t.get_common_ancestor(*outgroup)
        else:
            # Calculate the midpoint node
            R = t.get_midpoint_outgroup()

        if R != t:
            t.set_outgroup(R)

    farthest, max_dist = t.get_farthest_leaf()

    margin = .05
    xstart = margin
    ystart = 1 - margin
    canvas = 1 - rmargin - 2 * margin
    tip = .005
    # scale the tree
    scale = canvas / max_dist

    num_leaves = len(t.get_leaf_names())
    yinterval = canvas / (num_leaves + 1)

    # get exons structures, if any
    structures = {}
    if gffdir:
        gffiles = glob("{0}/*.gff*".format(gffdir))
        setups, ratio = get_setups(gffiles, canvas=rmargin / 2, noUTR=True)
        structures = dict((a, (b, c)) for a, b, c in setups)

    if sizes:
        sizes = Sizes(sizes).mapping

    if barcodefile:
        barcodemap = DictFile(barcodefile, delimiter="\t")

    if leafcolorfile:
        leafcolors = DictFile(leafcolorfile, delimiter="\t")

    coords = {}
    i = 0
    for n in t.traverse("postorder"):
        dist = n.get_distance(t)
        xx = xstart + scale * dist

        if n.is_leaf():
            yy = ystart - i * yinterval
            i += 1

            if trunc_name:
                name = truncate_name(n.name, rule=trunc_name)
            else:
                name = n.name

            if barcodefile:
                name = decode_name(name, barcodemap)

            sname = name.replace("_", "-")

            try:
                lc = leafcolors[n.name]
            except Exception:
                lc = leafcolor
            else:
                # if color is given as "R,G,B"
                if "," in lc:
                    lc = map(float, lc.split(","))

            ax.text(xx + tip, yy, sname, va="center",
                    fontstyle="italic", size=leaffont, color=lc)

            gname = n.name.split("_")[0]
            if gname in structures:
                mrnabed, cdsbeds = structures[gname]
                ExonGlyph(ax, 1 - rmargin / 2, yy, mrnabed, cdsbeds,
                          align="right", ratio=ratio)
            if sizes and gname in sizes:
                size = sizes[gname]
                size = size / 3 - 1  # base pair converted to amino acid
                size = "{0}aa".format(size)
                ax.text(1 - rmargin / 2 + tip, yy, size, size=leaffont)

        else:
            children = [coords[x] for x in n.get_children()]
            children_x, children_y = zip(*children)
            min_y, max_y = min(children_y), max(children_y)
            # plot the vertical bar
            ax.plot((xx, xx), (min_y, max_y), "k-")
            # plot the horizontal bar
            for cx, cy in children:
                ax.plot((xx, cx), (cy, cy), "k-")
            yy = sum(children_y) * 1. / len(children_y)
            support = n.support
            if support > 1:
                support = support / 100.
            if not n.is_root():
                if support > scutoff / 100.:
                    ax.text(xx, yy+.005, "{0:d}".format(int(abs(support * 100))),
                        ha="right", size=leaffont, color=supportcolor)

        coords[n] = (xx, yy)

    # scale bar
    br = .1
    x1 = xstart + .1
    x2 = x1 + br * scale
    yy = ystart - i * yinterval
    ax.plot([x1, x1], [yy - tip, yy + tip], "k-")
    ax.plot([x2, x2], [yy - tip, yy + tip], "k-")
    ax.plot([x1, x2], [yy, yy], "k-")
    ax.text((x1 + x2) / 2, yy - tip, "{0:g}".format(br),
            va="top", ha="center", size=leaffont)

    if SH is not None:
        xs = x1
        ys = (margin + yy) / 2.
        ax.text(xs, ys, "SH test against ref tree: {0}"\
                .format(SH), ha="left", size=leaffont, color="g")

    normalize_axes(ax)
Пример #39
0
def wheel(args):
    """
    %prog wheel datafile.csv groups.csv

    Wheel plot that shows continous data in radial axes.
    """
    p = OptionParser(wheel.__doc__)
    p.add_option("--column",
                 default="score",
                 choices=("score", "percentile"),
                 help="Which column to extract from `datafile.csv`")
    opts, args, iopts = p.set_image_options(args, figsize="5x5", format="png")

    if len(args) != 2:
        sys.exit(not p.print_help())

    datafile, groupsfile = args
    column = opts.column
    linecolor = "#d6d6d6"
    df = parse_data(datafile, score_column=opts.column)
    groups = parse_groups(groupsfile)
    labels = [g for g in groups if g in df]
    print(labels)
    df = [df[g] for g in labels]
    print(df)
    groups = [groups[g] for g in labels]
    print(groups)

    pf = datafile.rsplit(".", 1)[0]
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])
    categories = len(df)
    #ax = plt.subplot(111, projection='polar')
    ax = fig.add_axes([0.1, 0.1, 0.8, 0.8], polar=True)

    brewer = [
        "#FF3B30",
        "#DD43A0",
        "#5856D6",
        "#007AFE",
        "#56BDEC",
        "#4CD8BA",
        "#4CD864",
        "#B0F457",
        "#FEF221",
        "#FFCC01",
        "#FF9500",
        "#FF3B30",
    ]

    # Baseline
    theta = np.linspace(1.5 * np.pi,
                        3.5 * np.pi,
                        endpoint=False,
                        num=categories)
    _theta = np.linspace(1.5 * np.pi, 3.5 * np.pi)
    R = max(max(df), 10)
    xlim = (-R, R) if column == "score" else (-100, 100)
    plim = (-R / 2, R) if column == "score" else (0, 100)
    ci = (-.5, 2) if column == "score" else (10, 90)

    # Grid
    if column == "score":
        for t in theta:
            ax.plot([t, t], plim, color=linecolor)
    ax.axis('off')

    # Contours
    for t in plim:
        ax.plot(_theta, [t] * len(_theta), color=linecolor)

    # Sectors (groupings)
    collapsed_groups = []
    gg = []
    for group, c in groupby(enumerate(groups), lambda x: x[1]):
        c = [x[0] for x in list(c)]
        collapsed_groups.append(group)
        gg.append(c)

    sector = False
    if sector:
        theta_interval = 2 * np.pi / categories
        theta_pad = theta_interval / 2 * .9
        for color, group in zip(brewer, gg):
            tmin, tmax = min(group), max(group)
            sector(ax,
                   theta[tmin],
                   theta[tmax],
                   theta_pad,
                   R * .95,
                   "-",
                   color=color,
                   lw=2)

    # Data
    r = df
    closed_plot(ax, theta, r, color="lightslategray", alpha=.25)
    all_data = []
    for color, group in zip(brewer, gg):
        hidden_data = [(theta[x], r[x]) for x in group if \
                            (ci[0] <= r[x] <= ci[1])]
        shown_data = [(theta[x], r[x]) for x in group if (r[x] < ci[0] \
                            or r[x] > ci[1])]
        all_data.append((theta[x], labels[x], r[x]))
        for alpha, data in zip((1, 1), (hidden_data, shown_data)):
            if not data:
                continue
            color_theta, color_r = zip(*data)
            ax.plot(color_theta, color_r, "o", color=color, alpha=alpha)

    # Print out data
    diseaseNames, risks = labels, df
    print("var theta = [{}]".format(",".join("{:.1f}".format(degrees(x))
                                             for x in theta)))
    print("var risks = [{}]".format(",".join(str(x) for x in risks)))
    print("var diseaseNames = [{}]".format(",".join(\
                    ['"{}"'.format(x) for x in diseaseNames])))

    # Labels
    from math import cos, sin
    r = .5
    for i, label in enumerate(labels):
        tl = theta[i]
        x, y = .5 + r * cos(tl), .5 + r * sin(tl)
        d = degrees(tl)
        if 90 < d % 360 < 270:  # On the left quardrants
            d -= 180
        root.text(x,
                  y,
                  label,
                  size=4,
                  rotation=d,
                  ha="center",
                  va="center",
                  color=linecolor)
        print(x, y, label)

    # Add baseline
    baseline = 0 if column == "score" else 50
    _r = len(_theta) * [baseline]
    closed_plot(ax, _theta, _r, "k:", lw=1, ms=4)

    # Add confidence interval
    if column == "percentile":
        barcolor = "#eeeeee"
        ax.bar([0], [ci[1] - ci[0]],
               width=2 * np.pi,
               bottom=ci[0],
               fc=barcolor)

    ax.set_rmin(xlim[0])
    ax.set_rmax(xlim[1])

    normalize_axes(root)

    image_name = pf + "-" + column + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Пример #40
0
def main():
    p = OptionParser(__doc__)
    opts, args, iopts = p.set_image_options(figsize="9x7")

    if len(args) != 1:
        sys.exit(not p.print_help())

    mode, = args
    assert mode == "demo"

    a, b = 30, 70
    pad = .08
    w = .31
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    # Row separators
    yy = 1 - pad
    for i in xrange(3):
        root.plot((0, 1), (yy, yy), "-", lw=2, color="lightgray")
        yy -= w

    # Row headers
    xx = pad * .6
    yy = 1 - pad - .5 * w
    for title in ("Inversion", "Indel", "Duplication"):
        root.text(xx, yy, title, ha="center", va="center")
        yy -= w

    # Column headers
    xx = pad + .5 * w
    yy = 1 - pad / 2
    for title in ("Assembly alignment", "Read alignment",
                  "Optical map alignment"):
        root.text(xx, yy, title, ha="center", va="center")
        xx += w

    p = PairwiseAlign(fig, [pad, 2 * w, w, w])
    p.invert(a, b)
    p.draw()

    p = PairwiseAlign(fig, [pad, w, w, w])
    p.delete(a, b)
    p.draw()

    p = PairwiseAlign(fig, [pad, 0, w, w])
    p.duplicate(a, b, gap=5)
    p.draw()

    p = ReadAlign(fig, [pad + w, 2 * w, w, w])
    p.invert(a, b)
    p.draw()

    p = ReadAlign(fig, [pad + w, w, w, w])
    p.delete(a, b)
    p.draw()

    p = ReadAlign(fig, [pad + w, 0, w, w])
    p.duplicate(a, b)
    p.draw()

    p = OpticalMapAlign(fig, [pad + 2 * w, 2 * w, w, w])
    p.invert(a, b)
    p.draw()

    p = OpticalMapAlign(fig, [pad + 2 * w, w, w, w])
    p.delete(a, b)
    p.draw()

    p = OpticalMapAlign(fig, [pad + 2 * w, 0, w, w])
    p.duplicate(a, b)
    p.draw()

    normalize_axes(root)

    image_name = mode + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Пример #41
0
def compare4(args):
    """
    %prog compare4

    Compare performances of various variant callers on simulated STR datasets.
    Adds coverage comparisons as panel C and D.
    """
    p = OptionParser(compare4.__doc__)
    p.add_option('--maxinsert',
                 default=300,
                 type="int",
                 help="Maximum number of repeats")
    add_simulate_options(p)
    opts, args, iopts = p.set_image_options(args, figsize="10x10")

    if len(args) != 0:
        sys.exit(not p.print_help())

    depth = opts.depth
    max_insert = opts.maxinsert
    fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(ncols=2,
                                                 nrows=2,
                                                 figsize=(iopts.w, iopts.h))
    plt.tight_layout(pad=3)

    # ax1: lobSTR vs TREDPARSE with haploid model
    lobstr_results = parse_results("lobstr_results_homo-20x-150bp-500bp.txt")
    tredparse_results = parse_results(
        "tredparse_results_homo-20x-150bp-500bp.txt")
    title = SIMULATED_HAPLOID + r" ($Depth=%s\times)" % depth
    plot_compare(ax1,
                 title,
                 tredparse_results,
                 lobstr_results,
                 max_insert=max_insert)

    # ax2: lobSTR vs TREDPARSE with diploid model (depth=20x)
    lobstr_results = parse_results("lobstr_results_het-20x-150bp-500bp.txt",
                                   exclude=20)
    tredparse_results = parse_results(
        "tredparse_results_het-20x-150bp-500bp.txt", exclude=20)
    title = SIMULATED_DIPLOID + r" ($Depth=%s\times$)" % depth
    plot_compare(ax2,
                 title,
                 tredparse_results,
                 lobstr_results,
                 max_insert=max_insert)

    # ax3: lobSTR vs TREDPARSE with diploid model (depth=5x)
    lobstr_results = parse_results("lobstr_results_het-5x-150bp-500bp.txt",
                                   exclude=20)
    tredparse_results = parse_results(
        "tredparse_results_het-5x-150bp-500bp.txt", exclude=20)
    title = SIMULATED_DIPLOID + r" ($Depth=%s\times$)" % 5
    plot_compare(ax3,
                 title,
                 tredparse_results,
                 lobstr_results,
                 max_insert=max_insert)

    # ax4: lobSTR vs TREDPARSE with diploid model (depth=80x)
    lobstr_results = parse_results("lobstr_results_het-80x-150bp-500bp.txt",
                                   exclude=20)
    tredparse_results = parse_results(
        "tredparse_results_het-80x-150bp-500bp.txt", exclude=20)
    title = SIMULATED_DIPLOID + r" ($Depth=%s\times$)" % 80
    plot_compare(ax4,
                 title,
                 tredparse_results,
                 lobstr_results,
                 max_insert=max_insert)

    for ax in (ax1, ax2, ax3, ax4):
        ax.set_xlim(0, max_insert)
        ax.set_ylim(0, max_insert)

    root = fig.add_axes([0, 0, 1, 1])
    pad = .03
    panel_labels(root, ((pad / 2, 1 - pad, "A"), (1 / 2., 1 - pad, "B"),
                        (pad / 2, 1 / 2., "C"), (1 / 2., 1 / 2., "D")))
    normalize_axes(root)

    image_name = "tredparse." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Пример #42
0
def fig3(args):
    """
    %prog fig3 chrA02,A02,C2,chrC02 chr.sizes all.bed data

    Napus Figure 3 displays alignments between quartet chromosomes, inset
    with read histograms.
    """
    from jcvi.formats.bed import Bed

    p = OptionParser(fig3.__doc__)
    p.add_option("--gauge_step",
                 default=10000000,
                 type="int",
                 help="Step size for the base scale")
    opts, args, iopts = p.set_image_options(args, figsize="12x9")

    if len(args) != 4:
        sys.exit(not p.print_help())

    chrs, sizes, bedfile, datadir = args
    gauge_step = opts.gauge_step
    diverge = iopts.diverge
    rr, gg = diverge
    chrs = [[x] for x in chrs.split(",")]
    sizes = Sizes(sizes).mapping

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    chr_sizes, chr_sum_sizes, ratio = calc_ratio(chrs, sizes)

    # Synteny panel
    seqidsfile = make_seqids(chrs)
    klayout = make_layout(chrs, chr_sum_sizes, ratio, template_f3a, shift=.05)
    height = .07
    r = height / 4
    K = Karyotype(fig,
                  root,
                  seqidsfile,
                  klayout,
                  gap=gap,
                  height=height,
                  lw=2,
                  generank=False,
                  sizes=sizes,
                  heightpad=r,
                  roundrect=True,
                  plot_label=False)

    # Chromosome labels
    for kl in K.layout:
        if kl.empty:
            continue
        lx, ly = kl.xstart, kl.y
        if lx < .11:
            lx += .1
            ly += .06
        label = kl.label
        root.text(lx - .015, ly, label, fontsize=15, ha="right", va="center")

    # Inset with datafiles
    datafiles = ("chrA02.bzh.forxmgr", "parent.A02.per10kb.forxmgr",
                 "parent.C2.per10kb.forxmgr", "chrC02.bzh.forxmgr")
    datafiles = [op.join(datadir, x) for x in datafiles]
    tracks = K.tracks
    hlfile = op.join(datadir, "bzh.regions.forhaibao")
    xy_axes = []
    for t, datafile in zip(tracks, datafiles):
        ax = make_affix_axis(fig, t, -r, height=2 * r)
        xy_axes.append(ax)
        chr = t.seqids[0]
        xy = XYtrack(ax, datafile, color="lightslategray")
        start, end = 0, t.total
        xy.interpolate(end)
        xy.cap(ymax=40)
        xy.import_hlfile(hlfile, chr, diverge=diverge)
        xy.draw()
        ax.set_xlim(start, end)
        gauge_ax = make_affix_axis(fig, t, -r)
        adjust_spines(gauge_ax, ["bottom"])
        setup_gauge_ax(gauge_ax, start, end, gauge_step)

    # Converted gene tracks
    ax_Ar = make_affix_axis(fig, tracks[1], r, height=r / 2)
    ax_Co = make_affix_axis(fig, tracks[2], r, height=r / 2)

    order = Bed(bedfile).order
    for asterisk in (False, True):
        conversion_track(order,
                         "data/Genes.Converted.seuil.0.6.AtoC.txt",
                         0,
                         "A02",
                         ax_Ar,
                         rr,
                         asterisk=asterisk)
        conversion_track(order,
                         "data/Genes.Converted.seuil.0.6.AtoC.txt",
                         1,
                         "C2",
                         ax_Co,
                         gg,
                         asterisk=asterisk)
        conversion_track(order,
                         "data/Genes.Converted.seuil.0.6.CtoA.txt",
                         0,
                         "A02",
                         ax_Ar,
                         gg,
                         ypos=1,
                         asterisk=asterisk)
        conversion_track(order,
                         "data/Genes.Converted.seuil.0.6.CtoA.txt",
                         1,
                         "C2",
                         ax_Co,
                         rr,
                         ypos=1,
                         asterisk=asterisk)

    Ar, Co = xy_axes[1:3]
    annotations = ((Ar, "Bra028920 Bra028897", "center",
                    "1DAn2+"), (Ar, "Bra020081 Bra020171", "right", "2DAn2+"),
                   (Ar, "Bra020218 Bra020286", "left",
                    "3DAn2+"), (Ar, "Bra008143 Bra008167", "left", "4DAn2-"),
                   (Ar, "Bra029317 Bra029251", "right",
                    "5DAn2+ (GSL)"), (Co, "Bo2g001000 Bo2g001300", "left",
                                      "1DCn2-"), (Co, "Bo2g018560 Bo2g023700",
                                                  "right", "2DCn2-"),
                   (Co, "Bo2g024450 Bo2g025390", "left",
                    "3DCn2-"), (Co, "Bo2g081060 Bo2g082340", "left", "4DCn2+"),
                   (Co, "Bo2g161510 Bo2g164260", "right", "5DCn2-"))

    for ax, genes, ha, label in annotations:
        g1, g2 = genes.split()
        x1, x2 = order[g1][1].start, order[g2][1].start
        if ha == "center":
            x = (x1 + x2) / 2 * .8
        elif ha == "left":
            x = x2
        else:
            x = x1
        label = r"\textit{{{0}}}".format(label)
        color = rr if "+" in label else gg
        ax.text(x, 30, label, color=color, fontsize=9, ha=ha, va="center")

    ax_Ar.set_xlim(0, tracks[1].total)
    ax_Ar.set_ylim(-1, 1)
    ax_Co.set_xlim(0, tracks[2].total)
    ax_Co.set_ylim(-1, 1)

    # Plot coverage in resequencing lines
    gstep = 5000000
    order = "swede,kale,h165,yudal,aviso,abu,bristol".split(",")
    labels_dict = {"h165": "Resynthesized (H165)", "abu": "Aburamasari"}
    hlsuffix = "regions.forhaibao"
    chr1, chr2 = "chrA02", "chrC02"
    t1, t2 = tracks[0], tracks[-1]
    s1, s2 = sizes[chr1], sizes[chr2]

    canvas1 = (t1.xstart, .75, t1.xend - t1.xstart, .2)
    c = Coverage(fig,
                 root,
                 canvas1,
                 chr1, (0, s1),
                 datadir,
                 order=order,
                 gauge=None,
                 plot_chr_label=False,
                 gauge_step=gstep,
                 palette="gray",
                 cap=40,
                 hlsuffix=hlsuffix,
                 labels_dict=labels_dict,
                 diverge=diverge)
    yys = c.yys
    x1, x2 = .37, .72
    tip = .02
    annotations = ((x1, yys[2] + .3 * tip, tip, tip / 2,
                    "FLC"), (x1, yys[3] + .6 * tip, tip, tip / 2, "FLC"),
                   (x1, yys[5] + .6 * tip, tip, tip / 2,
                    "FLC"), (x2, yys[0] + .9 * tip, -1.2 * tip, 0, "GSL"),
                   (x2, yys[4] + .9 * tip, -1.2 * tip, 0,
                    "GSL"), (x2, yys[6] + .9 * tip, -1.2 * tip, 0, "GSL"))

    arrowprops = dict(facecolor='black',
                      shrink=.05,
                      frac=.5,
                      width=1,
                      headwidth=4)
    for x, y, dx, dy, label in annotations:
        label = r"\textit{{{0}}}".format(label)
        root.annotate(label,
                      xy=(x, y),
                      xytext=(x + dx, y + dy),
                      arrowprops=arrowprops,
                      color=rr,
                      fontsize=9,
                      ha="center",
                      va="center")

    canvas2 = (t2.xstart, .05, t2.xend - t2.xstart, .2)
    Coverage(fig,
             root,
             canvas2,
             chr2, (0, s2),
             datadir,
             order=order,
             gauge=None,
             plot_chr_label=False,
             gauge_step=gstep,
             palette="gray",
             cap=40,
             hlsuffix=hlsuffix,
             labels_dict=labels_dict,
             diverge=diverge)

    pad = .03
    labels = ((.1, .67, "A"), (t1.xstart - 3 * pad, .95 + pad, "B"),
              (t2.xstart - 3 * pad, .25 + pad, "C"))
    panel_labels(root, labels)
    normalize_axes(root)

    image_name = "napus-fig3." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Пример #43
0
def seeds(args):
    """
    %prog seeds [pngfile|jpgfile]

    Extract seed metrics from [pngfile|jpgfile]. Use --rows and --cols to crop image.
    """
    p = OptionParser(seeds.__doc__)
    p.set_outfile()
    opts, args, iopts = add_seeds_options(p, args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    (pngfile, ) = args
    pf = opts.prefix or op.basename(pngfile).rsplit(".", 1)[0]
    sigma, kernel = opts.sigma, opts.kernel
    rows, cols = opts.rows, opts.cols
    labelrows, labelcols = opts.labelrows, opts.labelcols
    ff = opts.filter
    calib = opts.calibrate
    outdir = opts.outdir
    if outdir != ".":
        mkdir(outdir)
    if calib:
        calib = json.load(must_open(calib))
        pixel_cm_ratio, tr = calib["PixelCMratio"], calib["RGBtransform"]
        tr = np.array(tr)
    nbcolor = opts.changeBackground
    pngfile = convert_background(pngfile, nbcolor)
    resizefile, mainfile, labelfile, exif = convert_image(
        pngfile,
        pf,
        outdir=outdir,
        rotate=opts.rotate,
        rows=rows,
        cols=cols,
        labelrows=labelrows,
        labelcols=labelcols,
    )
    oimg = load_image(resizefile)
    img = load_image(mainfile)

    fig, (ax1, ax2, ax3, ax4) = plt.subplots(ncols=4,
                                             nrows=1,
                                             figsize=(iopts.w, iopts.h))
    # Edge detection
    img_gray = rgb2gray(img)
    logging.debug("Running {0} edge detection ...".format(ff))
    if ff == "canny":
        edges = canny(img_gray, sigma=opts.sigma)
    elif ff == "roberts":
        edges = roberts(img_gray)
    elif ff == "sobel":
        edges = sobel(img_gray)
    edges = clear_border(edges, buffer_size=opts.border)
    selem = disk(kernel)
    closed = closing(edges, selem) if kernel else edges
    filled = binary_fill_holes(closed)

    # Watershed algorithm
    if opts.watershed:
        distance = distance_transform_edt(filled)
        local_maxi = peak_local_max(distance,
                                    threshold_rel=0.05,
                                    indices=False)
        coordinates = peak_local_max(distance, threshold_rel=0.05)
        markers, nmarkers = label(local_maxi, return_num=True)
        logging.debug("Identified {0} watershed markers".format(nmarkers))
        labels = watershed(closed, markers, mask=filled)
    else:
        labels = label(filled)

    # Object size filtering
    w, h = img_gray.shape
    canvas_size = w * h
    min_size = int(round(canvas_size * opts.minsize / 100))
    max_size = int(round(canvas_size * opts.maxsize / 100))
    logging.debug(
        "Find objects with pixels between {0} ({1}%) and {2} ({3}%)".format(
            min_size, opts.minsize, max_size, opts.maxsize))

    # Plotting
    ax1.set_title("Original picture")
    ax1.imshow(oimg)

    params = "{0}, $\sigma$={1}, $k$={2}".format(ff, sigma, kernel)
    if opts.watershed:
        params += ", watershed"
    ax2.set_title("Edge detection\n({0})".format(params))
    closed = gray2rgb(closed)
    ax2_img = labels
    if opts.edges:
        ax2_img = closed
    elif opts.watershed:
        ax2.plot(coordinates[:, 1], coordinates[:, 0], "g.")
    ax2.imshow(ax2_img, cmap=iopts.cmap)

    ax3.set_title("Object detection")
    ax3.imshow(img)

    filename = op.basename(pngfile)
    if labelfile:
        accession = extract_label(labelfile)
    else:
        accession = pf

    # Calculate region properties
    rp = regionprops(labels)
    rp = [x for x in rp if min_size <= x.area <= max_size]
    nb_labels = len(rp)
    logging.debug("A total of {0} objects identified.".format(nb_labels))
    objects = []
    for i, props in enumerate(rp):
        i += 1
        if i > opts.count:
            break

        y0, x0 = props.centroid
        orientation = props.orientation
        major, minor = props.major_axis_length, props.minor_axis_length
        major_dx = cos(orientation) * major / 2
        major_dy = sin(orientation) * major / 2
        minor_dx = sin(orientation) * minor / 2
        minor_dy = cos(orientation) * minor / 2
        ax2.plot((x0 - major_dx, x0 + major_dx),
                 (y0 + major_dy, y0 - major_dy), "r-")
        ax2.plot((x0 - minor_dx, x0 + minor_dx),
                 (y0 - minor_dy, y0 + minor_dy), "r-")

        npixels = int(props.area)
        # Sample the center of the blob for color
        d = min(int(round(minor / 2 * 0.35)) + 1, 50)
        x0d, y0d = int(round(x0)), int(round(y0))
        square = img[(y0d - d):(y0d + d), (x0d - d):(x0d + d)]
        pixels = []
        for row in square:
            pixels.extend(row)
        logging.debug("Seed #{0}: {1} pixels ({2} sampled) - {3:.2f}%".format(
            i, npixels, len(pixels), 100.0 * npixels / canvas_size))

        rgb = pixel_stats(pixels)
        objects.append(Seed(filename, accession, i, rgb, props, exif))
        minr, minc, maxr, maxc = props.bbox
        rect = Rectangle((minc, minr),
                         maxc - minc,
                         maxr - minr,
                         fill=False,
                         ec="w",
                         lw=1)
        ax3.add_patch(rect)
        mc, mr = (minc + maxc) / 2, (minr + maxr) / 2
        ax3.text(mc,
                 mr,
                 "{0}".format(i),
                 color="w",
                 ha="center",
                 va="center",
                 size=6)

    for ax in (ax2, ax3):
        ax.set_xlim(0, h)
        ax.set_ylim(w, 0)

    # Output identified seed stats
    ax4.text(0.1, 0.92, "File: {0}".format(latex(filename)), color="g")
    ax4.text(0.1, 0.86, "Label: {0}".format(latex(accession)), color="m")
    yy = 0.8
    fw = must_open(opts.outfile, "w")
    if not opts.noheader:
        print(Seed.header(calibrate=calib), file=fw)
    for o in objects:
        if calib:
            o.calibrate(pixel_cm_ratio, tr)
        print(o, file=fw)
        i = o.seedno
        if i > 7:
            continue
        ax4.text(0.01, yy, str(i), va="center", bbox=dict(fc="none", ec="k"))
        ax4.text(0.1, yy, o.pixeltag, va="center")
        yy -= 0.04
        ax4.add_patch(
            Rectangle((0.1, yy - 0.025),
                      0.12,
                      0.05,
                      lw=0,
                      fc=rgb_to_hex(o.rgb)))
        ax4.text(0.27, yy, o.hashtag, va="center")
        yy -= 0.06
    ax4.text(
        0.1,
        yy,
        "(A total of {0} objects displayed)".format(nb_labels),
        color="darkslategray",
    )
    normalize_axes(ax4)

    for ax in (ax1, ax2, ax3):
        xticklabels = [int(x) for x in ax.get_xticks()]
        yticklabels = [int(x) for x in ax.get_yticks()]
        ax.set_xticklabels(xticklabels, family="Helvetica", size=8)
        ax.set_yticklabels(yticklabels, family="Helvetica", size=8)

    image_name = op.join(outdir, pf + "." + iopts.format)
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
    return objects
Пример #44
0
def estimategaps(args):
    """
    %prog estimategaps JM-4 chr1 JMMale-1

    Illustrate ALLMAPS gap estimation algorithm.
    """
    p = OptionParser(estimategaps.__doc__)
    opts, args, iopts = p.set_image_options(args, figsize="6x6", dpi=300)

    if len(args) != 3:
        sys.exit(not p.print_help())

    pf, seqid, mlg = args
    bedfile = pf + ".lifted.bed"
    agpfile = pf + ".agp"

    function = lambda x: x.cm
    cc = Map(bedfile, scaffold_info=True, function=function)
    agp = AGP(agpfile)

    g = GapEstimator(cc, agp, seqid, mlg, function=function)
    pp, chrsize, mlgsize = g.pp, g.chrsize, g.mlgsize
    spl, spld = g.spl, g.spld
    g.compute_all_gaps(verbose=False)

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    # Panel A
    xstart, ystart = 0.15, 0.65
    w, h = 0.7, 0.3
    t = np.linspace(0, chrsize, 1000)
    ax = fig.add_axes([xstart, ystart, w, h])
    mx, my = zip(*g.scatter_data)
    rho = spearmanr(mx, my)

    dsg = "g"
    ax.vlines(pp, 0, mlgsize, colors="beige")
    ax.plot(mx, my, ".", color=set2[3])
    ax.plot(t, spl(t), "-", color=dsg)
    ax.text(0.05, 0.95, mlg, va="top", transform=ax.transAxes)
    normalize_lms_axis(ax, xlim=chrsize, ylim=mlgsize, ylabel="Genetic distance (cM)")
    if rho < 0:
        ax.invert_yaxis()

    # Panel B
    ystart -= 0.28
    h = 0.25
    ax = fig.add_axes([xstart, ystart, w, h])
    ax.vlines(pp, 0, mlgsize, colors="beige")
    ax.plot(t, spld(t), "-", lw=2, color=dsg)
    ax.plot(pp, spld(pp), "o", mfc="w", mec=dsg, ms=5)
    normalize_lms_axis(
        ax,
        xlim=chrsize,
        ylim=25 * 1e-6,
        xfactor=1e-6,
        xlabel="Physical position (Mb)",
        yfactor=1000000,
        ylabel="Recomb. rate\n(cM / Mb)",
    )
    ax.xaxis.grid(False)

    # Panel C (specific to JMMale-1)
    a, b = "scaffold_1076", "scaffold_861"
    sizes = dict(
        (x.component_id, (x.object_beg, x.object_end, x.component_span, x.orientation))
        for x in g.agp
        if not x.is_gap
    )
    a_beg, a_end, asize, ao = sizes[a]
    b_beg, b_end, bsize, bo = sizes[b]
    gapsize = g.get_gapsize(a)
    total_size = asize + gapsize + bsize
    ratio = 0.6 / total_size
    y = 0.16
    pad = 0.03
    pb_ratio = w / chrsize

    # Zoom
    lsg = "lightslategray"
    root.plot((0.15 + pb_ratio * a_beg, 0.2), (ystart, ystart - 0.14), ":", color=lsg)
    root.plot((0.15 + pb_ratio * b_end, 0.3), (ystart, ystart - 0.08), ":", color=lsg)
    ends = []
    for tag, size, marker, beg in zip(
        (a, b), (asize, bsize), (49213, 81277), (0.2, 0.2 + (asize + gapsize) * ratio)
    ):
        end = beg + size * ratio
        marker = beg + marker * ratio
        ends.append((beg, end, marker))
        root.plot((marker,), (y,), "o", color=lsg)
        root.text((beg + end) / 2, y + pad, latex(tag), ha="center", va="center")
        HorizontalChromosome(root, beg, end, y, height=0.025, fc="gainsboro")

    begs, ends, markers = zip(*ends)
    fontprop = dict(color=lsg, ha="center", va="center")
    ypos = y + pad * 2
    root.plot(markers, (ypos, ypos), "-", lw=2, color=lsg)
    root.text(
        sum(markers) / 2,
        ypos + pad,
        "Distance: 1.29cM $\Leftrightarrow$ 211,824bp (6.1 cM/Mb)",
        **fontprop
    )

    ypos = y - pad
    xx = markers[0], ends[0]
    root.plot(xx, (ypos, ypos), "-", lw=2, color=lsg)
    root.text(sum(xx) / 2, ypos - pad, "34,115bp", **fontprop)
    xx = markers[1], begs[1]
    root.plot(xx, (ypos, ypos), "-", lw=2, color=lsg)
    root.text(sum(xx) / 2, ypos - pad, "81,276bp", **fontprop)

    root.plot((ends[0], begs[1]), (y, y), ":", lw=2, color=lsg)
    root.text(
        sum(markers) / 2,
        ypos - 3 * pad,
        r"$\textit{Estimated gap size: 96,433bp}$",
        color="r",
        ha="center",
        va="center",
    )

    labels = ((0.05, 0.95, "A"), (0.05, 0.6, "B"), (0.05, 0.27, "C"))
    panel_labels(root, labels)
    normalize_axes(root)

    pf = "estimategaps"
    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Пример #45
0
def fig3(args):
    """
    %prog fig3 chrA02,A02,C2,chrC02 chr.sizes all.bed data

    Napus Figure 3 displays alignments between quartet chromosomes, inset
    with read histograms.
    """
    from jcvi.formats.bed import Bed

    p = OptionParser(fig3.__doc__)
    p.add_option("--gauge_step", default=10000000, type="int",
                help="Step size for the base scale")
    opts, args, iopts = p.set_image_options(args, figsize="12x9")

    if len(args) != 4:
        sys.exit(not p.print_help())

    chrs, sizes, bedfile, datadir = args
    gauge_step = opts.gauge_step
    diverge = iopts.diverge
    rr, gg = diverge
    chrs = [[x] for x in chrs.split(",")]
    sizes = Sizes(sizes).mapping

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    chr_sizes, chr_sum_sizes, ratio = calc_ratio(chrs, sizes)

    # Synteny panel
    seqidsfile = make_seqids(chrs)
    klayout = make_layout(chrs, chr_sum_sizes, ratio, template_f3a, shift=.05)
    height = .07
    r = height / 4
    K = Karyotype(fig, root, seqidsfile, klayout, gap=gap,
                  height=height, lw=2, generank=False, sizes=sizes,
                  heightpad=r, roundrect=True, plot_label=False)

    # Chromosome labels
    for kl in K.layout:
        if kl.empty:
            continue
        lx, ly = kl.xstart, kl.y
        if lx < .11:
            lx += .1
            ly += .06
        label = kl.label
        root.text(lx - .015, ly, label, fontsize=15,
                  ha="right", va="center")

    # Inset with datafiles
    datafiles = ("chrA02.bzh.forxmgr", "parent.A02.per10kb.forxmgr",
                 "parent.C2.per10kb.forxmgr", "chrC02.bzh.forxmgr")
    datafiles = [op.join(datadir, x) for x in datafiles]
    tracks = K.tracks
    hlfile = op.join(datadir, "bzh.regions.forhaibao")
    xy_axes = []
    for t, datafile in zip(tracks, datafiles):
        ax = make_affix_axis(fig, t, -r, height=2 * r)
        xy_axes.append(ax)
        chr = t.seqids[0]
        xy = XYtrack(ax, datafile, color="lightslategray")
        start, end = 0, t.total
        xy.interpolate(end)
        xy.cap(ymax=40)
        xy.import_hlfile(hlfile, chr, diverge=diverge)
        xy.draw()
        ax.set_xlim(start, end)
        gauge_ax = make_affix_axis(fig, t, -r)
        adjust_spines(gauge_ax, ["bottom"])
        setup_gauge_ax(gauge_ax, start, end, gauge_step)

    # Converted gene tracks
    ax_Ar = make_affix_axis(fig, tracks[1], r, height=r/2)
    ax_Co = make_affix_axis(fig, tracks[2], r, height=r/2)

    order = Bed(bedfile).order
    for asterisk in (False, True):
        conversion_track(order, "data/Genes.Converted.seuil.0.6.AtoC.txt",
                         0, "A02", ax_Ar, rr, asterisk=asterisk)
        conversion_track(order, "data/Genes.Converted.seuil.0.6.AtoC.txt",
                         1, "C2", ax_Co, gg, asterisk=asterisk)
        conversion_track(order, "data/Genes.Converted.seuil.0.6.CtoA.txt",
                         0, "A02", ax_Ar, gg, ypos=1, asterisk=asterisk)
        conversion_track(order, "data/Genes.Converted.seuil.0.6.CtoA.txt",
                         1, "C2", ax_Co, rr, ypos=1, asterisk=asterisk)

    Ar, Co = xy_axes[1:3]
    annotations = ((Ar, "Bra028920 Bra028897", "center", "1DAn2+"),
                   (Ar, "Bra020081 Bra020171", "right", "2DAn2+"),
                   (Ar, "Bra020218 Bra020286", "left", "3DAn2+"),
                   (Ar, "Bra008143 Bra008167", "left", "4DAn2-"),
                   (Ar, "Bra029317 Bra029251", "right", "5DAn2+ (GSL)"),
                   (Co, "Bo2g001000 Bo2g001300", "left", "1DCn2-"),
                   (Co, "Bo2g018560 Bo2g023700", "right", "2DCn2-"),
                   (Co, "Bo2g024450 Bo2g025390", "left", "3DCn2-"),
                   (Co, "Bo2g081060 Bo2g082340", "left", "4DCn2+"),
                   (Co, "Bo2g161510 Bo2g164260", "right", "5DCn2-"))

    for ax, genes, ha, label in annotations:
        g1, g2 = genes.split()
        x1, x2 = order[g1][1].start, order[g2][1].start
        if ha == "center":
            x = (x1 + x2) / 2 * .8
        elif ha == "left":
            x = x2
        else:
            x = x1
        label = r"\textit{{{0}}}".format(label)
        color = rr if "+" in label else gg
        ax.text(x, 30, label, color=color, fontsize=9, ha=ha, va="center")

    ax_Ar.set_xlim(0, tracks[1].total)
    ax_Ar.set_ylim(-1, 1)
    ax_Co.set_xlim(0, tracks[2].total)
    ax_Co.set_ylim(-1, 1)

    # Plot coverage in resequencing lines
    gstep = 5000000
    order = "swede,kale,h165,yudal,aviso,abu,bristol".split(",")
    labels_dict = {"h165": "Resynthesized (H165)", "abu": "Aburamasari"}
    hlsuffix = "regions.forhaibao"
    chr1, chr2 = "chrA02", "chrC02"
    t1, t2 = tracks[0], tracks[-1]
    s1, s2 = sizes[chr1], sizes[chr2]

    canvas1 = (t1.xstart, .75, t1.xend - t1.xstart, .2)
    c = Coverage(fig, root, canvas1, chr1, (0, s1), datadir,
                 order=order, gauge=None, plot_chr_label=False,
                 gauge_step=gstep, palette="gray",
                 cap=40, hlsuffix=hlsuffix, labels_dict=labels_dict,
                 diverge=diverge)
    yys = c.yys
    x1, x2 = .37, .72
    tip = .02
    annotations = ((x1, yys[2] + .3 * tip, tip, tip / 2, "FLC"),
                   (x1, yys[3] + .6 * tip, tip, tip / 2, "FLC"),
                   (x1, yys[5] + .6 * tip, tip, tip / 2, "FLC"),
                   (x2, yys[0] + .9 * tip, -1.2 * tip, 0, "GSL"),
                   (x2, yys[4] + .9 * tip, -1.2 * tip, 0, "GSL"),
                   (x2, yys[6] + .9 * tip, -1.2 * tip, 0, "GSL"))

    arrowprops=dict(facecolor='black', shrink=.05, frac=.5,
                    width=1, headwidth=4)
    for x, y, dx, dy, label in annotations:
        label = r"\textit{{{0}}}".format(label)
        root.annotate(label, xy=(x, y), xytext=(x + dx, y + dy),
                      arrowprops=arrowprops, color=rr, fontsize=9,
                      ha="center", va="center")

    canvas2 = (t2.xstart, .05, t2.xend - t2.xstart, .2)
    Coverage(fig, root, canvas2, chr2, (0, s2), datadir,
                 order=order, gauge=None, plot_chr_label=False,
                 gauge_step=gstep, palette="gray",
                 cap=40, hlsuffix=hlsuffix, labels_dict=labels_dict,
                 diverge=diverge)

    pad = .03
    labels = ((.1, .67, "A"), (t1.xstart - 3 * pad, .95 + pad, "B"),
              (t2.xstart - 3 * pad, .25 + pad, "C"))
    panel_labels(root, labels)
    normalize_axes(root)

    image_name = "napus-fig3." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Пример #46
0
def diagram(args):
    """
    %prog diagram

    Plot the predictive power of various evidences.
    """
    p = OptionParser(diagram.__doc__)
    opts, args, iopts = p.set_image_options(args, figsize="8x4")

    if len(args) != 0:
        sys.exit(not p.print_help())

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    # Gauge on top, this is log-scale
    lsg = "lightslategray"
    yy = .7
    yinterval = .1
    height = .05
    yp = yy - yinterval - height
    canvas = .95
    xstart = .025
    convert = lambda x: xstart + x * canvas / 1200
    # Symbols
    root.text(.5,
              .9,
              r"$L$: Read length, $F$: Flank size, $V$: Pair distance",
              ha="center")
    root.text(.5, .85, r"ex. $L=150bp, F=9bp, V=500bp$", ha="center")
    root.text(xstart + canvas,
              yy - height,
              "STR repeat length",
              ha="center",
              color=lsg,
              size=10)

    # Mark the key events
    pad = .02
    arrowlen = canvas * 1.05
    arrowprops = dict(length_includes_head=True,
                      width=.01,
                      fc=lsg,
                      lw=0,
                      head_length=arrowlen * .12,
                      head_width=.04)
    p = FancyArrow(xstart, yy, arrowlen, 0, shape="right", **arrowprops)
    root.add_patch(p)

    ppad = 30
    keyevents = (
        (0, 0, -1, r"$0$"),
        (150 - 18, 150 - 18 - ppad, 0, r"$L - 2F$"),
        (150 - 9, 150 - 9, 1, r"$L - F$"),
        (150, 150 + ppad, 2, r"$L$"),
        (500 - 9, 500 - 9, 3, r"$V - F$"),
        (500 * 2 - 18, 500 * 2 - 18, 2, r"$2(V - F)$"),
    )
    for event, pos, i, label in keyevents:
        _event = convert(event)
        _pos = convert(pos)
        root.plot((_event, _event), (yy - height / 4, yy + height / 4),
                  '-',
                  color='k')
        root.text(_pos, yy + pad, label, rotation=45, va="bottom", size=8)
        if i < 0:
            continue
        ystart = yp - i * yinterval
        root.plot((_event, _event), (ystart, yy - height / 4), ':', color=lsg)

    # Range on bottom. These are simple 4 rectangles, with the range indicating
    # the predictive range.
    CLOSED, OPEN = range(2)
    ranges = (
        (0, 150 - 18, CLOSED, "Spanning reads"),
        (9, 150 - 9, OPEN, "Partial reads"),
        (150, 500 * 2 - 18, CLOSED, "Repeat reads"),
        (0, 500 - 9, CLOSED, "Paired-end reads"),
    )
    for start, end, starttag, label in ranges:
        _start = convert(start)
        _end = convert(end)
        data = [[0., 1.], [0., 1.]] if starttag == OPEN else \
               [[1., 0.], [1., 0.]]
        root.imshow(data,
                    interpolation='bicubic',
                    cmap=plt.cm.Greens,
                    extent=[_start, _end, yp, yp + height])
        root.text(_end + pad, yp + height / 2, label, va="center")
        yp -= yinterval

    normalize_axes(root)

    image_name = "diagram." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Пример #47
0
def main():
    p = OptionParser(__doc__)
    opts, args, iopts = p.set_image_options(figsize="9x7")

    if len(args) != 1:
        sys.exit(not p.print_help())

    mode, = args
    assert mode == "demo"

    a, b = 30, 70
    pad = .08
    w = .31
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    # Row separators
    yy = 1 - pad
    for i in xrange(3):
        root.plot((0, 1), (yy, yy), "-", lw=2, color="lightgray")
        yy -= w

    # Row headers
    xx = pad * .6
    yy = 1 - pad - .5 * w
    for title in ("Inversion", "Indel", "Duplication"):
        root.text(xx, yy, title, ha="center", va="center")
        yy -= w

    # Column headers
    xx = pad + .5 * w
    yy = 1 - pad / 2
    for title in ("Assembly alignment", "Read alignment", "Optical map alignment"):
        root.text(xx, yy, title, ha="center", va="center")
        xx += w

    p = PairwiseAlign(fig, [pad, 2 * w, w, w])
    p.invert(a, b)
    p.draw()

    p = PairwiseAlign(fig, [pad, w, w, w])
    p.delete(a, b)
    p.draw()

    p = PairwiseAlign(fig, [pad, 0, w, w])
    p.duplicate(a, b, gap=5)
    p.draw()

    p = ReadAlign(fig, [pad + w, 2 * w, w, w])
    p.invert(a, b)
    p.draw()

    p = ReadAlign(fig, [pad + w, w, w, w])
    p.delete(a, b)
    p.draw()

    p = ReadAlign(fig, [pad + w, 0, w, w])
    p.duplicate(a, b)
    p.draw()

    p = OpticalMapAlign(fig, [pad + 2 * w, 2 * w, w, w])
    p.invert(a, b)
    p.draw()

    p = OpticalMapAlign(fig, [pad + 2 * w, w, w, w])
    p.delete(a, b)
    p.draw()

    p = OpticalMapAlign(fig, [pad + 2 * w, 0, w, w])
    p.duplicate(a, b)
    p.draw()

    normalize_axes(root)

    image_name = mode + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Пример #48
0
def multihistogram(args):
    """
    %prog multihistogram *.histogram species

    Plot the histogram based on a set of K-mer hisotograms. The method is based
    on Star et al.'s method (Atlantic Cod genome paper).
    """
    p = OptionParser(multihistogram.__doc__)
    p.add_option("--kmin",
                 default=15,
                 type="int",
                 help="Minimum K-mer size, inclusive")
    p.add_option("--kmax",
                 default=30,
                 type="int",
                 help="Maximum K-mer size, inclusive")
    p.add_option("--vmin",
                 default=2,
                 type="int",
                 help="Minimum value, inclusive")
    p.add_option("--vmax",
                 default=100,
                 type="int",
                 help="Maximum value, inclusive")
    opts, args, iopts = p.set_image_options(args, figsize="10x5", dpi=300)

    if len(args) < 1:
        sys.exit(not p.print_help())

    histfiles = args[:-1]
    species = args[-1]
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])
    A = fig.add_axes([0.08, 0.12, 0.38, 0.76])
    B = fig.add_axes([0.58, 0.12, 0.38, 0.76])

    lines = []
    legends = []
    genomesizes = []
    for histfile in histfiles:
        ks = KmerSpectrum(histfile)
        x, y = ks.get_xy(opts.vmin, opts.vmax)
        K = get_number(op.basename(histfile).split(".")[0].split("-")[-1])
        if not opts.kmin <= K <= opts.kmax:
            continue

        (line, ) = A.plot(x, y, "-", lw=1)
        lines.append(line)
        legends.append("K = {0}".format(K))
        ks.analyze(K=K, method="allpaths")
        genomesizes.append((K, ks.genomesize / 1e6))

    leg = A.legend(lines, legends, shadow=True, fancybox=True)
    leg.get_frame().set_alpha(0.5)

    title = "{0} genome K-mer histogram".format(species)
    A.set_title(markup(title))
    xlabel, ylabel = "Coverage (X)", "Counts"
    A.set_xlabel(xlabel)
    A.set_ylabel(ylabel)
    set_human_axis(A)

    title = "{0} genome size estimate".format(species)
    B.set_title(markup(title))
    x, y = zip(*genomesizes)
    B.plot(x, y, "ko", mfc="w")
    t = np.linspace(opts.kmin - 0.5, opts.kmax + 0.5, 100)
    p = np.poly1d(np.polyfit(x, y, 2))
    B.plot(t, p(t), "r:")

    xlabel, ylabel = "K-mer size", "Estimated genome size (Mb)"
    B.set_xlabel(xlabel)
    B.set_ylabel(ylabel)
    set_ticklabels_helvetica(B)

    labels = ((0.04, 0.96, "A"), (0.54, 0.96, "B"))
    panel_labels(root, labels)

    normalize_axes(root)
    imagename = species + ".multiK.pdf"
    savefig(imagename, dpi=iopts.dpi, iopts=iopts)
Пример #49
0
def depth(args):
    """
    %prog depth anchorfile --qbed qbedfile --sbed sbedfile

    Calculate the depths in the two genomes in comparison, given in --qbed and
    --sbed. The synteny blocks will be layered on the genomes, and the
    multiplicity will be summarized to stderr.
    """
    from jcvi.utils.range import range_depth

    p = OptionParser(depth.__doc__)
    p.add_option("--depthfile",
                 help="Generate file with gene and depth [default: %default]")
    p.add_option("--histogram", default=False, action="store_true",
                 help="Plot histograms in PDF")
    p.add_option("--xmax", type="int", help="x-axis maximum to display in plot")
    p.add_option("--title", default=None, help="Title to display in plot")
    p.add_option("--quota", help="Force to use this quota, e.g. 1:1, 1:2 ...")
    p.set_beds()

    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    anchorfile, = args
    qbed, sbed, qorder, sorder, is_self = check_beds(anchorfile, p, opts)
    depthfile = opts.depthfile
    ac = AnchorFile(anchorfile)
    qranges = []
    sranges = []
    blocks = ac.blocks
    for ib in blocks:
        q, s, t = zip(*ib)
        q = [qorder[x] for x in q]
        s = [sorder[x] for x in s]
        qrange = (min(q)[0], max(q)[0])
        srange = (min(s)[0], max(s)[0])
        qranges.append(qrange)
        sranges.append(srange)
        if is_self:
            qranges.append(srange)

    qgenome = op.basename(qbed.filename).split(".")[0]
    sgenome = op.basename(sbed.filename).split(".")[0]
    qtag = "Genome {0} depths".format(qgenome)
    print("{}:".format(qtag), file=sys.stderr)
    dsq, details = range_depth(qranges, len(qbed))
    if depthfile:
        fw = open(depthfile, "w")
        write_details(fw, details, qbed)

    if is_self:
        return

    stag = "Genome {0} depths".format(sgenome)
    print("{}:".format(stag), file=sys.stderr)
    dss, details = range_depth(sranges, len(sbed))
    if depthfile:
        write_details(fw, details, sbed)
        fw.close()
        logging.debug("Depth written to `{0}`.".format(depthfile))

    if not opts.histogram:
        return

    from jcvi.graphics.base import plt, quickplot_ax, savefig, normalize_axes

    # Plot two histograms one for query genome, one for subject genome
    plt.figure(1, (6, 3))
    f, (ax1, ax2) = plt.subplots(1, 2, sharey=True)

    xmax = opts.xmax or max(4, max(dsq.keys() + dss.keys()))
    if opts.quota:
        speak, qpeak = opts.quota.split(":")
        qpeak, speak = int(qpeak), int(speak)
    else:
        qpeak = find_peak(dsq)
        speak = find_peak(dss)

    qtag = "# of {} blocks per {} gene".format(sgenome, qgenome)
    stag = "# of {} blocks per {} gene".format(qgenome, sgenome)
    quickplot_ax(ax1, dss, 0, xmax, stag, ylabel="Percentage of genome",
                 highlight=range(1, speak + 1))
    quickplot_ax(ax2, dsq, 0, xmax, qtag, ylabel=None,
                 highlight=range(1, qpeak + 1))

    title = opts.title or "{} vs {} syntenic depths\n{}:{} pattern"\
                    .format(qgenome, sgenome, speak, qpeak)
    root = f.add_axes([0, 0, 1, 1])
    vs, pattern = title.split('\n')
    root.text(.5, .97, vs, ha="center", va="center", color="darkslategray")
    root.text(.5, .925, pattern, ha="center", va="center",
                                 color="tomato", size=16)
    print(title, file=sys.stderr)

    normalize_axes(root)

    pf = anchorfile.rsplit(".", 1)[0] + ".depth"
    image_name = pf + ".pdf"
    savefig(image_name)