Пример #1
0
def ortholog(args):
    """
    %prog ortholog species_a species_b

    Run a sensitive pipeline to find orthologs between two species a and b.
    The pipeline runs LAST and generate .lifted.anchors.

    `--full` mode would assume 1-to-1 quota synteny blocks as the backbone of
    such predictions. Extra orthologs will be recruited from reciprocal best
    match (RBH).
    """
    from jcvi.apps.align import last as last_main
    from jcvi.compara.blastfilter import main as blastfilter_main
    from jcvi.compara.quota import main as quota_main
    from jcvi.compara.synteny import scan, mcscan, liftover
    from jcvi.formats.blast import cscore, filter

    p = OptionParser(ortholog.__doc__)
    p.add_option(
        "--dbtype",
        default="nucl",
        choices=("nucl", "prot"),
        help="Molecule type of subject database",
    )
    p.add_option(
        "--full",
        default=False,
        action="store_true",
        help="Run in full 1x1 mode, including blocks and RBH",
    )
    p.add_option("--cscore", default=0.7, type="float", help="C-score cutoff")
    p.add_option(
        "--dist", default=20, type="int", help="Extent of flanking regions to search"
    )
    p.add_option(
        "-n",
        "--min_size",
        dest="n",
        type="int",
        default=4,
        help="minimum number of anchors in a cluster",
    )
    p.add_option("--quota", help="Quota align parameter")
    p.add_option(
        "--no_strip_names",
        default=False,
        action="store_true",
        help="Do not strip alternative splicing (e.g. At5g06540.1 -> At5g06540)",
    )
    p.set_cpus()
    p.set_dotplot_opts()

    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    a, b = args
    dbtype = opts.dbtype
    suffix = ".cds" if dbtype == "nucl" else ".pep"
    abed, afasta = a + ".bed", a + suffix
    bbed, bfasta = b + ".bed", b + suffix
    ccscore = opts.cscore
    quota = opts.quota
    dist = "--dist={0}".format(opts.dist)
    minsize_flag = "--min_size={}".format(opts.n)
    cpus_flag = "--cpus={}".format(opts.cpus)

    aprefix = afasta.split(".")[0]
    bprefix = bfasta.split(".")[0]
    pprefix = ".".join((aprefix, bprefix))
    qprefix = ".".join((bprefix, aprefix))
    last = pprefix + ".last"
    if need_update((afasta, bfasta), last):
        last_main([bfasta, afasta, cpus_flag], dbtype)

    if a == b:
        lastself = last + ".P98L0.inverse"
        if need_update(last, lastself):
            filter([last, "--hitlen=0", "--pctid=98", "--inverse", "--noself"])
        last = lastself

    filtered_last = last + ".filtered"
    if need_update(last, filtered_last):
        if opts.no_strip_names:
            blastfilter_main([last, "--cscore={0}".format(ccscore), "--no_strip_names"])
        else:
            blastfilter_main([last, "--cscore={0}".format(ccscore)])

    anchors = pprefix + ".anchors"
    lifted_anchors = pprefix + ".lifted.anchors"
    pdf = pprefix + ".pdf"
    if not opts.full:
        if need_update(filtered_last, lifted_anchors):
            dargs = [
                filtered_last,
                anchors,
                minsize_flag,
                dist,
                "--liftover={0}".format(last),
            ]
            if opts.no_strip_names:
                dargs += [
                    "--no_strip_names",
                ]
            scan(dargs)
        if quota:
            quota_main([lifted_anchors, "--quota={0}".format(quota), "--screen"])
        if need_update(anchors, pdf):
            from jcvi.graphics.dotplot import dotplot_main

            dargs = [anchors]
            if opts.nostdpf:
                dargs += ["--nostdpf"]
            if opts.nochpf:
                dargs += ["--nochpf"]
            if opts.skipempty:
                dargs += ["--skipempty"]
            if opts.genomenames:
                dargs += ["--genomenames", opts.genomenames]
            if opts.theme:
                dargs += ["--theme", opts.theme]
            dotplot_main(dargs)
        return

    if need_update(filtered_last, anchors):
        if opts.no_strip_names:
            scan([filtered_last, anchors, dist, "--no_strip_names"])
        else:
            scan([filtered_last, anchors, dist])

    ooanchors = pprefix + ".1x1.anchors"
    if need_update(anchors, ooanchors):
        quota_main([anchors, "--quota=1:1", "--screen"])

    lifted_anchors = pprefix + ".1x1.lifted.anchors"
    if need_update((last, ooanchors), lifted_anchors):
        if opts.no_strip_names:
            liftover([last, ooanchors, dist, "--no_strip_names"])
        else:
            liftover([last, ooanchors, dist])

    pblocks = pprefix + ".1x1.blocks"
    qblocks = qprefix + ".1x1.blocks"
    if need_update(lifted_anchors, [pblocks, qblocks]):
        mcscan([abed, lifted_anchors, "--iter=1", "-o", pblocks])
        mcscan([bbed, lifted_anchors, "--iter=1", "-o", qblocks])

    rbh = pprefix + ".rbh"
    if need_update(last, rbh):
        cscore([last, "-o", rbh])

    portho = pprefix + ".ortholog"
    qortho = qprefix + ".ortholog"
    if need_update([pblocks, qblocks, rbh], [portho, qortho]):
        make_ortholog(pblocks, rbh, portho)
        make_ortholog(qblocks, rbh, qortho)
Пример #2
0
def dotplot_main(args):
    p = OptionParser(__doc__)
    p.set_beds()
    p.add_option(
        "--synteny",
        default=False,
        action="store_true",
        help="Run a fast synteny scan and display blocks",
    )
    p.add_option("--cmaptext",
                 help="Draw colormap box on the bottom-left corner")
    p.add_option(
        "--vmin",
        dest="vmin",
        type="float",
        default=0,
        help="Minimum value in the colormap",
    )
    p.add_option(
        "--vmax",
        dest="vmax",
        type="float",
        default=2,
        help="Maximum value in the colormap",
    )
    p.add_option(
        "--nmax",
        dest="sample_number",
        type="int",
        default=10000,
        help="Maximum number of data points to plot",
    )
    p.add_option(
        "--minfont",
        type="int",
        default=4,
        help="Do not render labels with size smaller than",
    )
    p.add_option("--colormap",
                 help="Two column file, block id to color mapping")
    p.add_option(
        "--nosort",
        default=False,
        action="store_true",
        help="Do not sort the seqids along the axes",
    )
    p.add_option("--nosep",
                 default=False,
                 action="store_true",
                 help="Do not add contig lines")
    p.add_option("--title", help="Title of the dot plot")
    p.set_dotplot_opts()
    p.set_outfile(outfile=None)
    opts, args, iopts = p.set_image_options(args,
                                            figsize="9x9",
                                            style="dark",
                                            dpi=90,
                                            cmap="copper")

    if len(args) != 1:
        sys.exit(not p.print_help())

    palette = opts.colormap
    if palette:
        palette = Palette(palette)

    (anchorfile, ) = args
    cmaptext = opts.cmaptext
    if anchorfile.endswith(".ks"):
        from jcvi.apps.ks import KsFile

        logging.debug("Anchors contain Ks values")
        cmaptext = cmaptext or "*Ks* values"
        anchorksfile = anchorfile + ".anchors"
        if need_update(anchorfile, anchorksfile):
            ksfile = KsFile(anchorfile)
            ksfile.print_to_anchors(anchorksfile)
        anchorfile = anchorksfile

    qbed, sbed, qorder, sorder, is_self = check_beds(anchorfile,
                                                     p,
                                                     opts,
                                                     sorted=(not opts.nosort))

    if opts.skipempty:
        ac = AnchorFile(anchorfile)
        if is_self:
            qseqids = sseqids = set()
        else:
            qseqids, sseqids = set(), set()

        for pair in ac.iter_pairs():
            q, s = pair[:2]
            qi, q = qorder[q]
            si, s = sorder[s]
            qseqids.add(q.seqid)
            sseqids.add(s.seqid)

        if is_self:
            qbed = sbed = subset_bed(qbed, qseqids)
        else:
            qbed = subset_bed(qbed, qseqids)
            sbed = subset_bed(sbed, sseqids)

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])  # the whole canvas
    ax = fig.add_axes([0.1, 0.1, 0.8, 0.8])  # the dot plot

    dotplot(
        anchorfile,
        qbed,
        sbed,
        fig,
        root,
        ax,
        vmin=opts.vmin,
        vmax=opts.vmax,
        is_self=is_self,
        synteny=opts.synteny,
        cmap_text=opts.cmaptext,
        cmap=iopts.cmap,
        genomenames=opts.genomenames,
        sample_number=opts.sample_number,
        minfont=opts.minfont,
        palette=palette,
        sep=(not opts.nosep),
        sepcolor=set1[int(opts.theme)],
        title=opts.title,
        stdpf=(not opts.nostdpf),
        chpf=(not opts.nochpf),
    )

    image_name = opts.outfile or (op.splitext(anchorfile)[0] + "." +
                                  opts.format)
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
    fig.clear()