def ortholog(args): """ %prog ortholog species_a species_b Run a sensitive pipeline to find orthologs between two species a and b. The pipeline runs LAST and generate .lifted.anchors. `--full` mode would assume 1-to-1 quota synteny blocks as the backbone of such predictions. Extra orthologs will be recruited from reciprocal best match (RBH). """ from jcvi.apps.align import last as last_main from jcvi.compara.blastfilter import main as blastfilter_main from jcvi.compara.quota import main as quota_main from jcvi.compara.synteny import scan, mcscan, liftover from jcvi.formats.blast import cscore, filter p = OptionParser(ortholog.__doc__) p.add_option( "--dbtype", default="nucl", choices=("nucl", "prot"), help="Molecule type of subject database", ) p.add_option( "--full", default=False, action="store_true", help="Run in full 1x1 mode, including blocks and RBH", ) p.add_option("--cscore", default=0.7, type="float", help="C-score cutoff") p.add_option( "--dist", default=20, type="int", help="Extent of flanking regions to search" ) p.add_option( "-n", "--min_size", dest="n", type="int", default=4, help="minimum number of anchors in a cluster", ) p.add_option("--quota", help="Quota align parameter") p.add_option( "--no_strip_names", default=False, action="store_true", help="Do not strip alternative splicing (e.g. At5g06540.1 -> At5g06540)", ) p.set_cpus() p.set_dotplot_opts() opts, args = p.parse_args(args) if len(args) != 2: sys.exit(not p.print_help()) a, b = args dbtype = opts.dbtype suffix = ".cds" if dbtype == "nucl" else ".pep" abed, afasta = a + ".bed", a + suffix bbed, bfasta = b + ".bed", b + suffix ccscore = opts.cscore quota = opts.quota dist = "--dist={0}".format(opts.dist) minsize_flag = "--min_size={}".format(opts.n) cpus_flag = "--cpus={}".format(opts.cpus) aprefix = afasta.split(".")[0] bprefix = bfasta.split(".")[0] pprefix = ".".join((aprefix, bprefix)) qprefix = ".".join((bprefix, aprefix)) last = pprefix + ".last" if need_update((afasta, bfasta), last): last_main([bfasta, afasta, cpus_flag], dbtype) if a == b: lastself = last + ".P98L0.inverse" if need_update(last, lastself): filter([last, "--hitlen=0", "--pctid=98", "--inverse", "--noself"]) last = lastself filtered_last = last + ".filtered" if need_update(last, filtered_last): if opts.no_strip_names: blastfilter_main([last, "--cscore={0}".format(ccscore), "--no_strip_names"]) else: blastfilter_main([last, "--cscore={0}".format(ccscore)]) anchors = pprefix + ".anchors" lifted_anchors = pprefix + ".lifted.anchors" pdf = pprefix + ".pdf" if not opts.full: if need_update(filtered_last, lifted_anchors): dargs = [ filtered_last, anchors, minsize_flag, dist, "--liftover={0}".format(last), ] if opts.no_strip_names: dargs += [ "--no_strip_names", ] scan(dargs) if quota: quota_main([lifted_anchors, "--quota={0}".format(quota), "--screen"]) if need_update(anchors, pdf): from jcvi.graphics.dotplot import dotplot_main dargs = [anchors] if opts.nostdpf: dargs += ["--nostdpf"] if opts.nochpf: dargs += ["--nochpf"] if opts.skipempty: dargs += ["--skipempty"] if opts.genomenames: dargs += ["--genomenames", opts.genomenames] if opts.theme: dargs += ["--theme", opts.theme] dotplot_main(dargs) return if need_update(filtered_last, anchors): if opts.no_strip_names: scan([filtered_last, anchors, dist, "--no_strip_names"]) else: scan([filtered_last, anchors, dist]) ooanchors = pprefix + ".1x1.anchors" if need_update(anchors, ooanchors): quota_main([anchors, "--quota=1:1", "--screen"]) lifted_anchors = pprefix + ".1x1.lifted.anchors" if need_update((last, ooanchors), lifted_anchors): if opts.no_strip_names: liftover([last, ooanchors, dist, "--no_strip_names"]) else: liftover([last, ooanchors, dist]) pblocks = pprefix + ".1x1.blocks" qblocks = qprefix + ".1x1.blocks" if need_update(lifted_anchors, [pblocks, qblocks]): mcscan([abed, lifted_anchors, "--iter=1", "-o", pblocks]) mcscan([bbed, lifted_anchors, "--iter=1", "-o", qblocks]) rbh = pprefix + ".rbh" if need_update(last, rbh): cscore([last, "-o", rbh]) portho = pprefix + ".ortholog" qortho = qprefix + ".ortholog" if need_update([pblocks, qblocks, rbh], [portho, qortho]): make_ortholog(pblocks, rbh, portho) make_ortholog(qblocks, rbh, qortho)
def dotplot_main(args): p = OptionParser(__doc__) p.set_beds() p.add_option( "--synteny", default=False, action="store_true", help="Run a fast synteny scan and display blocks", ) p.add_option("--cmaptext", help="Draw colormap box on the bottom-left corner") p.add_option( "--vmin", dest="vmin", type="float", default=0, help="Minimum value in the colormap", ) p.add_option( "--vmax", dest="vmax", type="float", default=2, help="Maximum value in the colormap", ) p.add_option( "--nmax", dest="sample_number", type="int", default=10000, help="Maximum number of data points to plot", ) p.add_option( "--minfont", type="int", default=4, help="Do not render labels with size smaller than", ) p.add_option("--colormap", help="Two column file, block id to color mapping") p.add_option( "--nosort", default=False, action="store_true", help="Do not sort the seqids along the axes", ) p.add_option("--nosep", default=False, action="store_true", help="Do not add contig lines") p.add_option("--title", help="Title of the dot plot") p.set_dotplot_opts() p.set_outfile(outfile=None) opts, args, iopts = p.set_image_options(args, figsize="9x9", style="dark", dpi=90, cmap="copper") if len(args) != 1: sys.exit(not p.print_help()) palette = opts.colormap if palette: palette = Palette(palette) (anchorfile, ) = args cmaptext = opts.cmaptext if anchorfile.endswith(".ks"): from jcvi.apps.ks import KsFile logging.debug("Anchors contain Ks values") cmaptext = cmaptext or "*Ks* values" anchorksfile = anchorfile + ".anchors" if need_update(anchorfile, anchorksfile): ksfile = KsFile(anchorfile) ksfile.print_to_anchors(anchorksfile) anchorfile = anchorksfile qbed, sbed, qorder, sorder, is_self = check_beds(anchorfile, p, opts, sorted=(not opts.nosort)) if opts.skipempty: ac = AnchorFile(anchorfile) if is_self: qseqids = sseqids = set() else: qseqids, sseqids = set(), set() for pair in ac.iter_pairs(): q, s = pair[:2] qi, q = qorder[q] si, s = sorder[s] qseqids.add(q.seqid) sseqids.add(s.seqid) if is_self: qbed = sbed = subset_bed(qbed, qseqids) else: qbed = subset_bed(qbed, qseqids) sbed = subset_bed(sbed, sseqids) fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) # the whole canvas ax = fig.add_axes([0.1, 0.1, 0.8, 0.8]) # the dot plot dotplot( anchorfile, qbed, sbed, fig, root, ax, vmin=opts.vmin, vmax=opts.vmax, is_self=is_self, synteny=opts.synteny, cmap_text=opts.cmaptext, cmap=iopts.cmap, genomenames=opts.genomenames, sample_number=opts.sample_number, minfont=opts.minfont, palette=palette, sep=(not opts.nosep), sepcolor=set1[int(opts.theme)], title=opts.title, stdpf=(not opts.nostdpf), chpf=(not opts.nochpf), ) image_name = opts.outfile or (op.splitext(anchorfile)[0] + "." + opts.format) savefig(image_name, dpi=iopts.dpi, iopts=iopts) fig.clear()