示例#1
0
def _run_wham(inputs, background_bams):
    """Run WHAM on a defined set of inputs and targets.
    """
    out_file = os.path.join(_sv_workdir(inputs[0]), "%s-wham.vcf.gz" % dd.get_sample_name(inputs[0]))
    if not utils.file_exists(out_file):
        with file_transaction(inputs[0], out_file) as tx_out_file:
            cores = dd.get_cores(inputs[0])
            ref_file = dd.get_ref_file(inputs[0])
            include_chroms = ",".join([c.name for c in ref.file_contigs(ref_file)
                                       if chromhacks.is_autosomal_or_x(c.name)])
            all_bams = ",".join([x["align_bam"] for x in inputs] + background_bams)
            cmd = ("whamg -x {cores} -a {ref_file} -f {all_bams} -c {include_chroms} "
                   "| bgzip -c > {tx_out_file}")
            do.run(cmd.format(**locals()), "WHAM SV caller: %s" % ", ".join(dd.get_sample_name(d) for d in inputs))
    return vcfutils.bgzip_and_index(out_file, inputs[0]["config"])
示例#2
0
def _run_titancna(cn_file, het_file, ploidy, num_clusters, work_dir, data):
    """Run titanCNA wrapper script on given ploidy and clusters.
    """
    sample = dd.get_sample_name(data)
    cores = dd.get_num_cores(data)
    export_cmd = utils.get_R_exports()
    ploidy_dir = utils.safe_makedir(os.path.join(work_dir, "run_ploidy%s" % ploidy))

    cluster_dir = "%s_cluster%02d" % (sample, num_clusters)
    out_dir = os.path.join(ploidy_dir, cluster_dir)
    if not utils.file_uptodate(out_dir + ".titan.txt", cn_file):
        with tx_tmpdir(data) as tmp_dir:
            with utils.chdir(tmp_dir):
                cmd = ("{export_cmd} && titanCNA.R --id {sample} --hetFile {het_file} --cnFile {cn_file} "
                       "--numClusters {num_clusters} --ploidy {ploidy} --numCores {cores} --outDir {tmp_dir} "
                       "--libdir None")
                chroms = ["'%s'" % c.name.replace("chr", "") for c in ref.file_contigs(dd.get_ref_file(data))
                          if chromhacks.is_autosomal_or_x(c.name)]
                if "'X'" not in chroms:
                    chroms += ["'X'"]
                # Use UCSC style naming for human builds to support BSgenome
                genome_build = ("hg19" if dd.get_genome_build(data) in ["GRCh37", "hg19"]
                                else dd.get_genome_build(data))
                cmd += """ --chrs "c(%s)" """ % ",".join(chroms)
                cmd += " --genomeBuild {genome_build}"
                if data["genome_build"] in ("hg19", "hg38"):
                    cmd += " --genomeStyle UCSC"
                if data["genome_build"] in ["hg38"]:
                    data_dir = os.path.normpath(os.path.join(
                        os.path.dirname(os.path.realpath(os.path.join(
                            os.path.dirname(utils.Rscript_cmd()), "titanCNA.R"))),
                        os.pardir, os.pardir, "data"))
                    cytoband_file = os.path.join(data_dir, "cytoBand_hg38.txt")
                    assert os.path.exists(cytoband_file), cytoband_file
                    cmd += " --cytobandFile %s" % cytoband_file
                # TitanCNA's model is influenced by the variance in read coverage data
                # and data type: set reasonable defaults for non-WGS runs
                # (see https://github.com/gavinha/TitanCNA/tree/master/scripts/R_scripts)
                if dd.get_coverage_interval(data) != "genome":
                    cmd += " --alphaK=2500 --alphaKHigh=2500"
                do.run(cmd.format(**locals()), "TitanCNA CNV detection: ploidy %s, cluster %s" % (ploidy, num_clusters))
            for fname in glob.glob(os.path.join(tmp_dir, cluster_dir + "*")):
                shutil.move(fname, ploidy_dir)
            if os.path.exists(os.path.join(tmp_dir, "Rplots.pdf")):
                shutil.move(os.path.join(tmp_dir, "Rplots.pdf"),
                            os.path.join(ploidy_dir, "%s.Rplots.pdf" % cluster_dir))
    return ploidy_dir
示例#3
0
def _run_wham(inputs, background_bams):
    """Run WHAM on a defined set of inputs and targets.
    """
    out_file = os.path.join(_sv_workdir(inputs[0]),
                            "%s-wham.vcf.gz" % dd.get_sample_name(inputs[0]))
    if not utils.file_exists(out_file):
        with file_transaction(inputs[0], out_file) as tx_out_file:
            cores = dd.get_cores(inputs[0])
            ref_file = dd.get_ref_file(inputs[0])
            include_chroms = ",".join([
                c.name for c in ref.file_contigs(ref_file)
                if chromhacks.is_autosomal_or_x(c.name)
            ])
            all_bams = ",".join([x["align_bam"]
                                 for x in inputs] + background_bams)
            cmd = (
                "whamg -x {cores} -a {ref_file} -f {all_bams} -c {include_chroms} "
                "| bgzip -c > {tx_out_file}")
            do.run(
                cmd.format(**locals()), "WHAM SV caller: %s" %
                ", ".join(dd.get_sample_name(d) for d in inputs))
    return vcfutils.bgzip_and_index(out_file, inputs[0]["config"])