Exemplo n.º 1
0
def _run_purecn(paired, work_dir):
    """Run PureCN.R wrapper with pre-segmented CNVkit inputs.
    """
    out_base, out, all_files = _get_purecn_files(paired, work_dir)
    cnr_file = tz.get_in(["depth", "bins", "normalized"], paired.tumor_data)
    if not utils.file_uptodate(out["rds"], cnr_file):
        cnvkit_base = os.path.join(
            utils.safe_makedir(os.path.join(work_dir, "cnvkit")),
            dd.get_sample_name(paired.tumor_data))
        seg_file = cnvkit.segment_from_cnr(cnr_file, paired.tumor_data,
                                           cnvkit_base)
        from bcbio import heterogeneity
        vcf_file = heterogeneity.get_variants(paired.tumor_data)[0]["vrn_file"]
        with file_transaction(paired.tumor_data, out_base) as tx_out_base:
            cmd = [
                "PureCN.R", "--seed", "42", "--out", tx_out_base, "--rds",
                "%s.rds" % tx_out_base, "--sampleid",
                dd.get_sample_name(paired.tumor_data), "--genome",
                dd.get_genome_build(paired.tumor_data), "--vcf", vcf_file,
                "--tumor", cnr_file, "--segfile", seg_file,
                "--funsegmentation", "none"
            ]
            do.run(cmd, "PureCN copy number calling")
            for f in all_files:
                shutil.move(os.path.join(os.path.dirname(tx_out_base), f),
                            os.path.join(os.path.dirname(out_base), f))
    return out
Exemplo n.º 2
0
def run(items):
    from bcbio import heterogeneity
    paired = vcfutils.get_paired(items)
    if not paired:
        logger.info("Skipping TitanCNA; no somatic tumor calls in batch: %s" %
                    " ".join([dd.get_sample_name(d) for d in items]))
        return items
    work_dir = _sv_workdir(paired.tumor_data)
    cn_file = _titan_cn_file(dd.get_normalized_depth(paired.tumor_data),
                             work_dir, paired.tumor_data)
    het_file = _titan_het_file(heterogeneity.get_variants(paired.tumor_data),
                               work_dir, paired)
    if _should_run(het_file):
        ploidy_outdirs = []
        for ploidy in [2, 3, 4]:
            for num_clusters in [1, 2, 3]:
                out_dir = _run_titancna(cn_file, het_file, ploidy,
                                        num_clusters, work_dir,
                                        paired.tumor_data)
            ploidy_outdirs.append((ploidy, out_dir))
        solution_file = _run_select_solution(ploidy_outdirs, work_dir,
                                             paired.tumor_data)
    else:
        logger.info("Skipping TitanCNA; not enough input data: %s" %
                    " ".join([dd.get_sample_name(d) for d in items]))
        return items
    out = []
    if paired.normal_data:
        out.append(paired.normal_data)
    if "sv" not in paired.tumor_data:
        paired.tumor_data["sv"] = []
    paired.tumor_data["sv"].append(
        _finalize_sv(solution_file, paired.tumor_data))
    out.append(paired.tumor_data)
    return out
Exemplo n.º 3
0
def _run_purecn(paired, work_dir):
    """Run PureCN.R wrapper with pre-segmented CNVkit or GATK4 inputs.
    """
    segfns = {"cnvkit": _segment_normalized_cnvkit, "gatk-cnv": _segment_normalized_gatk}
    out_base, out, all_files = _get_purecn_files(paired, work_dir)
    cnr_file = tz.get_in(["depth", "bins", "normalized"], paired.tumor_data)
    if not utils.file_uptodate(out["rds"], cnr_file):
        cnr_file, seg_file = segfns[cnvkit.bin_approach(paired.tumor_data)](cnr_file, work_dir, paired)
        from bcbio import heterogeneity
        vcf_file = heterogeneity.get_variants(paired.tumor_data, include_germline=False)[0]["vrn_file"]
        vcf_file = germline.filter_to_pass_and_reject(vcf_file, paired, out_dir=work_dir)
        with file_transaction(paired.tumor_data, out_base) as tx_out_base:
            # Use UCSC style naming for human builds to support BSgenome
            genome = ("hg19" if dd.get_genome_build(paired.tumor_data) in ["GRCh37", "hg19"]
                      else dd.get_genome_build(paired.tumor_data))
            cmd = ["PureCN.R", "--seed", "42", "--out", tx_out_base, "--rds", "%s.rds" % tx_out_base,
                   "--sampleid", dd.get_sample_name(paired.tumor_data),
                   "--genome", genome,
                   "--vcf", vcf_file, "--tumor", cnr_file,
                   "--segfile", seg_file, "--funsegmentation", "Hclust", "--maxnonclonal", "0.3"]
            if dd.get_num_cores(paired.tumor_data) > 1:
                cmd += ["--cores", str(dd.get_num_cores(paired.tumor_data))]
            do.run(cmd, "PureCN copy number calling")
            for f in all_files:
                shutil.move(os.path.join(os.path.dirname(tx_out_base), f),
                            os.path.join(os.path.dirname(out_base), f))
    return out
Exemplo n.º 4
0
def _run_purecn(paired, work_dir):
    """Run PureCN.R wrapper with pre-segmented CNVkit or GATK4 inputs.
    """
    segfns = {
        "cnvkit": _segment_normalized_cnvkit,
        "gatk-cnv": _segment_normalized_gatk
    }
    out_base, out, all_files = _get_purecn_files(paired, work_dir)
    failed_file = out_base + "-failed.log"
    cnr_file = tz.get_in(["depth", "bins", "normalized"], paired.tumor_data)
    if not utils.file_uptodate(
            out["rds"], cnr_file) and not utils.file_exists(failed_file):
        cnr_file, seg_file = segfns[cnvkit.bin_approach(paired.tumor_data)](
            cnr_file, work_dir, paired)
        from bcbio import heterogeneity
        vcf_file = heterogeneity.get_variants(
            paired.tumor_data, include_germline=False)[0]["vrn_file"]
        vcf_file = germline.filter_to_pass_and_reject(vcf_file,
                                                      paired,
                                                      out_dir=work_dir)
        with file_transaction(paired.tumor_data, out_base) as tx_out_base:
            # Use UCSC style naming for human builds to support BSgenome
            genome = ("hg19" if dd.get_genome_build(paired.tumor_data) in [
                "GRCh37", "hg19"
            ] else dd.get_genome_build(paired.tumor_data))
            cmd = [
                "PureCN.R", "--seed", "42", "--out", tx_out_base, "--rds",
                "%s.rds" % tx_out_base, "--sampleid",
                dd.get_sample_name(paired.tumor_data), "--genome", genome,
                "--vcf", vcf_file, "--tumor", cnr_file, "--segfile", seg_file,
                "--funsegmentation", "Hclust", "--maxnonclonal", "0.3"
            ]
            if dd.get_num_cores(paired.tumor_data) > 1:
                cmd += ["--cores", str(dd.get_num_cores(paired.tumor_data))]
            try:
                cmd = "export R_LIBS_USER=%s && %s && %s" % (
                    utils.R_sitelib(), utils.get_R_exports(), " ".join(
                        [str(x) for x in cmd]))
                do.run(cmd, "PureCN copy number calling")
            except subprocess.CalledProcessError as msg:
                if _allowed_errors(str(msg)):
                    logger.info(
                        "PureCN failed to find solution for %s: skipping" %
                        dd.get_sample_name(paired.tumor_data))
                    with open(failed_file, "w") as out_handle:
                        out_handle.write(str(msg))
                else:
                    logger.exception()
                    raise
            for f in all_files:
                if os.path.exists(os.path.join(os.path.dirname(tx_out_base),
                                               f)):
                    shutil.move(os.path.join(os.path.dirname(tx_out_base), f),
                                os.path.join(os.path.dirname(out_base), f))
    out = _get_purecn_files(paired, work_dir, require_exist=True)[1]
    return out if (out.get("rds") and os.path.exists(out["rds"])) else None
Exemplo n.º 5
0
def run(items):
    paired = vcfutils.get_paired(items)
    if not paired or not paired.normal_name:
        logger.info(
            "Skipping PURPLE; need tumor/normal somatic calls in batch: %s" %
            " ".join([dd.get_sample_name(d) for d in items]))
        return items
    work_dir = _sv_workdir(paired.tumor_data)
    from bcbio import heterogeneity
    het_file = _amber_het_file(heterogeneity.get_variants(paired.tumor_data),
                               work_dir, paired)
    depth_file = _run_cobalt(paired, work_dir)
    print(het_file, depth_file)
    return items
Exemplo n.º 6
0
def _compatible_small_variants(data, items):
    """Retrieve small variant (SNP, indel) VCFs compatible with CNVkit.
    """
    from bcbio import heterogeneity
    VarFile = collections.namedtuple("VarFile", ["name", "sample", "normal"])
    out = []
    paired = vcfutils.get_paired(items)
    for v in heterogeneity.get_variants(data, include_germline=not paired):
        vrn_file = v["vrn_file"]
        base, ext = utils.splitext_plus(os.path.basename(vrn_file))
        if paired:
            out.append(VarFile(vrn_file, paired.tumor_name, paired.normal_name))
        else:
            out.append(VarFile(vrn_file, dd.get_sample_name(data), None))
    return out
Exemplo n.º 7
0
def _compatible_small_variants(data, items):
    """Retrieve small variant (SNP, indel) VCFs compatible with CNVkit.
    """
    from bcbio import heterogeneity
    VarFile = collections.namedtuple("VarFile", ["name", "sample", "normal"])
    out = []
    paired = vcfutils.get_paired(items)
    for v in heterogeneity.get_variants(data, include_germline=not paired):
        vrn_file = v["vrn_file"]
        base, ext = utils.splitext_plus(os.path.basename(vrn_file))
        if paired:
            out.append(VarFile(vrn_file, paired.tumor_name, paired.normal_name))
        else:
            out.append(VarFile(vrn_file, dd.get_sample_name(data), None))
    return out
Exemplo n.º 8
0
def _run_purecn(paired, work_dir):
    """Run PureCN.R wrapper with pre-segmented CNVkit inputs.
    """
    out_base, out, all_files = _get_purecn_files(paired, work_dir)
    cnr_file = tz.get_in(["depth", "bins", "normalized"], paired.tumor_data)
    if not utils.file_uptodate(out["rds"], cnr_file):
        cnvkit_base = os.path.join(
            utils.safe_makedir(os.path.join(work_dir, "cnvkit")),
            dd.get_sample_name(paired.tumor_data))
        cnr_file = chromhacks.bed_to_standardonly(
            cnr_file,
            paired.tumor_data,
            headers="chromosome",
            include_sex_chroms=True,
            out_dir=os.path.dirname(cnvkit_base))
        cnr_file = _remove_overlaps(cnr_file, os.path.dirname(cnvkit_base),
                                    paired.tumor_data)
        seg_file = cnvkit.segment_from_cnr(cnr_file, paired.tumor_data,
                                           cnvkit_base)
        from bcbio import heterogeneity
        vcf_file = heterogeneity.get_variants(
            paired.tumor_data, include_germline=False)[0]["vrn_file"]
        vcf_file = germline.filter_to_pass_and_reject(vcf_file,
                                                      paired,
                                                      out_dir=work_dir)
        with file_transaction(paired.tumor_data, out_base) as tx_out_base:
            # Use UCSC style naming for human builds to support BSgenome
            genome = ("hg19" if dd.get_genome_build(paired.tumor_data) in [
                "GRCh37", "hg19"
            ] else dd.get_genome_build(paired.tumor_data))
            cmd = [
                "PureCN.R", "--seed", "42", "--out", tx_out_base, "--rds",
                "%s.rds" % tx_out_base, "--sampleid",
                dd.get_sample_name(paired.tumor_data), "--genome", genome,
                "--vcf", vcf_file, "--tumor", cnr_file, "--segfile", seg_file,
                "--funsegmentation", "Hclust", "--maxnonclonal", "0.3"
            ]
            if dd.get_num_cores(paired.tumor_data) > 1:
                cmd += ["--cores", str(dd.get_num_cores(paired.tumor_data))]
            do.run(cmd, "PureCN copy number calling")
            for f in all_files:
                shutil.move(os.path.join(os.path.dirname(tx_out_base), f),
                            os.path.join(os.path.dirname(out_base), f))
    return out
Exemplo n.º 9
0
def _run_purecn(paired, work_dir):
    """Run PureCN.R wrapper with pre-segmented CNVkit or GATK4 inputs.
    """
    segfns = {"cnvkit": _segment_normalized_cnvkit, "gatk-cnv": _segment_normalized_gatk}
    out_base, out, all_files = _get_purecn_files(paired, work_dir)
    failed_file = out_base + "-failed.log"
    cnr_file = tz.get_in(["depth", "bins", "normalized"], paired.tumor_data)
    if not utils.file_uptodate(out["rds"], cnr_file) and not utils.file_exists(failed_file):
        cnr_file, seg_file = segfns[cnvkit.bin_approach(paired.tumor_data)](cnr_file, work_dir, paired)
        from bcbio import heterogeneity
        vcf_file = heterogeneity.get_variants(paired.tumor_data, include_germline=False)[0]["vrn_file"]
        vcf_file = germline.filter_to_pass_and_reject(vcf_file, paired, out_dir=work_dir)
        with file_transaction(paired.tumor_data, out_base) as tx_out_base:
            # Use UCSC style naming for human builds to support BSgenome
            genome = ("hg19" if dd.get_genome_build(paired.tumor_data) in ["GRCh37", "hg19"]
                      else dd.get_genome_build(paired.tumor_data))
            cmd = ["PureCN.R", "--seed", "42", "--out", tx_out_base, "--rds", "%s.rds" % tx_out_base,
                   "--sampleid", dd.get_sample_name(paired.tumor_data),
                   "--genome", genome,
                   "--vcf", vcf_file, "--tumor", cnr_file,
                   "--segfile", seg_file, "--funsegmentation", "Hclust", "--maxnonclonal", "0.3"]
            if dd.get_num_cores(paired.tumor_data) > 1:
                cmd += ["--cores", str(dd.get_num_cores(paired.tumor_data))]
            try:
                cmd = "export R_LIBS_USER=%s && %s && %s" % (utils.R_sitelib(), utils.get_R_exports(),
                                                             " ".join([str(x) for x in cmd]))
                do.run(cmd, "PureCN copy number calling")
            except subprocess.CalledProcessError as msg:
                if _allowed_errors(str(msg)):
                    logger.info("PureCN failed to find solution for %s: skipping" %
                                dd.get_sample_name(paired.tumor_data))
                    with open(failed_file, "w") as out_handle:
                        out_handle.write(str(msg))
                else:
                    logger.exception()
                    raise
            for f in all_files:
                if os.path.exists(os.path.join(os.path.dirname(tx_out_base), f)):
                    shutil.move(os.path.join(os.path.dirname(tx_out_base), f),
                                os.path.join(os.path.dirname(out_base), f))
    out = _get_purecn_files(paired, work_dir, require_exist=True)[1]
    return out if (out.get("rds") and os.path.exists(out["rds"])) else None
Exemplo n.º 10
0
def run(items):
    paired = vcfutils.get_paired(items)
    if not paired or not paired.normal_name:
        logger.info("Skipping PURPLE; need tumor/normal somatic calls in batch: %s" %
                    " ".join([dd.get_sample_name(d) for d in items]))
        return items
    work_dir = _sv_workdir(paired.tumor_data)
    from bcbio import heterogeneity
    vrn_files = heterogeneity.get_variants(paired.tumor_data, include_germline=False)
    het_file = _amber_het_file("pon", vrn_files, work_dir, paired)
    depth_file = _run_cobalt(paired, work_dir)
    purple_out = _run_purple(paired, het_file, depth_file, vrn_files, work_dir)
    out = []
    if paired.normal_data:
        out.append(paired.normal_data)
    if "sv" not in paired.tumor_data:
        paired.tumor_data["sv"] = []
    paired.tumor_data["sv"].append(purple_out)
    out.append(paired.tumor_data)
    return out
Exemplo n.º 11
0
def run(items):
    paired = vcfutils.get_paired(items)
    if not paired or not paired.normal_name:
        logger.info(
            "Skipping PURPLE; need tumor/normal somatic calls in batch: %s" %
            " ".join([dd.get_sample_name(d) for d in items]))
        return items
    work_dir = _sv_workdir(paired.tumor_data)
    from bcbio import heterogeneity
    vrn_files = heterogeneity.get_variants(paired.tumor_data,
                                           include_germline=False)
    het_file = _amber_het_file("pon", vrn_files, work_dir, paired)
    depth_file = _run_cobalt(paired, work_dir)
    purple_out = _run_purple(paired, het_file, depth_file, vrn_files, work_dir)
    out = []
    if paired.normal_data:
        out.append(paired.normal_data)
    if "sv" not in paired.tumor_data:
        paired.tumor_data["sv"] = []
    paired.tumor_data["sv"].append(purple_out)
    out.append(paired.tumor_data)
    return out