示例#1
0
def _run_purecn(paired, work_dir):
    """Run PureCN.R wrapper with pre-segmented CNVkit inputs.
    """
    out_base, out, all_files = _get_purecn_files(paired, work_dir)
    cnr_file = tz.get_in(["depth", "bins", "normalized"], paired.tumor_data)
    if not utils.file_uptodate(out["rds"], cnr_file):
        cnvkit_base = os.path.join(
            utils.safe_makedir(os.path.join(work_dir, "cnvkit")),
            dd.get_sample_name(paired.tumor_data))
        seg_file = cnvkit.segment_from_cnr(cnr_file, paired.tumor_data,
                                           cnvkit_base)
        from bcbio import heterogeneity
        vcf_file = heterogeneity.get_variants(paired.tumor_data)[0]["vrn_file"]
        with file_transaction(paired.tumor_data, out_base) as tx_out_base:
            cmd = [
                "PureCN.R", "--seed", "42", "--out", tx_out_base, "--rds",
                "%s.rds" % tx_out_base, "--sampleid",
                dd.get_sample_name(paired.tumor_data), "--genome",
                dd.get_genome_build(paired.tumor_data), "--vcf", vcf_file,
                "--tumor", cnr_file, "--segfile", seg_file,
                "--funsegmentation", "none"
            ]
            do.run(cmd, "PureCN copy number calling")
            for f in all_files:
                shutil.move(os.path.join(os.path.dirname(tx_out_base), f),
                            os.path.join(os.path.dirname(out_base), f))
    return out
示例#2
0
def _segment_normalized_cnvkit(cnr_file, work_dir, paired):
    """Segmentation of normalized inputs using CNVkit.
    """
    cnvkit_base = os.path.join(utils.safe_makedir(os.path.join(work_dir, "cnvkit")),
                                dd.get_sample_name(paired.tumor_data))
    cnr_file = chromhacks.bed_to_standardonly(cnr_file, paired.tumor_data, headers="chromosome",
                                                include_sex_chroms=True,
                                                out_dir=os.path.dirname(cnvkit_base))
    cnr_file = _remove_overlaps(cnr_file, os.path.dirname(cnvkit_base), paired.tumor_data)
    seg_file = cnvkit.segment_from_cnr(cnr_file, paired.tumor_data, cnvkit_base)
    return cnr_file, seg_file
示例#3
0
def _segment_normalized_cnvkit(cnr_file, work_dir, paired):
    """Segmentation of normalized inputs using CNVkit.
    """
    cnvkit_base = os.path.join(utils.safe_makedir(os.path.join(work_dir, "cnvkit")),
                                dd.get_sample_name(paired.tumor_data))
    cnr_file = chromhacks.bed_to_standardonly(cnr_file, paired.tumor_data, headers="chromosome",
                                                include_sex_chroms=True,
                                                out_dir=os.path.dirname(cnvkit_base))
    cnr_file = _remove_overlaps(cnr_file, os.path.dirname(cnvkit_base), paired.tumor_data)
    seg_file = cnvkit.segment_from_cnr(cnr_file, paired.tumor_data, cnvkit_base)
    return cnr_file, seg_file
示例#4
0
def _run_purecn(paired, work_dir):
    """Run PureCN.R wrapper with pre-segmented CNVkit inputs.
    """
    out_base, out, all_files = _get_purecn_files(paired, work_dir)
    cnr_file = tz.get_in(["depth", "bins", "normalized"], paired.tumor_data)
    if not utils.file_uptodate(out["rds"], cnr_file):
        cnvkit_base = os.path.join(
            utils.safe_makedir(os.path.join(work_dir, "cnvkit")),
            dd.get_sample_name(paired.tumor_data))
        cnr_file = chromhacks.bed_to_standardonly(
            cnr_file,
            paired.tumor_data,
            headers="chromosome",
            include_sex_chroms=True,
            out_dir=os.path.dirname(cnvkit_base))
        cnr_file = _remove_overlaps(cnr_file, os.path.dirname(cnvkit_base),
                                    paired.tumor_data)
        seg_file = cnvkit.segment_from_cnr(cnr_file, paired.tumor_data,
                                           cnvkit_base)
        from bcbio import heterogeneity
        vcf_file = heterogeneity.get_variants(
            paired.tumor_data, include_germline=False)[0]["vrn_file"]
        vcf_file = germline.filter_to_pass_and_reject(vcf_file,
                                                      paired,
                                                      out_dir=work_dir)
        with file_transaction(paired.tumor_data, out_base) as tx_out_base:
            # Use UCSC style naming for human builds to support BSgenome
            genome = ("hg19" if dd.get_genome_build(paired.tumor_data) in [
                "GRCh37", "hg19"
            ] else dd.get_genome_build(paired.tumor_data))
            cmd = [
                "PureCN.R", "--seed", "42", "--out", tx_out_base, "--rds",
                "%s.rds" % tx_out_base, "--sampleid",
                dd.get_sample_name(paired.tumor_data), "--genome", genome,
                "--vcf", vcf_file, "--tumor", cnr_file, "--segfile", seg_file,
                "--funsegmentation", "Hclust", "--maxnonclonal", "0.3"
            ]
            if dd.get_num_cores(paired.tumor_data) > 1:
                cmd += ["--cores", str(dd.get_num_cores(paired.tumor_data))]
            do.run(cmd, "PureCN copy number calling")
            for f in all_files:
                shutil.move(os.path.join(os.path.dirname(tx_out_base), f),
                            os.path.join(os.path.dirname(out_base), f))
    return out