Exemplo n.º 1
0
def calculate_sv_bins(*items):
    """Determine bin sizes and regions to use for samples.

    Unified approach to prepare regional bins for coverage calculations across
    multiple CNV callers. Splits into target and antitarget regions allowing
    callers to take advantage of both. Provides consistent target/anti-target
    bin sizes across batches.

    Uses callable_regions as the access BED file and mosdepth regions in
    variant_regions to estimate depth for bin sizes.
    """
    from bcbio.structural import cnvkit
    items = [utils.to_single_data(x) for x in cwlutils.handle_combined_input(items)]
    if all(not cnvkit.use_general_sv_bins(x) for x in items):
        return [[d] for d in items]
    out = []
    for i, cnv_group in enumerate(_group_by_cnv_method(multi.group_by_batch(items, False))):
        size_calc_fn = MemoizedSizes(cnv_group.region_file, cnv_group.items).get_target_antitarget_bin_sizes
        for data in cnv_group.items:
            if cnvkit.use_general_sv_bins(data):
                if dd.get_background_cnv_reference(data):
                    target_bed, anti_bed = cnvkit.targets_from_background(dd.get_background_cnv_reference(data),
                                                                          cnv_group.work_dir, data)
                else:
                    target_bed, anti_bed = cnvkit.targets_w_bins(cnv_group.region_file, cnv_group.access_file,
                                                                size_calc_fn, cnv_group.work_dir, data)
                if not data.get("regions"):
                    data["regions"] = {}
                data["regions"]["bins"] = {"target": target_bed, "antitarget": anti_bed, "group": str(i)}
            out.append([data])
    if not len(out) == len(items):
        raise AssertionError("Inconsistent samples in and out of SV bin calculation:\nout: %s\nin : %s" %
                             (sorted([dd.get_sample_name(utils.to_single_data(x)) for x in out]),
                              sorted([dd.get_sample_name(x) for x in items])))
    return out
Exemplo n.º 2
0
def calculate_sv_bins(*items):
    """Determine bin sizes and regions to use for samples.

    Unified approach to prepare regional bins for coverage calculations across
    multiple CNV callers. Splits into target and antitarget regions allowing
    callers to take advantage of both. Provides consistent target/anti-target
    bin sizes across batches.

    Uses callable_regions as the access BED file and mosdepth regions in
    variant_regions to estimate depth for bin sizes.
    """
    calcfns = {"cnvkit": _calculate_sv_bins_cnvkit, "gatk-cnv": _calculate_sv_bins_gatk}
    from bcbio.structural import cnvkit
    items = [utils.to_single_data(x) for x in cwlutils.handle_combined_input(items)]
    if all(not cnvkit.use_general_sv_bins(x) for x in items):
        return [[d] for d in items]
    out = []
    for i, cnv_group in enumerate(_group_by_cnv_method(multi.group_by_batch(items, False))):
        size_calc_fn = MemoizedSizes(cnv_group.region_file, cnv_group.items).get_target_antitarget_bin_sizes
        for data in cnv_group.items:
            if cnvkit.use_general_sv_bins(data):
                target_bed, anti_bed, gcannotated_tsv = calcfns[cnvkit.bin_approach(data)](data, cnv_group,
                                                                                           size_calc_fn)
                if not data.get("regions"):
                    data["regions"] = {}
                data["regions"]["bins"] = {"target": target_bed, "antitarget": anti_bed, "group": str(i),
                                           "gcannotated": gcannotated_tsv}
            out.append([data])
    if not len(out) == len(items):
        raise AssertionError("Inconsistent samples in and out of SV bin calculation:\nout: %s\nin : %s" %
                             (sorted([dd.get_sample_name(utils.to_single_data(x)) for x in out]),
                              sorted([dd.get_sample_name(x) for x in items])))
    return out
Exemplo n.º 3
0
def calculate_sv_bins(*items):
    """Determine bin sizes and regions to use for samples.

    Unified approach to prepare regional bins for coverage calculations across
    multiple CNV callers. Splits into target and antitarget regions allowing
    callers to take advantage of both. Provides consistent target/anti-target
    bin sizes across batches.

    Uses callable_regions as the access BED file and mosdepth regions in
    variant_regions to estimate depth for bin sizes.
    """
    from bcbio.structural import cnvkit
    if all(not cnvkit.use_general_sv_bins(utils.to_single_data(x)) for x in items):
        return items
    items = [utils.to_single_data(x) for x in items]
    out = []
    for cnv_group in _group_by_cnv_method(multi.group_by_batch(items, False)):
        size_calc_fn = MemoizedSizes(cnv_group.region_file, cnv_group.items).get_target_antitarget_bin_sizes
        for data in cnv_group.items:
            target_bed, anti_bed = cnvkit.targets_w_bins(cnv_group.region_file, cnv_group.access_file, size_calc_fn,
                                                         cnv_group.work_dir, data)
            if not data.get("regions"):
                data["regions"] = {}
            data["regions"]["bins"] = {"target": target_bed, "antitarget": anti_bed}
            out.append([data])
    if not len(out) == len(items):
        raise AssertionError("Inconsistent samples in and out of SV bin calculation:\nout: %s\nin : %s" %
                             (sorted([dd.get_sample_name(utils.to_single_data(x)) for x in out]),
                              sorted([dd.get_sample_name(x) for x in items])))
    return out
Exemplo n.º 4
0
def calculate_sv_coverage(data):
    """Calculate coverage within bins for downstream CNV calling.

    Creates corrected cnr files with log2 ratios and depths.
    """
    from bcbio.variation import coverage
    from bcbio.structural import annotate, cnvkit
    data = utils.to_single_data(data)
    if not cnvkit.use_general_sv_bins(data):
        return [[data]]
    work_dir = utils.safe_makedir(os.path.join(dd.get_work_dir(data), "structural",
                                               dd.get_sample_name(data), "bins"))
    out_target_file = os.path.join(work_dir, "%s-target-coverage.cnn" % dd.get_sample_name(data))
    out_anti_file = os.path.join(work_dir, "%s-antitarget-coverage.cnn" % dd.get_sample_name(data))
    if ((not utils.file_exists(out_target_file) or not utils.file_exists(out_anti_file))
          and (dd.get_align_bam(data) or dd.get_work_bam(data))):
        # mosdepth
        target_cov = coverage.run_mosdepth(data, "target", tz.get_in(["regions", "bins", "target"], data))
        anti_cov = coverage.run_mosdepth(data, "antitarget", tz.get_in(["regions", "bins", "antitarget"], data))
        target_cov_genes = annotate.add_genes(target_cov.regions, data, max_distance=0)
        anti_cov_genes = annotate.add_genes(anti_cov.regions, data, max_distance=0)
        out_target_file = _add_log2_depth(target_cov_genes, out_target_file, data)
        out_anti_file = _add_log2_depth(anti_cov_genes, out_anti_file, data)
        # TODO: Correct for GC bias
    if os.path.exists(out_target_file):
        data["depth"]["bins"] = {"target": out_target_file, "antitarget": out_anti_file}
    return [[data]]
Exemplo n.º 5
0
def calculate_sv_coverage(data):
    """Calculate coverage within bins for downstream CNV calling.

    Creates corrected cnr files with log2 ratios and depths.
    """
    calcfns = {
        "cnvkit": _calculate_sv_coverage_cnvkit,
        "gatk-cnv": _calculate_sv_coverage_gatk
    }
    from bcbio.structural import cnvkit
    data = utils.to_single_data(data)
    if not cnvkit.use_general_sv_bins(data):
        out_target_file, out_anti_file = (None, None)
    else:
        work_dir = utils.safe_makedir(
            os.path.join(dd.get_work_dir(data), "structural",
                         dd.get_sample_name(data), "bins"))
        out_target_file, out_anti_file = calcfns[cnvkit.bin_approach(data)](
            data, work_dir)
        if not os.path.exists(out_target_file):
            out_target_file, out_anti_file = (None, None)
    if "seq2c" in dd.get_svcaller(data):
        from bcbio.structural import seq2c
        seq2c_target = seq2c.precall(data)
    else:
        seq2c_target = None

    if not tz.get_in(["depth", "bins"], data):
        data = tz.update_in(data, ["depth", "bins"], lambda x: {})
    data["depth"]["bins"] = {
        "target": out_target_file,
        "antitarget": out_anti_file,
        "seq2c": seq2c_target
    }
    return [[data]]
Exemplo n.º 6
0
def normalize_sv_coverage(*items):
    """Normalize CNV coverage, providing flexible point for multiple methods.
       Don't normalize when running purecn alone
    """
    out = []
    items = [utils.to_single_data(x) for x in cwlutils.handle_combined_input(items)]
    from bcbio.structural import get_svcallers
    sv_callers = get_svcallers(items[0])
    if "gatk-cnv" in sv_callers or "cnvkit" in sv_callers:
        calcfns = {"cnvkit": _normalize_sv_coverage_cnvkit, "gatk-cnv": _normalize_sv_coverage_gatk}
        from bcbio.structural import cnvkit
        from bcbio.structural import shared as sshared
        if all(not cnvkit.use_general_sv_bins(x) for x in items):
            return [[d] for d in items]
        out_files = {}
        back_files = {}
        for group_id, gitems in itertools.groupby(items, lambda x: tz.get_in(["regions", "bins", "group"], x)):
            # No CNVkit calling for this particular set of samples
            if group_id is None:
                continue
            inputs, backgrounds = sshared.find_case_control(list(gitems))
            assert inputs, "Did not find inputs for sample batch: %s" % (" ".join(dd.get_sample_name(x) for x in items))
            work_dir = utils.safe_makedir(os.path.join(dd.get_work_dir(inputs[0]), "structural",
                                                       dd.get_sample_name(inputs[0]), "bins"))
            back_files, out_files = calcfns[cnvkit.bin_approach(inputs[0])](group_id, inputs, backgrounds, work_dir,
                                                                        back_files, out_files)
        for data in items:
            if dd.get_sample_name(data) in out_files:
                data["depth"]["bins"]["background"] = back_files[dd.get_sample_name(data)]
                data["depth"]["bins"]["normalized"] = out_files[dd.get_sample_name(data)]
            out.append([data])
    else:
        out = [[d] for d in items]
    return out
Exemplo n.º 7
0
def normalize_sv_coverage(*items):
    """Normalize CNV coverage, providing flexible point for multiple methods.
    """
    calcfns = {"cnvkit": _normalize_sv_coverage_cnvkit, "gatk-cnv": _normalize_sv_coverage_gatk}
    from bcbio.structural import cnvkit
    from bcbio.structural import shared as sshared
    items = [utils.to_single_data(x) for x in cwlutils.handle_combined_input(items)]
    if all(not cnvkit.use_general_sv_bins(x) for x in items):
        return [[d] for d in items]
    out_files = {}
    back_files = {}
    for group_id, gitems in itertools.groupby(items, lambda x: tz.get_in(["regions", "bins", "group"], x)):
        # No CNVkit calling for this particular set of samples
        if group_id is None:
            continue
        inputs, backgrounds = sshared.find_case_control(list(gitems))
        assert inputs, "Did not find inputs for sample batch: %s" % (" ".join(dd.get_sample_name(x) for x in items))
        work_dir = utils.safe_makedir(os.path.join(dd.get_work_dir(inputs[0]), "structural",
                                                    dd.get_sample_name(inputs[0]), "bins"))
        back_files, out_files = calcfns[cnvkit.bin_approach(inputs[0])](group_id, inputs, backgrounds, work_dir,
                                                                        back_files, out_files)
    out = []
    for data in items:
        if dd.get_sample_name(data) in out_files:
            data["depth"]["bins"]["background"] = back_files[dd.get_sample_name(data)]
            data["depth"]["bins"]["normalized"] = out_files[dd.get_sample_name(data)]
        out.append([data])
    return out
Exemplo n.º 8
0
def calculate_sv_coverage(data):
    """Calculate coverage within bins for downstream CNV calling.

    Creates corrected cnr files with log2 ratios and depths.
    """
    calcfns = {"cnvkit": _calculate_sv_coverage_cnvkit, "gatk-cnv": _calculate_sv_coverage_gatk}
    from bcbio.structural import cnvkit
    data = utils.to_single_data(data)
    if not cnvkit.use_general_sv_bins(data):
        out_target_file, out_anti_file = (None, None)
    else:
        work_dir = utils.safe_makedir(os.path.join(dd.get_work_dir(data), "structural",
                                                   dd.get_sample_name(data), "bins"))
        out_target_file, out_anti_file = calcfns[cnvkit.bin_approach(data)](data, work_dir)
        if not os.path.exists(out_target_file):
            out_target_file, out_anti_file = (None, None)
    if "seq2c" in dd.get_svcaller(data):
        from bcbio.structural import seq2c
        seq2c_target = seq2c.precall(data)
    else:
        seq2c_target = None

    if not tz.get_in(["depth", "bins"], data):
        data = tz.update_in(data, ["depth", "bins"], lambda x: {})
    data["depth"]["bins"] = {"target": out_target_file, "antitarget": out_anti_file, "seq2c": seq2c_target}
    return [[data]]
Exemplo n.º 9
0
def calculate_sv_bins(*items):
    """Determine bin sizes and regions to use for samples.

    Unified approach to prepare regional bins for coverage calculations across
    multiple CNV callers. Splits into target and antitarget regions allowing
    callers to take advantage of both. Provides consistent target/anti-target
    bin sizes across batches.

    Uses callable_regions as the access BED file and mosdepth regions in
    variant_regions to estimate depth for bin sizes.
    """
    calcfns = {"cnvkit": _calculate_sv_bins_cnvkit, "gatk-cnv": _calculate_sv_bins_gatk}
    from bcbio.structural import cnvkit
    items = [utils.to_single_data(x) for x in cwlutils.handle_combined_input(items)]

    from bcbio.structural import get_svcallers
    sv_callers = get_svcallers(items[0])
    has_cnvkit_gatkcnv = bool(set(sv_callers) & set(["cnvkit", "gatk-cnv"]))

    if all(not cnvkit.use_general_sv_bins(x) for x in items) or not has_cnvkit_gatkcnv:
        return [[d] for d in items]

    out = []
    for i, cnv_group in enumerate(_group_by_cnv_method(multi.group_by_batch(items, False))):
        size_calc_fn = MemoizedSizes(cnv_group.region_file, cnv_group.items).get_target_antitarget_bin_sizes
        for data in cnv_group.items:
            if cnvkit.use_general_sv_bins(data):
                target_bed, anti_bed, gcannotated_tsv = calcfns[cnvkit.bin_approach(data)](data, cnv_group,
                                                                                           size_calc_fn)
                if not data.get("regions"):
                    data["regions"] = {}
                data["regions"]["bins"] = {"target": target_bed, "antitarget": anti_bed, "group": str(i),
                                           "gcannotated": gcannotated_tsv}
            out.append([data])
    if not len(out) == len(items):
        raise AssertionError("Inconsistent samples in and out of SV bin calculation:\nout: %s\nin : %s" %
                             (sorted([dd.get_sample_name(utils.to_single_data(x)) for x in out]),
                              sorted([dd.get_sample_name(x) for x in items])))
    return out
Exemplo n.º 10
0
def calculate_sv_bins(*items):
    """Determine bin sizes and regions to use for samples.

    Unified approach to prepare regional bins for coverage calculations across
    multiple CNV callers. Splits into target and antitarget regions allowing
    callers to take advantage of both. Provides consistent target/anti-target
    bin sizes across batches.

    Uses callable_regions as the access BED file and mosdepth regions in
    variant_regions to estimate depth for bin sizes.
    """
    from bcbio.structural import cnvkit
    if all(not cnvkit.use_general_sv_bins(utils.to_single_data(x))
           for x in items):
        return items
    items = [utils.to_single_data(x) for x in items]
    out = []
    for batch, batch_items in multi.group_by_batch(items, False).items():
        work_dir = utils.safe_makedir(
            os.path.join(dd.get_work_dir(items[0]), "structural", "bins",
                         batch))
        access_file = tz.get_in(["config", "algorithm", "callable_regions"],
                                batch_items[0])
        cnv_file = get_base_cnv_regions(batch_items[0],
                                        work_dir,
                                        "transcripts100",
                                        include_gene_names=False)
        target_bin, anti_bin = _get_target_antitarget_bin_sizes(
            cnv_file, items)
        for data in batch_items:
            target_bed, anti_bed = cnvkit.targets_w_bins(
                cnv_file, access_file, target_bin, anti_bin, work_dir, data)
            if not data.get("regions"):
                data["regions"] = {}
            data["regions"]["bins"] = {
                "target": target_bed,
                "antitarget": anti_bed
            }
            out.append([data])
    if not len(out) == len(items):
        raise AssertionError(
            "Inconsistent samples in and out of SV bin calculation:\nout: %s\nin : %s"
            % (sorted(
                [dd.get_sample_name(utils.to_single_data(x))
                 for x in out]), sorted([dd.get_sample_name(x)
                                         for x in items])))
    return out
Exemplo n.º 11
0
def normalize_sv_coverage(*items):
    """Normalize CNV coverage depths by GC, repeats and background.

    Provides normalized output based on CNVkit approaches, provides a
    point for providing additional methods in the future:

    - reference: calculates reference backgrounds from normals and pools
      including GC and repeat information
    - fix: Uses background to normalize coverage estimations
    http://cnvkit.readthedocs.io/en/stable/pipeline.html#fix
    """
    from bcbio.structural import cnvkit
    from bcbio.structural import shared as sshared
    orig_items = items
    items = [utils.to_single_data(x) for x in cwlutils.handle_combined_input(items)]
    if all(not cnvkit.use_general_sv_bins(x) for x in items):
        return orig_items
    out_files = {}
    for group_id, gitems in itertools.groupby(items, lambda x: tz.get_in(["regions", "bins", "group"], x)):
        inputs, backgrounds = sshared.find_case_control(list(gitems))
        cnns = reduce(operator.add, [[tz.get_in(["depth", "bins", "target"], x),
                                      tz.get_in(["depth", "bins", "antitarget"], x)] for x in backgrounds], [])
        assert inputs, "Did not find inputs for sample batch: %s" % (" ".join(dd.get_sample_name(x) for x in items))
        for d in inputs:
            if tz.get_in(["depth", "bins", "target"], d):
                target_bed = tz.get_in(["depth", "bins", "target"], d)
                antitarget_bed = tz.get_in(["depth", "bins", "antitarget"], d)
        work_dir = utils.safe_makedir(os.path.join(dd.get_work_dir(inputs[00]), "structural",
                                                   dd.get_sample_name(inputs[0]), "bins"))
        back_file = cnvkit.cnvkit_background(cnns, os.path.join(work_dir, "background-%s-cnvkit.cnn" % (group_id)),
                                             backgrounds or inputs, target_bed, antitarget_bed)
        for data in inputs:
            work_dir = utils.safe_makedir(os.path.join(dd.get_work_dir(data), "structural",
                                                       dd.get_sample_name(data), "bins"))
            if tz.get_in(["depth", "bins", "target"], data):
                fix_file = cnvkit.run_fix(tz.get_in(["depth", "bins", "target"], data),
                                          tz.get_in(["depth", "bins", "antitarget"], data),
                                          back_file,
                                          os.path.join(work_dir, "%s-normalized.cnr" % (dd.get_sample_name(data))),
                                          data)
                out_files[dd.get_sample_name(data)] = fix_file
    out = []
    for data in items:
        if dd.get_sample_name(data) in out_files:
            data["depth"]["bins"]["normalized"] = out_files[dd.get_sample_name(data)]
        out.append([data])
    return out
Exemplo n.º 12
0
def calculate_sv_coverage(data):
    """Calculate coverage within bins for downstream CNV calling.
    Creates corrected cnr files with log2 ratios and depths.
    data is one sample
    """
    calcfns = {"cnvkit": _calculate_sv_coverage_cnvkit, "gatk-cnv": _calculate_sv_coverage_gatk}
    from bcbio.structural import cnvkit
    data = utils.to_single_data(data)

    from bcbio.structural import get_svcallers
    sv_callers = get_svcallers(data)
    has_cnvkit_or_gatkcnv = bool(set(["cnvkit", "gatk-cnv"]) & set(sv_callers))

    if not cnvkit.use_general_sv_bins(data) or not has_cnvkit_or_gatkcnv:
        out_target_file, out_anti_file = (None, None)
    else:
        work_dir = utils.safe_makedir(os.path.join(dd.get_work_dir(data), "structural",
                                                   dd.get_sample_name(data), "bins"))
        out_target_file, out_anti_file = calcfns[cnvkit.bin_approach(data)](data, work_dir)
        if not os.path.exists(out_target_file):
            out_target_file, out_anti_file = (None, None)

    if "seq2c" in dd.get_svcaller(data):
        from bcbio.structural import seq2c
        seq2c_target = seq2c.precall(data)
    else:
        seq2c_target = None

    if "purecn" in dd.get_svcaller(data):
        # set purecn_pon_build flag
        batches = dd.get_batch(data)
        if batches and "pon_build" in dd.get_batch(data):
            data["config"]["algorithm"]["purecn_pon_build"] = True
        from bcbio.structural import purecn
        # still calculate coverage even when not building pon - for t-only analysis
        purecn_target = purecn.get_coverage(data)
    else:
        purecn_target = None

    if not tz.get_in(["depth", "bins"], data):
        data = tz.update_in(data, ["depth", "bins"], lambda x: {})
    data["depth"]["bins"] = {"target": out_target_file,
                             "antitarget": out_anti_file,
                             "seq2c": seq2c_target,
                             "purecn": purecn_target}
    return [[data]]
Exemplo n.º 13
0
def calculate_sv_coverage(data):
    """Calculate coverage within bins for downstream CNV calling.

    Creates corrected cnr files with log2 ratios and depths.
    """
    calcfns = {
        "cnvkit": _calculate_sv_coverage_cnvkit,
        "gatk-cnv": _calculate_sv_coverage_gatk
    }
    from bcbio.structural import cnvkit
    data = utils.to_single_data(data)
    if not cnvkit.use_general_sv_bins(data):
        return [[data]]
    work_dir = utils.safe_makedir(
        os.path.join(dd.get_work_dir(data), "structural",
                     dd.get_sample_name(data), "bins"))
    out_target_file, out_anti_file = calcfns[cnvkit.bin_approach(data)](
        data, work_dir)
    if os.path.exists(out_target_file):
        data["depth"]["bins"] = {
            "target": out_target_file,
            "antitarget": out_anti_file
        }
    return [[data]]
Exemplo n.º 14
0
def normalize_sv_coverage(*items):
    """Normalize CNV coverage depths by GC, repeats and background.

    Provides normalized output based on CNVkit approaches, provides a
    point for providing additional methods in the future:

    - reference: calculates reference backgrounds from normals and pools
      including GC and repeat information
    - fix: Uses background to normalize coverage estimations
    http://cnvkit.readthedocs.io/en/stable/pipeline.html#fix
    """
    from bcbio.structural import cnvkit
    from bcbio.structural import shared as sshared
    items = [utils.to_single_data(x) for x in cwlutils.handle_combined_input(items)]
    if all(not cnvkit.use_general_sv_bins(x) for x in items):
        return [[d] for d in items]
    out_files = {}
    back_files = {}
    for group_id, gitems in itertools.groupby(items, lambda x: tz.get_in(["regions", "bins", "group"], x)):
        # No CNVkit calling for this particular set of samples
        if group_id is None:
            continue
        inputs, backgrounds = sshared.find_case_control(list(gitems))
        cnns = reduce(operator.add, [[tz.get_in(["depth", "bins", "target"], x),
                                      tz.get_in(["depth", "bins", "antitarget"], x)] for x in backgrounds], [])
        assert inputs, "Did not find inputs for sample batch: %s" % (" ".join(dd.get_sample_name(x) for x in items))
        for d in inputs:
            if tz.get_in(["depth", "bins", "target"], d):
                target_bed = tz.get_in(["depth", "bins", "target"], d)
                antitarget_bed = tz.get_in(["depth", "bins", "antitarget"], d)
        work_dir = utils.safe_makedir(os.path.join(dd.get_work_dir(inputs[0]), "structural",
                                                   dd.get_sample_name(inputs[0]), "bins"))
        input_backs = set(filter(lambda x: x is not None,
                                 [dd.get_background_cnv_reference(d) for d in inputs]))
        if input_backs:
            assert len(input_backs) == 1, "Multiple backgrounds in group: %s" % list(input_backs)
            back_file = list(input_backs)[0]
        else:
            back_file = cnvkit.cnvkit_background(cnns,
                                                 os.path.join(work_dir, "background-%s-cnvkit.cnn" % (group_id)),
                                                backgrounds or inputs, target_bed, antitarget_bed)
        fix_cmd_inputs = []
        for data in inputs:
            work_dir = utils.safe_makedir(os.path.join(dd.get_work_dir(data), "structural",
                                                       dd.get_sample_name(data), "bins"))
            if tz.get_in(["depth", "bins", "target"], data):
                fix_file = os.path.join(work_dir, "%s-normalized.cnr" % (dd.get_sample_name(data)))
                fix_cmd_inputs.append((tz.get_in(["depth", "bins", "target"], data),
                                       tz.get_in(["depth", "bins", "antitarget"], data),
                                       back_file, fix_file, data))
                out_files[dd.get_sample_name(data)] = fix_file
                back_files[dd.get_sample_name(data)] = back_file
        parallel = {"type": "local", "cores": dd.get_cores(inputs[0]), "progs": ["cnvkit"]}
        run_multicore(cnvkit.run_fix_parallel, fix_cmd_inputs, inputs[0]["config"], parallel)

    out = []
    for data in items:
        if dd.get_sample_name(data) in out_files:
            data["depth"]["bins"]["background"] = back_files[dd.get_sample_name(data)]
            data["depth"]["bins"]["normalized"] = out_files[dd.get_sample_name(data)]
        out.append([data])
    return out