Пример #1
0
def parallel_callable_loci(in_bam, ref_file, data):
    config = copy.deepcopy(data["config"])
    num_cores = config["algorithm"].get("num_cores", 1)
    out_dir = utils.safe_makedir(
        os.path.join(dd.get_work_dir(data), "align", dd.get_sample_name(data)))
    data = {
        "work_bam": in_bam,
        "config": config,
        "reference": data["reference"],
        "dirs": {
            "out": out_dir
        }
    }
    parallel = {
        "type": "local",
        "cores": num_cores,
        "module": "bcbio.distributed"
    }
    items = [[data]]
    with prun.start(parallel, items, config,
                    multiplier=int(num_cores)) as runner:
        split_fn = shared.process_bam_by_chromosome("-callable.bed",
                                                    "work_bam",
                                                    remove_alts=True)
        out = parallel_split_combine(items, split_fn, runner,
                                     "calc_callable_loci", "combine_bed",
                                     "callable_bed", ["config"])[0]
    return out[0]["callable_bed"]
Пример #2
0
def parallel_callable_loci(in_bam, ref_file, config):
    num_cores = config["algorithm"].get("num_cores", 1)
    config = copy.deepcopy(config)
    data = {
        "work_bam": in_bam,
        "config": config,
        "reference": {
            "fasta": {
                "base": ref_file
            }
        }
    }
    parallel = {
        "type": "local",
        "cores": num_cores,
        "module": "bcbio.distributed"
    }
    items = [[data]]
    with prun.start(parallel, items, config,
                    multiplier=int(num_cores)) as runner:
        split_fn = shared.process_bam_by_chromosome("-callable.bed",
                                                    "work_bam")
        out = parallel_split_combine(items, split_fn, runner,
                                     "calc_callable_loci", "combine_bed",
                                     "callable_bed", ["config"])[0]
    return out[0]["callable_bed"]
Пример #3
0
def parallel_callable_loci(in_bam, ref_file, config):
    num_cores = config["algorithm"].get("num_cores", 1)
    data = {"work_bam": in_bam, "sam_ref": ref_file, "config": config}
    parallel = {"type": "local", "cores": num_cores, "module": "bcbio.distributed"}
    runner = parallel_runner(parallel, {}, config)
    split_fn = shared.process_bam_by_chromosome("-callable.bed", "work_bam")
    out = parallel_split_combine([[data]], split_fn, runner,
                                 "calc_callable_loci", "combine_bed",
                                 "callable_bed", ["config"])[0]
    return out[0]["callable_bed"]
Пример #4
0
def parallel_callable_loci(in_bam, ref_file, data):
    config = copy.deepcopy(data["config"])
    num_cores = config["algorithm"].get("num_cores", 1)
    data = {"work_bam": in_bam, "config": config, "reference": data["reference"]}
    parallel = {"type": "local", "cores": num_cores, "module": "bcbio.distributed"}
    items = [[data]]
    with prun.start(parallel, items, config, multiplier=int(num_cores)) as runner:
        split_fn = shared.process_bam_by_chromosome("-callable.bed", "work_bam", remove_alts=True)
        out = parallel_split_combine(
            items, split_fn, runner, "calc_callable_loci", "combine_bed", "callable_bed", ["config"]
        )[0]
    return out[0]["callable_bed"]
Пример #5
0
def parallel_callable_loci(in_bam, ref_file, config):
    num_cores = config["algorithm"].get("num_cores", 1)
    config = copy.deepcopy(config)
    config["algorithm"]["memory_adjust"] = {"direction": "decrease", "magnitude": 2}
    data = {"work_bam": in_bam, "sam_ref": ref_file, "config": config}
    parallel = {"type": "local", "cores": num_cores, "module": "bcbio.distributed"}
    items = [[data]]
    with prun.start(parallel, items, config) as runner:
        split_fn = shared.process_bam_by_chromosome("-callable.bed", "work_bam")
        out = parallel_split_combine(
            items, split_fn, runner, "calc_callable_loci", "combine_bed", "callable_bed", ["config"]
        )[0]
    return out[0]["callable_bed"]
Пример #6
0
def parallel_callable_loci(in_bam, ref_file, config):
    num_cores = config["algorithm"].get("num_cores", 1)
    config = copy.deepcopy(config)
    config["algorithm"]["memory_adjust"] = {"direction": "decrease", "magnitude": 2}
    data = {"work_bam": in_bam, "sam_ref": ref_file, "config": config}
    parallel = {"type": "local", "cores": num_cores, "module": "bcbio.distributed"}
    items = [[data]]
    with prun.start(parallel, items, config, multiplier=int(num_cores)) as runner:
        split_fn = shared.process_bam_by_chromosome("-callable.bed", "work_bam")
        out = parallel_split_combine(items, split_fn, runner,
                                     "calc_callable_loci", "combine_bed",
                                     "callable_bed", ["config"])[0]
    return out[0]["callable_bed"]
Пример #7
0
def parallel_callable_loci(in_bam, ref_file, config):
    num_cores = config["algorithm"].get("num_cores", 1)
    data = {"work_bam": in_bam, "sam_ref": ref_file, "config": config}
    parallel = {
        "type": "local",
        "cores": num_cores,
        "module": "bcbio.distributed"
    }
    runner = parallel_runner(parallel, {}, config)
    split_fn = shared.process_bam_by_chromosome("-callable.bed", "work_bam")
    out = parallel_split_combine([[data]], split_fn, runner,
                                 "calc_callable_loci", "combine_bed",
                                 "callable_bed", ["config"])[0]
    return out[0]["callable_bed"]
Пример #8
0
def parallel_realign_sample(sample_info, parallel_fn):
    """Realign samples, running in parallel over individual chromosomes.
    """
    to_process = []
    finished = []
    for x in sample_info:
        if x[0]["config"]["algorithm"].get("realign", True):
            to_process.append(x)
        else:
            finished.append(x)
    if len(to_process) > 0:
        file_key = "work_bam"
        split_fn = process_bam_by_chromosome("-realign.bam", file_key,
                                           default_targets=["nochr"])
        processed = parallel_split_combine(to_process, split_fn, parallel_fn,
                                           "realign_sample", "combine_bam",
                                           file_key, ["config"])
        finished.extend(processed)
    return finished
Пример #9
0
def parallel_variantcall(sample_info, parallel_fn):
    """Provide sample genotyping, running in parallel over individual chromosomes.
    """
    to_process = []
    finished = []
    for x in sample_info:
        if x[0]["config"]["algorithm"]["snpcall"]:
            to_process.extend(_handle_multiple_variantcallers(x))
        else:
            finished.append(x)
    if len(to_process) > 0:
        split_fn = process_bam_by_chromosome("-variants.vcf", "work_bam",
                                             dir_ext_fn = _get_variantcaller)
        processed = parallel_split_combine(to_process, split_fn, parallel_fn,
                                           "variantcall_sample",
                                           "combine_variant_files",
                                           "vrn_file", ["sam_ref", "config"])
        finished.extend(processed)
    return finished
Пример #10
0
def parallel_variantcall(sample_info, parallel_fn):
    """Provide sample genotyping, running in parallel over individual chromosomes.
    """
    to_process = []
    finished = []
    for x in sample_info:
        if get_variantcaller(x[0]):
            to_process.extend(handle_multiple_variantcallers(x))
        else:
            finished.append(x)
    if len(to_process) > 0:
        split_fn = process_bam_by_chromosome("-variants.vcf", "work_bam",
                                             dir_ext_fn=get_variantcaller)
        processed = grouped_parallel_split_combine(
            to_process, split_fn, multi.group_batches, parallel_fn,
            "variantcall_sample", "split_variants_by_sample", "combine_variant_files",
            "vrn_file", ["sam_ref", "config"])
        finished.extend(processed)
    return finished
Пример #11
0
def parallel_variantcall(sample_info, parallel_fn):
    """Provide sample genotyping, running in parallel over individual chromosomes.
    """
    to_process = []
    finished = []
    for x in sample_info:
        if get_variantcaller(x[0]):
            to_process.extend(handle_multiple_variantcallers(x))
        else:
            finished.append(x)
    if len(to_process) > 0:
        split_fn = process_bam_by_chromosome("-variants.vcf.gz", "work_bam",
                                             dir_ext_fn=get_variantcaller)
        processed = grouped_parallel_split_combine(
            to_process, split_fn, multi.group_batches, parallel_fn,
            "variantcall_sample", "split_variants_by_sample", "combine_variant_files",
            "vrn_file", ["sam_ref", "config"])
        finished.extend(processed)
    return finished
Пример #12
0
def parallel_realign_sample(sample_info, parallel_fn):
    """Realign samples, running in parallel over individual chromosomes.
    """
    to_process = []
    finished = []
    for x in sample_info:
        if x[0]["config"]["algorithm"].get("realign", True):
            to_process.append(x)
        else:
            finished.append(x)
    if len(to_process) > 0:
        file_key = "work_bam"
        split_fn = process_bam_by_chromosome("-realign.bam",
                                             file_key,
                                             default_targets=["nochr"])
        processed = parallel_split_combine(to_process, split_fn, parallel_fn,
                                           "realign_sample", "combine_bam",
                                           file_key, ["config"])
        finished.extend(processed)
    return finished
Пример #13
0
def parallel_write_recal_bam(xs, parallel_fn):
    """Rewrite a recalibrated BAM file in parallel, working off each chromosome.
    """
    to_process = []
    finished = []
    for x in xs:
        if x[0]["config"]["algorithm"].get("recalibrate", True):
            to_process.append(x)
        else:
            finished.append(x)
    if len(to_process) > 0:
        file_key = "work_bam"
        split_fn = process_bam_by_chromosome("-gatkrecal.bam", file_key,
                                           default_targets=["nochr"])
        processed = parallel_split_combine(to_process, split_fn, parallel_fn,
                                           "write_recal_bam", "combine_bam",
                                           file_key, ["config"])
        finished.extend(processed)
        # Save diskspace from original to recalibrated
        #save_diskspace(data["work_bam"], "Recalibrated to %s" % recal_bam,
        #               data["config"])
    return finished
Пример #14
0
def parallel_write_recal_bam(xs, parallel_fn):
    """Rewrite a recalibrated BAM file in parallel, working off each chromosome.
    """
    to_process = []
    finished = []
    for x in xs:
        if x[0]["config"]["algorithm"].get("recalibrate", True):
            to_process.append(x)
        else:
            finished.append(x)
    if len(to_process) > 0:
        file_key = "work_bam"
        split_fn = process_bam_by_chromosome("-gatkrecal.bam",
                                             file_key,
                                             default_targets=["nochr"])
        processed = parallel_split_combine(to_process, split_fn, parallel_fn,
                                           "write_recal_bam", "combine_bam",
                                           file_key, ["config"])
        finished.extend(processed)
        # Save diskspace from original to recalibrated
        #save_diskspace(data["work_bam"], "Recalibrated to %s" % recal_bam,
        #               data["config"])
    return finished