def parallel_variantcall_region(samples, run_parallel): """Perform variant calling and post-analysis on samples by region. """ to_process = [] extras = [] to_group = [] for x in samples: added = False for add in genotype.handle_multiple_variantcallers(x): added = True to_process.append(add) if not added: if "combine" in x[0] and x[0]["combine"].keys()[0] in x[0]: assert len(x) == 1 to_group.append(x[0]) else: extras.append(x) split_fn = _split_by_ready_regions("-variants.vcf.gz", "work_bam", genotype.get_variantcaller) if len(to_group) > 0: extras += group_combine_parts(to_group) return extras + grouped_parallel_split_combine(to_process, split_fn, multi.group_batches, run_parallel, "variantcall_sample", "split_variants_by_sample", "concat_variant_files", "vrn_file", ["region", "sam_ref", "config"])
def parallel_variantcall_region(samples, run_parallel): """Perform variant calling and post-analysis on samples by region. """ to_process = [] extras = [] to_group = [] for x in samples: added = False for add in genotype.handle_multiple_variantcallers(x): added = True to_process.append(add) if not added: if "combine" in x[0] and x[0]["combine"].keys()[0] in x[0]: assert len(x) == 1 to_group.append(x[0]) else: extras.append(x) split_fn = _split_by_ready_regions("-variants.vcf", "work_bam", genotype.get_variantcaller) if len(to_group) > 0: extras += group_combine_parts(to_group) return extras + grouped_parallel_split_combine( to_process, split_fn, multi.group_batches, run_parallel, "variantcall_sample", "split_variants_by_sample", "concat_variant_files", "vrn_file", ["region", "sam_ref", "config"])
def parallel_variantcall_region(samples, run_parallel): """Perform variant calling and post-analysis on samples by region. """ to_process = [] extras = [] for x in samples: added = False for add in handle_multiple_variantcallers(x): added = True to_process.append(add) if not added: extras.append(x) split_fn = _split_by_ready_regions(".vcf.gz", "work_bam", get_variantcaller) samples = _collapse_by_bam_variantcaller( grouped_parallel_split_combine( to_process, split_fn, multi.group_batches, run_parallel, "variantcall_sample", "concat_variant_files", "vrn_file", ["region", "sam_ref", "config"], ) ) return extras + samples
def parallel_variantcall_region(samples, run_parallel): """Perform variant calling and post-analysis on samples by region. """ split_fn = _split_by_ready_regions("-variants.vcf", "work_bam", genotype.get_variantcaller) return grouped_parallel_split_combine(samples, split_fn, multi.group_batches, run_parallel, "variantcall_sample", "split_variants_by_sample", "combine_variant_files", "vrn_file", ["sam_ref", "config"])
def parallel_variantcall_region(samples, run_parallel): """Perform variant calling and post-analysis on samples by region. """ to_process, extras = _dup_samples_by_variantcaller(samples) split_fn = _split_by_ready_regions(".vcf.gz", "work_bam", get_variantcaller) samples = _collapse_by_bam_variantcaller( grouped_parallel_split_combine(to_process, split_fn, multi.group_batches, run_parallel, "variantcall_sample", "concat_variant_files", "vrn_file", ["region", "sam_ref", "config"])) return extras + samples
def parallel_variantcall_region(samples, run_parallel): """Perform variant calling and post-analysis on samples by region. """ to_process = [] for x in samples: to_process.extend(genotype.handle_multiple_variantcallers(x)) split_fn = _split_by_ready_regions("-variants.vcf", "work_bam", genotype.get_variantcaller) return grouped_parallel_split_combine(to_process, split_fn, multi.group_batches, run_parallel, "variantcall_sample", "split_variants_by_sample", "combine_variant_files", "vrn_file", ["sam_ref", "config"])
def square_off(samples, run_parallel): """Perform joint calling at all variants within a batch. """ to_process = [] extras = [] for data in [x[0] for x in samples]: jointcaller = tz.get_in(("config", "algorithm", "jointcaller"), data) batch = tz.get_in(("metadata", "batch"), data) if jointcaller and batch: to_process.append([data]) else: extras.append([data]) processed = grouped_parallel_split_combine(to_process, _split_by_callable_region, multi.group_batches_joint, run_parallel, "square_batch_region", "concat_variant_files", "vrn_file", ["region", "sam_ref", "config"]) return _combine_to_jointcaller(processed) + extras
def square_off(samples, run_parallel): """Perform joint calling at all variants within a batch. """ to_process = [] extras = [] for data in [x[0] for x in samples]: jointcaller = tz.get_in(("config", "algorithm", "jointcaller"), data) batch = tz.get_in(("metadata", "batch"), data) if jointcaller and batch: to_process.append([data]) else: extras.append([data]) processed = grouped_parallel_split_combine( to_process, _split_by_callable_region, multi.group_batches_joint, run_parallel, "square_batch_region", "concat_variant_files", "vrn_file", ["region", "sam_ref", "config"]) return _combine_to_jointcaller(processed) + extras
def parallel_variantcall_region(samples, run_parallel): """Perform variant calling and post-analysis on samples by region. """ to_process = [] extras = [] for x in samples: added = False for add in genotype.handle_multiple_variantcallers(x): added = True to_process.append(add) if not added: extras.append(x) split_fn = _split_by_ready_regions("-variants.vcf", "work_bam", genotype.get_variantcaller) return extras + grouped_parallel_split_combine( to_process, split_fn, multi.group_batches, run_parallel, "variantcall_sample", "split_variants_by_sample", "concat_variant_files", "vrn_file", ["region", "sam_ref", "config"])
def square_off(samples, run_parallel): """Perform joint calling at all variants within a batch. """ to_process = [] extras = [] for data in [x[0] for x in samples]: added = False if tz.get_in(("metadata", "batch"), data): for add in genotype.handle_multiple_callers(data, "jointcaller"): if _is_jointcaller_compatible(add): added = True to_process.append([add]) if not added: extras.append([data]) processed = grouped_parallel_split_combine( to_process, _split_by_callable_region, multi.group_batches_joint, run_parallel, "square_batch_region", "concat_variant_files", "vrn_file", ["region", "sam_ref", "config"]) return _combine_to_jointcaller(processed) + extras
def parallel_variantcall(sample_info, parallel_fn): """Provide sample genotyping, running in parallel over individual chromosomes. """ to_process = [] finished = [] for x in sample_info: if get_variantcaller(x[0]): to_process.extend(handle_multiple_variantcallers(x)) else: finished.append(x) if len(to_process) > 0: split_fn = process_bam_by_chromosome("-variants.vcf", "work_bam", dir_ext_fn=get_variantcaller) processed = grouped_parallel_split_combine( to_process, split_fn, multi.group_batches, parallel_fn, "variantcall_sample", "split_variants_by_sample", "combine_variant_files", "vrn_file", ["sam_ref", "config"]) finished.extend(processed) return finished
def square_off(samples, run_parallel): """Perform joint calling at all variants within a batch. """ to_process = [] extras = [] for data in [x[0] for x in samples]: added = False if tz.get_in(("metadata", "batch"), data): for add in genotype.handle_multiple_callers(data, "jointcaller"): if _is_jointcaller_compatible(add): added = True to_process.append([add]) if not added: extras.append([data]) processed = grouped_parallel_split_combine(to_process, _split_by_callable_region, multi.group_batches_joint, run_parallel, "square_batch_region", "concat_variant_files", "vrn_file", ["region", "sam_ref", "config"]) return _combine_to_jointcaller(processed) + extras
def parallel_variantcall_region(samples, run_parallel): """Perform variant calling and post-analysis on samples by region. """ to_process = [] extras = [] for data in [x[0] for x in samples]: added = False for add in handle_multiple_callers(data, "variantcaller", "gatk"): added = True to_process.append([add]) if not added: extras.append([data]) split_fn = _split_by_ready_regions(".vcf.gz", "work_bam", get_variantcaller) samples = _collapse_by_bam_variantcaller( grouped_parallel_split_combine(to_process, split_fn, multi.group_batches, run_parallel, "variantcall_sample", "concat_variant_files", "vrn_file", ["region", "sam_ref", "config"])) return extras + samples