def variantcall_sample(data, region=None, align_bams=None, out_file=None): """Parallel entry point for doing genotyping of a region of a sample. """ if out_file is None or not os.path.exists(out_file) or not os.path.lexists( out_file): utils.safe_makedir(os.path.dirname(out_file)) sam_ref = data["sam_ref"] config = data["config"] caller_fns = get_variantcallers() caller_fn = caller_fns[config["algorithm"].get("variantcaller", "gatk")] if len(align_bams) == 1: items = [data] else: items = multi.get_orig_items(data) assert len(items) == len(align_bams) call_file = "%s-raw%s" % utils.splitext_plus(out_file) call_file = caller_fn(align_bams, items, sam_ref, data["genome_resources"]["variation"], region, call_file) if data["config"]["algorithm"].get("phasing", False) == "gatk": call_file = phasing.read_backed_phasing(call_file, align_bams, sam_ref, region, config) utils.symlink_plus(call_file, out_file) if region: data["region"] = region data["vrn_file"] = out_file return [data]
def variantcall_sample(data, region=None, align_bams=None, out_file=None): """Parallel entry point for doing genotyping of a region of a sample. """ if out_file is None or not os.path.exists(out_file) or not os.path.lexists( out_file): utils.safe_makedir(os.path.dirname(out_file)) ref_file = dd.get_ref_file(data) config = data["config"] caller_fns = get_variantcallers() caller_fn = caller_fns[config["algorithm"].get("variantcaller")] if len(align_bams) == 1: items = [data] else: items = multi.get_orig_items(data) assert len(items) == len(align_bams) assoc_files = tz.get_in(("genome_resources", "variation"), data, {}) if not assoc_files: assoc_files = {} for bam_file in align_bams: bam.index(bam_file, data["config"], check_timestamp=False) out_file = caller_fn(align_bams, items, ref_file, assoc_files, region, out_file) if region: data["region"] = region data["vrn_file"] = out_file return [data]
def variantcall_sample(data, region=None, align_bams=None, out_file=None): """Parallel entry point for doing genotyping of a region of a sample. """ if out_file is None or not os.path.exists(out_file) or not os.path.lexists(out_file): utils.safe_makedir(os.path.dirname(out_file)) sam_ref = data["sam_ref"] config = data["config"] caller_fns = get_variantcallers() caller_fn = caller_fns[config["algorithm"].get("variantcaller", "gatk")] if len(align_bams) == 1: items = [data] else: items = multi.get_orig_items(data) assert len(items) == len(align_bams) assoc_files = tz.get_in(("genome_resources", "variation"), data, {}) if not assoc_files: assoc_files = {} for bam_file in align_bams: bam.index(bam_file, data["config"], check_timestamp=False) do_phasing = data["config"]["algorithm"].get("phasing", False) call_file = "%s-raw%s" % utils.splitext_plus(out_file) if do_phasing else out_file call_file = caller_fn(align_bams, items, sam_ref, assoc_files, region, call_file) if do_phasing == "gatk": call_file = phasing.read_backed_phasing(call_file, align_bams, sam_ref, region, config) utils.symlink_plus(call_file, out_file) if region: data["region"] = region data["vrn_file"] = out_file return [data]
def _get_orig_items(data): """Retrieve original items in a batch, handling CWL and standard cases. """ if isinstance(data, dict): if dd.get_align_bam(data) and tz.get_in(["metadata", "batch"], data): return vmulti.get_orig_items(data) else: return [data] else: return data
def _get_orig_items(data): """Retrieve original items in a batch, handling CWL and standard cases. """ if isinstance(data, dict): if tz.get_in(["metadata", "batch"], data): return vmulti.get_orig_items(data) else: return [data] else: return data
def _get_validate(data): """Retrieve items to validate, from single samples or from combined joint calls. """ if data.get("vrn_file") and "validate" in data["config"]["algorithm"]: return data elif "group_orig" in data: for sub in multi.get_orig_items(data): if "validate" in sub["config"]["algorithm"]: return sub return None
def _get_validate(data): """Retrieve items to validate, from single samples or from combined joint calls. """ if data.get("vrn_file") and "validate" in data["config"]["algorithm"]: return data elif "group_orig" in data: for sub in multi.get_orig_items(data): if "validate" in sub["config"]["algorithm"]: sub_val = utils.deepish_copy(sub) sub_val["vrn_file"] = data["vrn_file"] return sub_val return None
def _get_validate(data): """Retrieve items to validate, from single samples or from combined joint calls. """ if data.get("vrn_file") and tz.get_in(["config", "algorithm", "validate"], data): return data elif "group_orig" in data: for sub in multi.get_orig_items(data): if "validate" in sub["config"]["algorithm"]: sub_val = utils.deepish_copy(sub) sub_val["vrn_file"] = data["vrn_file"] return sub_val return None
def extract(data): """Extract germline calls for the given sample, if tumor/normal or prioritized. """ if vcfutils.get_paired_phenotype(data): is_paired = dd.get_batches(data) and len(vmulti.get_orig_items(data)) > 1 if is_paired: germline_vcf = _extract_germline(data["vrn_file"], data) else: germline_vcf = _remove_prioritization(data["vrn_file"], data) germline_vcf = vcfutils.bgzip_and_index(germline_vcf, data["config"]) data["vrn_file_plus"] = {"germline": germline_vcf} return data
def extract(data): """Extract germline calls for the given sample, if tumor/normal or prioritized. """ if vcfutils.get_paired_phenotype(data): is_paired = dd.get_batches(data) and len( vmulti.get_orig_items(data)) > 1 if is_paired: germline_vcf = _extract_germline(data["vrn_file"], data) else: germline_vcf = _remove_prioritization(data["vrn_file"], data) germline_vcf = vcfutils.bgzip_and_index(germline_vcf, data["config"]) data["vrn_file_plus"] = {"germline": germline_vcf} return data
def _do_prioritize(data): """Determine if we should perform prioritization. Currently done on tumor-only input samples. """ if vcfutils.get_paired_phenotype(data): has_tumor = False has_normal = False orig_items = vmulti.get_orig_items(data) if tz.get_in(["metadata", "batch"], data) else [data] for sub_data in orig_items: if vcfutils.get_paired_phenotype(sub_data) == "tumor": has_tumor = True elif vcfutils.get_paired_phenotype(sub_data) == "normal": has_normal = True return has_tumor and not has_normal
def _do_prioritize(data): """Determine if we should perform prioritization. Currently done on tumor-only input samples. """ if vcfutils.get_paired_phenotype(data): has_tumor = False has_normal = False orig_items = vmulti.get_orig_items(data) if tz.get_in( ["metadata", "batch"], data) else [data] for sub_data in orig_items: if vcfutils.get_paired_phenotype(sub_data) == "tumor": has_tumor = True elif vcfutils.get_paired_phenotype(sub_data) == "normal": has_normal = True return has_tumor and not has_normal