def split_somatic(items): """Split somatic batches, adding a germline target. Enables separate germline calling of samples using shared alignments. """ somatic_groups, somatic, non_somatic = vcfutils.somatic_batches(items) # extract germline samples to run from normals in tumor/normal pairs germline_added = set([]) germline = [] for somatic_group in somatic_groups: paired = vcfutils.get_paired(somatic_group) if paired and paired.normal_data: cur = utils.deepish_copy(paired.normal_data) vc = dd.get_variantcaller(cur) if isinstance(vc, dict) and "germline" in vc: cur["description"] = "%s-germline" % cur["description"] if cur["description"] not in germline_added: germline_added.add(cur["description"]) cur["rgnames"]["sample"] = cur["description"] del cur["metadata"]["batch"] cur["metadata"]["phenotype"] = "germline" cur = remove_align_qc_tools(cur) cur["config"]["algorithm"]["variantcaller"] = vc[ "germline"] germline.append(cur) # Fix variantcalling specification for only somatic targets somatic_out = [] for data in somatic: vc = dd.get_variantcaller(data) if isinstance(vc, dict) and "somatic" in vc: data["config"]["algorithm"]["variantcaller"] = vc["somatic"] somatic_out.append(data) return non_somatic + somatic_out + germline
def split_somatic(items): """Split somatic batches, adding a germline target. Enables separate germline calling of samples using shared alignments. """ items = [_clean_flat_variantcaller(x) for x in items] somatic_groups, somatic, non_somatic = vcfutils.somatic_batches(items) # extract germline samples to run from normals in tumor/normal pairs germline_added = set([]) germline = [] for somatic_group in somatic_groups: paired = vcfutils.get_paired(somatic_group) if paired and paired.normal_data: cur = utils.deepish_copy(paired.normal_data) vc = dd.get_variantcaller(cur) if isinstance(vc, dict) and "germline" in vc: if cur["description"] not in germline_added: germline_added.add(cur["description"]) cur["rgnames"]["sample"] = cur["description"] cur["metadata"]["batch"] = "%s-germline" % cur["description"] cur["metadata"]["phenotype"] = "germline" cur = remove_align_qc_tools(cur) cur["config"]["algorithm"]["variantcaller"] = vc["germline"] germline.append(cur) # Fix variantcalling specification for only somatic targets somatic_out = [] for data in somatic: vc = dd.get_variantcaller(data) if isinstance(vc, dict) and "somatic" in vc: data["config"]["algorithm"]["variantcaller"] = vc["somatic"] somatic_out.append(data) return non_somatic + somatic_out + germline