def run_gvcfgenotyper(data, orig_region, vrn_files, out_file): """Merge strelka2 and Illumina compatible gVCFs with gvcfgenotyper. https://github.com/Illumina/gvcfgenotyper Also need to explore GLnexus (https://github.com/dnanexus-rnd/GLnexus) """ if not utils.file_exists(out_file): with file_transaction(data, out_file) as tx_out_file: regions = _find_gvcf_blocks(vrn_files[0], bamprep.region_to_gatk(orig_region), os.path.dirname(tx_out_file)) if len(regions) == 1: _run_gvcfgenotyper(data, regions[0], vrn_files, tx_out_file) else: split_outs = [ _run_gvcfgenotyper( data, r, vrn_files, "%s-%s.vcf.gz" % (utils.splitext_plus(out_file)[0], r.replace( ":", "_").replace("-", "_"))) for r in regions ] vcfutils.concat_variant_files(split_outs, tx_out_file, regions, dd.get_ref_file(data), data["config"]) return vcfutils.bgzip_and_index(out_file, data["config"])
def gatk_rnaseq_calling(data): """Use GATK to perform gVCF variant calling on RNA-seq data """ from bcbio.bam import callable data = utils.deepish_copy(data) tools_on = dd.get_tools_on(data) if not tools_on: tools_on = [] tools_on.append("gvcf") data = dd.set_tools_on(data, tools_on) data = dd.set_jointcaller(data, ["%s-joint" % v for v in dd.get_variantcaller(data)]) out_dir = utils.safe_makedir(os.path.join(dd.get_work_dir(data), "variation", "rnaseq", "gatk-haplotype")) data = _setup_variant_regions(data, out_dir) out_file = os.path.join(out_dir, "%s-gatk-haplotype.vcf.gz" % dd.get_sample_name(data)) if not utils.file_exists(out_file): region_files = [] regions = [] for cur_region in callable.get_split_regions(dd.get_variant_regions(data), data): str_region = "_".join([str(x) for x in cur_region]) region_file = os.path.join(utils.safe_makedir(os.path.join(dd.get_work_dir(data), "variation", "rnaseq", "gatk-haplotype", "regions")), "%s-%s-gatk-haplotype.vcf.gz" % (dd.get_sample_name(data), str_region)) region_file = gatk.haplotype_caller([dd.get_split_bam(data)], [data], dd.get_ref_file(data), {}, region=cur_region, out_file=region_file) region_files.append(region_file) regions.append(cur_region) out_file = vcfutils.concat_variant_files(region_files, out_file, regions, dd.get_ref_file(data), data["config"]) return dd.set_vrn_file(data, out_file)
def _run_wham(inputs, background_bams): """Run WHAM on a defined set of inputs and targets. """ out_file = os.path.join(_sv_workdir(inputs[0]), "%s-wham.vcf.gz" % dd.get_sample_name(inputs[0])) if not utils.file_exists(out_file): with file_transaction(inputs[0], out_file) as tx_out_file: coords = chromhacks.autosomal_or_x_coords(dd.get_ref_file(inputs[0])) parallel = {"type": "local", "cores": dd.get_cores(inputs[0]), "progs": []} rs = run_multicore(_run_wham_coords, [(inputs, background_bams, coord, out_file) for coord in coords], inputs[0]["config"], parallel) rs = {coord: fname for (coord, fname) in rs} vcfutils.concat_variant_files([rs[c] for c in coords], tx_out_file, coords, dd.get_ref_file(inputs[0]), inputs[0]["config"]) return out_file
def run_gvcfgenotyper(data, orig_region, vrn_files, out_file): """Merge strelka2 and Illumina compatible gVCFs with gvcfgenotyper. https://github.com/Illumina/gvcfgenotyper Also need to explore GLnexus (https://github.com/dnanexus-rnd/GLnexus) """ if not utils.file_exists(out_file): with file_transaction(data, out_file) as tx_out_file: regions = _find_gvcf_blocks(vrn_files[0], bamprep.region_to_gatk(orig_region), os.path.dirname(tx_out_file)) if len(regions) == 1: _run_gvcfgenotyper(data, regions[0], vrn_files, tx_out_file) else: split_outs = [_run_gvcfgenotyper(data, r, vrn_files, "%s-%s.vcf.gz" % (utils.splitext_plus(out_file)[0], r.replace(":", "_").replace("-", "_"))) for r in regions] vcfutils.concat_variant_files(split_outs, tx_out_file, regions, dd.get_ref_file(data), data["config"]) return vcfutils.bgzip_and_index(out_file, data["config"])
def concat_batch_variantcalls(items): """CWL entry point: combine variant calls from regions into single VCF. """ items, cwl_extras = split_data_cwl_items(items) batch_name = _get_batch_name(items) variantcaller = _get_batch_variantcaller(items) out_file = os.path.join(dd.get_work_dir(items[0]), variantcaller, "%s.vcf.gz" % (batch_name)) utils.safe_makedir(os.path.dirname(out_file)) regions = [_region_to_coords(r) for r in cwl_extras["region"]] out_file = vcfutils.concat_variant_files(cwl_extras["vrn_file_region"], out_file, regions, dd.get_ref_file(items[0]), items[0]["config"]) return {"vrn_file": out_file}
def concat_batch_variantcalls(items): """CWL entry point: combine variant calls from regions into single VCF. """ items = [utils.to_single_data(x) for x in items] batch_name = _get_batch_name(items) variantcaller = _get_batch_variantcaller(items) out_file = os.path.join(dd.get_work_dir(items[0]), variantcaller, "%s.vcf.gz" % (batch_name)) utils.safe_makedir(os.path.dirname(out_file)) regions = [_region_to_coords(r) for r in items[0]["region"]] vrn_file_regions = items[0]["vrn_file_region"] out_file = vcfutils.concat_variant_files(vrn_file_regions, out_file, regions, dd.get_ref_file(items[0]), items[0]["config"]) return {"vrn_file": out_file}
def concat_batch_variantcalls(items, region_block=True, skip_jointcheck=False): """CWL entry point: combine variant calls from regions into single VCF. """ items = [utils.to_single_data(x) for x in items] batch_name = _get_batch_name(items, skip_jointcheck) variantcaller = _get_batch_variantcaller(items) out_file = os.path.join(dd.get_work_dir(items[0]), variantcaller, "%s.vcf.gz" % (batch_name)) utils.safe_makedir(os.path.dirname(out_file)) if region_block: regions = [_region_to_coords(rs[0]) for rs in items[0]["region_block"]] else: regions = [_region_to_coords(r) for r in items[0]["region"]] vrn_file_regions = items[0]["vrn_file_region"] out_file = vcfutils.concat_variant_files(vrn_file_regions, out_file, regions, dd.get_ref_file(items[0]), items[0]["config"]) return {"vrn_file": out_file}
def concat_batch_variantcalls(items): """CWL entry point: combine variant calls from regions into single VCF. """ items, cwl_extras = split_data_cwl_items(items) batch_name = _get_batch_name(items) variantcaller = _get_batch_variantcaller(items) out_file = os.path.join(dd.get_work_dir(items[0]), variantcaller, "%s.vcf.gz" % (batch_name)) utils.safe_makedir(os.path.dirname(out_file)) if "region" in cwl_extras and "vrn_file_region" in cwl_extras: regions = cwl_extras["region"] vrn_file_regions = cwl_extras["vrn_file_region"] else: regions = [x["region"] for x in items] vrn_file_regions = [x["vrn_file_region"] for x in items] regions = [_region_to_coords(r) for r in regions] out_file = vcfutils.concat_variant_files(vrn_file_regions, out_file, regions, dd.get_ref_file(items[0]), items[0]["config"]) return {"vrn_file": out_file}
def concat_batch_variantcalls(items, region_block=True, skip_jointcheck=False): """CWL entry point: combine variant calls from regions into single VCF. """ items = [utils.to_single_data(x) for x in items] batch_name = _get_batch_name(items, skip_jointcheck) variantcaller = _get_batch_variantcaller(items) # Pre-called input variant files if not variantcaller and all(d.get("vrn_file") for d in items): return {"vrn_file": items[0]["vrn_file"]} out_file = os.path.join(dd.get_work_dir(items[0]), variantcaller, "%s.vcf.gz" % (batch_name)) utils.safe_makedir(os.path.dirname(out_file)) if region_block: regions = [_region_to_coords(rs[0]) for rs in items[0]["region_block"]] else: regions = [_region_to_coords(r) for r in items[0]["region"]] vrn_file_regions = items[0]["vrn_file_region"] out_file = vcfutils.concat_variant_files(vrn_file_regions, out_file, regions, dd.get_ref_file(items[0]), items[0]["config"]) return {"vrn_file": out_file}
def concat_variant_files(*args): return vcfutils.concat_variant_files(*args)