示例#1
0
def run_gvcfgenotyper(data, orig_region, vrn_files, out_file):
    """Merge strelka2 and Illumina compatible gVCFs with gvcfgenotyper.

    https://github.com/Illumina/gvcfgenotyper

    Also need to explore GLnexus (https://github.com/dnanexus-rnd/GLnexus)
    """
    if not utils.file_exists(out_file):
        with file_transaction(data, out_file) as tx_out_file:
            regions = _find_gvcf_blocks(vrn_files[0],
                                        bamprep.region_to_gatk(orig_region),
                                        os.path.dirname(tx_out_file))
            if len(regions) == 1:
                _run_gvcfgenotyper(data, regions[0], vrn_files, tx_out_file)
            else:
                split_outs = [
                    _run_gvcfgenotyper(
                        data, r, vrn_files, "%s-%s.vcf.gz" %
                        (utils.splitext_plus(out_file)[0], r.replace(
                            ":", "_").replace("-", "_"))) for r in regions
                ]
                vcfutils.concat_variant_files(split_outs, tx_out_file, regions,
                                              dd.get_ref_file(data),
                                              data["config"])
    return vcfutils.bgzip_and_index(out_file, data["config"])
示例#2
0
def gatk_rnaseq_calling(data):
    """Use GATK to perform gVCF variant calling on RNA-seq data
    """
    from bcbio.bam import callable
    data = utils.deepish_copy(data)
    tools_on = dd.get_tools_on(data)
    if not tools_on:
        tools_on = []
    tools_on.append("gvcf")
    data = dd.set_tools_on(data, tools_on)
    data = dd.set_jointcaller(data, ["%s-joint" % v for v in dd.get_variantcaller(data)])
    out_dir = utils.safe_makedir(os.path.join(dd.get_work_dir(data),
                                              "variation", "rnaseq", "gatk-haplotype"))
    data = _setup_variant_regions(data, out_dir)
    out_file = os.path.join(out_dir, "%s-gatk-haplotype.vcf.gz" % dd.get_sample_name(data))
    if not utils.file_exists(out_file):
        region_files = []
        regions = []
        for cur_region in callable.get_split_regions(dd.get_variant_regions(data), data):
            str_region = "_".join([str(x) for x in cur_region])
            region_file = os.path.join(utils.safe_makedir(os.path.join(dd.get_work_dir(data),
                                                                    "variation", "rnaseq", "gatk-haplotype",
                                                                    "regions")),
                                    "%s-%s-gatk-haplotype.vcf.gz" % (dd.get_sample_name(data), str_region))
            region_file = gatk.haplotype_caller([dd.get_split_bam(data)], [data], dd.get_ref_file(data), {},
                                                region=cur_region, out_file=region_file)
            region_files.append(region_file)
            regions.append(cur_region)
        out_file = vcfutils.concat_variant_files(region_files, out_file, regions,
                                                 dd.get_ref_file(data), data["config"])
    return dd.set_vrn_file(data, out_file)
示例#3
0
def _run_wham(inputs, background_bams):
    """Run WHAM on a defined set of inputs and targets.
    """
    out_file = os.path.join(_sv_workdir(inputs[0]), "%s-wham.vcf.gz" % dd.get_sample_name(inputs[0]))
    if not utils.file_exists(out_file):
        with file_transaction(inputs[0], out_file) as tx_out_file:
            coords = chromhacks.autosomal_or_x_coords(dd.get_ref_file(inputs[0]))
            parallel = {"type": "local", "cores": dd.get_cores(inputs[0]), "progs": []}
            rs = run_multicore(_run_wham_coords,
                                [(inputs, background_bams, coord, out_file)
                                 for coord in coords],
                                inputs[0]["config"], parallel)
            rs = {coord: fname for (coord, fname) in rs}
            vcfutils.concat_variant_files([rs[c] for c in coords], tx_out_file, coords,
                                          dd.get_ref_file(inputs[0]), inputs[0]["config"])
    return out_file
示例#4
0
def _run_wham(inputs, background_bams):
    """Run WHAM on a defined set of inputs and targets.
    """
    out_file = os.path.join(_sv_workdir(inputs[0]), "%s-wham.vcf.gz" % dd.get_sample_name(inputs[0]))
    if not utils.file_exists(out_file):
        with file_transaction(inputs[0], out_file) as tx_out_file:
            coords = chromhacks.autosomal_or_x_coords(dd.get_ref_file(inputs[0]))
            parallel = {"type": "local", "cores": dd.get_cores(inputs[0]), "progs": []}
            rs = run_multicore(_run_wham_coords,
                                [(inputs, background_bams, coord, out_file)
                                 for coord in coords],
                                inputs[0]["config"], parallel)
            rs = {coord: fname for (coord, fname) in rs}
            vcfutils.concat_variant_files([rs[c] for c in coords], tx_out_file, coords,
                                          dd.get_ref_file(inputs[0]), inputs[0]["config"])
    return out_file
示例#5
0
def gatk_rnaseq_calling(data):
    """Use GATK to perform gVCF variant calling on RNA-seq data
    """
    from bcbio.bam import callable
    data = utils.deepish_copy(data)
    tools_on = dd.get_tools_on(data)
    if not tools_on:
        tools_on = []
    tools_on.append("gvcf")
    data = dd.set_tools_on(data, tools_on)
    data = dd.set_jointcaller(data, ["%s-joint" % v for v in dd.get_variantcaller(data)])
    out_dir = utils.safe_makedir(os.path.join(dd.get_work_dir(data),
                                              "variation", "rnaseq", "gatk-haplotype"))
    data = _setup_variant_regions(data, out_dir)
    out_file = os.path.join(out_dir, "%s-gatk-haplotype.vcf.gz" % dd.get_sample_name(data))
    if not utils.file_exists(out_file):
        region_files = []
        regions = []
        for cur_region in callable.get_split_regions(dd.get_variant_regions(data), data):
            str_region = "_".join([str(x) for x in cur_region])
            region_file = os.path.join(utils.safe_makedir(os.path.join(dd.get_work_dir(data),
                                                                    "variation", "rnaseq", "gatk-haplotype",
                                                                    "regions")),
                                    "%s-%s-gatk-haplotype.vcf.gz" % (dd.get_sample_name(data), str_region))
            region_file = gatk.haplotype_caller([dd.get_split_bam(data)], [data], dd.get_ref_file(data), {},
                                                region=cur_region, out_file=region_file)
            region_files.append(region_file)
            regions.append(cur_region)
        out_file = vcfutils.concat_variant_files(region_files, out_file, regions,
                                                 dd.get_ref_file(data), data["config"])
    return dd.set_vrn_file(data, out_file)
示例#6
0
def run_gvcfgenotyper(data, orig_region, vrn_files, out_file):
    """Merge strelka2 and Illumina compatible gVCFs with gvcfgenotyper.

    https://github.com/Illumina/gvcfgenotyper

    Also need to explore GLnexus (https://github.com/dnanexus-rnd/GLnexus)
    """
    if not utils.file_exists(out_file):
        with file_transaction(data, out_file) as tx_out_file:
            regions = _find_gvcf_blocks(vrn_files[0], bamprep.region_to_gatk(orig_region),
                                        os.path.dirname(tx_out_file))
            if len(regions) == 1:
                _run_gvcfgenotyper(data, regions[0], vrn_files, tx_out_file)
            else:
                split_outs = [_run_gvcfgenotyper(data, r, vrn_files,
                                                 "%s-%s.vcf.gz" % (utils.splitext_plus(out_file)[0],
                                                                   r.replace(":", "_").replace("-", "_")))
                              for r in regions]
                vcfutils.concat_variant_files(split_outs, tx_out_file, regions,
                                              dd.get_ref_file(data), data["config"])
    return vcfutils.bgzip_and_index(out_file, data["config"])
示例#7
0
def concat_batch_variantcalls(items):
    """CWL entry point: combine variant calls from regions into single VCF.
    """
    items, cwl_extras = split_data_cwl_items(items)
    batch_name = _get_batch_name(items)
    variantcaller = _get_batch_variantcaller(items)
    out_file = os.path.join(dd.get_work_dir(items[0]), variantcaller, "%s.vcf.gz" % (batch_name))
    utils.safe_makedir(os.path.dirname(out_file))
    regions = [_region_to_coords(r) for r in cwl_extras["region"]]
    out_file = vcfutils.concat_variant_files(cwl_extras["vrn_file_region"], out_file, regions,
                                             dd.get_ref_file(items[0]), items[0]["config"])
    return {"vrn_file": out_file}
示例#8
0
def concat_batch_variantcalls(items):
    """CWL entry point: combine variant calls from regions into single VCF.
    """
    items = [utils.to_single_data(x) for x in items]
    batch_name = _get_batch_name(items)
    variantcaller = _get_batch_variantcaller(items)
    out_file = os.path.join(dd.get_work_dir(items[0]), variantcaller, "%s.vcf.gz" % (batch_name))
    utils.safe_makedir(os.path.dirname(out_file))
    regions = [_region_to_coords(r) for r in items[0]["region"]]
    vrn_file_regions = items[0]["vrn_file_region"]
    out_file = vcfutils.concat_variant_files(vrn_file_regions, out_file, regions,
                                             dd.get_ref_file(items[0]), items[0]["config"])
    return {"vrn_file": out_file}
示例#9
0
def concat_batch_variantcalls(items, region_block=True, skip_jointcheck=False):
    """CWL entry point: combine variant calls from regions into single VCF.
    """
    items = [utils.to_single_data(x) for x in items]
    batch_name = _get_batch_name(items, skip_jointcheck)
    variantcaller = _get_batch_variantcaller(items)
    out_file = os.path.join(dd.get_work_dir(items[0]), variantcaller, "%s.vcf.gz" % (batch_name))
    utils.safe_makedir(os.path.dirname(out_file))
    if region_block:
        regions = [_region_to_coords(rs[0]) for rs in items[0]["region_block"]]
    else:
        regions = [_region_to_coords(r) for r in items[0]["region"]]
    vrn_file_regions = items[0]["vrn_file_region"]
    out_file = vcfutils.concat_variant_files(vrn_file_regions, out_file, regions,
                                             dd.get_ref_file(items[0]), items[0]["config"])
    return {"vrn_file": out_file}
示例#10
0
def concat_batch_variantcalls(items):
    """CWL entry point: combine variant calls from regions into single VCF.
    """
    items, cwl_extras = split_data_cwl_items(items)
    batch_name = _get_batch_name(items)
    variantcaller = _get_batch_variantcaller(items)
    out_file = os.path.join(dd.get_work_dir(items[0]), variantcaller, "%s.vcf.gz" % (batch_name))
    utils.safe_makedir(os.path.dirname(out_file))
    if "region" in cwl_extras and "vrn_file_region" in cwl_extras:
        regions = cwl_extras["region"]
        vrn_file_regions = cwl_extras["vrn_file_region"]
    else:
        regions = [x["region"] for x in items]
        vrn_file_regions = [x["vrn_file_region"] for x in items]
    regions = [_region_to_coords(r) for r in regions]
    out_file = vcfutils.concat_variant_files(vrn_file_regions, out_file, regions,
                                             dd.get_ref_file(items[0]), items[0]["config"])
    return {"vrn_file": out_file}
示例#11
0
def concat_batch_variantcalls(items, region_block=True, skip_jointcheck=False):
    """CWL entry point: combine variant calls from regions into single VCF.
    """
    items = [utils.to_single_data(x) for x in items]
    batch_name = _get_batch_name(items, skip_jointcheck)
    variantcaller = _get_batch_variantcaller(items)
    # Pre-called input variant files
    if not variantcaller and all(d.get("vrn_file") for d in items):
        return {"vrn_file": items[0]["vrn_file"]}
    out_file = os.path.join(dd.get_work_dir(items[0]), variantcaller, "%s.vcf.gz" % (batch_name))
    utils.safe_makedir(os.path.dirname(out_file))
    if region_block:
        regions = [_region_to_coords(rs[0]) for rs in items[0]["region_block"]]
    else:
        regions = [_region_to_coords(r) for r in items[0]["region"]]
    vrn_file_regions = items[0]["vrn_file_region"]
    out_file = vcfutils.concat_variant_files(vrn_file_regions, out_file, regions,
                                             dd.get_ref_file(items[0]), items[0]["config"])
    return {"vrn_file": out_file}
示例#12
0
def concat_variant_files(*args):
    return vcfutils.concat_variant_files(*args)
示例#13
0
def concat_variant_files(*args):
    return vcfutils.concat_variant_files(*args)