Пример #1
0
def postprocess_variants(data):
    """Provide post-processing of variant calls: filtering and effects annotation.
    """
    cur_name = "%s, %s" % (data["name"][-1], get_variantcaller(data))
    logger.info("Finalizing variant calls: %s" % cur_name)
    if data.get("align_bam") and data.get("vrn_file"):
        logger.info("Calculating variation effects for %s" % cur_name)
        effect_todo = effects.get_type(data)
        if effect_todo:
            if effect_todo == "snpeff":
                ann_vrn_file = effects.snpeff_effects(data)
            elif effect_todo == "vep":
                ann_vrn_file = effects.run_vep(data)
            else:
                raise ValueError(
                    "Unexpected variant effects configuration: %s" %
                    effect_todo)
            if ann_vrn_file:
                data["vrn_file"] = ann_vrn_file
        logger.info("Filtering for %s" % cur_name)
        data["vrn_file"] = variant_filtration(
            data["vrn_file"], data["sam_ref"],
            tz.get_in(("genome_resources", "variation"), data, {}), data)
        logger.info("Prioritization for %s" % cur_name)
        data["vrn_file"] = prioritize.handle_vcf_calls(data["vrn_file"], data)
    return [[data]]
Пример #2
0
def finalize_genotyper(call_file, ref_file, config):
    """Perform SNP genotyping and analysis using GATK.
    """
    vrn_files = configured_vrn_files(config, ref_file)
    filter_snp = variant_filtration(call_file, ref_file, vrn_files, config)
    _eval_genotyper(filter_snp, ref_file, vrn_files.dbsnp, config)
    return filter_snp
Пример #3
0
def postprocess_variants(items):
    """Provide post-processing of variant calls: filtering and effects annotation.
    """
    vrn_key = "vrn_file"
    if not isinstance(items, dict):
        items = [utils.to_single_data(x) for x in items]
        if "vrn_file_joint" in items[0]:
            vrn_key = "vrn_file_joint"
    data, items = _get_batch_representative(items, vrn_key)
    items = cwlutils.unpack_tarballs(items, data)
    data = cwlutils.unpack_tarballs(data, data)
    cur_name = "%s, %s" % (dd.get_sample_name(data), get_variantcaller(data))
    logger.info("Finalizing variant calls: %s" % cur_name)
    orig_vrn_file = data.get(vrn_key)
    data = _symlink_to_workdir(data, [vrn_key])
    data = _symlink_to_workdir(data,
                               ["config", "algorithm", "variant_regions"])
    if data.get(vrn_key):
        logger.info("Calculating variation effects for %s" % cur_name)
        ann_vrn_file, vrn_stats = effects.add_to_vcf(data[vrn_key], data)
        if ann_vrn_file:
            data[vrn_key] = ann_vrn_file
        if vrn_stats:
            data["vrn_stats"] = vrn_stats
        orig_items = _get_orig_items(items)
        logger.info("Annotate VCF file: %s" % cur_name)
        data[vrn_key] = annotation.finalize_vcf(data[vrn_key],
                                                get_variantcaller(data),
                                                orig_items)
        if dd.get_analysis(data).lower().find("rna-seq") >= 0:
            logger.info("Annotate RNA editing sites")
            ann_file = vcfanno.run_vcfanno(dd.get_vrn_file(data), ["rnaedit"],
                                           data)
            if ann_file:
                data[vrn_key] = ann_file
        if cwlutils.is_cwl_run(data):
            logger.info("Annotate with population level variation data")
            ann_file = population.run_vcfanno(dd.get_vrn_file(data), data,
                                              population.do_db_build([data]))
            if ann_file:
                data[vrn_key] = ann_file
        logger.info("Filtering for %s" % cur_name)
        data[vrn_key] = variant_filtration(
            data[vrn_key], dd.get_ref_file(data),
            tz.get_in(("genome_resources", "variation"), data, {}), data,
            orig_items)
        logger.info("Prioritization for %s" % cur_name)
        prio_vrn_file = prioritize.handle_vcf_calls(data[vrn_key], data,
                                                    orig_items)
        if prio_vrn_file != data[vrn_key]:
            data[vrn_key] = prio_vrn_file
            logger.info("Germline extraction for %s" % cur_name)
            data = germline.extract(data, orig_items)

        if dd.get_align_bam(data):
            data = damage.run_filter(data[vrn_key], dd.get_align_bam(data),
                                     dd.get_ref_file(data), data, orig_items)
    if orig_vrn_file and os.path.samefile(data[vrn_key], orig_vrn_file):
        data[vrn_key] = orig_vrn_file
    return [[data]]
Пример #4
0
def postprocess_variants(items):
    """Provide post-processing of variant calls: filtering and effects annotation.
    """
    data = _get_batch_representative(items, "vrn_file")
    cur_name = "%s, %s" % (dd.get_sample_name(data), get_variantcaller(data))
    logger.info("Finalizing variant calls: %s" % cur_name)
    orig_vrn_file = data.get("vrn_file")
    data = _symlink_to_workdir(data, ["vrn_file"])
    data = _symlink_to_workdir(data,
                               ["config", "algorithm", "variant_regions"])
    if data.get("align_bam") and data.get("vrn_file"):
        logger.info("Calculating variation effects for %s" % cur_name)
        ann_vrn_file, vrn_stats = effects.add_to_vcf(data["vrn_file"], data)
        if ann_vrn_file:
            data["vrn_file"] = ann_vrn_file
        if vrn_stats:
            data["vrn_stats"] = vrn_stats
        orig_items = _get_orig_items(items)
        logger.info("Annotate VCF file: %s" % cur_name)
        data["vrn_file"] = annotation.finalize_vcf(data["vrn_file"],
                                                   get_variantcaller(data),
                                                   orig_items)
        logger.info("Filtering for %s" % cur_name)
        data["vrn_file"] = variant_filtration(
            data["vrn_file"], dd.get_ref_file(data),
            tz.get_in(("genome_resources", "variation"), data, {}), data,
            orig_items)
        logger.info("Prioritization for %s" % cur_name)
        data["vrn_file"] = prioritize.handle_vcf_calls(data["vrn_file"], data,
                                                       orig_items)
        logger.info("Germline extraction for %s" % cur_name)
        data = germline.extract(data, orig_items)
    if orig_vrn_file and os.path.samefile(data["vrn_file"], orig_vrn_file):
        data["vrn_file"] = orig_vrn_file
    return [[data]]
Пример #5
0
def postprocess_variants(items):
    """Provide post-processing of variant calls: filtering and effects annotation.
    """
    data = _get_batch_representative(items, "vrn_file")
    cur_name = "%s, %s" % (dd.get_sample_name(data), get_variantcaller(data))
    logger.info("Finalizing variant calls: %s" % cur_name)
    orig_vrn_file = data.get("vrn_file")
    data = _symlink_to_workdir(data, ["vrn_file"])
    data = _symlink_to_workdir(data, ["config", "algorithm", "variant_regions"])
    if data.get("align_bam") and data.get("vrn_file"):
        logger.info("Calculating variation effects for %s" % cur_name)
        ann_vrn_file, vrn_stats = effects.add_to_vcf(data["vrn_file"], data)
        if ann_vrn_file:
            data["vrn_file"] = ann_vrn_file
        if vrn_stats:
            data["vrn_stats"] = vrn_stats
        logger.info("Filtering for %s" % cur_name)
        orig_items = _get_orig_items(items)
        data["vrn_file"] = variant_filtration(data["vrn_file"], dd.get_ref_file(data),
                                              tz.get_in(("genome_resources", "variation"), data, {}),
                                              data, orig_items)
        logger.info("Prioritization for %s" % cur_name)
        data["vrn_file"] = prioritize.handle_vcf_calls(data["vrn_file"], data, orig_items)
        logger.info("Germline extraction for %s" % cur_name)
        data = germline.extract(data, orig_items)
    if orig_vrn_file and os.path.samefile(data["vrn_file"], orig_vrn_file):
        data["vrn_file"] = orig_vrn_file
    return [[data]]
Пример #6
0
def finalize_genotyper(call_file, bam_file, ref_file, config):
    """Perform SNP genotyping and analysis.
    """
    vrn_files = configured_vrn_files(config, ref_file)
    variantcaller = config["algorithm"].get("variantcaller", "gatk")
    if variantcaller in ["freebayes", "cortex", "samtools", "gatk-haplotype", "varscan"]:
        call_file = annotation.annotate_nongatk_vcf(call_file, bam_file, vrn_files.dbsnp,
                                                    ref_file, config)
    filter_snp = variant_filtration(call_file, ref_file, vrn_files, config)
    return filter_snp
Пример #7
0
def finalize_genotyper(call_file, bam_file, ref_file, config):
    """Perform SNP genotyping and analysis.
    """
    vrn_files = configured_vrn_files(config, ref_file)
    variantcaller = config["algorithm"].get("variantcaller", "gatk")
    if variantcaller in ["freebayes", "cortex"]:
        call_file = freebayes.postcall_annotate(call_file, ref_file, vrn_files, config)
    filter_snp = variant_filtration(call_file, ref_file, vrn_files, config)
    phase_snp = phasing.read_backed_phasing(filter_snp, bam_file, ref_file, config)
    _eval_genotyper(phase_snp, ref_file, vrn_files.dbsnp, config)
    return phase_snp
Пример #8
0
def finalize_genotyper(call_file, bam_file, ref_file, config):
    """Perform SNP genotyping and analysis.
    """
    vrn_files = configured_vrn_files(config, ref_file)
    variantcaller = config["algorithm"].get("variantcaller", "gatk")
    if variantcaller in ["freebayes", "cortex", "samtools"]:
        call_file = freebayes.postcall_annotate(call_file, ref_file, vrn_files, config)
    filter_snp = variant_filtration(call_file, ref_file, vrn_files, config)
    phase_snp = phasing.read_backed_phasing(filter_snp, bam_file, ref_file, config)
    _eval_genotyper(phase_snp, ref_file, vrn_files.dbsnp, config)
    return phase_snp
Пример #9
0
def postprocess_variants(data):
    """Provide post-processing of variant calls: filtering and effects annotation.
    """
    logger.info("Finalizing variant calls: %s" % str(data["name"]))
    if data["work_bam"] and data.get("vrn_file"):
        data["vrn_file"] = variant_filtration(
            data["vrn_file"], data["sam_ref"],
            data["genome_resources"]["variation"], data["config"])
        logger.info("Calculating variation effects for %s" % str(data["name"]))
        ann_vrn_file = effects.snpeff_effects(data)
        if ann_vrn_file:
            data["vrn_file"] = ann_vrn_file
    return [[data]]
Пример #10
0
def postprocess_variants(data):
    """Provide post-processing of variant calls: filtering and effects annotation.
    """
    logger.info("Finalizing variant calls: %s" % str(data["name"]))
    if data["work_bam"] and data.get("vrn_file"):
        vrn_files = configured_vrn_files(data["config"], data["sam_ref"])
        data["vrn_file"] = variant_filtration(data["vrn_file"], data["sam_ref"], vrn_files,
                                              data["config"])
        logger.info("Calculating variation effects for %s" % str(data["name"]))
        ann_vrn_file = effects.snpeff_effects(data)
        if ann_vrn_file:
            data["vrn_file"] = ann_vrn_file
    return [[data]]
Пример #11
0
def finalize_genotyper(call_file, bam_file, ref_file, config):
    """Perform SNP genotyping and analysis.
    """
    vrn_files = configured_vrn_files(config, ref_file)
    variantcaller = config["algorithm"].get("variantcaller", "gatk")
    if variantcaller in [
            "freebayes", "cortex", "samtools", "gatk-haplotype", "varscan"
    ]:
        call_file = annotation.annotate_nongatk_vcf(call_file, bam_file,
                                                    vrn_files.dbsnp, ref_file,
                                                    config)
    filter_snp = variant_filtration(call_file, ref_file, vrn_files, config)
    return filter_snp
Пример #12
0
def postprocess_variants(data):
    """Provide post-processing of variant calls: filtering and effects annotation.
    """
    cur_name = "%s, %s" % (data["name"][-1], get_variantcaller(data))
    logger.info("Finalizing variant calls: %s" % cur_name)
    if data["work_bam"] and data.get("vrn_file"):
        data["vrn_file"] = variant_filtration(data["vrn_file"], data["sam_ref"],
                                              data["genome_resources"]["variation"],
                                              data["config"])
        logger.info("Calculating variation effects for %s" % cur_name)
        ann_vrn_file = effects.snpeff_effects(data)
        if ann_vrn_file:
            data["vrn_file"] = ann_vrn_file
    return [[data]]
Пример #13
0
def postprocess_variants(items):
    """Provide post-processing of variant calls: filtering and effects annotation.
    """
    vrn_key = "vrn_file"
    if not isinstance(items, dict):
        items = [utils.to_single_data(x) for x in items]
        if "vrn_file_joint" in items[0]:
            vrn_key = "vrn_file_joint"
    data, items = _get_batch_representative(items, vrn_key)
    items = cwlutils.unpack_tarballs(items, data)
    data = cwlutils.unpack_tarballs(data, data)
    cur_name = "%s, %s" % (dd.get_sample_name(data), get_variantcaller(data, require_bam=False))
    logger.info("Finalizing variant calls: %s" % cur_name)
    orig_vrn_file = data.get(vrn_key)
    data = _symlink_to_workdir(data, [vrn_key])
    data = _symlink_to_workdir(data, ["config", "algorithm", "variant_regions"])
    if data.get(vrn_key):
        logger.info("Calculating variation effects for %s" % cur_name)
        ann_vrn_file, vrn_stats = effects.add_to_vcf(data[vrn_key], data)
        if ann_vrn_file:
            data[vrn_key] = ann_vrn_file
        if vrn_stats:
            data["vrn_stats"] = vrn_stats
        orig_items = _get_orig_items(items)
        logger.info("Annotate VCF file: %s" % cur_name)
        data[vrn_key] = annotation.finalize_vcf(data[vrn_key], get_variantcaller(data, require_bam=False),
                                                orig_items)
        if cwlutils.is_cwl_run(data):
            logger.info("Annotate with population level variation data")
            ann_file = population.run_vcfanno(data[vrn_key], data)
            if ann_file:
                data[vrn_key] = ann_file
        logger.info("Filtering for %s" % cur_name)
        data[vrn_key] = variant_filtration(data[vrn_key], dd.get_ref_file(data),
                                           tz.get_in(("genome_resources", "variation"), data, {}),
                                           data, orig_items)
        logger.info("Prioritization for %s" % cur_name)
        prio_vrn_file = prioritize.handle_vcf_calls(data[vrn_key], data, orig_items)
        if prio_vrn_file != data[vrn_key]:
            data[vrn_key] = prio_vrn_file
            logger.info("Germline extraction for %s" % cur_name)
            data = germline.extract(data, orig_items)

        if dd.get_align_bam(data):
            data = damage.run_filter(data[vrn_key], dd.get_align_bam(data), dd.get_ref_file(data),
                                     data, orig_items)
    if orig_vrn_file and os.path.samefile(data[vrn_key], orig_vrn_file):
        data[vrn_key] = orig_vrn_file
    return [[data]]
Пример #14
0
def postprocess_variants(data):
    """Provide post-processing of variant calls: filtering and effects annotation.
    """
    cur_name = "%s, %s" % (data["name"][-1], get_variantcaller(data))
    logger.info("Finalizing variant calls: %s" % cur_name)
    if data.get("align_bam") and data.get("vrn_file"):
        logger.info("Calculating variation effects for %s" % cur_name)
        ann_vrn_file = effects.snpeff_effects(data)
        if ann_vrn_file:
            data["vrn_file"] = ann_vrn_file
        logger.info("Filtering for %s" % cur_name)
        data["vrn_file"] = variant_filtration(
            data["vrn_file"], data["sam_ref"],
            data["genome_resources"]["variation"], data)
    return [[data]]
Пример #15
0
def postprocess_variants(data):
    """Provide post-processing of variant calls: filtering and effects annotation.
    """
    cur_name = "%s, %s" % (data["name"][-1], get_variantcaller(data))
    logger.info("Finalizing variant calls: %s" % cur_name)
    if data.get("align_bam") and data.get("vrn_file"):
        logger.info("Calculating variation effects for %s" % cur_name)
        ann_vrn_file = effects.add_to_vcf(data["vrn_file"], data)
        if ann_vrn_file:
            data["vrn_file"] = ann_vrn_file
        logger.info("Filtering for %s" % cur_name)
        data["vrn_file"] = variant_filtration(data["vrn_file"], data["sam_ref"],
                                              tz.get_in(("genome_resources", "variation"), data, {}),
                                              data)
        logger.info("Prioritization for %s" % cur_name)
        data["vrn_file"] = prioritize.handle_vcf_calls(data["vrn_file"], data)
    return [[data]]
Пример #16
0
def postprocess_variants(data):
    """Provide post-processing of variant calls: filtering and effects annotation.
    """
    cur_name = "%s, %s" % (data["name"][-1], get_variantcaller(data))
    logger.info("Finalizing variant calls: %s" % cur_name)
    if data.get("align_bam") and data.get("vrn_file"):
        logger.info("Calculating variation effects for %s" % cur_name)
        ann_vrn_file, vrn_stats = effects.add_to_vcf(data["vrn_file"], data)
        if ann_vrn_file:
            data["vrn_file"] = ann_vrn_file
        if vrn_stats:
            data["vrn_stats"] = vrn_stats
        logger.info("Filtering for %s" % cur_name)
        data["vrn_file"] = variant_filtration(
            data["vrn_file"], data["sam_ref"],
            tz.get_in(("genome_resources", "variation"), data, {}), data)
        logger.info("Prioritization for %s" % cur_name)
        data["vrn_file"] = prioritize.handle_vcf_calls(data["vrn_file"], data)
    return [[data]]
Пример #17
0
def postprocess_variants(data):
    """Provide post-processing of variant calls: filtering and effects annotation.
    """
    cur_name = "%s, %s" % (data["name"][-1], get_variantcaller(data))
    logger.info("Finalizing variant calls: %s" % cur_name)
    if data.get("align_bam") and data.get("vrn_file"):
        logger.info("Calculating variation effects for %s" % cur_name)
        effect_todo = tz.get_in(("config", "algorithm", "effects"), data, "snpeff")
        if effect_todo:
            if effect_todo == "snpeff":
                ann_vrn_file = effects.snpeff_effects(data)
            elif effect_todo == "vep":
                ann_vrn_file = effects.run_vep(data)
            else:
                raise ValueError("Unexpected variant effects configuration: %s" % effect_todo)
            if ann_vrn_file:
                data["vrn_file"] = ann_vrn_file
        logger.info("Filtering for %s" % cur_name)
        data["vrn_file"] = variant_filtration(data["vrn_file"], data["sam_ref"],
                                              tz.get_in(("genome_resources", "variation"), data, {}),
                                              data)
    return [[data]]