Пример #1
0
def main(snpeff_jar, vcf_ref, genome, interval_file=None):
    if os.path.isdir(vcf_ref):
        vcf_files = sorted(glob.glob(os.path.join(vcf_ref, "*-snp-filter.vcf")))
    else:
        vcf_files = [vcf_ref]
    for vcf_file in vcf_files:
        snpeff_effects(snpeff_jar, vcf_file, genome, interval_file)
Пример #2
0
def variation_effects(vrn_file, genome_file, genome_build, config):
    """Calculate effects of variations, associating them with transcripts.
    """
    snpeff_vcf, snpeff_txt = snpeff_effects(vrn_file, genome_build, config)
    annotated_vcf = annotate_effects(vrn_file, snpeff_vcf, genome_file, config) \
                    if snpeff_vcf else None
    return annotated_vcf, snpeff_txt
Пример #3
0
def variation_effects(vrn_file, genome_file, genome_build, config):
    """Calculate effects of variations, associating them with transcripts.

    Runs snpEff, returning the resulting effects file. No longer runs the GATK
    annotator, since it requires an old version of snpEff.
    """
    return snpeff_effects(vrn_file, genome_build, config)
Пример #4
0
def postprocess_variants(data):
    """Provide post-processing of variant calls: filtering and effects annotation.
    """
    cur_name = "%s, %s" % (data["name"][-1], get_variantcaller(data))
    logger.info("Finalizing variant calls: %s" % cur_name)
    if data.get("align_bam") and data.get("vrn_file"):
        logger.info("Calculating variation effects for %s" % cur_name)
        effect_todo = effects.get_type(data)
        if effect_todo:
            if effect_todo == "snpeff":
                ann_vrn_file = effects.snpeff_effects(data)
            elif effect_todo == "vep":
                ann_vrn_file = effects.run_vep(data)
            else:
                raise ValueError(
                    "Unexpected variant effects configuration: %s" %
                    effect_todo)
            if ann_vrn_file:
                data["vrn_file"] = ann_vrn_file
        logger.info("Filtering for %s" % cur_name)
        data["vrn_file"] = variant_filtration(
            data["vrn_file"], data["sam_ref"],
            tz.get_in(("genome_resources", "variation"), data, {}), data)
        logger.info("Prioritization for %s" % cur_name)
        data["vrn_file"] = prioritize.handle_vcf_calls(data["vrn_file"], data)
    return [[data]]
Пример #5
0
def variation_effects(vrn_file, genome_file, genome_build, config):
    """Calculate effects of variations, associating them with transcripts.
    """
    snpeff_vcf, snpeff_txt = snpeff_effects(vrn_file, genome_build, config)
    annotated_vcf = annotate_effects(vrn_file, snpeff_vcf, genome_file, config) \
                    if snpeff_vcf else None
    return annotated_vcf, snpeff_txt
Пример #6
0
def variation_effects(vrn_file, genome_file, genome_build, config):
    """Calculate effects of variations, associating them with transcripts.

    Runs snpEff, returning the resulting effects file. No longer runs the GATK
    annotator, since it requires an old version of snpEff.
    """
    return snpeff_effects(vrn_file, genome_build, config)
def main(config_file, env, cores):
    cores = int(cores)
    config = read_config(config_file, env)
    idremap = read_remap_file(config["runinfo"]["idmapping"])
    exclude = read_priority_file(config["runinfo"]["priority"], idremap)
    samples = list(get_input_samples(config["inputs"], idremap))
    problem = [x for x in samples if x["id"] is None]
    if len(problem) > 0:
        print "Problem identifiers"
        for p in problem:
            print p["illuminaid"], os.path.basename(p["dir"])
        raise NotImplementedError
    check_fam(samples, config["runinfo"]["fam"])

    config["algorithm"] = {"num_cores": cores}
    samples = [
        s for s in samples if s["id"] is not None and s["id"] not in exclude
    ]
    print "Processing %s samples" % len(samples)
    out_files = [
        outf for outf in joblib.Parallel(cores)(
            joblib.delayed(run_illumina_prep)(s, config) for s in samples)
    ]
    merge_file = merge_vcf_files(out_files, cores, config)
    effects_file = effects.snpeff_effects({
        "vrn_file": merge_file,
        "sam_ref": config["ref"]["GRCh37"],
        "reference": {
            "fasta": {
                "base": config["ref"]["GRCh37"]
            }
        },
        "genome_resources": {
            "aliases": {
                "snpeff": "GRCh37.74"
            }
        },
        "genome_build": "GRCh37",
        "config": config
    })
    data = {"config": config, "dirs": {"work": os.getcwd()}, "name": [""]}
    gemini_db = population.prep_gemini_db(
        [os.path.join(os.getcwd(), effects_file)],
        [utils.splitext_plus(config["outputs"]["merge"])[0], "casava", True],
        [{
            "config": config,
            "work_bam": "yes",
            "genome_build": "GRCh37",
            "genome_resources": {
                "aliases": {
                    "human": True
                }
            }
        }], data)[0][1]["db"]
    print gemini_db
    noexclude_file = "%s-noexclude%s" % utils.splitext_plus(effects_file)
    noexclude_file = vcfutils.exclude_samples(effects_file, noexclude_file,
                                              exclude, config["ref"]["GRCh37"],
                                              config)
    prepare_plink_vcftools(noexclude_file, config)
Пример #8
0
def variation_effects(vrn_file, genome_build, config):
    """Calculate effects of variations, associating them with transcripts.
    """
    snpeff_jar = os.path.join(config["program"]["snpEff"], "snpEff.jar")
    java_memory = config["algorithm"].get("java_memory", None)
    return snpeff_effects(snpeff_jar, vrn_file, genome_build,
                          config["algorithm"].get("hybrid_target", None),
                          java_memory)
Пример #9
0
def main(dirname, config, cores):
    vcf_files = find_vcf_files(dirname)
    prepped_files = prep_vcf_files(vcf_files, cores, config)
    merged_file = merge_vcf_files(prepped_files, cores, config)
    effects_file = effects.snpeff_effects({"vrn_file": merged_file,
                                           "genome_resources": {"aliases" : {"snpeff": "GRCh37"}},
                                           "genome_build": "GRCh37",
                                           "config": config})

    gemini_db = load_gemini_db(effects_file, config["ped"], cores)
Пример #10
0
def postprocess_variants(data):
    """Provide post-processing of variant calls.
    """
    logger.info("Finalizing variant calls: %s" % str(data["name"]))
    if data["work_bam"] and data.get("vrn_file"):
        data["vrn_file"] = finalize_genotyper(data["vrn_file"], data["work_bam"],
                                              data["sam_ref"], data["config"])
        logger.info("Calculating variation effects for %s" % str(data["name"]))
        ann_vrn_file = effects.snpeff_effects(data)
        if ann_vrn_file:
            data["vrn_file"] = ann_vrn_file
    return [[data]]
Пример #11
0
def postprocess_variants(data):
    """Provide post-processing of variant calls: filtering and effects annotation.
    """
    logger.info("Finalizing variant calls: %s" % str(data["name"]))
    if data["work_bam"] and data.get("vrn_file"):
        data["vrn_file"] = variant_filtration(
            data["vrn_file"], data["sam_ref"],
            data["genome_resources"]["variation"], data["config"])
        logger.info("Calculating variation effects for %s" % str(data["name"]))
        ann_vrn_file = effects.snpeff_effects(data)
        if ann_vrn_file:
            data["vrn_file"] = ann_vrn_file
    return [[data]]
Пример #12
0
def postprocess_variants(data):
    """Provide post-processing of variant calls.
    """
    if data["config"]["algorithm"]["snpcall"]:
        logger.info("Finalizing variant calls: %s" % str(data["name"]))
        data["vrn_file"] = finalize_genotyper(data["vrn_file"], data["work_bam"],
                                              data["sam_ref"], data["config"])
        logger.info("Calculating variation effects for %s" % str(data["name"]))
        ann_vrn_file = effects.snpeff_effects(data["vrn_file"], data["genome_build"],
                                              data["config"])
        if ann_vrn_file:
            data["vrn_file"] = ann_vrn_file
    return [[data]]
Пример #13
0
def postprocess_variants(data):
    """Provide post-processing of variant calls.
    """
    logger.info("Finalizing variant calls: %s" % str(data["name"]))
    if data["work_bam"] and data.get("vrn_file"):
        data["vrn_file"] = finalize_genotyper(data["vrn_file"],
                                              data["work_bam"],
                                              data["sam_ref"], data["config"])
        logger.info("Calculating variation effects for %s" % str(data["name"]))
        ann_vrn_file = effects.snpeff_effects(data)
        if ann_vrn_file:
            data["vrn_file"] = ann_vrn_file
    return [[data]]
Пример #14
0
def postprocess_variants(data):
    """Provide post-processing of variant calls: filtering and effects annotation.
    """
    logger.info("Finalizing variant calls: %s" % str(data["name"]))
    if data["work_bam"] and data.get("vrn_file"):
        vrn_files = configured_vrn_files(data["config"], data["sam_ref"])
        data["vrn_file"] = variant_filtration(data["vrn_file"], data["sam_ref"], vrn_files,
                                              data["config"])
        logger.info("Calculating variation effects for %s" % str(data["name"]))
        ann_vrn_file = effects.snpeff_effects(data)
        if ann_vrn_file:
            data["vrn_file"] = ann_vrn_file
    return [[data]]
Пример #15
0
def postprocess_variants(data):
    """Provide post-processing of variant calls: filtering and effects annotation.
    """
    cur_name = "%s, %s" % (data["name"][-1], get_variantcaller(data))
    logger.info("Finalizing variant calls: %s" % cur_name)
    if data["work_bam"] and data.get("vrn_file"):
        data["vrn_file"] = variant_filtration(data["vrn_file"], data["sam_ref"],
                                              data["genome_resources"]["variation"],
                                              data["config"])
        logger.info("Calculating variation effects for %s" % cur_name)
        ann_vrn_file = effects.snpeff_effects(data)
        if ann_vrn_file:
            data["vrn_file"] = ann_vrn_file
    return [[data]]
Пример #16
0
def postprocess_variants(data):
    """Provide post-processing of variant calls: filtering and effects annotation.
    """
    cur_name = "%s, %s" % (data["name"][-1], get_variantcaller(data))
    logger.info("Finalizing variant calls: %s" % cur_name)
    if data.get("align_bam") and data.get("vrn_file"):
        logger.info("Calculating variation effects for %s" % cur_name)
        ann_vrn_file = effects.snpeff_effects(data)
        if ann_vrn_file:
            data["vrn_file"] = ann_vrn_file
        logger.info("Filtering for %s" % cur_name)
        data["vrn_file"] = variant_filtration(
            data["vrn_file"], data["sam_ref"],
            data["genome_resources"]["variation"], data)
    return [[data]]
Пример #17
0
def postprocess_variants(data):
    """Provide post-processing of variant calls.
    """
    logger.info("Finalizing variant calls: %s" % str(data["name"]))
    if data["work_bam"]:
        data["vrn_file"] = finalize_genotyper(data["vrn_file"],
                                              data["work_bam"],
                                              data["sam_ref"], data["config"])
        logger.info("Calculating variation effects for %s" % str(data["name"]))
        ann_vrn_file = effects.snpeff_effects(data["vrn_file"],
                                              data["genome_build"],
                                              data["config"])
        if ann_vrn_file:
            data["vrn_file"] = ann_vrn_file
    data = validate.compare_to_rm(data)
    return [[data]]
Пример #18
0
def main(dirname, config, cores):
    vcf_files = find_vcf_files(dirname)
    prepped_files = prep_vcf_files(vcf_files, cores, config)
    merged_file = merge_vcf_files(prepped_files, cores, config)
    effects_file = effects.snpeff_effects({
        "vrn_file": merged_file,
        "genome_resources": {
            "aliases": {
                "snpeff": "GRCh37"
            }
        },
        "genome_build": "GRCh37",
        "config": config
    })

    gemini_db = load_gemini_db(effects_file, config["ped"], cores)
Пример #19
0
def _run_ensemble_w_caller(batch_id, vrn_files, bam_files, base_dir, edata):
    """Run ensemble method using a variant caller to handle re-calling the inputs.

    Uses bcbio.variation.recall method plus an external variantcaller.
    """
    out_vcf_file = os.path.join(base_dir, "{0}-ensemble.vcf".format(batch_id))
    if not utils.file_exists(out_vcf_file):
        caller = edata["config"]["algorithm"]["ensemble"]["caller"]
        cmd = [config_utils.get_program("bcbio-variation-recall", edata["config"]),
               "ensemble", "--cores=%s" % edata["config"]["algorithm"].get("num_cores", 1),
               "--caller=%s" % caller,
               out_vcf_file, edata["sam_ref"]] + vrn_files + bam_files
        do.run(cmd, "Ensemble calling with %s: %s" % (caller, batch_id))
    in_data = copy.deepcopy(edata)
    in_data["vrn_file"] = out_vcf_file
    effects_vcf = effects.snpeff_effects(in_data)
    return {"variantcaller": "ensemble",
            "vrn_file": effects_vcf,
            "bed_file": None}
Пример #20
0
def postprocess_variants(data):
    """Provide post-processing of variant calls: filtering and effects annotation.
    """
    cur_name = "%s, %s" % (data["name"][-1], get_variantcaller(data))
    logger.info("Finalizing variant calls: %s" % cur_name)
    if data.get("align_bam") and data.get("vrn_file"):
        logger.info("Calculating variation effects for %s" % cur_name)
        effect_todo = tz.get_in(("config", "algorithm", "effects"), data, "snpeff")
        if effect_todo:
            if effect_todo == "snpeff":
                ann_vrn_file = effects.snpeff_effects(data)
            elif effect_todo == "vep":
                ann_vrn_file = effects.run_vep(data)
            else:
                raise ValueError("Unexpected variant effects configuration: %s" % effect_todo)
            if ann_vrn_file:
                data["vrn_file"] = ann_vrn_file
        logger.info("Filtering for %s" % cur_name)
        data["vrn_file"] = variant_filtration(data["vrn_file"], data["sam_ref"],
                                              tz.get_in(("genome_resources", "variation"), data, {}),
                                              data)
    return [[data]]
Пример #21
0
def _run_ensemble_w_caller(batch_id, vrn_files, bam_files, base_dir, edata):
    """Run ensemble method using a variant caller to handle re-calling the inputs.

    Uses bcbio.variation.recall method plus an external variantcaller.
    """
    out_vcf_file = os.path.join(base_dir, "{0}-ensemble.vcf".format(batch_id))
    if not utils.file_exists(out_vcf_file):
        caller = edata["config"]["algorithm"]["ensemble"]["caller"]
        cmd = [
            config_utils.get_program("bcbio-variation-recall",
                                     edata["config"]), "ensemble",
            "--cores=%s" % edata["config"]["algorithm"].get("num_cores", 1),
            "--caller=%s" % caller, out_vcf_file, edata["sam_ref"]
        ] + vrn_files + bam_files
        do.run(cmd, "Ensemble calling with %s: %s" % (caller, batch_id))
    in_data = copy.deepcopy(edata)
    in_data["vrn_file"] = out_vcf_file
    effects_vcf = effects.snpeff_effects(in_data)
    return {
        "variantcaller": "ensemble",
        "vrn_file": effects_vcf,
        "bed_file": None
    }
Пример #22
0
def main(config_file, env, cores):
    cores = int(cores)
    config = read_config(config_file, env)
    idremap = read_remap_file(config["runinfo"]["idmapping"])
    exclude = read_priority_file(config["runinfo"]["priority"], idremap)
    samples = list(get_input_samples(config["inputs"], idremap))
    problem = [x for x in samples if x["id"] is None]
    if len(problem) > 0:
        print "Problem identifiers"
        for p in problem:
            print p["illuminaid"], os.path.basename(p["dir"])
        raise NotImplementedError
    check_fam(samples, config["runinfo"]["fam"])

    config["algorithm"] = {"num_cores": cores}
    samples = [s for s in samples if s["id"] is not None and s["id"] not in exclude]
    print "Processing %s samples" % len(samples)
    out_files = [outf for outf in joblib.Parallel(cores)(joblib.delayed(run_illumina_prep)(s, config)
                                                         for s in samples)]
    merge_file = merge_vcf_files(out_files, cores, config)
    effects_file = effects.snpeff_effects({"vrn_file": merge_file,
                                           "sam_ref": config["ref"]["GRCh37"],
                                           "reference": {"fasta" : {"base": config["ref"]["GRCh37"]}},
                                           "genome_resources": {"aliases" : {"snpeff": "GRCh37.74"}},
                                           "genome_build": "GRCh37",
                                           "config": config})
    data = {"config": config, "dirs": {"work": os.getcwd()}, "name": [""]}
    gemini_db = population.prep_gemini_db([os.path.join(os.getcwd(), effects_file)],
                                          [utils.splitext_plus(config["outputs"]["merge"])[0], "casava", True],
                                          [{"config": config, "work_bam": "yes", "genome_build": "GRCh37",
                                            "genome_resources": {"aliases": {"human": True}}}],
                                          data)[0][1]["db"]
    print gemini_db
    noexclude_file = "%s-noexclude%s" % utils.splitext_plus(effects_file)
    noexclude_file = vcfutils.exclude_samples(effects_file, noexclude_file, exclude,
                                              config["ref"]["GRCh37"], config)
    prepare_plink_vcftools(noexclude_file, config)
Пример #23
0
 def __call__(self, in_file):
     self._start_message(in_file)
     out_file = effects.snpeff_effects(in_file, self.genome, self.config)
     self._end_message(in_file)
     return out_file
Пример #24
0
 def __call__(self, in_file):
     self._start_message(in_file)
     out_file = effects.snpeff_effects(in_file, self.genome, self.config)
     self._end_message(in_file)
     return out_file