def prep_gemini_db(fnames, call_info, samples, extras): """Prepare a gemini database from VCF inputs prepared with snpEff. """ data = samples[0] use_gemini = do_db_build(samples) and any( vcfutils.vcf_has_variants(f) for f in fnames) name, caller, is_batch = call_info out_dir = utils.safe_makedir(os.path.join(data["dirs"]["work"], "gemini")) multisample_vcf = get_multisample_vcf(fnames, name, caller, data) gemini_db = os.path.join(out_dir, "%s-%s.db" % (name, caller)) if not utils.file_exists(gemini_db) and use_gemini: passonly = all("gemini_allvariants" not in dd.get_tools_on(d) for d in samples) gemini_vcf = multiallelic.to_single(multisample_vcf, data, passonly=passonly) ped_file = create_ped_file(samples + extras, gemini_vcf) # Use original approach for hg19/GRCh37 pending additional testing if support_gemini_orig(data) and not any( dd.get_vcfanno(d) for d in samples): gemini_db = create_gemini_db_orig(gemini_vcf, data, gemini_db, ped_file) else: gemini_db = create_gemini_db(gemini_vcf, data, gemini_db, ped_file) return [[(name, caller), { "db": gemini_db if utils.file_exists(gemini_db) else None, "vcf": multisample_vcf if is_batch else None }]]
def prep_gemini_db(fnames, call_info, samples, extras): """Prepare a gemini database from VCF inputs prepared with snpEff. """ data = samples[0] out_dir = utils.safe_makedir(os.path.join(data["dirs"]["work"], "gemini")) name, caller, is_batch = call_info gemini_db = os.path.join(out_dir, "%s-%s.db" % (name, caller)) multisample_vcf = get_multisample_vcf(fnames, name, caller, data) gemini_vcf = multiallelic.to_single(multisample_vcf, data) use_gemini_quick = (do_db_build(samples) and any(vcfutils.vcf_has_variants(f) for f in fnames)) if not utils.file_exists(gemini_db) and use_gemini_quick: use_gemini = do_db_build(samples) and any(vcfutils.vcf_has_variants(f) for f in fnames) if use_gemini: ped_file = create_ped_file(samples + extras, gemini_vcf) gemini_db = create_gemini_db(gemini_vcf, data, gemini_db, ped_file) return [[(name, caller), {"db": gemini_db if utils.file_exists(gemini_db) else None, "vcf": multisample_vcf if is_batch else None}]]
def prep_gemini_db(fnames, call_info, samples, extras): """Prepare a gemini database from VCF inputs prepared with snpEff. """ data = samples[0] use_gemini = do_db_build(samples) and any(vcfutils.vcf_has_variants(f) for f in fnames) name, caller, is_batch = call_info out_dir = utils.safe_makedir(os.path.join(data["dirs"]["work"], "gemini")) gemini_vcf = get_multisample_vcf(fnames, name, caller, data) if use_gemini: passonly = all("gemini_allvariants" not in dd.get_tools_on(d) for d in samples) gemini_vcf = multiallelic.to_single(gemini_vcf, data, passonly=passonly) gemini_vcf = _run_vcfanno(gemini_vcf, data, use_gemini) gemini_db = os.path.join(out_dir, "%s-%s.db" % (name, caller)) if vcfutils.vcf_has_variants(gemini_vcf): if not utils.file_exists(gemini_db) and use_gemini: ped_file = create_ped_file(samples + extras, gemini_vcf) # Use original approach for hg19/GRCh37 pending additional testing if support_gemini_orig(data) and not any(dd.get_vcfanno(d) for d in samples): gemini_db = create_gemini_db_orig(gemini_vcf, data, gemini_db, ped_file) else: gemini_db = create_gemini_db(gemini_vcf, data, gemini_db, ped_file) return [[(name, caller), {"db": gemini_db if utils.file_exists(gemini_db) else None, "vcf": gemini_vcf, "decomposed": use_gemini}]]