def main(config_file, env, cores): cores = int(cores) config = read_config(config_file, env) idremap = read_remap_file(config["runinfo"]["idmapping"]) exclude = read_priority_file(config["runinfo"]["priority"], idremap) samples = list(get_input_samples(config["inputs"], idremap)) problem = [x for x in samples if x["id"] is None] if len(problem) > 0: print "Problem identifiers" for p in problem: print p["illuminaid"], os.path.basename(p["dir"]) raise NotImplementedError check_fam(samples, config["runinfo"]["fam"]) config["algorithm"] = {"num_cores": cores} samples = [ s for s in samples if s["id"] is not None and s["id"] not in exclude ] print "Processing %s samples" % len(samples) out_files = [ outf for outf in joblib.Parallel(cores)( joblib.delayed(run_illumina_prep)(s, config) for s in samples) ] merge_file = merge_vcf_files(out_files, cores, config) effects_file = effects.snpeff_effects({ "vrn_file": merge_file, "sam_ref": config["ref"]["GRCh37"], "reference": { "fasta": { "base": config["ref"]["GRCh37"] } }, "genome_resources": { "aliases": { "snpeff": "GRCh37.74" } }, "genome_build": "GRCh37", "config": config }) data = {"config": config, "dirs": {"work": os.getcwd()}, "name": [""]} gemini_db = population.prep_gemini_db( [os.path.join(os.getcwd(), effects_file)], [utils.splitext_plus(config["outputs"]["merge"])[0], "casava", True], [{ "config": config, "work_bam": "yes", "genome_build": "GRCh37", "genome_resources": { "aliases": { "human": True } } }], data)[0][1]["db"] print gemini_db noexclude_file = "%s-noexclude%s" % utils.splitext_plus(effects_file) noexclude_file = vcfutils.exclude_samples(effects_file, noexclude_file, exclude, config["ref"]["GRCh37"], config) prepare_plink_vcftools(noexclude_file, config)
def main(config_file, env, cores): cores = int(cores) config = read_config(config_file, env) idremap = read_remap_file(config["runinfo"]["idmapping"]) exclude = read_priority_file(config["runinfo"]["priority"], idremap) samples = list(get_input_samples(config["inputs"], idremap)) problem = [x for x in samples if x["id"] is None] if len(problem) > 0: print "Problem identifiers" for p in problem: print p["illuminaid"], os.path.basename(p["dir"]) raise NotImplementedError check_fam(samples, config["runinfo"]["fam"]) config["algorithm"] = {"num_cores": cores} samples = [s for s in samples if s["id"] is not None and s["id"] not in exclude] print "Processing %s samples" % len(samples) out_files = [outf for outf in joblib.Parallel(cores)(joblib.delayed(run_illumina_prep)(s, config) for s in samples)] merge_file = merge_vcf_files(out_files, cores, config) effects_file = effects.snpeff_effects({"vrn_file": merge_file, "sam_ref": config["ref"]["GRCh37"], "reference": {"fasta" : {"base": config["ref"]["GRCh37"]}}, "genome_resources": {"aliases" : {"snpeff": "GRCh37.74"}}, "genome_build": "GRCh37", "config": config}) data = {"config": config, "dirs": {"work": os.getcwd()}, "name": [""]} gemini_db = population.prep_gemini_db([os.path.join(os.getcwd(), effects_file)], [utils.splitext_plus(config["outputs"]["merge"])[0], "casava", True], [{"config": config, "work_bam": "yes", "genome_build": "GRCh37", "genome_resources": {"aliases": {"human": True}}}], data)[0][1]["db"] print gemini_db noexclude_file = "%s-noexclude%s" % utils.splitext_plus(effects_file) noexclude_file = vcfutils.exclude_samples(effects_file, noexclude_file, exclude, config["ref"]["GRCh37"], config) prepare_plink_vcftools(noexclude_file, config)
def prep_gemini_db(*args): return population.prep_gemini_db(*args)