def gen_pheno_txt_file(this_dataset, genofile_name, vals, method): """Generates phenotype file for GEMMA""" if method == "gemma": current_file_data = [] with open( "{}/{}.fam".format(flat_files('mapping'), this_dataset.group.name), "r") as outfile: for i, line in enumerate(outfile): split_line = line.split() current_file_data.append(split_line) with open( "{}/{}.fam".format(flat_files('mapping'), this_dataset.group.name), "w") as outfile: for i, line in enumerate(current_file_data): if vals[i] == "x": this_val = -9 else: this_val = vals[i] outfile.write("0" + " " + line[1] + " " + line[2] + " " + line[3] + " " + line[4] + " " + str(this_val) + "\n") else: current_file_data = [] with open( "{}/{}_pheno.txt".format(flat_files('genotype/bimbam'), genofile_name), "w") as outfile: for value in vals: if value == "x": outfile.write("NA\n") else: outfile.write(value + "\n")
def gen_pheno_txt_file(this_dataset, genofile_name, vals, method): """Generates phenotype file for GEMMA""" if method == "gemma": current_file_data = [] with open("{}/{}.fam".format(flat_files('mapping'), this_dataset.group.name), "r") as outfile: for i, line in enumerate(outfile): split_line = line.split() current_file_data.append(split_line) with open("{}/{}.fam".format(flat_files('mapping'), this_dataset.group.name), "w") as outfile: for i, line in enumerate(current_file_data): if vals[i] == "x": this_val = -9 else: this_val = vals[i] outfile.write("0" + " " + line[1] + " " + line[2] + " " + line[3] + " " + line[4] + " " + str(this_val) + "\n") else: current_file_data = [] with open("{}/{}_pheno.txt".format(flat_files('genotype/bimbam'), genofile_name), "w") as outfile: for value in vals: if value == "x": outfile.write("NA\n") else: outfile.write(value + "\n")
def gen_covariates_file(this_dataset, covariates): covariate_list = covariates.split(",") covariate_data_object = [] for covariate in covariate_list: this_covariate_data = [] trait_name = covariate.split(":")[0] dataset_ob = create_dataset(covariate.split(":")[1]) trait_ob = GeneralTrait(dataset=dataset_ob, name=trait_name, cellid=None) #trait_samples = this_dataset.group.all_samples_ordered() this_dataset.group.get_samplelist() trait_samples = this_dataset.group.samplelist logger.debug("SAMPLES:", trait_samples) trait_sample_data = trait_ob.data logger.debug("SAMPLE DATA:", trait_sample_data) for index, sample in enumerate(trait_samples): if sample in trait_sample_data: sample_value = trait_sample_data[sample].value this_covariate_data.append(sample_value) else: this_covariate_data.append("-9") covariate_data_object.append(this_covariate_data) with open("{}/{}_covariates.txt".format(flat_files('mapping'), this_dataset.group.name), "w") as outfile: for i in range(len(covariate_data_object[0])): for this_covariate in covariate_data_object: outfile.write(str(this_covariate[i]) + "\t") outfile.write("\n")
def gen_covariates_file(this_dataset, covariates): covariate_list = covariates.split(",") covariate_data_object = [] for covariate in covariate_list: this_covariate_data = [] trait_name = covariate.split(":")[0] dataset_ob = create_dataset(covariate.split(":")[1]) trait_ob = GeneralTrait(dataset=dataset_ob, name=trait_name, cellid=None) #trait_samples = this_dataset.group.all_samples_ordered() this_dataset.group.get_samplelist() trait_samples = this_dataset.group.samplelist logger.debug("SAMPLES:", trait_samples) trait_sample_data = trait_ob.data logger.debug("SAMPLE DATA:", trait_sample_data) for index, sample in enumerate(trait_samples): if sample in trait_sample_data: sample_value = trait_sample_data[sample].value this_covariate_data.append(sample_value) else: this_covariate_data.append("-9") covariate_data_object.append(this_covariate_data) with open( "{}/{}_covariates.txt".format(flat_files('mapping'), this_dataset.group.name), "w") as outfile: for i in range(len(covariate_data_object[0])): for this_covariate in covariate_data_object: outfile.write(str(this_covariate[i]) + "\t") outfile.write("\n")
def gen_pheno_txt_file(this_dataset, vals): """Generates phenotype file for GEMMA/PLINK""" current_file_data = [] with open("{}/{}.fam".format(flat_files('mapping'), this_dataset.group.name), "r") as outfile: for i, line in enumerate(outfile): split_line = line.split() current_file_data.append(split_line) with open("{}/{}.fam".format(flat_files('mapping'), this_dataset.group.name), "w") as outfile: for i, line in enumerate(current_file_data): if vals[i] == "x": this_val = -9 else: this_val = vals[i] outfile.write(line[1] + " " + line[1] + " " + line[2] + " " + line[3] + " " + line[4] + " " + str(this_val) + "\n")
def __init__(self, name): json_data_fh = open(locate(name + ".json", 'genotype/json')) markers = [] with open("%s/%s_snps.txt" % (flat_files('genotype/bimbam'), name), 'r') as bimbam_fh: if len(bimbam_fh.readline().split(", ")) > 2: delimiter = ", " elif len(bimbam_fh.readline().split(",")) > 2: delimiter = "," elif len(bimbam_fh.readline().split("\t")) > 2: delimiter = "\t" else: delimiter = " " for line in bimbam_fh: marker = {} marker['name'] = line.split(delimiter)[0].rstrip() marker['Mb'] = float( line.split(delimiter)[1].rstrip()) / 1000000 marker['chr'] = line.split(delimiter)[2].rstrip() markers.append(marker) for marker in markers: if (marker['chr'] != "X") and (marker['chr'] != "Y") and (marker['chr'] != "M"): marker['chr'] = int(marker['chr']) marker['Mb'] = float(marker['Mb']) self.markers = markers
def gen_pheno_txt_file(this_dataset, vals): """Generates phenotype file for GEMMA/PLINK""" current_file_data = [] with open("{}/{}.fam".format(flat_files('mapping'), this_dataset.group.name), "r") as outfile: for i, line in enumerate(outfile): split_line = line.split() current_file_data.append(split_line) with open("{}/{}.fam".format(flat_files('mapping'), this_dataset.group.name), "w") as outfile: for i, line in enumerate(current_file_data): if vals[i] == "x": this_val = -9 else: this_val = vals[i] outfile.write("0 " + line[1] + " " + line[2] + " " + line[3] + " " + line[4] + " " + str(this_val) + "\n")
def check_plink_gemma(): if flat_file_exists("mapping"): MAPPING_PATH = flat_files("mapping")+"/" if (os.path.isfile(MAPPING_PATH+self.dataset.group.name+".bed") and (os.path.isfile(MAPPING_PATH+self.dataset.group.name+".map") or os.path.isfile(MAPPING_PATH+self.dataset.group.name+".bim"))): return True return False
def gen_pheno_txt_file(this_dataset, genofile_name, vals): """Generates phenotype file for GEMMA""" current_file_data = [] with open("{}/{}_pheno.txt".format(flat_files('genotype/bimbam'), genofile_name), "w") as outfile: for value in vals: if value == "x": outfile.write("NA\n") else: outfile.write(value + "\n")
def run_gemma(this_dataset, samples, vals): """Generates p-values for each marker using GEMMA""" print("INSIDE GEMMA_MAPPING") gen_pheno_txt_file(this_dataset, vals) # use GEMMA_RUN in the next one, create a unique temp file gemma_command = GEMMA_COMMAND + ' -bfile %s/%s -k %s/%s.sXX.txt -lmm 1 -maf 0.1 -outdir %s -o %s_output' % ( flat_files('mapping'), this_dataset.group.name, flat_files('mapping'), this_dataset.group.name, webqtlConfig.GENERATED_IMAGE_DIR, this_dataset.group.name) print("gemma_command:" + gemma_command) os.system(gemma_command) marker_obs = parse_gemma_output(this_dataset) return marker_obs
def gen_pheno_txt_file(this_dataset, genofile_name, vals): """Generates phenotype file for GEMMA""" current_file_data = [] with open( "{}/{}_pheno.txt".format(flat_files('genotype/bimbam'), genofile_name), "w") as outfile: for value in vals: if value == "x": outfile.write("NA\n") else: outfile.write(value + "\n")
def get_samples_from_ped_file(dataset): ped_file= open("{}{}.ped".format(flat_files('mapping'), dataset.group.name),"r") line = ped_file.readline() sample_list=[] while line: lineList = string.split(string.strip(line), '\t') lineList = map(string.strip, lineList) sample_name = lineList[0] sample_list.append(sample_name) line = ped_file.readline() return sample_list
def gen_reaper_results(self): self.trait_results = {} for trait_db in self.trait_list: self.dataset.group.get_markers() this_trait = trait_db[0] genotype = self.dataset.group.read_genotype_file(use_reaper=False) samples, values, variances, sample_aliases = this_trait.export_informative( ) if self.dataset.group.genofile != None: genofile_name = self.dataset.group.genofile[:-5] else: genofile_name = self.dataset.group.name trimmed_samples = [] trimmed_values = [] for i in range(0, len(samples)): if samples[i] in self.dataset.group.samplelist: trimmed_samples.append(str(samples[i])) trimmed_values.append(values[i]) trait_filename = str(this_trait.name) + "_" + str( self.dataset.name) + "_pheno" gen_pheno_txt_file(trimmed_samples, trimmed_values, trait_filename) output_filename = self.dataset.group.name + "_GWA_" + ''.join( random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) reaper_command = REAPER_COMMAND + ' --geno {0}/{1}.geno --traits {2}/gn2/{3}.txt -n 1000 -o {4}{5}.txt'.format( flat_files('genotype'), genofile_name, TEMPDIR, trait_filename, webqtlConfig.GENERATED_IMAGE_DIR, output_filename) os.system(reaper_command) reaper_results = parse_reaper_output(output_filename) lrs_values = [float(qtl['lrs_value']) for qtl in reaper_results] self.trait_results[this_trait.name] = [] for qtl in reaper_results: if qtl['additive'] > 0: self.trait_results[this_trait.name].append( -float(qtl['lrs_value'])) else: self.trait_results[this_trait.name].append( float(qtl['lrs_value']))
def run_plink(this_trait, dataset, species, vals, maf): plink_output_filename = webqtlUtil.genRandStr("%s_%s_"%(dataset.group.name, this_trait.name)) gen_pheno_txt_file(dataset, vals) plink_command = PLINK_COMMAND + ' --noweb --bfile %s/%s --no-pheno --no-fid --no-parents --no-sex --maf %s --out %s%s --assoc ' % ( flat_files('mapping'), dataset.group.name, maf, TMPDIR, plink_output_filename) logger.debug("plink_command:", plink_command) os.system(plink_command) count, p_values = parse_plink_output(plink_output_filename, species) logger.debug("p_values:", p_values) dataset.group.markers.add_pvalues(p_values) return dataset.group.markers.markers
def run_plink(this_trait, dataset, species, vals, maf): plink_output_filename = webqtlUtil.genRandStr( "%s_%s_" % (dataset.group.name, this_trait.name)) gen_pheno_txt_file(dataset, vals) plink_command = PLINK_COMMAND + ' --noweb --bfile %s/%s --no-pheno --no-fid --no-parents --no-sex --maf %s --out %s%s --assoc ' % ( flat_files('mapping'), dataset.group.name, maf, TMPDIR, plink_output_filename) logger.debug("plink_command:", plink_command) os.system(plink_command) count, p_values = parse_plink_output(plink_output_filename, species) logger.debug("p_values:", p_values) dataset.group.markers.add_pvalues(p_values) return dataset.group.markers.markers
def __init__(self, name, specified_markers = []): marker_data_fh = open(flat_files('mapping') + '/' + name + '.bim') self.markers = [] for line in marker_data_fh: splat = line.strip().split() #logger.debug("splat:", splat) if len(specified_markers) > 0: if splat[1] in specified_markers: marker = {} marker['chr'] = int(splat[0]) marker['name'] = splat[1] marker['Mb'] = float(splat[3]) / 1000000 else: continue else: marker = {} marker['chr'] = int(splat[0]) marker['name'] = splat[1] marker['Mb'] = float(splat[3]) / 1000000 self.markers.append(marker)
def run_plink(this_trait, dataset, species, vals, maf): plink_output_filename = webqtlUtil.genRandStr("%s_%s_"%(dataset.group.name, this_trait.name)) gen_pheno_txt_file(dataset, vals) #gen_pheno_txt_file_plink(this_trait, dataset, vals, pheno_filename = plink_output_filename) plink_command = PLINK_COMMAND + ' --noweb --bfile %s/%s --no-fid --no-parents --no-sex --maf %s --missing-phenotype -9 --out %s/%s --assoc ' % ( flat_files('mapping'), dataset.group.name, maf, TMPDIR, plink_output_filename) logger.debug("plink_command:", plink_command) os.system(plink_command) count, p_values = parse_plink_output(plink_output_filename, species) #for marker in self.dataset.group.markers.markers: # if marker['name'] not in included_markers: # logger.debug("marker:", marker) # self.dataset.group.markers.markers.remove(marker) # #del self.dataset.group.markers.markers[marker] logger.debug("p_values:", p_values) dataset.group.markers.add_pvalues(p_values) return dataset.group.markers.markers
def run_gemma(this_dataset, samples, vals, covariates, method, use_loco): """Generates p-values for each marker using GEMMA""" if this_dataset.group.genofile != None: genofile_name = this_dataset.group.genofile[:-5] else: genofile_name = this_dataset.group.name gen_pheno_txt_file(this_dataset, genofile_name, vals, method) if not os.path.isfile("{}{}_output.assoc.txt".format( webqtlConfig.GENERATED_IMAGE_DIR, genofile_name)): open( "{}{}_output.assoc.txt".format(webqtlConfig.GENERATED_IMAGE_DIR, genofile_name), "w+") this_chromosomes = this_dataset.species.chromosomes.chromosomes chr_list_string = "" for i in range(len(this_chromosomes)): if i < (len(this_chromosomes) - 1): chr_list_string += this_chromosomes[i + 1].name + "," else: chr_list_string += this_chromosomes[i + 1].name if covariates != "": gen_covariates_file(this_dataset, covariates) if method == "gemma": gemma_command = GEMMA_COMMAND + ' -bfile %s/%s -k %s/%s.cXX.txt -lmm 1 -maf 0.1' % ( flat_files('mapping'), this_dataset.group.name, flat_files('mapping'), this_dataset.group.name) if covariates != "": gemma_command += ' -c %s/%s_covariates.txt -outdir %s -o %s_output' % ( flat_files('mapping'), this_dataset.group.name, webqtlConfig.GENERATED_IMAGE_DIR, this_dataset.group.name) else: #gemma_command = GEMMA_COMMAND + ' -bfile %s/%s -k %s/%s.sXX.txt -lmm 1 -maf 0.1 -o %s_output' % (flat_files('mapping'), gemma_command += ' -outdir %s -o %s_output' % ( webqtlConfig.GENERATED_IMAGE_DIR, this_dataset.group.name) else: if use_loco == "True": k_output_filename = this_dataset.group.name + "_K_" + ''.join( random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) generate_k_command = GEMMA_WRAPPER_COMMAND + ' --json --loco ' + chr_list_string + ' -- -g %s/%s_geno.txt -p %s/%s_pheno.txt -a %s/%s_snps.txt -gk -debug > %s/gn2/%s.json' % ( flat_files('genotype/bimbam'), genofile_name, flat_files('genotype/bimbam'), genofile_name, flat_files('genotype/bimbam'), genofile_name, TEMPDIR, k_output_filename) logger.debug("k_command:" + generate_k_command) os.system(generate_k_command) gemma_command = GEMMA_WRAPPER_COMMAND + ' --json --loco --input %s/gn2/%s.json -- -g %s/%s_geno.txt -p %s/%s_pheno.txt' % ( TEMPDIR, k_output_filename, flat_files('genotype/bimbam'), genofile_name, flat_files('genotype/bimbam'), genofile_name) gwa_output_filename = this_dataset.group.name + "_GWA_" + ''.join( random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) if covariates != "": gemma_command += ' -c %s/%s_covariates.txt -a %s/%s_snps.txt -lmm 1 -maf 0.1 -debug > %s/gn2/%s.json' % ( flat_files('mapping'), this_dataset.group.name, flat_files('genotype/bimbam'), genofile_name, TEMPDIR, gwa_output_filename) else: gemma_command += ' -a %s/%s_snps.txt -lmm 1 -maf 0.1 -debug > %s/gn2/%s.json' % ( flat_files('genotype/bimbam'), genofile_name, TEMPDIR, gwa_output_filename) else: gemma_command = GEMMA_COMMAND + ' -g %s/%s_geno.txt -p %s/%s_pheno.txt -a %s/%s_snps.txt -k %s/%s.cXX.txt -lmm 1 -maf 0.1' % ( flat_files('genotype/bimbam'), genofile_name, flat_files('genotype/bimbam'), genofile_name, flat_files('genotype/bimbam'), genofile_name, flat_files('genotype/bimbam'), genofile_name) if covariates != "": gemma_command += ' -c %s/%s_covariates.txt -outdir %s -debug -o %s_output' % ( flat_files('mapping'), this_dataset.group.name, webqtlConfig.GENERATED_IMAGE_DIR, genofile_name) else: gemma_command += ' -outdir %s -debug -o %s_output' % ( webqtlConfig.GENERATED_IMAGE_DIR, genofile_name) logger.debug("gemma_command:" + gemma_command) os.system(gemma_command) if use_loco == "True": marker_obs = parse_loco_output(this_dataset, gwa_output_filename) else: marker_obs = parse_gemma_output(genofile_name) return marker_obs
def check_plink_gemma(): if flat_file_exists("mapping"): MAPPING_PATH = flat_files("mapping")+"/" if os.path.isfile(MAPPING_PATH+self.name+".bed"): return True return False
assert_writable_dir(TEMPDIR) TMPDIR = mk_dir(TEMPDIR+'/gn2/') assert_writable_dir(TMPDIR) CACHEDIR = mk_dir(TMPDIR+'/cache/') # We can no longer write into the git tree: GENERATED_IMAGE_DIR = mk_dir(TMPDIR+'generated/') GENERATED_TEXT_DIR = mk_dir(TMPDIR+'generated_text/') # Make sure we have permissions to access these assert_writable_dir(CACHEDIR) assert_writable_dir(GENERATED_IMAGE_DIR) assert_writable_dir(GENERATED_TEXT_DIR) # Flat file directories GENODIR = flat_files('genotype')+'/' assert_dir(GENODIR) # JSON genotypes are OBSOLETE JSON_GENODIR = flat_files('genotype/json')+'/' if not valid_path(JSON_GENODIR): # fall back on old location (move the dir, FIXME) JSON_GENODIR = flat_files('json') # Are we using the following...? PORTADDR = "http://50.16.251.170" INFOPAGEHREF = '/dbdoc/%s.html' CGIDIR = '/webqtl/' #XZ: The variable name 'CGIDIR' should be changed to 'PYTHONDIR' SCRIPTFILE = 'main.py'
def run_reaper(this_trait, this_dataset, samples, vals, json_data, num_perm, boot_check, num_bootstrap, do_control, control_marker, manhattan_plot, first_run=True, output_files=None): """Generates p-values for each marker using qtlreaper""" if first_run: if this_dataset.group.genofile != None: genofile_name = this_dataset.group.genofile[:-5] else: genofile_name = this_dataset.group.name trait_filename = f"{str(this_trait.name)}_{str(this_dataset.name)}_pheno" gen_pheno_txt_file(samples, vals, trait_filename) output_filename = (f"{this_dataset.group.name}_GWA_" + ''.join( random.choice(string.ascii_uppercase + string.digits) for _ in range(6))) bootstrap_filename = None permu_filename = None opt_list = [] if boot_check and num_bootstrap > 0: bootstrap_filename = ( f"{this_dataset.group.name}_BOOTSTRAP_" + ''.join( random.choice(string.ascii_uppercase + string.digits) for _ in range(6))) opt_list.append("-b") opt_list.append(f"--n_bootstrap {str(num_bootstrap)}") opt_list.append( f"--bootstrap_output {webqtlConfig.GENERATED_IMAGE_DIR}{bootstrap_filename}.txt" ) if num_perm > 0: permu_filename = ("{this_dataset.group.name}_PERM_" + ''.join( random.choice(string.ascii_uppercase + string.digits) for _ in range(6))) opt_list.append("-n " + str(num_perm)) opt_list.append("--permu_output " + webqtlConfig.GENERATED_IMAGE_DIR + permu_filename + ".txt") if control_marker != "" and do_control == "true": opt_list.append("-c " + control_marker) if manhattan_plot != True: opt_list.append("--interval 1") reaper_command = ( REAPER_COMMAND + ' --geno {0}/{1}.geno --traits {2}/gn2/{3}.txt {4} -o {5}{6}.txt'. format(flat_files('genotype'), genofile_name, TEMPDIR, trait_filename, " ".join(opt_list), webqtlConfig.GENERATED_IMAGE_DIR, output_filename)) logger.debug("reaper_command:" + reaper_command) os.system(reaper_command) else: output_filename, permu_filename, bootstrap_filename = output_files marker_obs, permu_vals, bootstrap_vals = parse_reaper_output( output_filename, permu_filename, bootstrap_filename) suggestive = 0 significant = 0 if len(permu_vals) > 0: suggestive = permu_vals[int(num_perm * 0.37 - 1)] significant = permu_vals[int(num_perm * 0.95 - 1)] return (marker_obs, permu_vals, suggestive, significant, bootstrap_vals, [output_filename, permu_filename, bootstrap_filename])
TMPDIR = mk_dir(TEMPDIR + '/gn2/') assert_writable_dir(TMPDIR) CACHEDIR = mk_dir(TMPDIR + '/cache/') # We can no longer write into the git tree: GENERATED_IMAGE_DIR = mk_dir(TMPDIR + 'generated/') GENERATED_TEXT_DIR = mk_dir(TMPDIR + 'generated_text/') # Make sure we have permissions to access these assert_writable_dir(CACHEDIR) assert_writable_dir(GENERATED_IMAGE_DIR) assert_writable_dir(GENERATED_TEXT_DIR) # Flat file directories GENODIR = flat_files('genotype') + '/' assert_dir(GENODIR) assert_dir(GENODIR + 'bimbam') # for gemma # JSON genotypes are OBSOLETE JSON_GENODIR = flat_files('genotype/json') + '/' if not valid_path(JSON_GENODIR): # fall back on old location (move the dir, FIXME) JSON_GENODIR = flat_files('json') # Are we using the following...? PORTADDR = "http://50.16.251.170" INFOPAGEHREF = '/dbdoc/%s.html' CGIDIR = '/webqtl/' #XZ: The variable name 'CGIDIR' should be changed to 'PYTHONDIR' SCRIPTFILE = 'main.py'
def get_genotypes(group_name, file_format="csv", dataset_name=None): limit_num = None if 'limit_to' in request.args: if request.args['limit_to'].isdigit(): limit_num = int(request.args['limit_to']) si = StringIO.StringIO() if file_format == "csv" or file_format == "geno": filename = group_name + ".geno" if os.path.isfile("{0}/{1}.geno".format(flat_files("genotype"), group_name)): output_lines = [] with open("{0}/{1}.geno".format(flat_files("genotype"), group_name)) as genofile: i = 0 for line in genofile: if line[0] == "#" or line[0] == "@": output_lines.append([line.strip()]) else: if limit_num and i >= limit_num: break output_lines.append(line.split()) i += 1 csv_writer = csv.writer(si, delimiter="\t", escapechar="\\", quoting=csv.QUOTE_NONE) else: return return_error(code=204, source=request.url_rule.rule, title="No Results", details="") elif file_format == "rqtl2": memory_file = io.BytesIO() if dataset_name: filename = dataset_name else: filename = group_name if os.path.isfile("{0}/{1}_geno.csv".format( flat_files("genotype/rqtl2"), group_name)): yaml_file = json.load( open("{0}/{1}.json".format(flat_files("genotype/rqtl2"), group_name))) yaml_file["geno"] = filename + "_geno.csv" yaml_file["gmap"] = filename + "_gmap.csv" yaml_file["pheno"] = filename + "_pheno.csv" config_file = [filename + ".json", json.dumps(yaml_file)] #config_file = [filename + ".yaml", open("{0}/{1}.yaml".format(flat_files("genotype/rqtl2"), group_name))] geno_file = [ filename + "_geno.csv", open("{0}/{1}_geno.csv".format(flat_files("genotype/rqtl2"), group_name)) ] gmap_file = [ filename + "_gmap.csv", open("{0}/{1}_gmap.csv".format(flat_files("genotype/rqtl2"), group_name)) ] if dataset_name: phenotypes = requests.get( "http://gn2.genenetwork.org/api/v_pre1/sample_data/" + dataset_name) else: phenotypes = requests.get( "http://gn2.genenetwork.org/api/v_pre1/sample_data/" + group_name + "Publish") with ZipFile(memory_file, 'w', compression=ZIP_DEFLATED) as zf: zf.writestr(config_file[0], config_file[1]) for this_file in [geno_file, gmap_file]: zf.writestr(this_file[0], this_file[1].read()) zf.writestr(filename + "_pheno.csv", phenotypes.content) memory_file.seek(0) return send_file(memory_file, attachment_filename=filename + ".zip", as_attachment=True) else: return return_error(code=204, source=request.url_rule.rule, title="No Results", details="") else: filename = group_name + ".bimbam" if os.path.isfile("{0}/{1}.geno".format(flat_files("genotype"), group_name)): output_lines = [] with open("{0}/{1}_geno.txt".format(flat_files("genotype/bimbam"), group_name)) as genofile: i = 0 for line in genofile: if limit_num and i >= limit_num: break output_lines.append( [line.strip() for line in line.split(",")]) i += 1 csv_writer = csv.writer(si, delimiter=",") else: return return_error(code=204, source=request.url_rule.rule, title="No Results", details="") csv_writer.writerows(output_lines) output = make_response(si.getvalue()) output.headers["Content-Disposition"] = "attachment; filename=" + filename output.headers["Content-type"] = "text/csv" return output
def run_gemma(this_dataset, samples, vals, covariates, method, use_loco): """Generates p-values for each marker using GEMMA""" if this_dataset.group.genofile != None: genofile_name = this_dataset.group.genofile[:-5] else: genofile_name = this_dataset.group.name gen_pheno_txt_file(this_dataset, genofile_name, vals, method) if not os.path.isfile("{}{}_output.assoc.txt".format(webqtlConfig.GENERATED_IMAGE_DIR, genofile_name)): open("{}{}_output.assoc.txt".format(webqtlConfig.GENERATED_IMAGE_DIR, genofile_name), "w+") this_chromosomes = this_dataset.species.chromosomes.chromosomes chr_list_string = "" for i in range(len(this_chromosomes)): if i < (len(this_chromosomes) - 1): chr_list_string += this_chromosomes[i+1].name + "," else: chr_list_string += this_chromosomes[i+1].name if covariates != "": gen_covariates_file(this_dataset, covariates) if method == "gemma": gemma_command = GEMMA_COMMAND + ' -bfile %s/%s -k %s/%s.cXX.txt -lmm 1 -maf 0.1' % (flat_files('mapping'), this_dataset.group.name, flat_files('mapping'), this_dataset.group.name) if covariates != "": gemma_command += ' -c %s/%s_covariates.txt -outdir %s -o %s_output' % (flat_files('mapping'), this_dataset.group.name, webqtlConfig.GENERATED_IMAGE_DIR, this_dataset.group.name) else: #gemma_command = GEMMA_COMMAND + ' -bfile %s/%s -k %s/%s.sXX.txt -lmm 1 -maf 0.1 -o %s_output' % (flat_files('mapping'), gemma_command += ' -outdir %s -o %s_output' % (webqtlConfig.GENERATED_IMAGE_DIR, this_dataset.group.name) else: if use_loco == "True": k_output_filename = this_dataset.group.name + "_K_" + ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) generate_k_command = GEMMA_WRAPPER_COMMAND + ' --json --loco ' + chr_list_string + ' -- -g %s/%s_geno.txt -p %s/%s_pheno.txt -a %s/%s_snps.txt -gk -debug > %s/gn2/%s.json' % (flat_files('genotype/bimbam'), genofile_name, flat_files('genotype/bimbam'), genofile_name, flat_files('genotype/bimbam'), genofile_name, TEMPDIR, k_output_filename) logger.debug("k_command:" + generate_k_command) os.system(generate_k_command) gemma_command = GEMMA_WRAPPER_COMMAND + ' --json --loco --input %s/gn2/%s.json -- -g %s/%s_geno.txt -p %s/%s_pheno.txt' % (TEMPDIR, k_output_filename, flat_files('genotype/bimbam'), genofile_name, flat_files('genotype/bimbam'), genofile_name) gwa_output_filename = this_dataset.group.name + "_GWA_" + ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) if covariates != "": gemma_command += ' -c %s/%s_covariates.txt -a %s/%s_snps.txt -lmm 1 -maf 0.1 -debug > %s/gn2/%s.json' % (flat_files('mapping'), this_dataset.group.name, flat_files('genotype/bimbam'), genofile_name, TEMPDIR, gwa_output_filename) else: gemma_command += ' -a %s/%s_snps.txt -lmm 1 -maf 0.1 -debug > %s/gn2/%s.json' % (flat_files('genotype/bimbam'), genofile_name, TEMPDIR, gwa_output_filename) else: gemma_command = GEMMA_COMMAND + ' -g %s/%s_geno.txt -p %s/%s_pheno.txt -a %s/%s_snps.txt -k %s/%s.cXX.txt -lmm 1 -maf 0.1' % (flat_files('genotype/bimbam'), genofile_name, flat_files('genotype/bimbam'), genofile_name, flat_files('genotype/bimbam'), genofile_name, flat_files('genotype/bimbam'), genofile_name) if covariates != "": gemma_command += ' -c %s/%s_covariates.txt -outdir %s -debug -o %s_output' % (flat_files('mapping'), this_dataset.group.name, webqtlConfig.GENERATED_IMAGE_DIR, genofile_name) else: gemma_command += ' -outdir %s -debug -o %s_output' % (webqtlConfig.GENERATED_IMAGE_DIR, genofile_name) logger.debug("gemma_command:" + gemma_command) os.system(gemma_command) if use_loco == "True": marker_obs = parse_loco_output(this_dataset, gwa_output_filename) else: marker_obs = parse_gemma_output(genofile_name) return marker_obs
UCSC_RUDI_TRACK_URL = " http://genome.cse.ucsc.edu/cgi-bin/hgTracks?org=%s&db=%s&hgt.customText=http://gbic.biol.rug.nl/~ralberts/tracks/%s/%s" GENOMEBROWSER_URL="http://ucscbrowser.genenetwork.org/cgi-bin/hgTracks?clade=mammal&org=Mouse&db=mm9&position=%s&hgt.suggest=&pix=800&Submit=submit" ENSEMBLETRANSCRIPT_URL="http://useast.ensembl.org/Mus_musculus/Lucene/Details?species=Mus_musculus;idx=Transcript;end=1;q=%s" # HTMLPATH = GNROOT + 'genotype_files/' # PYLMM_PATH # IMGDIR = GNROOT + '/wqflask/wqflask/static/output/' # Temporary storage: TMPDIR = mk_dir(TEMPDIR+'/gn2/') CACHEDIR = mk_dir(TEMPDIR+'/cache/') # We can no longer write into the git tree: GENERATED_IMAGE_DIR = mk_dir(TMPDIR+'/generate/') GENERATED_TEXT_DIR = mk_dir(TMPDIR+'/generate_text/') # Flat file directories GENODIR = flat_files('genotype')+'/' JSON_GENODIR = assert_dir(GENODIR+'json/') # SITENAME = 'GN' # PORTADDR = "http://50.16.251.170" # BASEHREF = '<base href="http://50.16.251.170/">' INFOPAGEHREF = '/dbdoc/%s.html' CGIDIR = '/webqtl/' #XZ: The variable name 'CGIDIR' should be changed to 'PYTHONDIR' SCRIPTFILE = 'main.py' # GLOSSARYFILE = "/glossary.html" # REFRESHSTR = '<meta http-equiv="refresh" content="5;url=%s' + SCRIPTFILE +'?sid=%s">' # REFRESHDIR = '%s' + SCRIPTFILE +'?sid=%s'