def gen_covariates_file(this_dataset, covariates): covariate_list = covariates.split(",") covariate_data_object = [] for covariate in covariate_list: this_covariate_data = [] trait_name = covariate.split(":")[0] dataset_ob = create_dataset(covariate.split(":")[1]) trait_ob = GeneralTrait(dataset=dataset_ob, name=trait_name, cellid=None) #trait_samples = this_dataset.group.all_samples_ordered() this_dataset.group.get_samplelist() trait_samples = this_dataset.group.samplelist logger.debug("SAMPLES:", trait_samples) trait_sample_data = trait_ob.data logger.debug("SAMPLE DATA:", trait_sample_data) for index, sample in enumerate(trait_samples): if sample in trait_sample_data: sample_value = trait_sample_data[sample].value this_covariate_data.append(sample_value) else: this_covariate_data.append("-9") covariate_data_object.append(this_covariate_data) with open("{}/{}_covariates.txt".format(flat_files('mapping'), this_dataset.group.name), "w") as outfile: for i in range(len(covariate_data_object[0])): for this_covariate in covariate_data_object: outfile.write(str(this_covariate[i]) + "\t") outfile.write("\n")
def find_outliers(vals): """Calculates the upper and lower bounds of a set of sample/case values >>> find_outliers([3.504, 5.234, 6.123, 7.234, 3.542, 5.341, 7.852, 4.555, 12.537]) (11.252500000000001, 0.5364999999999993) >>> >>> find_outliers([9,12,15,17,31,50,7,5,6,8]) (32.0, -8.0) If there are no vals, returns None for the upper and lower bounds, which code that calls it will have to deal with. >>> find_outliers([]) (None, None) """ logger.debug("xerxes vals is:", pf(vals)) if vals: #logger.debug("vals is:", pf(vals)) stats = corestats.Stats(vals) low_hinge = stats.percentile(25) up_hinge = stats.percentile(75) hstep = 1.5 * (up_hinge - low_hinge) upper_bound = up_hinge + hstep lower_bound = low_hinge - hstep else: upper_bound = None lower_bound = None logger.debug(pf(locals())) return upper_bound, lower_bound
def search_page(): logger.info("in search_page") logger.info(request.url) result = None if USE_REDIS: with Bench("Trying Redis cache"): key = "search_results:v1:" + json.dumps(request.args, sort_keys=True) logger.debug("key is:", pf(key)) result = Redis.get(key) if result: logger.info("Redis cache hit on search results!") result = pickle.loads(result) else: logger.info("Skipping Redis cache (USE_REDIS=False)") logger.info("request.args is", request.args) the_search = search_results.SearchResultPage(request.args) result = the_search.__dict__ valid_search = result['search_term_exists'] logger.debugf("result", result) if USE_REDIS and valid_search: Redis.set(key, pickle.dumps(result, pickle.HIGHEST_PROTOCOL)) Redis.expire(key, 60*60) if valid_search: return render_template("search_result_page.html", **result) else: return render_template("search_error.html")
def trim_results(self, p_values): logger.debug("len_p_values:", len(p_values)) if len(p_values) > 500: p_values.sort(reverse=True) trimmed_values = p_values[:500] return trimmed_values
def gen_covariates_file(this_dataset, covariates): covariate_list = covariates.split(",") covariate_data_object = [] for covariate in covariate_list: this_covariate_data = [] trait_name = covariate.split(":")[0] dataset_ob = create_dataset(covariate.split(":")[1]) trait_ob = GeneralTrait(dataset=dataset_ob, name=trait_name, cellid=None) #trait_samples = this_dataset.group.all_samples_ordered() this_dataset.group.get_samplelist() trait_samples = this_dataset.group.samplelist logger.debug("SAMPLES:", trait_samples) trait_sample_data = trait_ob.data logger.debug("SAMPLE DATA:", trait_sample_data) for index, sample in enumerate(trait_samples): if sample in trait_sample_data: sample_value = trait_sample_data[sample].value this_covariate_data.append(sample_value) else: this_covariate_data.append("-9") covariate_data_object.append(this_covariate_data) with open( "{}/{}_covariates.txt".format(flat_files('mapping'), this_dataset.group.name), "w") as outfile: for i in range(len(covariate_data_object[0])): for this_covariate in covariate_data_object: outfile.write(str(this_covariate[i]) + "\t") outfile.write("\n")
def loading_page(): logger.info(request.url) initial_start_vars = request.form logger.debug("Marker regression called with initial_start_vars:", initial_start_vars.items()) #temp_uuid = initial_start_vars['temp_uuid'] wanted = ('temp_uuid', 'trait_id', 'dataset', 'method', 'trimmed_markers', 'selected_chr', 'chromosomes', 'mapping_scale', 'score_type', 'suggestive', 'significant', 'num_perm', 'permCheck', 'perm_output', 'num_bootstrap', 'bootCheck', 'bootstrap_results', 'LRSCheck', 'covariates', 'maf', 'use_loco', 'manhattan_plot', 'control_marker', 'control_marker_db', 'do_control', 'genofile', 'pair_scan', 'startMb', 'endMb', 'graphWidth', 'lrsMax', 'additiveCheck', 'showSNP', 'showGenes', 'viewLegend', 'haplotypeAnalystCheck', 'mapmethod_rqtl_geno', 'mapmodel_rqtl_geno') start_vars_container = {} start_vars = {} for key, value in initial_start_vars.iteritems(): if key in wanted or key.startswith(('value:')): start_vars[key] = value start_vars_container['start_vars'] = start_vars rendered_template = render_template("loading.html", **start_vars_container) return rendered_template
def export_mapping_results(dataset, trait, markers, results_path, mapping_scale, score_type): with open(results_path, "w+") as output_file: output_file.write("Population: " + dataset.group.species.title() + " " + dataset.group.name + "\n") output_file.write("Data Set: " + dataset.fullname + "\n") if dataset.type == "ProbeSet": output_file.write("Gene Symbol: " + trait.symbol + "\n") output_file.write("Location: " + str(trait.chr) + " @ " + str(trait.mb) + " Mb\n") output_file.write("\n") output_file.write("Name,Chr,") if mapping_scale == "physic": output_file.write("Mb," + score_type) else: output_file.write("Cm," + score_type) if "additive" in markers[0].keys(): output_file.write(",Additive") if "dominance" in markers[0].keys(): output_file.write(",Dominance") output_file.write("\n") for i, marker in enumerate(markers): logger.debug("THE MARKER:", marker) output_file.write(marker['name'] + "," + str(marker['chr']) + "," + str(marker['Mb']) + ",") if "lod_score" in marker.keys(): output_file.write(str(marker['lod_score'])) else: output_file.write(str(marker['lrs_value'])) if "additive" in marker.keys(): output_file.write("," + str(marker['additive'])) if "dominance" in marker.keys(): output_file.write("," + str(marker['dominance'])) if i < (len(markers) - 1): output_file.write("\n")
def run_plink(this_trait, dataset, species, vals, maf): plink_output_filename = webqtlUtil.genRandStr("%s_%s_"%(dataset.group.name, this_trait.name)) gen_pheno_txt_file_plink(this_trait, dataset, vals, pheno_filename = plink_output_filename) plink_command = PLINK_COMMAND + ' --noweb --ped %s/%s.ped --no-fid --no-parents --no-sex --no-pheno --map %s/%s.map --pheno %s%s.txt --pheno-name %s --maf %s --missing-phenotype -9999 --out %s%s --assoc ' % ( PLINK_PATH, dataset.group.name, PLINK_PATH, dataset.group.name, TMPDIR, plink_output_filename, this_trait.name, maf, TMPDIR, plink_output_filename) logger.debug("plink_command:", plink_command) os.system(plink_command) count, p_values = parse_plink_output(plink_output_filename, species) #for marker in self.dataset.group.markers.markers: # if marker['name'] not in included_markers: # logger.debug("marker:", marker) # self.dataset.group.markers.markers.remove(marker) # #del self.dataset.group.markers.markers[marker] logger.debug("p_values:", pf(p_values)) dataset.group.markers.add_pvalues(p_values) return dataset.group.markers.markers
def search_page(): logger.info("in search_page") logger.info(request.url) result = None if USE_REDIS: with Bench("Trying Redis cache"): key = "search_results:v1:" + \ json.dumps(request.args, sort_keys=True) logger.debug("key is:", pf(key)) result = Redis.get(key) if result: logger.info("Redis cache hit on search results!") result = pickle.loads(result) else: logger.info("Skipping Redis cache (USE_REDIS=False)") logger.info("request.args is", request.args) the_search = SearchResultPage(request.args) result = the_search.__dict__ valid_search = result['search_term_exists'] if USE_REDIS and valid_search: Redis.set(key, pickle.dumps(result, pickle.HIGHEST_PROTOCOL)) Redis.expire(key, 60 * 60) if valid_search: return render_template("search_result_page.html", **result) else: return render_template("search_error.html")
def connect_db(): logger.info("@app.before_request connect_db") db = getattr(g, '_database', None) if db is None: logger.debug("Get new database connector") g.db = g._database = sqlalchemy.create_engine(SQL_URI, encoding="latin1") logger.debug(g.db)
def init_db(): # import all modules here that might define models so that # they will be registered properly on the metadata. Otherwise # you will have to import them first before calling init_db() #import yourapplication.models import wqflask.model logger.debug("Creating all model metadata") Base.metadata.create_all(bind=engine) logger.info("Done creating all model metadata")
def loading_page(): logger.info(request.url) initial_start_vars = request.form logger.debug("Marker regression called with initial_start_vars:", initial_start_vars.items()) #temp_uuid = initial_start_vars['temp_uuid'] wanted = ( 'temp_uuid', 'trait_id', 'dataset', 'method', 'trimmed_markers', 'selected_chr', 'chromosomes', 'mapping_scale', 'score_type', 'suggestive', 'significant', 'num_perm', 'permCheck', 'perm_output', 'num_bootstrap', 'bootCheck', 'bootstrap_results', 'LRSCheck', 'covariates', 'maf', 'use_loco', 'manhattan_plot', 'control_marker', 'control_marker_db', 'do_control', 'genofile', 'pair_scan', 'startMb', 'endMb', 'graphWidth', 'lrsMax', 'additiveCheck', 'showSNP', 'showGenes', 'viewLegend', 'haplotypeAnalystCheck', 'mapmethod_rqtl_geno', 'mapmodel_rqtl_geno' ) start_vars_container = {} start_vars = {} for key, value in initial_start_vars.iteritems(): if key in wanted or key.startswith(('value:')): start_vars[key] = value start_vars_container['start_vars'] = start_vars rendered_template = render_template("loading.html", **start_vars_container) return rendered_template
def get_lod_score_cutoff(self): logger.debug("INSIDE GET LOD CUTOFF") high_qtl_count = 0 for marker in self.dataset.group.markers.markers: if marker['lod_score'] > 1: high_qtl_count += 1 if high_qtl_count > 1000: return 1 else: return 0
def run_reaper(this_trait, this_dataset, samples, vals, json_data, num_perm, boot_check, num_bootstrap, do_control, control_marker, manhattan_plot, first_run=True, output_files=None): """Generates p-values for each marker using qtlreaper""" if first_run: if this_dataset.group.genofile != None: genofile_name = this_dataset.group.genofile[:-5] else: genofile_name = this_dataset.group.name trait_filename = str(this_trait.name) + "_" + str(this_dataset.name) + "_pheno" gen_pheno_txt_file(samples, vals, trait_filename) output_filename = this_dataset.group.name + "_GWA_" + ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) bootstrap_filename = None permu_filename = None opt_list = [] if boot_check and num_bootstrap > 0: bootstrap_filename = this_dataset.group.name + "_BOOTSTRAP_" + ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) opt_list.append("-b") opt_list.append("--n_bootstrap " + str(num_bootstrap)) opt_list.append("--bootstrap_output " + webqtlConfig.GENERATED_IMAGE_DIR + bootstrap_filename + ".txt") if num_perm > 0: permu_filename = this_dataset.group.name + "_PERM_" + ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) opt_list.append("-n " + str(num_perm)) opt_list.append("--permu_output " + webqtlConfig.GENERATED_IMAGE_DIR + permu_filename + ".txt") if control_marker != "" and do_control == "true": opt_list.append("-c " + control_marker) reaper_command = REAPER_COMMAND + ' --geno {0}/{1}.geno --traits {2}/gn2/{3}.txt {4} -o {5}{6}.txt'.format(flat_files('genotype'), genofile_name, TEMPDIR, trait_filename, " ".join(opt_list), webqtlConfig.GENERATED_IMAGE_DIR, output_filename) logger.debug("reaper_command:" + reaper_command) os.system(reaper_command) else: output_filename, permu_filename, bootstrap_filename = output_files marker_obs, permu_vals, bootstrap_vals = parse_reaper_output(output_filename, permu_filename, bootstrap_filename) suggestive = 0 significant = 0 if len(permu_vals) > 0: suggestive = permu_vals[int(num_perm*0.37-1)] significant = permu_vals[int(num_perm*0.95-1)] return marker_obs, permu_vals, suggestive, significant, bootstrap_vals, [output_filename, permu_filename, bootstrap_filename]
def __init__(self, dataset, sample_names, this_trait, sample_group_type, header): self.dataset = dataset self.this_trait = this_trait self.sample_group_type = sample_group_type # primary or other self.header = header self.sample_list = [] # The actual list self.sample_attribute_values = {} self.get_attributes() # logger.debug("camera: attributes are:", pf(self.attributes)) if self.this_trait and self.dataset and self.dataset.type == 'ProbeSet': self.get_extra_attribute_values() for counter, sample_name in enumerate(sample_names, 1): sample_name = sample_name.replace("_2nd_", "") #ZS - If there's no value for the sample/strain, create the sample object (so samples with no value are still displayed in the table) try: sample = self.this_trait.data[sample_name] except KeyError: logger.debug("No sample %s, let's create it now" % sample_name) sample = webqtlCaseData.webqtlCaseData(sample_name) #sampleNameAdd = '' #if fd.RISet == 'AXBXA' and sampleName in ('AXB18/19/20','AXB13/14','BXA8/17'): # sampleNameAdd = HT.Href(url='/mouseCross.html#AXB/BXA', text=HT.Sup('#'), Class='fs12', target="_blank") sample.extra_info = {} if self.dataset.group.name == 'AXBXA' and sample_name in ( 'AXB18/19/20', 'AXB13/14', 'BXA8/17'): sample.extra_info['url'] = "/mouseCross.html#AXB/BXA" sample.extra_info['css_class'] = "fs12" # logger.debug(" type of sample:", type(sample)) if sample_group_type == 'primary': sample.this_id = "Primary_" + str(counter) else: sample.this_id = "Other_" + str(counter) #### For extra attribute columns; currently only used by several datasets - Zach if self.sample_attribute_values: sample.extra_attributes = self.sample_attribute_values.get( sample_name, {}) logger.debug("sample.extra_attributes is", pf(sample.extra_attributes)) self.sample_list.append(sample) logger.debug("self.attributes is", pf(self.attributes)) self.do_outliers() #do_outliers(the_samples) logger.debug("*the_samples are [%i]: %s" % (len(self.sample_list), pf(self.sample_list)))
def __init__(self, dataset, sample_names, this_trait, sample_group_type, header): self.dataset = dataset self.this_trait = this_trait self.sample_group_type = sample_group_type # primary or other self.header = header self.sample_list = [] # The actual list self.sample_attribute_values = {} self.get_attributes() # logger.debug("camera: attributes are:", pf(self.attributes)) if self.this_trait and self.dataset and self.dataset.type == 'ProbeSet': self.get_extra_attribute_values() for counter, sample_name in enumerate(sample_names, 1): sample_name = sample_name.replace("_2nd_", "") #ZS - If there's no value for the sample/strain, create the sample object (so samples with no value are still displayed in the table) try: sample = self.this_trait.data[sample_name] except KeyError: logger.debug("No sample %s, let's create it now" % sample_name) sample = webqtlCaseData.webqtlCaseData(sample_name) #sampleNameAdd = '' #if fd.RISet == 'AXBXA' and sampleName in ('AXB18/19/20','AXB13/14','BXA8/17'): # sampleNameAdd = HT.Href(url='/mouseCross.html#AXB/BXA', text=HT.Sup('#'), Class='fs12', target="_blank") sample.extra_info = {} if self.dataset.group.name == 'AXBXA' and sample_name in ('AXB18/19/20','AXB13/14','BXA8/17'): sample.extra_info['url'] = "/mouseCross.html#AXB/BXA" sample.extra_info['css_class'] = "fs12" # logger.debug(" type of sample:", type(sample)) if sample_group_type == 'primary': sample.this_id = "Primary_" + str(counter) else: sample.this_id = "Other_" + str(counter) #### For extra attribute columns; currently only used by several datasets - Zach if self.sample_attribute_values: sample.extra_attributes = self.sample_attribute_values.get(sample_name, {}) logger.debug("sample.extra_attributes is", pf(sample.extra_attributes)) self.sample_list.append(sample) logger.debug("self.attributes is", pf(self.attributes)) self.do_outliers() #do_outliers(the_samples) logger.debug("*the_samples are [%i]: %s" % (len(self.sample_list), pf(self.sample_list)))
def run_plink(this_trait, dataset, species, vals, maf): plink_output_filename = webqtlUtil.genRandStr( f"{dataset.group.name}_{this_trait.name}_") gen_pheno_txt_file(dataset, vals) plink_command = f"{PLINK_COMMAND} --noweb --bfile {flat_files('mapping')}/{dataset.group.name} --no-pheno --no-fid --no-parents --no-sex --maf {maf} --out { TMPDIR}{plink_output_filename} --assoc " logger.debug("plink_command:", plink_command) os.system(plink_command) count, p_values = parse_plink_output(plink_output_filename, species) logger.debug("p_values:", p_values) dataset.group.markers.add_pvalues(p_values) return dataset.group.markers.markers
def run_plink(this_trait, dataset, species, vals, maf): plink_output_filename = webqtlUtil.genRandStr("%s_%s_"%(dataset.group.name, this_trait.name)) gen_pheno_txt_file(dataset, vals) plink_command = PLINK_COMMAND + ' --noweb --bfile %s/%s --no-pheno --no-fid --no-parents --no-sex --maf %s --out %s%s --assoc ' % ( flat_files('mapping'), dataset.group.name, maf, TMPDIR, plink_output_filename) logger.debug("plink_command:", plink_command) os.system(plink_command) count, p_values = parse_plink_output(plink_output_filename, species) logger.debug("p_values:", p_values) dataset.group.markers.add_pvalues(p_values) return dataset.group.markers.markers
def gen_human_results(self, pheno_vector, key, temp_uuid): file_base = locate(self.dataset.group.name, "mapping") plink_input = input.plink(file_base, type='b') input_file_name = os.path.join(webqtlConfig.SNP_PATH, self.dataset.group.name + ".snps.gz") pheno_vector = pheno_vector.reshape((len(pheno_vector), 1)) covariate_matrix = np.ones((pheno_vector.shape[0], 1)) kinship_matrix = np.fromfile(open(file_base + '.kin', 'r'), sep=" ") kinship_matrix.resize( (len(plink_input.indivs), len(plink_input.indivs))) logger.debug("Before creating params") params = dict( pheno_vector=pheno_vector.tolist(), covariate_matrix=covariate_matrix.tolist(), input_file_name=input_file_name, kinship_matrix=kinship_matrix.tolist(), refit=False, temp_uuid=temp_uuid, # meta data timestamp=datetime.datetime.now().isoformat(), ) logger.debug("After creating params") json_params = json.dumps(params) Redis.set(key, json_params) Redis.expire(key, 60 * 60) logger.debug("Before creating the command") command = PYLMM_COMMAND + ' --key {} --species {}'.format(key, "human") logger.debug("command is:", command) os.system(command) json_results = Redis.blpop("pylmm:results:" + temp_uuid, 45 * 60) results = json.loads(json_results[1]) t_stats = results['t_stats'] p_values = results['p_values'] #p_values, t_stats = lmm.run_human(key) #p_values, t_stats = lmm.run_human( # pheno_vector, # covariate_matrix, # input_file_name, # kinship_matrix, # loading_progress=tempdata # ) return p_values, t_stats
def gen_human_results(self, pheno_vector, key, temp_uuid): file_base = locate(self.dataset.group.name,"mapping") plink_input = input.plink(file_base, type='b') input_file_name = os.path.join(webqtlConfig.SNP_PATH, self.dataset.group.name + ".snps.gz") pheno_vector = pheno_vector.reshape((len(pheno_vector), 1)) covariate_matrix = np.ones((pheno_vector.shape[0],1)) kinship_matrix = np.fromfile(open(file_base + '.kin','r'),sep=" ") kinship_matrix.resize((len(plink_input.indivs),len(plink_input.indivs))) logger.debug("Before creating params") params = dict(pheno_vector = pheno_vector.tolist(), covariate_matrix = covariate_matrix.tolist(), input_file_name = input_file_name, kinship_matrix = kinship_matrix.tolist(), refit = False, temp_uuid = temp_uuid, # meta data timestamp = datetime.datetime.now().isoformat(), ) logger.debug("After creating params") json_params = json.dumps(params) Redis.set(key, json_params) Redis.expire(key, 60*60) logger.debug("Before creating the command") command = PYLMM_COMMAND+' --key {} --species {}'.format(key, "human") logger.debug("command is:", command) os.system(command) json_results = Redis.blpop("pylmm:results:" + temp_uuid, 45*60) results = json.loads(json_results[1]) t_stats = results['t_stats'] p_values = results['p_values'] #p_values, t_stats = lmm.run_human(key) #p_values, t_stats = lmm.run_human( # pheno_vector, # covariate_matrix, # input_file_name, # kinship_matrix, # loading_progress=tempdata # ) return p_values, t_stats
def check_access_permissions(): logger.debug("@app.before_request check_access_permissions") available = True if 'dataset' in request.args: permissions = DEFAULT_PRIVILEGES if request.args['dataset'] != "Temp": dataset = create_dataset(request.args['dataset']) if dataset.type == "Temp": permissions = DEFAULT_PRIVILEGES elif 'trait_id' in request.args: permissions = check_resource_availability( dataset, request.args['trait_id']) elif dataset.type != "Publish": permissions = check_resource_availability(dataset) if 'view' not in permissions['data']: return redirect(url_for("no_access_page"))
def run_plink(this_trait, dataset, species, vals, maf): plink_output_filename = webqtlUtil.genRandStr( "%s_%s_" % (dataset.group.name, this_trait.name)) gen_pheno_txt_file(dataset, vals) plink_command = PLINK_COMMAND + ' --noweb --bfile %s/%s --no-pheno --no-fid --no-parents --no-sex --maf %s --out %s%s --assoc ' % ( flat_files('mapping'), dataset.group.name, maf, TMPDIR, plink_output_filename) logger.debug("plink_command:", plink_command) os.system(plink_command) count, p_values = parse_plink_output(plink_output_filename, species) logger.debug("p_values:", p_values) dataset.group.markers.add_pvalues(p_values) return dataset.group.markers.markers
def get_attributes(self): """Finds which extra attributes apply to this dataset""" # Get attribute names and distinct values for each attribute results = g.db.execute(''' SELECT DISTINCT CaseAttribute.Id, CaseAttribute.Name, CaseAttributeXRef.Value FROM CaseAttribute, CaseAttributeXRef WHERE CaseAttributeXRef.CaseAttributeId = CaseAttribute.Id AND CaseAttributeXRef.ProbeSetFreezeId = %s ORDER BY CaseAttribute.Name''', (str(self.dataset.id),)) self.attributes = {} for attr, values in itertools.groupby(results.fetchall(), lambda row: (row.Id, row.Name)): key, name = attr logger.debug("radish: %s - %s" % (key, name)) self.attributes[key] = Bunch() self.attributes[key].name = name self.attributes[key].distinct_values = [item.Value for item in values] self.attributes[key].distinct_values.sort(key=natural_sort_key)
def create_marker_covariates(control_marker, cross): ro.globalenv["the_cross"] = cross ro.r('genotypes <- pull.geno(the_cross)') # Get the genotype matrix userinput_sanitized = control_marker.replace(" ", "").split( ",") # TODO: sanitize user input, Never Ever trust a user logger.debug(userinput_sanitized) if len(userinput_sanitized) > 0: covariate_names = ', '.join('"{0}"'.format(w) for w in userinput_sanitized) ro.r('covnames <- c(' + covariate_names + ')') else: ro.r('covnames <- c()') ro.r('covInGeno <- which(covnames %in% colnames(genotypes))') ro.r('covnames <- covnames[covInGeno]') ro.r("cat('covnames (purged): ', covnames,'\n')") ro.r( 'marker_covars <- genotypes[,covnames]' ) # Get the covariate matrix by using the marker name as index to the genotype file # TODO: Create a design matrix from the marker covars for the markers in case of an F2, 4way, etc return ro.r["marker_covars"]
def search_page(): logger.info("in search_page") if 'info_database' in request.args: logger.info("Going to sharing_info_page") template_vars = sharing_info_page() if template_vars.redirect_url: logger.info("Going to redirect") return flask.redirect(template_vars.redirect_url) else: return render_template("data_sharing.html", **template_vars.__dict__) else: result = None if USE_REDIS: with Bench("Trying Redis cache"): key = "search_results:v1:" + json.dumps(request.args, sort_keys=True) logger.debug("key is:", pf(key)) result = Redis.get(key) if result: logger.info("Redis cache hit on search results!") result = pickle.loads(result) else: logger.info("Skipping Redis cache (USE_REDIS=False)") logger.info("request.args is", request.args) the_search = search_results.SearchResultPage(request.args) result = the_search.__dict__ logger.debugf("result", result) if USE_REDIS: Redis.set(key, pickle.dumps(result, pickle.HIGHEST_PROTOCOL)) Redis.expire(key, 60 * 60) if result['search_term_exists']: return render_template("search_result_page.html", **result) else: return render_template("search_error.html")
def search_page(): logger.info("in search_page") logger.error(request.url) if 'info_database' in request.args: logger.info("Going to sharing_info_page") template_vars = sharing_info_page() if template_vars.redirect_url: logger.info("Going to redirect") return flask.redirect(template_vars.redirect_url) else: return render_template("data_sharing.html", **template_vars.__dict__) else: result = None if USE_REDIS: with Bench("Trying Redis cache"): key = "search_results:v1:" + json.dumps(request.args, sort_keys=True) logger.debug("key is:", pf(key)) result = Redis.get(key) if result: logger.info("Redis cache hit on search results!") result = pickle.loads(result) else: logger.info("Skipping Redis cache (USE_REDIS=False)") logger.info("request.args is", request.args) the_search = search_results.SearchResultPage(request.args) result = the_search.__dict__ logger.debugf("result", result) if USE_REDIS: Redis.set(key, pickle.dumps(result, pickle.HIGHEST_PROTOCOL)) Redis.expire(key, 60*60) if result['search_term_exists']: return render_template("search_result_page.html", **result) else: return render_template("search_error.html")
def run_plink(self): plink_output_filename = webqtlUtil.genRandStr("%s_%s_"%(self.dataset.group.name, self.this_trait.name)) self.gen_pheno_txt_file_plink(pheno_filename = plink_output_filename) plink_command = PLINK_COMMAND + ' --noweb --ped %s/%s.ped --no-fid --no-parents --no-sex --no-pheno --map %s/%s.map --pheno %s%s.txt --pheno-name %s --maf %s --missing-phenotype -9999 --out %s%s --assoc ' % (PLINK_PATH, self.dataset.group.name, PLINK_PATH, self.dataset.group.name, TMPDIR, plink_output_filename, self.this_trait.name, self.maf, TMPDIR, plink_output_filename) logger.debug("plink_command:", plink_command) os.system(plink_command) count, p_values = self.parse_plink_output(plink_output_filename) #for marker in self.dataset.group.markers.markers: # if marker['name'] not in included_markers: # logger.debug("marker:", marker) # self.dataset.group.markers.markers.remove(marker) # #del self.dataset.group.markers.markers[marker] logger.debug("p_values:", pf(p_values)) self.dataset.group.markers.add_pvalues(p_values) return self.dataset.group.markers.markers
def run_plink(this_trait, dataset, species, vals, maf): plink_output_filename = webqtlUtil.genRandStr("%s_%s_"%(dataset.group.name, this_trait.name)) gen_pheno_txt_file(dataset, vals) #gen_pheno_txt_file_plink(this_trait, dataset, vals, pheno_filename = plink_output_filename) plink_command = PLINK_COMMAND + ' --noweb --bfile %s/%s --no-fid --no-parents --no-sex --maf %s --missing-phenotype -9 --out %s/%s --assoc ' % ( flat_files('mapping'), dataset.group.name, maf, TMPDIR, plink_output_filename) logger.debug("plink_command:", plink_command) os.system(plink_command) count, p_values = parse_plink_output(plink_output_filename, species) #for marker in self.dataset.group.markers.markers: # if marker['name'] not in included_markers: # logger.debug("marker:", marker) # self.dataset.group.markers.markers.remove(marker) # #del self.dataset.group.markers.markers[marker] logger.debug("p_values:", p_values) dataset.group.markers.add_pvalues(p_values) return dataset.group.markers.markers
def get_attributes(self): """Finds which extra attributes apply to this dataset""" # Get attribute names and distinct values for each attribute results = g.db.execute( ''' SELECT DISTINCT CaseAttribute.Id, CaseAttribute.Name, CaseAttributeXRef.Value FROM CaseAttribute, CaseAttributeXRef WHERE CaseAttributeXRef.CaseAttributeId = CaseAttribute.Id AND CaseAttributeXRef.ProbeSetFreezeId = %s ORDER BY CaseAttribute.Name''', (str(self.dataset.id), )) self.attributes = {} for attr, values in itertools.groupby(results.fetchall(), lambda row: (row.Id, row.Name)): key, name = attr logger.debug("radish: %s - %s" % (key, name)) self.attributes[key] = Bunch() self.attributes[key].name = name self.attributes[key].distinct_values = [ item.Value for item in values ] self.attributes[key].distinct_values.sort(key=natural_sort_key)
def marker_regression_page(): initial_start_vars = request.form logger.debug("Marker regression called with initial_start_vars:", initial_start_vars.items()) temp_uuid = initial_start_vars['temp_uuid'] wanted = ( 'trait_id', 'dataset', 'method', 'trimmed_markers', 'selected_chr', 'chromosomes', 'mapping_scale', 'score_type', 'suggestive', 'significant', 'num_perm', 'permCheck', 'perm_output', 'num_bootstrap', 'bootCheck', 'bootstrap_results', 'LRSCheck', 'maf', 'manhattan_plot', 'control_marker', 'control_marker_db', 'do_control', 'genofile', 'pair_scan', 'startMb', 'endMb', 'graphWidth', 'lrsMax', 'additiveCheck', 'showSNP', 'showGenes', 'viewLegend', 'haplotypeAnalystCheck', 'mapmethod_rqtl_geno', 'mapmodel_rqtl_geno' ) start_vars = {} for key, value in initial_start_vars.iteritems(): if key in wanted or key.startswith(('value:')): start_vars[key] = value logger.debug("Marker regression called with start_vars:", start_vars) version = "v3" key = "marker_regression:{}:".format(version) + json.dumps(start_vars, sort_keys=True) logger.info("key is:", pf(key)) with Bench("Loading cache"): result = None # Just for testing #result = Redis.get(key) #logger.info("************************ Starting result *****************") #logger.info("result is [{}]: {}".format(type(result), result)) #logger.info("************************ Ending result ********************") if result: logger.info("Cache hit!!!") with Bench("Loading results"): result = pickle.loads(result) else: logger.info("Cache miss!!!") with Bench("Total time in MarkerRegression"): template_vars = marker_regression.MarkerRegression(start_vars, temp_uuid) if template_vars.mapping_method != "gemma" and template_vars.mapping_method != "plink": template_vars.js_data = json.dumps(template_vars.js_data, default=json_default_handler, indent=" ") result = template_vars.__dict__ if result['pair_scan']: with Bench("Rendering template"): img_path = result['pair_scan_filename'] logger.info("img_path:", img_path) initial_start_vars = request.form logger.info("initial_start_vars:", initial_start_vars) imgfile = open(TEMPDIR + img_path, 'rb') imgdata = imgfile.read() imgB64 = imgdata.encode("base64") bytesarray = array.array('B', imgB64) result['pair_scan_array'] = bytesarray rendered_template = render_template("pair_scan_results.html", **result) else: #for item in template_vars.__dict__.keys(): # logger.info(" ---**--- {}: {}".format(type(template_vars.__dict__[item]), item)) gn1_template_vars = marker_regression_gn1.MarkerRegression(result).__dict__ #pickled_result = pickle.dumps(result, pickle.HIGHEST_PROTOCOL) #logger.info("pickled result length:", len(pickled_result)) #Redis.set(key, pickled_result) #Redis.expire(key, 1*60) with Bench("Rendering template"): if (gn1_template_vars['mapping_method'] == "gemma") or (gn1_template_vars['mapping_method'] == "plink"): gn1_template_vars.pop('qtlresults', None) print("TEMPLATE KEYS:", list(gn1_template_vars.keys())) rendered_template = render_template("marker_regression_gn1.html", **gn1_template_vars) # with Bench("Rendering template"): # if result['pair_scan'] == True: # img_path = result['pair_scan_filename'] # logger.info("img_path:", img_path) # initial_start_vars = request.form # logger.info("initial_start_vars:", initial_start_vars) # imgfile = open(TEMPDIR + '/' + img_path, 'rb') # imgdata = imgfile.read() # imgB64 = imgdata.encode("base64") # bytesarray = array.array('B', imgB64) # result['pair_scan_array'] = bytesarray # rendered_template = render_template("pair_scan_results.html", **result) # else: # rendered_template = render_template("marker_regression.html", **result) # rendered_template = render_template("marker_regression_gn1.html", **gn1_template_vars) return rendered_template
def connect_db(): db = getattr(g, '_database', None) if db is None: logger.debug("Get new database connector") g.db = g._database = sqlalchemy.create_engine(SQL_URI) logger.debug(g.db)
def marker_regression_page(): initial_start_vars = request.form logger.debug("Marker regression called with initial_start_vars:", initial_start_vars.items()) temp_uuid = initial_start_vars['temp_uuid'] wanted = ('trait_id', 'dataset', 'method', 'trimmed_markers', 'selected_chr', 'chromosomes', 'mapping_scale', 'score_type', 'suggestive', 'significant', 'num_perm', 'permCheck', 'perm_output', 'num_bootstrap', 'bootCheck', 'bootstrap_results', 'LRSCheck', 'maf', 'manhattan_plot', 'control_marker', 'control_marker_db', 'do_control', 'genofile', 'pair_scan', 'startMb', 'endMb', 'graphWidth', 'lrsMax', 'additiveCheck', 'showSNP', 'showGenes', 'viewLegend', 'haplotypeAnalystCheck', 'mapmethod_rqtl_geno', 'mapmodel_rqtl_geno') start_vars = {} for key, value in initial_start_vars.iteritems(): if key in wanted or key.startswith(('value:')): start_vars[key] = value logger.debug("Marker regression called with start_vars:", start_vars) version = "v3" key = "marker_regression:{}:".format(version) + json.dumps(start_vars, sort_keys=True) logger.info("key is:", pf(key)) with Bench("Loading cache"): result = None # Just for testing #result = Redis.get(key) #logger.info("************************ Starting result *****************") #logger.info("result is [{}]: {}".format(type(result), result)) #logger.info("************************ Ending result ********************") if result: logger.info("Cache hit!!!") with Bench("Loading results"): result = pickle.loads(result) else: logger.info("Cache miss!!!") with Bench("Total time in MarkerRegression"): template_vars = marker_regression.MarkerRegression( start_vars, temp_uuid) if template_vars.mapping_method != "gemma" and template_vars.mapping_method != "plink": template_vars.js_data = json.dumps(template_vars.js_data, default=json_default_handler, indent=" ") result = template_vars.__dict__ if result['pair_scan']: with Bench("Rendering template"): img_path = result['pair_scan_filename'] logger.info("img_path:", img_path) initial_start_vars = request.form logger.info("initial_start_vars:", initial_start_vars) imgfile = open(TEMPDIR + img_path, 'rb') imgdata = imgfile.read() imgB64 = imgdata.encode("base64") bytesarray = array.array('B', imgB64) result['pair_scan_array'] = bytesarray rendered_template = render_template("pair_scan_results.html", **result) else: #for item in template_vars.__dict__.keys(): # logger.info(" ---**--- {}: {}".format(type(template_vars.__dict__[item]), item)) gn1_template_vars = marker_regression_gn1.MarkerRegression( result).__dict__ #pickled_result = pickle.dumps(result, pickle.HIGHEST_PROTOCOL) #logger.info("pickled result length:", len(pickled_result)) #Redis.set(key, pickled_result) #Redis.expire(key, 1*60) with Bench("Rendering template"): if (gn1_template_vars['mapping_method'] == "gemma") or (gn1_template_vars['mapping_method'] == "plink"): gn1_template_vars.pop('qtlresults', None) print("TEMPLATE KEYS:", list(gn1_template_vars.keys())) rendered_template = render_template( "marker_regression_gn1.html", **gn1_template_vars) # with Bench("Rendering template"): # if result['pair_scan'] == True: # img_path = result['pair_scan_filename'] # logger.info("img_path:", img_path) # initial_start_vars = request.form # logger.info("initial_start_vars:", initial_start_vars) # imgfile = open(TEMPDIR + '/' + img_path, 'rb') # imgdata = imgfile.read() # imgB64 = imgdata.encode("base64") # bytesarray = array.array('B', imgB64) # result['pair_scan_array'] = bytesarray # rendered_template = render_template("pair_scan_results.html", **result) # else: # rendered_template = render_template("marker_regression.html", **result) # rendered_template = render_template("marker_regression_gn1.html", **gn1_template_vars) return rendered_template
def shutdown_session(exception=None): db = getattr(g, '_database', None) if db is not None: logger.debug("remove db_session") db_session.remove() g.db = None
def get_dataset_info(dataset_name, group_name=None, file_format="json"): #ZS: First get ProbeSet (mRNA expression) datasets and then get Phenotype datasets datasets_list = [ ] #ZS: I figure I might as well return a list if there are multiple matches, though I don"t know if this will actually happen in practice probeset_query = """ SELECT ProbeSetFreeze.Id, ProbeSetFreeze.Name, ProbeSetFreeze.FullName, ProbeSetFreeze.ShortName, ProbeSetFreeze.DataScale, ProbeFreeze.TissueId, Tissue.Name, ProbeSetFreeze.public, ProbeSetFreeze.confidentiality FROM ProbeSetFreeze, ProbeFreeze, Tissue """ where_statement = """ WHERE ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id AND ProbeFreeze.TissueId = Tissue.Id AND ProbeSetFreeze.public > 0 AND ProbeSetFreeze.confidentiality < 1 AND """ if dataset_name.isdigit(): where_statement += """ ProbeSetFreeze.Id = "{}" """.format(dataset_name) else: where_statement += """ (ProbeSetFreeze.Name = "{0}" OR ProbeSetFreeze.Name2 = "{0}" OR ProbeSetFreeze.FullName = "{0}" OR ProbeSetFreeze.ShortName = "{0}") """.format(dataset_name) probeset_query += where_statement probeset_results = g.db.execute(probeset_query) dataset = probeset_results.fetchone() if dataset: dataset_dict = { "dataset_type": "mRNA expression", "id": dataset[0], "name": dataset[1], "full_name": dataset[2], "short_name": dataset[3], "data_scale": dataset[4], "tissue_id": dataset[5], "tissue": dataset[6], "public": dataset[7], "confidential": dataset[8] } datasets_list.append(dataset_dict) if group_name: pheno_query = """ SELECT PublishXRef.Id, Phenotype.Post_publication_abbreviation, Phenotype.Post_publication_description, Phenotype.Pre_publication_abbreviation, Phenotype.Pre_publication_description, Publication.PubMed_ID, Publication.Title, Publication.Year FROM PublishXRef, Phenotype, Publication, InbredSet, PublishFreeze WHERE PublishXRef.InbredSetId = InbredSet.Id AND PublishXRef.PhenotypeId = Phenotype.Id AND PublishXRef.PublicationId = Publication.Id AND PublishFreeze.InbredSetId = InbredSet.Id AND PublishFreeze.public > 0 AND PublishFreeze.confidentiality < 1 AND InbredSet.Name = "{0}" AND PublishXRef.Id = "{1}" """.format(group_name, dataset_name) logger.debug("QUERY:", pheno_query) pheno_results = g.db.execute(pheno_query) dataset = pheno_results.fetchone() if dataset: if dataset[5]: dataset_dict = { "dataset_type": "phenotype", "id": dataset[0], "name": dataset[1], "description": dataset[2], "pubmed_id": dataset[5], "title": dataset[6], "year": dataset[7] } elif dataset[4]: dataset_dict = { "dataset_type": "phenotype", "id": dataset[0], "name": dataset[3], "description": dataset[4] } else: dataset_dict = {"dataset_type": "phenotype", "id": dataset[0]} datasets_list.append(dataset_dict) if len(datasets_list) > 1: return flask.jsonify(datasets_list) elif len(datasets_list) == 1: return flask.jsonify(dataset_dict) else: return return_error(code=204, source=request.url_rule.rule, title="No Results", details="")
def __init__(self, start_vars, temp_uuid): helper_functions.get_species_dataset_trait(self, start_vars) self.temp_uuid = temp_uuid #needed to pass temp_uuid to gn1 mapping code (marker_regression_gn1.py) self.json_data = {} self.json_data['lodnames'] = ['lod.hk'] self.samples = [] # Want only ones with values self.vals = [] all_samples_ordered = self.dataset.group.all_samples_ordered() primary_sample_names = list(all_samples_ordered) for sample in self.dataset.group.samplelist: # sample is actually the name of an individual in_trait_data = False for item in self.this_trait.data: if self.this_trait.data[item].name == sample: value = start_vars['value:' + self.this_trait.data[item].name] self.samples.append(self.this_trait.data[item].name) self.vals.append(value) in_trait_data = True break if not in_trait_data: value = start_vars.get('value:' + sample) if value: self.samples.append(sample) self.vals.append(value) self.mapping_method = start_vars['method'] if start_vars['manhattan_plot'] == "True": self.manhattan_plot = True else: self.manhattan_plot = False self.maf = start_vars['maf'] # Minor allele frequency self.suggestive = "" self.significant = "" self.pair_scan = False # Initializing this since it is checked in views to determine which template to use self.score_type = "LRS" #ZS: LRS or LOD self.mapping_scale = "physic" self.num_perm = 0 self.perm_output = [] self.bootstrap_results = [] #ZS: This is passed to GN1 code for single chr mapping self.selected_chr = -1 if "selected_chr" in start_vars: if int( start_vars['selected_chr'] ) != -1: #ZS: Needs to be -1 if showing full map; there's probably a better way to fix this self.selected_chr = int(start_vars['selected_chr']) + 1 else: self.selected_chr = int(start_vars['selected_chr']) if "startMb" in start_vars: self.startMb = start_vars['startMb'] if "endMb" in start_vars: self.endMb = start_vars['endMb'] if "graphWidth" in start_vars: self.graphWidth = start_vars['graphWidth'] if "lrsMax" in start_vars: self.lrsMax = start_vars['lrsMax'] if "haplotypeAnalystCheck" in start_vars: self.haplotypeAnalystCheck = start_vars['haplotypeAnalystCheck'] if "startMb" in start_vars: #ZS: This is to ensure showGenes, Legend, etc are checked the first time you open the mapping page, since startMb will only not be set during the first load if "permCheck" in start_vars: self.permCheck = "ON" else: self.permCheck = False self.num_perm = int(start_vars['num_perm']) self.LRSCheck = start_vars['LRSCheck'] if "showSNP" in start_vars: self.showSNP = start_vars['showSNP'] else: self.showSNP = False if "showGenes" in start_vars: self.showGenes = start_vars['showGenes'] else: self.showGenes = False if "viewLegend" in start_vars: self.viewLegend = start_vars['viewLegend'] else: self.viewLegend = False else: try: if int(start_vars['num_perm']) > 0: self.num_perm = int(start_vars['num_perm']) except: self.num_perm = 0 if self.num_perm > 0: self.permCheck = "ON" else: self.permCheck = False self.showSNP = "ON" self.showGenes = "ON" self.viewLegend = "ON" self.dataset.group.get_markers() if self.mapping_method == "gemma": self.score_type = "-log(p)" self.manhattan_plot = True with Bench("Running GEMMA"): marker_obs = gemma_mapping.run_gemma(self.dataset, self.samples, self.vals) results = marker_obs elif self.mapping_method == "rqtl_plink": results = self.run_rqtl_plink() elif self.mapping_method == "rqtl_geno": self.score_type = "LOD" self.mapping_scale = "morgan" self.control_marker = start_vars['control_marker'] self.do_control = start_vars['do_control'] self.dataset.group.genofile = start_vars['genofile'] self.method = start_vars['mapmethod_rqtl_geno'] self.model = start_vars['mapmodel_rqtl_geno'] if start_vars['pair_scan'] == "true": self.pair_scan = True if self.permCheck and self.num_perm > 0: self.perm_output, self.suggestive, self.significant, results = rqtl_mapping.run_rqtl_geno( self.vals, self.dataset, self.method, self.model, self.permCheck, self.num_perm, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan) else: results = rqtl_mapping.run_rqtl_geno( self.vals, self.dataset, self.method, self.model, self.permCheck, self.num_perm, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan) elif self.mapping_method == "reaper": if "startMb" in start_vars: #ZS: Check if first time page loaded, so it can default to ON if "additiveCheck" in start_vars: self.additiveCheck = start_vars['additiveCheck'] else: self.additiveCheck = False if "bootCheck" in start_vars: self.bootCheck = "ON" else: self.bootCheck = False self.num_bootstrap = int(start_vars['num_bootstrap']) else: self.additiveCheck = "ON" try: if int(start_vars['num_bootstrap']) > 0: self.bootCheck = "ON" self.num_bootstrap = int(start_vars['num_bootstrap']) else: self.bootCheck = False self.num_bootstrap = 0 except: self.bootCheck = False self.num_bootstrap = 0 self.control_marker = start_vars['control_marker'] self.do_control = start_vars['do_control'] self.dataset.group.genofile = start_vars['genofile'] logger.info("Running qtlreaper") results, self.json_data, self.perm_output, self.suggestive, self.significant, self.bootstrap_results = qtlreaper_mapping.gen_reaper_results( self.this_trait, self.dataset, self.samples, self.json_data, self.num_perm, self.bootCheck, self.num_bootstrap, self.do_control, self.control_marker, self.manhattan_plot) elif self.mapping_method == "plink": self.score_type = "-log(p)" self.manhattan_plot = True results = plink_mapping.run_plink(self.this_trait, self.dataset, self.species, self.vals, self.maf) #results = self.run_plink() elif self.mapping_method == "pylmm": logger.debug("RUNNING PYLMM") self.dataset.group.genofile = start_vars['genofile'] if self.num_perm > 0: self.run_permutations(str(temp_uuid)) results = self.gen_data(str(temp_uuid)) else: logger.debug("RUNNING NOTHING") if self.pair_scan == True: self.qtl_results = [] highest_chr = 1 #This is needed in order to convert the highest chr to X/Y for marker in results: if marker['chr1'] > 0 or marker['chr1'] == "X" or marker[ 'chr1'] == "X/Y": if marker['chr1'] > highest_chr or marker[ 'chr1'] == "X" or marker['chr1'] == "X/Y": highest_chr = marker['chr1'] if 'lod_score' in marker.keys(): self.qtl_results.append(marker) self.trimmed_markers = results for qtl in enumerate(self.qtl_results): self.json_data['chr1'].append(str(qtl['chr1'])) self.json_data['chr2'].append(str(qtl['chr2'])) self.json_data['Mb'].append(qtl['Mb']) self.json_data['markernames'].append(qtl['name']) self.js_data = dict(json_data=self.json_data, this_trait=self.this_trait.name, data_set=self.dataset.name, maf=self.maf, manhattan_plot=self.manhattan_plot, mapping_scale=self.mapping_scale, qtl_results=self.qtl_results) else: self.cutoff = 2 self.qtl_results = [] highest_chr = 1 #This is needed in order to convert the highest chr to X/Y for marker in results: if marker['chr'] > 0 or marker['chr'] == "X" or marker[ 'chr'] == "X/Y": if marker['chr'] > highest_chr or marker[ 'chr'] == "X" or marker['chr'] == "X/Y": highest_chr = marker['chr'] if ('lod_score' in marker.keys()) or ('lrs_value' in marker.keys()): self.qtl_results.append(marker) self.trimmed_markers = trim_markers_for_table(results) if self.mapping_method != "gemma": self.json_data['chr'] = [] self.json_data['pos'] = [] self.json_data['lod.hk'] = [] self.json_data['markernames'] = [] self.json_data['suggestive'] = self.suggestive self.json_data['significant'] = self.significant #Need to convert the QTL objects that qtl reaper returns into a json serializable dictionary for index, qtl in enumerate(self.qtl_results): #if index<40: # logger.debug("lod score is:", qtl['lod_score']) if qtl['chr'] == highest_chr and highest_chr != "X" and highest_chr != "X/Y": #logger.debug("changing to X") self.json_data['chr'].append("X") else: self.json_data['chr'].append(str(qtl['chr'])) self.json_data['pos'].append(qtl['Mb']) if 'lrs_value' in qtl.keys(): self.json_data['lod.hk'].append(str(qtl['lrs_value'])) else: self.json_data['lod.hk'].append(str(qtl['lod_score'])) self.json_data['markernames'].append(qtl['name']) #Get chromosome lengths for drawing the interval map plot chromosome_mb_lengths = {} self.json_data['chrnames'] = [] for key in self.species.chromosomes.chromosomes.keys(): self.json_data['chrnames'].append([ self.species.chromosomes.chromosomes[key].name, self.species.chromosomes.chromosomes[key].mb_length ]) chromosome_mb_lengths[ key] = self.species.chromosomes.chromosomes[ key].mb_length # logger.debug("json_data:", self.json_data) self.js_data = dict( result_score_type=self.score_type, json_data=self.json_data, this_trait=self.this_trait.name, data_set=self.dataset.name, maf=self.maf, manhattan_plot=self.manhattan_plot, mapping_scale=self.mapping_scale, chromosomes=chromosome_mb_lengths, qtl_results=self.qtl_results, num_perm=self.num_perm, perm_results=self.perm_output, )
def get_bnw_input(start_vars): logger.debug("BNW VARS:", start_vars)
def __init__(self, start_vars, temp_uuid): helper_functions.get_species_dataset_trait(self, start_vars) self.temp_uuid = temp_uuid #needed to pass temp_uuid to gn1 mapping code (marker_regression_gn1.py) #ZS: Needed to zoom in or remap temp traits like PCA traits if "temp_trait" in start_vars and start_vars['temp_trait'] != "False": self.temp_trait = "True" self.group = self.dataset.group.name self.json_data = {} self.json_data['lodnames'] = ['lod.hk'] #ZS: Sometimes a group may have a genofile that only includes a subset of samples genofile_samplelist = [] if 'genofile' in start_vars: if start_vars['genofile'] != "": self.genofile_string = start_vars['genofile'] self.dataset.group.genofile = self.genofile_string.split( ":")[0] genofile_samplelist = get_genofile_samplelist(self.dataset) all_samples_ordered = self.dataset.group.all_samples_ordered() self.vals = [] self.samples = [] self.sample_vals = start_vars['sample_vals'] sample_val_dict = json.loads(self.sample_vals) samples = sample_val_dict.keys() if (len(genofile_samplelist) != 0): for sample in genofile_samplelist: self.samples.append(sample) if sample in samples: self.vals.append(sample_val_dict[sample]) else: self.vals.append("x") else: for sample in self.dataset.group.samplelist: if sample in samples: self.vals.append(sample_val_dict[sample]) self.samples.append(sample) if 'n_samples' in start_vars: self.n_samples = start_vars['n_samples'] else: self.n_samples = len([val for val in self.vals if val != "x"]) #ZS: Check if genotypes exist in the DB in order to create links for markers self.geno_db_exists = geno_db_exists(self.dataset) self.mapping_method = start_vars['method'] if "results_path" in start_vars: self.mapping_results_path = start_vars['results_path'] else: mapping_results_filename = self.dataset.group.name + "_" + ''.join( random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) self.mapping_results_path = "{}{}.csv".format( webqtlConfig.GENERATED_IMAGE_DIR, mapping_results_filename) self.manhattan_plot = False if 'manhattan_plot' in start_vars: if start_vars['manhattan_plot'].lower() != "false": self.color_scheme = "alternating" if "color_scheme" in start_vars: self.color_scheme = start_vars['color_scheme'] if self.color_scheme == "single": self.manhattan_single_color = start_vars[ 'manhattan_single_color'] self.manhattan_plot = True self.maf = start_vars['maf'] # Minor allele frequency if "use_loco" in start_vars: self.use_loco = start_vars['use_loco'] else: self.use_loco = None self.suggestive = "" self.significant = "" self.pair_scan = False # Initializing this since it is checked in views to determine which template to use if 'transform' in start_vars: self.transform = start_vars['transform'] else: self.transform = "" self.score_type = "LRS" #ZS: LRS or LOD self.mapping_scale = "physic" if "mapping_scale" in start_vars: self.mapping_scale = start_vars['mapping_scale'] self.num_perm = 0 self.perm_output = [] self.bootstrap_results = [] self.covariates = start_vars[ 'covariates'] if "covariates" in start_vars else "" self.categorical_vars = [] #ZS: This is passed to GN1 code for single chr mapping self.selected_chr = -1 if "selected_chr" in start_vars: if int( start_vars['selected_chr'] ) != -1: #ZS: Needs to be -1 if showing full map; there's probably a better way to fix this self.selected_chr = int(start_vars['selected_chr']) + 1 else: self.selected_chr = int(start_vars['selected_chr']) if "startMb" in start_vars: self.startMb = start_vars['startMb'] if "endMb" in start_vars: self.endMb = start_vars['endMb'] if "graphWidth" in start_vars: self.graphWidth = start_vars['graphWidth'] if "lrsMax" in start_vars: self.lrsMax = start_vars['lrsMax'] if "haplotypeAnalystCheck" in start_vars: self.haplotypeAnalystCheck = start_vars['haplotypeAnalystCheck'] if "startMb" in start_vars: #ZS: This is to ensure showGenes, Legend, etc are checked the first time you open the mapping page, since startMb will only not be set during the first load if "permCheck" in start_vars: self.permCheck = "ON" else: self.permCheck = False self.num_perm = int(start_vars['num_perm']) self.LRSCheck = start_vars['LRSCheck'] if "showSNP" in start_vars: self.showSNP = start_vars['showSNP'] else: self.showSNP = False if "showGenes" in start_vars: self.showGenes = start_vars['showGenes'] else: self.showGenes = False if "viewLegend" in start_vars: self.viewLegend = start_vars['viewLegend'] else: self.viewLegend = False else: try: if int(start_vars['num_perm']) > 0: self.num_perm = int(start_vars['num_perm']) except: self.num_perm = 0 if self.num_perm > 0: self.permCheck = "ON" else: self.permCheck = False self.showSNP = "ON" self.showGenes = "ON" self.viewLegend = "ON" #self.dataset.group.get_markers() if self.mapping_method == "gemma": self.first_run = True self.output_files = None if 'output_files' in start_vars: self.output_files = start_vars['output_files'] if 'first_run' in start_vars: #ZS: check if first run so existing result files can be used if it isn't (for example zooming on a chromosome, etc) self.first_run = False self.score_type = "-logP" self.manhattan_plot = True with Bench("Running GEMMA"): if self.use_loco == "True": marker_obs, self.output_files = gemma_mapping.run_gemma( self.this_trait, self.dataset, self.samples, self.vals, self.covariates, self.use_loco, self.maf, self.first_run, self.output_files) else: marker_obs, self.output_files = gemma_mapping.run_gemma( self.this_trait, self.dataset, self.samples, self.vals, self.covariates, self.use_loco, self.maf, self.first_run, self.output_files) results = marker_obs elif self.mapping_method == "rqtl_plink": results = self.run_rqtl_plink() elif self.mapping_method == "rqtl_geno": perm_strata = [] if "perm_strata" in start_vars and "categorical_vars" in start_vars: self.categorical_vars = start_vars["categorical_vars"].split( ",") if len(self.categorical_vars ) and start_vars["perm_strata"] == "True": primary_samples = SampleList(dataset=self.dataset, sample_names=self.samples, this_trait=self.this_trait) perm_strata = get_perm_strata(self.this_trait, primary_samples, self.categorical_vars, self.samples) self.score_type = "LOD" self.control_marker = start_vars['control_marker'] self.do_control = start_vars['do_control'] if 'mapmethod_rqtl_geno' in start_vars: self.method = start_vars['mapmethod_rqtl_geno'] else: self.method = "em" self.model = start_vars['mapmodel_rqtl_geno'] #if start_vars['pair_scan'] == "true": # self.pair_scan = True if self.permCheck and self.num_perm > 0: self.perm_output, self.suggestive, self.significant, results = rqtl_mapping.run_rqtl_geno( self.vals, self.samples, self.dataset, self.mapping_scale, self.method, self.model, self.permCheck, self.num_perm, perm_strata, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan, self.covariates) else: results = rqtl_mapping.run_rqtl_geno( self.vals, self.samples, self.dataset, self.mapping_scale, self.method, self.model, self.permCheck, self.num_perm, perm_strata, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan, self.covariates) elif self.mapping_method == "reaper": if "startMb" in start_vars: #ZS: Check if first time page loaded, so it can default to ON if "additiveCheck" in start_vars: self.additiveCheck = start_vars['additiveCheck'] else: self.additiveCheck = False if "bootCheck" in start_vars: self.bootCheck = "ON" else: self.bootCheck = False self.num_bootstrap = int(start_vars['num_bootstrap']) else: self.additiveCheck = "ON" try: if int(start_vars['num_bootstrap']) > 0: self.bootCheck = "ON" self.num_bootstrap = int(start_vars['num_bootstrap']) else: self.bootCheck = False self.num_bootstrap = 0 except: self.bootCheck = False self.num_bootstrap = 0 self.reaper_version = start_vars['reaper_version'] self.control_marker = start_vars['control_marker'] self.do_control = start_vars['do_control'] logger.info("Running qtlreaper") if self.reaper_version == "new": self.first_run = True self.output_files = None if 'first_run' in start_vars: #ZS: check if first run so existing result files can be used if it isn't (for example zooming on a chromosome, etc) self.first_run = False if 'output_files' in start_vars: self.output_files = start_vars['output_files'].split( ",") results, self.perm_output, self.suggestive, self.significant, self.bootstrap_results, self.output_files = qtlreaper_mapping.run_reaper( self.this_trait, self.dataset, self.samples, self.vals, self.json_data, self.num_perm, self.bootCheck, self.num_bootstrap, self.do_control, self.control_marker, self.manhattan_plot, self.first_run, self.output_files) else: results, self.json_data, self.perm_output, self.suggestive, self.significant, self.bootstrap_results = qtlreaper_mapping.run_original_reaper( self.this_trait, self.dataset, self.samples, self.vals, self.json_data, self.num_perm, self.bootCheck, self.num_bootstrap, self.do_control, self.control_marker, self.manhattan_plot) elif self.mapping_method == "plink": self.score_type = "-logP" self.manhattan_plot = True results = plink_mapping.run_plink(self.this_trait, self.dataset, self.species, self.vals, self.maf) #results = self.run_plink() else: logger.debug("RUNNING NOTHING") self.no_results = False if len(results) == 0: self.no_results = True else: if self.pair_scan == True: self.qtl_results = [] highest_chr = 1 #This is needed in order to convert the highest chr to X/Y for marker in results: if marker['chr1'] > 0 or marker['chr1'] == "X" or marker[ 'chr1'] == "X/Y": if marker['chr1'] > highest_chr or marker[ 'chr1'] == "X" or marker['chr1'] == "X/Y": highest_chr = marker['chr1'] if 'lod_score' in list(marker.keys()): self.qtl_results.append(marker) self.trimmed_markers = results for qtl in enumerate(self.qtl_results): self.json_data['chr1'].append(str(qtl['chr1'])) self.json_data['chr2'].append(str(qtl['chr2'])) self.json_data['Mb'].append(qtl['Mb']) self.json_data['markernames'].append(qtl['name']) self.js_data = dict(json_data=self.json_data, this_trait=self.this_trait.name, data_set=self.dataset.name, maf=self.maf, manhattan_plot=self.manhattan_plot, mapping_scale=self.mapping_scale, qtl_results=self.qtl_results) else: self.qtl_results = [] self.results_for_browser = [] self.annotations_for_browser = [] highest_chr = 1 #This is needed in order to convert the highest chr to X/Y for marker in results: if 'Mb' in marker: this_ps = marker['Mb'] * 1000000 else: this_ps = marker['cM'] * 1000000 browser_marker = dict(chr=str(marker['chr']), rs=marker['name'], ps=this_ps, url="/show_trait?trait_id=" + marker['name'] + "&dataset=" + self.dataset.group.name + "Geno") if self.geno_db_exists == "True": annot_marker = dict(name=str(marker['name']), chr=str(marker['chr']), rs=marker['name'], pos=this_ps, url="/show_trait?trait_id=" + marker['name'] + "&dataset=" + self.dataset.group.name + "Geno") else: annot_marker = dict(name=str(marker['name']), chr=str(marker['chr']), rs=marker['name'], pos=this_ps) if 'lrs_value' in marker and marker['lrs_value'] > 0: browser_marker['p_wald'] = 10**-(marker['lrs_value'] / 4.61) elif 'lod_score' in marker and marker['lod_score'] > 0: browser_marker['p_wald'] = 10**-(marker['lod_score']) else: browser_marker['p_wald'] = 0 self.results_for_browser.append(browser_marker) self.annotations_for_browser.append(annot_marker) if str(marker['chr']) > '0' or str( marker['chr']) == "X" or str( marker['chr']) == "X/Y": if str(marker['chr']) > str(highest_chr) or str( marker['chr']) == "X" or str( marker['chr']) == "X/Y": highest_chr = marker['chr'] if ('lod_score' in marker.keys()) or ('lrs_value' in marker.keys()): if 'Mb' in marker.keys(): marker['display_pos'] = "Chr" + str( marker['chr']) + ": " + "{:.6f}".format( marker['Mb']) elif 'cM' in marker.keys(): marker['display_pos'] = "Chr" + str( marker['chr']) + ": " + "{:.3f}".format( marker['cM']) else: marker['display_pos'] = "N/A" self.qtl_results.append(marker) total_markers = len(self.qtl_results) with Bench("Exporting Results"): export_mapping_results(self.dataset, self.this_trait, self.qtl_results, self.mapping_results_path, self.mapping_scale, self.score_type) with Bench("Trimming Markers for Figure"): if len(self.qtl_results) > 30000: self.qtl_results = trim_markers_for_figure( self.qtl_results) self.results_for_browser = trim_markers_for_figure( self.results_for_browser) filtered_annotations = [] for marker in self.results_for_browser: for annot_marker in self.annotations_for_browser: if annot_marker['rs'] == marker['rs']: filtered_annotations.append(annot_marker) break self.annotations_for_browser = filtered_annotations browser_files = write_input_for_browser( self.dataset, self.results_for_browser, self.annotations_for_browser) else: browser_files = write_input_for_browser( self.dataset, self.results_for_browser, self.annotations_for_browser) with Bench("Trimming Markers for Table"): self.trimmed_markers = trim_markers_for_table(results) chr_lengths = get_chr_lengths(self.mapping_scale, self.mapping_method, self.dataset, self.qtl_results) #ZS: For zooming into genome browser, need to pass chromosome name instead of number if self.dataset.group.species == "mouse": if self.selected_chr == 20: this_chr = "X" else: this_chr = str(self.selected_chr) elif self.dataset.group.species == "rat": if self.selected_chr == 21: this_chr = "X" else: this_chr = str(self.selected_chr) else: if self.selected_chr == 22: this_chr = "X" elif self.selected_chr == 23: this_chr = "Y" else: this_chr = str(self.selected_chr) if self.mapping_method != "gemma": if self.score_type == "LRS": significant_for_browser = self.significant / 4.61 else: significant_for_browser = self.significant self.js_data = dict( #result_score_type = self.score_type, #this_trait = self.this_trait.name, #data_set = self.dataset.name, #maf = self.maf, #manhattan_plot = self.manhattan_plot, #mapping_scale = self.mapping_scale, #chromosomes = chromosome_mb_lengths, #qtl_results = self.qtl_results, categorical_vars=self.categorical_vars, chr_lengths=chr_lengths, num_perm=self.num_perm, perm_results=self.perm_output, significant=significant_for_browser, browser_files=browser_files, selected_chr=this_chr, total_markers=total_markers) else: self.js_data = dict(chr_lengths=chr_lengths, browser_files=browser_files, selected_chr=this_chr, total_markers=total_markers)
def run_analysis(self, requestform): logger.info("Starting CTL analysis on dataset") self.trait_db_list = [trait.strip() for trait in requestform['trait_list'].split(',')] self.trait_db_list = [x for x in self.trait_db_list if x] logger.debug("strategy:", requestform.get("strategy")) strategy = requestform.get("strategy") logger.debug("nperm:", requestform.get("nperm")) nperm = int(requestform.get("nperm")) logger.debug("parametric:", requestform.get("parametric")) parametric = bool(requestform.get("parametric")) logger.debug("significance:", requestform.get("significance")) significance = float(requestform.get("significance")) # Get the name of the .geno file belonging to the first phenotype datasetname = self.trait_db_list[0].split(":")[1] dataset = data_set.create_dataset(datasetname) genofilelocation = locate(dataset.group.name + ".geno", "genotype") parser = genofile_parser.ConvertGenoFile(genofilelocation) parser.process_csv() logger.debug("dataset group: ", dataset.group) # Create a genotype matrix individuals = parser.individuals markers = [] markernames = [] for marker in parser.markers: markernames.append(marker["name"]) markers.append(marker["genotypes"]) genotypes = list(itertools.chain(*markers)) logger.debug(len(genotypes) / len(individuals), "==", len(parser.markers)) rGeno = r_t(ro.r.matrix(r_unlist(genotypes), nrow=len(markernames), ncol=len(individuals), dimnames = r_list(markernames, individuals), byrow=True)) # Create a phenotype matrix traits = [] for trait in self.trait_db_list: logger.debug("retrieving data for", trait) if trait != "": ts = trait.split(':') gt = TRAIT.GeneralTrait(name = ts[0], dataset_name = ts[1]) gt = TRAIT.retrieve_sample_data(gt, dataset, individuals) for ind in individuals: if ind in gt.data.keys(): traits.append(gt.data[ind].value) else: traits.append("-999") rPheno = r_t(ro.r.matrix(r_as_numeric(r_unlist(traits)), nrow=len(self.trait_db_list), ncol=len(individuals), dimnames = r_list(self.trait_db_list, individuals), byrow=True)) logger.debug(rPheno) # Use a data frame to store the objects rPheno = r_data_frame(rPheno, check_names = False) rGeno = r_data_frame(rGeno, check_names = False) # Debug: Print the genotype and phenotype files to disk #r_write_table(rGeno, "~/outputGN/geno.csv") #r_write_table(rPheno, "~/outputGN/pheno.csv") # Perform the CTL scan res = self.r_CTLscan(rGeno, rPheno, strategy = strategy, nperm = nperm, parametric = parametric, ncores = 6) # Get significant interactions significant = self.r_CTLsignificant(res, significance = significance) # Create an image for output self.results = {} self.results['imgurl1'] = webqtlUtil.genRandStr("CTLline_") + ".png" self.results['imgloc1'] = GENERATED_IMAGE_DIR + self.results['imgurl1'] self.results['ctlresult'] = significant self.results['requestform'] = requestform # Store the user specified parameters for the output page # Create the lineplot r_png(self.results['imgloc1'], width=1000, height=600, type='cairo-png') self.r_lineplot(res, significance = significance) r_dev_off() n = 2 # We start from 2, since R starts from 1 :) for trait in self.trait_db_list: # Create the QTL like CTL plots self.results['imgurl' + str(n)] = webqtlUtil.genRandStr("CTL_") + ".png" self.results['imgloc' + str(n)] = GENERATED_IMAGE_DIR + self.results['imgurl' + str(n)] r_png(self.results['imgloc' + str(n)], width=1000, height=600, type='cairo-png') self.r_plotCTLobject(res, (n-1), significance = significance, main='Phenotype ' + trait) r_dev_off() n = n + 1 # Flush any output from R sys.stdout.flush() # Create the interactive graph for cytoscape visualization (Nodes and Edges) if not type(significant) == ri.RNULLType: for x in range(len(significant[0])): logger.debug(significant[0][x], significant[1][x], significant[2][x]) # Debug to console tsS = significant[0][x].split(':') # Source tsT = significant[2][x].split(':') # Target gtS = TRAIT.GeneralTrait(name = tsS[0], dataset_name = tsS[1]) # Retrieve Source info from the DB gtT = TRAIT.GeneralTrait(name = tsT[0], dataset_name = tsT[1]) # Retrieve Target info from the DB self.addNode(gtS) self.addNode(gtT) self.addEdge(gtS, gtT, significant, x) significant[0][x] = gtS.symbol + " (" + gtS.name + ")" # Update the trait name for the displayed table significant[2][x] = gtT.symbol + " (" + gtT.name + ")" # Update the trait name for the displayed table self.elements = json.dumps(self.nodes_list + self.edges_list)
def gen_data(self, temp_uuid): """Generates p-values for each marker""" logger.debug("self.vals is:", self.vals) pheno_vector = np.array([(val == "x" or val == "") and np.nan or float(val) for val in self.vals]) #lmm_uuid = str(uuid.uuid4()) key = "pylmm:input:" + temp_uuid logger.debug("key is:", pf(key)) #with Bench("Loading cache"): # result = Redis.get(key) if self.dataset.group.species == "human": p_values, t_stats = self.gen_human_results(pheno_vector, key, temp_uuid) #p_values = self.trim_results(p_values) else: logger.debug("NOW CWD IS:", os.getcwd()) genotype_data = [marker['genotypes'] for marker in self.dataset.group.markers.markers] no_val_samples = self.identify_empty_samples() trimmed_genotype_data = self.trim_genotypes(genotype_data, no_val_samples) genotype_matrix = np.array(genotype_data).T #logger.debug("pheno_vector: ", pf(pheno_vector)) #logger.debug("genotype_matrix: ", pf(genotype_matrix)) #logger.debug("genotype_matrix.shape: ", pf(genotype_matrix.shape)) #params = {"pheno_vector": pheno_vector, # "genotype_matrix": genotype_matrix, # "restricted_max_likelihood": True, # "refit": False, # "temp_data": tempdata} # logger.debug("genotype_matrix:", str(genotype_matrix.tolist())) # logger.debug("pheno_vector:", str(pheno_vector.tolist())) params = dict(pheno_vector = pheno_vector.tolist(), genotype_matrix = genotype_matrix.tolist(), restricted_max_likelihood = True, refit = False, temp_uuid = temp_uuid, # meta data timestamp = datetime.datetime.now().isoformat(), ) json_params = json.dumps(params) #logger.debug("json_params:", json_params) Redis.set(key, json_params) Redis.expire(key, 60*60) logger.debug("before printing command") command = PYLMM_COMMAND + ' --key {} --species {}'.format(key, "other") logger.debug("command is:", command) logger.debug("after printing command") shell(command) #t_stats, p_values = lmm.run(key) #lmm.run(key) json_results = Redis.blpop("pylmm:results:" + temp_uuid, 45*60) results = json.loads(json_results[1]) p_values = [float(result) for result in results['p_values']] #logger.debug("p_values:", p_values[:10]) #p_values = self.trim_results(p_values) t_stats = results['t_stats'] #t_stats, p_values = lmm.run( # pheno_vector, # genotype_matrix, # restricted_max_likelihood=True, # refit=False, # temp_data=tempdata #) #logger.debug("p_values:", p_values) self.dataset.group.markers.add_pvalues(p_values) #self.get_lod_score_cutoff() return self.dataset.group.markers.markers
def gen_data(self, temp_uuid): """Generates p-values for each marker""" logger.debug("self.vals is:", self.vals) pheno_vector = np.array([(val == "x" or val == "") and np.nan or float(val) for val in self.vals]) #lmm_uuid = str(uuid.uuid4()) key = "pylmm:input:" + temp_uuid logger.debug("key is:", pf(key)) #with Bench("Loading cache"): # result = Redis.get(key) if self.dataset.group.species == "human": p_values, t_stats = self.gen_human_results(pheno_vector, key, temp_uuid) #p_values = self.trim_results(p_values) else: logger.debug("NOW CWD IS:", os.getcwd()) genotype_data = [ marker['genotypes'] for marker in self.dataset.group.markers.markers ] no_val_samples = self.identify_empty_samples() trimmed_genotype_data = self.trim_genotypes( genotype_data, no_val_samples) genotype_matrix = np.array(genotype_data).T #logger.debug("pheno_vector: ", pf(pheno_vector)) #logger.debug("genotype_matrix: ", pf(genotype_matrix)) #logger.debug("genotype_matrix.shape: ", pf(genotype_matrix.shape)) #params = {"pheno_vector": pheno_vector, # "genotype_matrix": genotype_matrix, # "restricted_max_likelihood": True, # "refit": False, # "temp_data": tempdata} # logger.debug("genotype_matrix:", str(genotype_matrix.tolist())) # logger.debug("pheno_vector:", str(pheno_vector.tolist())) params = dict( pheno_vector=pheno_vector.tolist(), genotype_matrix=genotype_matrix.tolist(), restricted_max_likelihood=True, refit=False, temp_uuid=temp_uuid, # meta data timestamp=datetime.datetime.now().isoformat(), ) json_params = json.dumps(params) #logger.debug("json_params:", json_params) Redis.set(key, json_params) Redis.expire(key, 60 * 60) logger.debug("before printing command") command = PYLMM_COMMAND + ' --key {} --species {}'.format( key, "other") logger.debug("command is:", command) logger.debug("after printing command") shell(command) #t_stats, p_values = lmm.run(key) #lmm.run(key) json_results = Redis.blpop("pylmm:results:" + temp_uuid, 45 * 60) results = json.loads(json_results[1]) p_values = [float(result) for result in results['p_values']] #logger.debug("p_values:", p_values[:10]) #p_values = self.trim_results(p_values) t_stats = results['t_stats'] #t_stats, p_values = lmm.run( # pheno_vector, # genotype_matrix, # restricted_max_likelihood=True, # refit=False, # temp_data=tempdata #) #logger.debug("p_values:", p_values) self.dataset.group.markers.add_pvalues(p_values) #self.get_lod_score_cutoff() return self.dataset.group.markers.markers
def __init__(self, start_vars, temp_uuid): helper_functions.get_species_dataset_trait(self, start_vars) self.temp_uuid = temp_uuid #needed to pass temp_uuid to gn1 mapping code (marker_regression_gn1.py) self.json_data = {} self.json_data['lodnames'] = ['lod.hk'] self.samples = [] # Want only ones with values self.vals = [] all_samples_ordered = self.dataset.group.all_samples_ordered() primary_sample_names = list(all_samples_ordered) for sample in self.dataset.group.samplelist: # sample is actually the name of an individual in_trait_data = False for item in self.this_trait.data: if self.this_trait.data[item].name == sample: value = start_vars['value:' + self.this_trait.data[item].name] self.samples.append(self.this_trait.data[item].name) self.vals.append(value) in_trait_data = True break if not in_trait_data: value = start_vars.get('value:' + sample) if value: self.samples.append(sample) self.vals.append(value) self.mapping_method = start_vars['method'] if start_vars['manhattan_plot'] == "True": self.manhattan_plot = True else: self.manhattan_plot = False self.maf = start_vars['maf'] # Minor allele frequency self.suggestive = "" self.significant = "" self.pair_scan = False # Initializing this since it is checked in views to determine which template to use self.score_type = "LRS" #ZS: LRS or LOD self.mapping_scale = "physic" self.num_perm = 0 self.perm_output = [] self.bootstrap_results = [] #ZS: This is passed to GN1 code for single chr mapping self.selected_chr = -1 if "selected_chr" in start_vars: if int(start_vars['selected_chr']) != -1: #ZS: Needs to be -1 if showing full map; there's probably a better way to fix this self.selected_chr = int(start_vars['selected_chr']) + 1 else: self.selected_chr = int(start_vars['selected_chr']) if "startMb" in start_vars: self.startMb = start_vars['startMb'] if "endMb" in start_vars: self.endMb = start_vars['endMb'] if "graphWidth" in start_vars: self.graphWidth = start_vars['graphWidth'] if "lrsMax" in start_vars: self.lrsMax = start_vars['lrsMax'] if "haplotypeAnalystCheck" in start_vars: self.haplotypeAnalystCheck = start_vars['haplotypeAnalystCheck'] if "startMb" in start_vars: #ZS: This is to ensure showGenes, Legend, etc are checked the first time you open the mapping page, since startMb will only not be set during the first load if "permCheck" in start_vars: self.permCheck = "ON" else: self.permCheck = False self.num_perm = int(start_vars['num_perm']) self.LRSCheck = start_vars['LRSCheck'] if "showSNP" in start_vars: self.showSNP = start_vars['showSNP'] else: self.showSNP = False if "showGenes" in start_vars: self.showGenes = start_vars['showGenes'] else: self.showGenes = False if "viewLegend" in start_vars: self.viewLegend = start_vars['viewLegend'] else: self.viewLegend = False else: try: if int(start_vars['num_perm']) > 0: self.num_perm = int(start_vars['num_perm']) except: self.num_perm = 0 if self.num_perm > 0: self.permCheck = "ON" else: self.permCheck = False self.showSNP = "ON" self.showGenes = "ON" self.viewLegend = "ON" self.dataset.group.get_markers() if self.mapping_method == "gemma": self.score_type = "-log(p)" self.manhattan_plot = True with Bench("Running GEMMA"): marker_obs = gemma_mapping.run_gemma(self.dataset, self.samples, self.vals) results = marker_obs elif self.mapping_method == "rqtl_plink": results = self.run_rqtl_plink() elif self.mapping_method == "rqtl_geno": self.score_type = "LOD" self.mapping_scale = "morgan" self.control_marker = start_vars['control_marker'] self.do_control = start_vars['do_control'] self.dataset.group.genofile = start_vars['genofile'] self.method = start_vars['mapmethod_rqtl_geno'] self.model = start_vars['mapmodel_rqtl_geno'] if start_vars['pair_scan'] == "true": self.pair_scan = True if self.permCheck and self.num_perm > 0: self.perm_output, self.suggestive, self.significant, results = rqtl_mapping.run_rqtl_geno(self.vals, self.dataset, self.method, self.model, self.permCheck, self.num_perm, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan) else: results = rqtl_mapping.run_rqtl_geno(self.vals, self.dataset, self.method, self.model, self.permCheck, self.num_perm, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan) elif self.mapping_method == "reaper": if "startMb" in start_vars: #ZS: Check if first time page loaded, so it can default to ON if "additiveCheck" in start_vars: self.additiveCheck = start_vars['additiveCheck'] else: self.additiveCheck = False if "bootCheck" in start_vars: self.bootCheck = "ON" else: self.bootCheck = False self.num_bootstrap = int(start_vars['num_bootstrap']) else: self.additiveCheck = "ON" try: if int(start_vars['num_bootstrap']) > 0: self.bootCheck = "ON" self.num_bootstrap = int(start_vars['num_bootstrap']) else: self.bootCheck = False self.num_bootstrap = 0 except: self.bootCheck = False self.num_bootstrap = 0 self.control_marker = start_vars['control_marker'] self.do_control = start_vars['do_control'] self.dataset.group.genofile = start_vars['genofile'] logger.info("Running qtlreaper") results, self.json_data, self.perm_output, self.suggestive, self.significant, self.bootstrap_results = qtlreaper_mapping.gen_reaper_results(self.this_trait, self.dataset, self.samples, self.json_data, self.num_perm, self.bootCheck, self.num_bootstrap, self.do_control, self.control_marker, self.manhattan_plot) elif self.mapping_method == "plink": self.score_type = "-log(p)" self.manhattan_plot = True results = plink_mapping.run_plink(self.this_trait, self.dataset, self.species, self.vals, self.maf) #results = self.run_plink() elif self.mapping_method == "pylmm": logger.debug("RUNNING PYLMM") self.dataset.group.genofile = start_vars['genofile'] if self.num_perm > 0: self.run_permutations(str(temp_uuid)) results = self.gen_data(str(temp_uuid)) else: logger.debug("RUNNING NOTHING") if self.pair_scan == True: self.qtl_results = [] highest_chr = 1 #This is needed in order to convert the highest chr to X/Y for marker in results: if marker['chr1'] > 0 or marker['chr1'] == "X" or marker['chr1'] == "X/Y": if marker['chr1'] > highest_chr or marker['chr1'] == "X" or marker['chr1'] == "X/Y": highest_chr = marker['chr1'] if 'lod_score' in marker.keys(): self.qtl_results.append(marker) self.trimmed_markers = results for qtl in enumerate(self.qtl_results): self.json_data['chr1'].append(str(qtl['chr1'])) self.json_data['chr2'].append(str(qtl['chr2'])) self.json_data['Mb'].append(qtl['Mb']) self.json_data['markernames'].append(qtl['name']) self.js_data = dict( json_data = self.json_data, this_trait = self.this_trait.name, data_set = self.dataset.name, maf = self.maf, manhattan_plot = self.manhattan_plot, mapping_scale = self.mapping_scale, qtl_results = self.qtl_results ) else: self.cutoff = 2 self.qtl_results = [] highest_chr = 1 #This is needed in order to convert the highest chr to X/Y for marker in results: if marker['chr'] > 0 or marker['chr'] == "X" or marker['chr'] == "X/Y": if marker['chr'] > highest_chr or marker['chr'] == "X" or marker['chr'] == "X/Y": highest_chr = marker['chr'] if ('lod_score' in marker.keys()) or ('lrs_value' in marker.keys()): self.qtl_results.append(marker) self.trimmed_markers = trim_markers_for_table(results) if self.mapping_method != "gemma": self.json_data['chr'] = [] self.json_data['pos'] = [] self.json_data['lod.hk'] = [] self.json_data['markernames'] = [] self.json_data['suggestive'] = self.suggestive self.json_data['significant'] = self.significant #Need to convert the QTL objects that qtl reaper returns into a json serializable dictionary for index, qtl in enumerate(self.qtl_results): #if index<40: # logger.debug("lod score is:", qtl['lod_score']) if qtl['chr'] == highest_chr and highest_chr != "X" and highest_chr != "X/Y": #logger.debug("changing to X") self.json_data['chr'].append("X") else: self.json_data['chr'].append(str(qtl['chr'])) self.json_data['pos'].append(qtl['Mb']) if 'lrs_value' in qtl.keys(): self.json_data['lod.hk'].append(str(qtl['lrs_value'])) else: self.json_data['lod.hk'].append(str(qtl['lod_score'])) self.json_data['markernames'].append(qtl['name']) #Get chromosome lengths for drawing the interval map plot chromosome_mb_lengths = {} self.json_data['chrnames'] = [] for key in self.species.chromosomes.chromosomes.keys(): self.json_data['chrnames'].append([self.species.chromosomes.chromosomes[key].name, self.species.chromosomes.chromosomes[key].mb_length]) chromosome_mb_lengths[key] = self.species.chromosomes.chromosomes[key].mb_length # logger.debug("json_data:", self.json_data) self.js_data = dict( result_score_type = self.score_type, json_data = self.json_data, this_trait = self.this_trait.name, data_set = self.dataset.name, maf = self.maf, manhattan_plot = self.manhattan_plot, mapping_scale = self.mapping_scale, chromosomes = chromosome_mb_lengths, qtl_results = self.qtl_results, num_perm = self.num_perm, perm_results = self.perm_output, )
def run_analysis(self, requestform): logger.info("Starting CTL analysis on dataset") self.trait_db_list = [ trait.strip() for trait in requestform['trait_list'].split(',') ] self.trait_db_list = [x for x in self.trait_db_list if x] logger.debug("strategy:", requestform.get("strategy")) strategy = requestform.get("strategy") logger.debug("nperm:", requestform.get("nperm")) nperm = int(requestform.get("nperm")) logger.debug("parametric:", requestform.get("parametric")) parametric = bool(requestform.get("parametric")) logger.debug("significance:", requestform.get("significance")) significance = float(requestform.get("significance")) # Get the name of the .geno file belonging to the first phenotype datasetname = self.trait_db_list[0].split(":")[1] dataset = data_set.create_dataset(datasetname) genofilelocation = locate(dataset.group.name + ".geno", "genotype") parser = genofile_parser.ConvertGenoFile(genofilelocation) parser.process_csv() logger.debug("dataset group: ", dataset.group) # Create a genotype matrix individuals = parser.individuals markers = [] markernames = [] for marker in parser.markers: markernames.append(marker["name"]) markers.append(marker["genotypes"]) genotypes = list(itertools.chain(*markers)) logger.debug( len(genotypes) / len(individuals), "==", len(parser.markers)) rGeno = r_t( ro.r.matrix(r_unlist(genotypes), nrow=len(markernames), ncol=len(individuals), dimnames=r_list(markernames, individuals), byrow=True)) # Create a phenotype matrix traits = [] for trait in self.trait_db_list: logger.debug("retrieving data for", trait) if trait != "": ts = trait.split(':') gt = create_trait(name=ts[0], dataset_name=ts[1]) gt = retrieve_sample_data(gt, dataset, individuals) for ind in individuals: if ind in list(gt.data.keys()): traits.append(gt.data[ind].value) else: traits.append("-999") rPheno = r_t( ro.r.matrix(r_as_numeric(r_unlist(traits)), nrow=len(self.trait_db_list), ncol=len(individuals), dimnames=r_list(self.trait_db_list, individuals), byrow=True)) logger.debug(rPheno) # Use a data frame to store the objects rPheno = r_data_frame(rPheno, check_names=False) rGeno = r_data_frame(rGeno, check_names=False) # Debug: Print the genotype and phenotype files to disk #r_write_table(rGeno, "~/outputGN/geno.csv") #r_write_table(rPheno, "~/outputGN/pheno.csv") # Perform the CTL scan res = self.r_CTLscan(rGeno, rPheno, strategy=strategy, nperm=nperm, parametric=parametric, nthreads=6) # Get significant interactions significant = self.r_CTLsignificant(res, significance=significance) # Create an image for output self.results = {} self.results['imgurl1'] = webqtlUtil.genRandStr("CTLline_") + ".png" self.results['imgloc1'] = GENERATED_IMAGE_DIR + self.results['imgurl1'] self.results['ctlresult'] = significant self.results[ 'requestform'] = requestform # Store the user specified parameters for the output page # Create the lineplot r_png(self.results['imgloc1'], width=1000, height=600, type='cairo-png') self.r_lineplot(res, significance=significance) r_dev_off() n = 2 # We start from 2, since R starts from 1 :) for trait in self.trait_db_list: # Create the QTL like CTL plots self.results['imgurl' + str(n)] = webqtlUtil.genRandStr("CTL_") + ".png" self.results[ 'imgloc' + str(n)] = GENERATED_IMAGE_DIR + self.results['imgurl' + str(n)] r_png(self.results['imgloc' + str(n)], width=1000, height=600, type='cairo-png') self.r_plotCTLobject(res, (n - 1), significance=significance, main='Phenotype ' + trait) r_dev_off() n = n + 1 # Flush any output from R sys.stdout.flush() # Create the interactive graph for cytoscape visualization (Nodes and Edges) if not isinstance(significant, ri.RNULLType): for x in range(len(significant[0])): logger.debug(significant[0][x], significant[1][x], significant[2][x]) # Debug to console tsS = significant[0][x].split(':') # Source tsT = significant[2][x].split(':') # Target gtS = create_trait( name=tsS[0], dataset_name=tsS[1]) # Retrieve Source info from the DB gtT = create_trait( name=tsT[0], dataset_name=tsT[1]) # Retrieve Target info from the DB self.addNode(gtS) self.addNode(gtT) self.addEdge(gtS, gtT, significant, x) significant[0][x] = "{} ({})".format( gtS.symbol, gtS.name) # Update the trait name for the displayed table significant[2][x] = "{} ({})".format( gtT.symbol, gtT.name) # Update the trait name for the displayed table self.elements = json.dumps(self.nodes_list + self.edges_list)
def run_gemma(this_dataset, samples, vals, covariates, method, use_loco): """Generates p-values for each marker using GEMMA""" if this_dataset.group.genofile != None: genofile_name = this_dataset.group.genofile[:-5] else: genofile_name = this_dataset.group.name gen_pheno_txt_file(this_dataset, genofile_name, vals, method) if not os.path.isfile("{}{}_output.assoc.txt".format( webqtlConfig.GENERATED_IMAGE_DIR, genofile_name)): open( "{}{}_output.assoc.txt".format(webqtlConfig.GENERATED_IMAGE_DIR, genofile_name), "w+") this_chromosomes = this_dataset.species.chromosomes.chromosomes chr_list_string = "" for i in range(len(this_chromosomes)): if i < (len(this_chromosomes) - 1): chr_list_string += this_chromosomes[i + 1].name + "," else: chr_list_string += this_chromosomes[i + 1].name if covariates != "": gen_covariates_file(this_dataset, covariates) if method == "gemma": gemma_command = GEMMA_COMMAND + ' -bfile %s/%s -k %s/%s.cXX.txt -lmm 1 -maf 0.1' % ( flat_files('mapping'), this_dataset.group.name, flat_files('mapping'), this_dataset.group.name) if covariates != "": gemma_command += ' -c %s/%s_covariates.txt -outdir %s -o %s_output' % ( flat_files('mapping'), this_dataset.group.name, webqtlConfig.GENERATED_IMAGE_DIR, this_dataset.group.name) else: #gemma_command = GEMMA_COMMAND + ' -bfile %s/%s -k %s/%s.sXX.txt -lmm 1 -maf 0.1 -o %s_output' % (flat_files('mapping'), gemma_command += ' -outdir %s -o %s_output' % ( webqtlConfig.GENERATED_IMAGE_DIR, this_dataset.group.name) else: if use_loco == "True": k_output_filename = this_dataset.group.name + "_K_" + ''.join( random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) generate_k_command = GEMMA_WRAPPER_COMMAND + ' --json --loco ' + chr_list_string + ' -- -g %s/%s_geno.txt -p %s/%s_pheno.txt -a %s/%s_snps.txt -gk -debug > %s/gn2/%s.json' % ( flat_files('genotype/bimbam'), genofile_name, flat_files('genotype/bimbam'), genofile_name, flat_files('genotype/bimbam'), genofile_name, TEMPDIR, k_output_filename) logger.debug("k_command:" + generate_k_command) os.system(generate_k_command) gemma_command = GEMMA_WRAPPER_COMMAND + ' --json --loco --input %s/gn2/%s.json -- -g %s/%s_geno.txt -p %s/%s_pheno.txt' % ( TEMPDIR, k_output_filename, flat_files('genotype/bimbam'), genofile_name, flat_files('genotype/bimbam'), genofile_name) gwa_output_filename = this_dataset.group.name + "_GWA_" + ''.join( random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) if covariates != "": gemma_command += ' -c %s/%s_covariates.txt -a %s/%s_snps.txt -lmm 1 -maf 0.1 -debug > %s/gn2/%s.json' % ( flat_files('mapping'), this_dataset.group.name, flat_files('genotype/bimbam'), genofile_name, TEMPDIR, gwa_output_filename) else: gemma_command += ' -a %s/%s_snps.txt -lmm 1 -maf 0.1 -debug > %s/gn2/%s.json' % ( flat_files('genotype/bimbam'), genofile_name, TEMPDIR, gwa_output_filename) else: gemma_command = GEMMA_COMMAND + ' -g %s/%s_geno.txt -p %s/%s_pheno.txt -a %s/%s_snps.txt -k %s/%s.cXX.txt -lmm 1 -maf 0.1' % ( flat_files('genotype/bimbam'), genofile_name, flat_files('genotype/bimbam'), genofile_name, flat_files('genotype/bimbam'), genofile_name, flat_files('genotype/bimbam'), genofile_name) if covariates != "": gemma_command += ' -c %s/%s_covariates.txt -outdir %s -debug -o %s_output' % ( flat_files('mapping'), this_dataset.group.name, webqtlConfig.GENERATED_IMAGE_DIR, genofile_name) else: gemma_command += ' -outdir %s -debug -o %s_output' % ( webqtlConfig.GENERATED_IMAGE_DIR, genofile_name) logger.debug("gemma_command:" + gemma_command) os.system(gemma_command) if use_loco == "True": marker_obs = parse_loco_output(this_dataset, gwa_output_filename) else: marker_obs = parse_gemma_output(genofile_name) return marker_obs
def run_gemma(this_dataset, samples, vals, covariates, method, use_loco): """Generates p-values for each marker using GEMMA""" if this_dataset.group.genofile != None: genofile_name = this_dataset.group.genofile[:-5] else: genofile_name = this_dataset.group.name gen_pheno_txt_file(this_dataset, genofile_name, vals, method) if not os.path.isfile("{}{}_output.assoc.txt".format(webqtlConfig.GENERATED_IMAGE_DIR, genofile_name)): open("{}{}_output.assoc.txt".format(webqtlConfig.GENERATED_IMAGE_DIR, genofile_name), "w+") this_chromosomes = this_dataset.species.chromosomes.chromosomes chr_list_string = "" for i in range(len(this_chromosomes)): if i < (len(this_chromosomes) - 1): chr_list_string += this_chromosomes[i+1].name + "," else: chr_list_string += this_chromosomes[i+1].name if covariates != "": gen_covariates_file(this_dataset, covariates) if method == "gemma": gemma_command = GEMMA_COMMAND + ' -bfile %s/%s -k %s/%s.cXX.txt -lmm 1 -maf 0.1' % (flat_files('mapping'), this_dataset.group.name, flat_files('mapping'), this_dataset.group.name) if covariates != "": gemma_command += ' -c %s/%s_covariates.txt -outdir %s -o %s_output' % (flat_files('mapping'), this_dataset.group.name, webqtlConfig.GENERATED_IMAGE_DIR, this_dataset.group.name) else: #gemma_command = GEMMA_COMMAND + ' -bfile %s/%s -k %s/%s.sXX.txt -lmm 1 -maf 0.1 -o %s_output' % (flat_files('mapping'), gemma_command += ' -outdir %s -o %s_output' % (webqtlConfig.GENERATED_IMAGE_DIR, this_dataset.group.name) else: if use_loco == "True": k_output_filename = this_dataset.group.name + "_K_" + ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) generate_k_command = GEMMA_WRAPPER_COMMAND + ' --json --loco ' + chr_list_string + ' -- -g %s/%s_geno.txt -p %s/%s_pheno.txt -a %s/%s_snps.txt -gk -debug > %s/gn2/%s.json' % (flat_files('genotype/bimbam'), genofile_name, flat_files('genotype/bimbam'), genofile_name, flat_files('genotype/bimbam'), genofile_name, TEMPDIR, k_output_filename) logger.debug("k_command:" + generate_k_command) os.system(generate_k_command) gemma_command = GEMMA_WRAPPER_COMMAND + ' --json --loco --input %s/gn2/%s.json -- -g %s/%s_geno.txt -p %s/%s_pheno.txt' % (TEMPDIR, k_output_filename, flat_files('genotype/bimbam'), genofile_name, flat_files('genotype/bimbam'), genofile_name) gwa_output_filename = this_dataset.group.name + "_GWA_" + ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) if covariates != "": gemma_command += ' -c %s/%s_covariates.txt -a %s/%s_snps.txt -lmm 1 -maf 0.1 -debug > %s/gn2/%s.json' % (flat_files('mapping'), this_dataset.group.name, flat_files('genotype/bimbam'), genofile_name, TEMPDIR, gwa_output_filename) else: gemma_command += ' -a %s/%s_snps.txt -lmm 1 -maf 0.1 -debug > %s/gn2/%s.json' % (flat_files('genotype/bimbam'), genofile_name, TEMPDIR, gwa_output_filename) else: gemma_command = GEMMA_COMMAND + ' -g %s/%s_geno.txt -p %s/%s_pheno.txt -a %s/%s_snps.txt -k %s/%s.cXX.txt -lmm 1 -maf 0.1' % (flat_files('genotype/bimbam'), genofile_name, flat_files('genotype/bimbam'), genofile_name, flat_files('genotype/bimbam'), genofile_name, flat_files('genotype/bimbam'), genofile_name) if covariates != "": gemma_command += ' -c %s/%s_covariates.txt -outdir %s -debug -o %s_output' % (flat_files('mapping'), this_dataset.group.name, webqtlConfig.GENERATED_IMAGE_DIR, genofile_name) else: gemma_command += ' -outdir %s -debug -o %s_output' % (webqtlConfig.GENERATED_IMAGE_DIR, genofile_name) logger.debug("gemma_command:" + gemma_command) os.system(gemma_command) if use_loco == "True": marker_obs = parse_loco_output(this_dataset, gwa_output_filename) else: marker_obs = parse_gemma_output(genofile_name) return marker_obs