def heatmap_page(): logger.info("In heatmap, request.form is:", pf(request.form)) logger.info(request.url) start_vars = request.form temp_uuid = uuid.uuid4() traits = [trait.strip() for trait in start_vars['trait_list'].split(',')] if traits[0] != "": version = "v5" key = "heatmap:{}:".format(version) + json.dumps(start_vars, sort_keys=True) logger.info("key is:", pf(key)) with Bench("Loading cache"): result = Redis.get(key) if result: logger.info("Cache hit!!!") with Bench("Loading results"): result = pickle.loads(result) else: logger.info("Cache miss!!!") template_vars = heatmap.Heatmap(request.form, temp_uuid) template_vars.js_data = json.dumps(template_vars.js_data, default=json_default_handler, indent=" ") result = template_vars.__dict__ for item in list(template_vars.__dict__.keys()): logger.info(" ---**--- {}: {}".format( type(template_vars.__dict__[item]), item)) pickled_result = pickle.dumps(result, pickle.HIGHEST_PROTOCOL) logger.info("pickled result length:", len(pickled_result)) Redis.set(key, pickled_result) Redis.expire(key, 60 * 60) with Bench("Rendering template"): rendered_template = render_template("heatmap.html", **result) else: rendered_template = render_template("empty_collection.html", **{'tool': 'Heatmap'}) return rendered_template
def create_datasets_list(): if USE_REDIS: key = "all_datasets" result = Redis.get(key) if result: logger.debug("Redis cache hit") datasets = pickle.loads(result) if result is None: datasets = list() with Bench("Creating DataSets object"): type_dict = {'Publish': 'PublishFreeze', 'ProbeSet': 'ProbeSetFreeze', 'Geno': 'GenoFreeze'} for dataset_type in type_dict: query = "SELECT Name FROM {}".format(type_dict[dataset_type]) for result in fetchall(query): #The query at the beginning of this function isn't #necessary here, but still would rather just reuse #it logger.debug("type: {}\tname: #{}".format(dataset_type, result.Name)) dataset = create_dataset(result.Name, dataset_type) datasets.append(dataset) if USE_REDIS: Redis.set(key, pickle.dumps(datasets, pickle.HIGHEST_PROTOCOL)) Redis.expire(key, 60*60) return datasets
def search_page(): logger.info("in search_page") logger.info(request.url) result = None if USE_REDIS: with Bench("Trying Redis cache"): key = "search_results:v1:" + \ json.dumps(request.args, sort_keys=True) logger.debug("key is:", pf(key)) result = Redis.get(key) if result: logger.info("Redis cache hit on search results!") result = pickle.loads(result) else: logger.info("Skipping Redis cache (USE_REDIS=False)") logger.info("request.args is", request.args) the_search = SearchResultPage(request.args) result = the_search.__dict__ valid_search = result['search_term_exists'] if USE_REDIS and valid_search: Redis.set(key, pickle.dumps(result, pickle.HIGHEST_PROTOCOL)) Redis.expire(key, 60 * 60) if valid_search: return render_template("search_result_page.html", **result) else: return render_template("search_error.html")
def quick_search(self): #search_terms = "" #for term in self.search_terms.split(): # search_terms += '+{} '.format(term) search_terms = ' '.join('+{}'.format(escape(term)) for term in self.search_terms.split()) print("search_terms are:", search_terms) query = """ SELECT table_name, the_key, result_fields FROM QuickSearch WHERE MATCH (terms) AGAINST ('{}' IN BOOLEAN MODE) """.format( search_terms) with Bench("Doing QuickSearch Query: "): dbresults = g.db.execute(query, no_parameters=True).fetchall() #print("results: ", pf(results)) self.results = collections.defaultdict(list) type_dict = { 'PublishXRef': 'phenotype', 'ProbeSetXRef': 'mrna_assay', 'GenoXRef': 'genotype' } self.species_groups = {} for dbresult in dbresults: this_result = {} this_result['table_name'] = dbresult.table_name if self.trait_type == type_dict[ dbresult.table_name] or self.trait_type == 'all': this_result['key'] = dbresult.the_key this_result['result_fields'] = json.loads( dbresult.result_fields) this_species = this_result['result_fields']['species'] this_group = this_result['result_fields']['group_name'] if this_species not in self.species_groups: self.species_groups[this_species] = {} if type_dict[dbresult.table_name] not in self.species_groups[ this_species]: self.species_groups[this_species][type_dict[ dbresult.table_name]] = [] if this_group not in self.species_groups[this_species][ type_dict[dbresult.table_name]]: self.species_groups[this_species][type_dict[ dbresult.table_name]].append(this_group) #if type_dict[dbresult.table_name] not in self.species_groups: # self.species_groups[type_dict[dbresult.table_name]] = {} #if this_species not in self.species_groups[type_dict[dbresult.table_name]]: # self.species_groups[type_dict[dbresult.table_name]][this_species] = [] #if this_group not in self.species_groups[type_dict[dbresult.table_name]][this_species]: # self.species_groups[type_dict[dbresult.table_name]][this_species].append(this_group) self.results[type_dict[dbresult.table_name]].append( this_result) import redis Redis = redis.Redis()
def gn_server(path): """Return JSON record by calling GN_SERVER """ with Bench("GN_SERVER", LOG_SQL): res = urllib2.urlopen(GN_SERVER_URL + path) rest = res.read() res2 = json.loads(rest) logger.debug(res2) return res2
def fetchall(query): """Return row iterator by calling SQL directly (the original fetchall, but with logging) """ with Bench("SQL", LOG_SQL): def helper(query): res = g.db.execute(query) return res.fetchall() return logger.sql(query, helper)
def fetchone(query): """Return tuple containing one row by calling SQL directly (the original fetchone, but with logging) """ with Bench("SQL", LOG_SQL): def helper(query): res = g.db.execute(query) return res.fetchone() return logger.sql(query, helper)
def search_page(): print("in search_page") if 'info_database' in request.args: print("Going to sharing_info_page") template_vars = sharing_info_page() if template_vars.redirect_url: print("Going to redirect") return flask.redirect(template_vars.redirect_url) else: return render_template("data_sharing.html", **template_vars.__dict__) else: key = "search_results:v1:" + json.dumps(request.args, sort_keys=True) print("key is:", pf(key)) with Bench("Loading cache"): result = Redis.get(key) if result: print("Cache hit!!!") with Bench("Loading results"): result = pickle.loads(result) else: print("calling search_results.SearchResultPage") print("request.args is", request.args) the_search = search_results.SearchResultPage(request.args) result = the_search.__dict__ print("result: ", pf(result)) Redis.set(key, pickle.dumps(result, pickle.HIGHEST_PROTOCOL)) Redis.expire(key, 60 * 60) if result['quick']: return render_template("quick_search.html", **result) elif result['search_term_exists']: return render_template("search_result_page.html", **result) else: return render_template("search_error.html")
def search_page(): logger.info("in search_page") if 'info_database' in request.args: logger.info("Going to sharing_info_page") template_vars = sharing_info_page() if template_vars.redirect_url: logger.info("Going to redirect") return flask.redirect(template_vars.redirect_url) else: return render_template("data_sharing.html", **template_vars.__dict__) else: result = None if USE_REDIS: with Bench("Trying Redis cache"): key = "search_results:v1:" + json.dumps(request.args, sort_keys=True) logger.debug("key is:", pf(key)) result = Redis.get(key) if result: logger.info("Redis cache hit on search results!") result = pickle.loads(result) else: logger.info("Skipping Redis cache (USE_REDIS=False)") logger.info("request.args is", request.args) the_search = search_results.SearchResultPage(request.args) result = the_search.__dict__ logger.debugf("result", result) if USE_REDIS: Redis.set(key, pickle.dumps(result, pickle.HIGHEST_PROTOCOL)) Redis.expire(key, 60 * 60) if result['search_term_exists']: return render_template("search_result_page.html", **result) else: return render_template("search_error.html")
def __init__(self, kw): self.type = kw['type'] self.terms = kw['terms'] if self.type == "gene": sql = """ SELECT Species.`Name` AS species_name, InbredSet.`Name` AS inbredset_name, Tissue.`Name` AS tissue_name, ProbeSetFreeze.Name AS probesetfreeze_name, ProbeSet.Name AS probeset_name, ProbeSet.Symbol AS probeset_symbol, ProbeSet.`description` AS probeset_description, ProbeSet.Chr AS chr, ProbeSet.Mb AS mb, ProbeSetXRef.Mean AS mean, ProbeSetXRef.LRS AS lrs, ProbeSetXRef.`Locus` AS locus, ProbeSetXRef.`pValue` AS pvalue, ProbeSetXRef.`additive` AS additive FROM Species, InbredSet, ProbeSetXRef, ProbeSet, ProbeFreeze, ProbeSetFreeze, Tissue WHERE InbredSet.`SpeciesId`=Species.`Id` AND ProbeFreeze.InbredSetId=InbredSet.`Id` AND ProbeFreeze.`TissueId`=Tissue.`Id` AND ProbeSetFreeze.ProbeFreezeId=ProbeFreeze.Id AND ( MATCH (ProbeSet.Name,ProbeSet.description,ProbeSet.symbol,alias,GenbankId, UniGeneId, Probe_Target_Description) AGAINST ('%s' IN BOOLEAN MODE) ) AND ProbeSet.Id = ProbeSetXRef.ProbeSetId AND ProbeSetXRef.ProbeSetFreezeId=ProbeSetFreeze.Id AND ProbeSetFreeze.confidentiality < 1 AND ProbeSetFreeze.public > 0 ORDER BY species_name, inbredset_name, tissue_name, probesetfreeze_name, probeset_name LIMIT 6000 """ % (self.terms) with Bench("Running query"): logger.sql(sql) re = g.db.execute(sql).fetchall() self.trait_list = [] with Bench("Creating trait objects"): for line in re: dataset = create_dataset(line[3], "ProbeSet", get_samplelist=False) trait_id = line[4] #with Bench("Building trait object"): this_trait = GeneralTrait(dataset=dataset, name=trait_id, get_qtl_info=True, get_sample_info=False) self.trait_list.append(this_trait) elif self.type == "phenotype": sql = """ SELECT Species.`Name`, InbredSet.`Name`, PublishFreeze.`Name`, PublishXRef.`Id`, Phenotype.`Post_publication_description`, Publication.`Authors`, Publication.`Year`, PublishXRef.`LRS`, PublishXRef.`Locus`, PublishXRef.`additive` FROM Species,InbredSet,PublishFreeze,PublishXRef,Phenotype,Publication WHERE PublishXRef.`InbredSetId`=InbredSet.`Id` AND PublishFreeze.`InbredSetId`=InbredSet.`Id` AND InbredSet.`SpeciesId`=Species.`Id` AND PublishXRef.`PhenotypeId`=Phenotype.`Id` AND PublishXRef.`PublicationId`=Publication.`Id` AND (Phenotype.Post_publication_description REGEXP "[[:<:]]%s[[:>:]]" OR Phenotype.Pre_publication_description REGEXP "[[:<:]]%s[[:>:]]" OR Phenotype.Pre_publication_abbreviation REGEXP "[[:<:]]%s[[:>:]]" OR Phenotype.Post_publication_abbreviation REGEXP "[[:<:]]%s[[:>:]]" OR Phenotype.Lab_code REGEXP "[[:<:]]%s[[:>:]]" OR Publication.PubMed_ID REGEXP "[[:<:]]%s[[:>:]]" OR Publication.Abstract REGEXP "[[:<:]]%s[[:>:]]" OR Publication.Title REGEXP "[[:<:]]%s[[:>:]]" OR Publication.Authors REGEXP "[[:<:]]%s[[:>:]]" OR PublishXRef.Id REGEXP "[[:<:]]%s[[:>:]]") ORDER BY Species.`Name`, InbredSet.`Name`, PublishXRef.`Id` LIMIT 6000 """ % (self.terms, self.terms, self.terms, self.terms, self.terms, self.terms, self.terms, self.terms, self.terms, self.terms) logger.sql(sql) re = g.db.execute(sql).fetchall() self.trait_list = [] with Bench("Creating trait objects"): for line in re: dataset = create_dataset(line[2], "Publish") trait_id = line[3] this_trait = GeneralTrait(dataset=dataset, name=trait_id, get_qtl_info=True, get_sample_info=False) self.trait_list.append(this_trait)
def __init__(self, start_vars, temp_uuid): helper_functions.get_species_dataset_trait(self, start_vars) #tempdata = temp_data.TempData(temp_uuid) self.temp_uuid = temp_uuid #needed to pass temp_uuid to gn1 mapping code (marker_regression_gn1.py) self.json_data = {} self.json_data['lodnames'] = ['lod.hk'] self.samples = [] # Want only ones with values self.vals = [] for sample in self.dataset.group.samplelist: value = start_vars['value:' + sample] self.samples.append(str(sample)) self.vals.append(value) self.mapping_method = start_vars['method'] if start_vars['manhattan_plot'] == "true": self.manhattan_plot = True else: self.manhattan_plot = False self.maf = start_vars['maf'] # Minor allele frequency self.suggestive = "" self.significant = "" self.pair_scan = False # Initializing this since it is checked in views to determine which template to use self.score_type = "LRS" #ZS: LRS or LOD self.mapping_scale = "physic" self.bootstrap_results = [] #ZS: This is passed to GN1 code for single chr mapping self.selected_chr = -1 if "selected_chr" in start_vars: if int( start_vars['selected_chr'] ) != -1: #ZS: Needs to be -1 if showing full map; there's probably a better way to fix this self.selected_chr = int(start_vars['selected_chr']) + 1 else: self.selected_chr = int(start_vars['selected_chr']) if "startMb" in start_vars: self.startMb = start_vars['startMb'] if "endMb" in start_vars: self.endMb = start_vars['endMb'] if "graphWidth" in start_vars: self.graphWidth = start_vars['graphWidth'] if "lrsMax" in start_vars: self.lrsMax = start_vars['lrsMax'] if "haplotypeAnalystCheck" in start_vars: self.haplotypeAnalystCheck = start_vars['haplotypeAnalystCheck'] if "startMb" in start_vars: #ZS: This is to ensure showGenes, Legend, etc are checked the first time you open the mapping page, since startMb will only not be set during the first load if "permCheck" in start_vars: self.permCheck = "ON" else: self.permCheck = False self.num_perm = int(start_vars['num_perm']) self.LRSCheck = start_vars['LRSCheck'] if "showSNP" in start_vars: self.showSNP = start_vars['showSNP'] else: self.showSNP = False if "showGenes" in start_vars: self.showGenes = start_vars['showGenes'] else: self.showGenes = False if "viewLegend" in start_vars: self.viewLegend = start_vars['viewLegend'] else: self.viewLegend = False else: try: if int(start_vars['num_perm']) > 0: self.num_perm = int(start_vars['num_perm']) else: self.num_perm = 0 except: self.num_perm = 0 self.LRSCheck = self.score_type self.permCheck = "ON" self.showSNP = "ON" self.showGenes = "ON" self.viewLegend = "ON" self.dataset.group.get_markers() if self.mapping_method == "gemma": self.score_type = "LOD" self.manhattan_plot = True with Bench("Running GEMMA"): included_markers, p_values = gemma_mapping.run_gemma( self.dataset, self.samples, self.vals) with Bench("Getting markers from csv"): marker_obs = get_markers_from_csv(included_markers, p_values, self.dataset.group.name) results = marker_obs elif self.mapping_method == "rqtl_plink": results = self.run_rqtl_plink() elif self.mapping_method == "rqtl_geno": self.score_type = "LOD" self.mapping_scale = "morgan" self.control_marker = start_vars['control_marker'] self.do_control = start_vars['do_control'] self.method = start_vars['mapmethod_rqtl_geno'] self.model = start_vars['mapmodel_rqtl_geno'] if start_vars['pair_scan'] == "true": self.pair_scan = True results = self.run_rqtl_geno() elif self.mapping_method == "reaper": if "startMb" in start_vars: #ZS: Check if first time page loaded, so it can default to ON if "additiveCheck" in start_vars: self.additiveCheck = start_vars['additiveCheck'] else: self.additiveCheck = False if "bootCheck" in start_vars: self.bootCheck = "ON" else: self.bootCheck = False self.num_bootstrap = int(start_vars['num_bootstrap']) else: self.additiveCheck = "ON" try: if int(start_vars['num_bootstrap']) > 0: self.bootCheck = "ON" self.num_bootstrap = int(start_vars['num_bootstrap']) else: self.bootCheck = False self.num_bootstrap = 0 except: self.bootCheck = False self.num_bootstrap = 0 self.control_marker = start_vars['control_marker'] self.do_control = start_vars['do_control'] results = self.gen_reaper_results() elif self.mapping_method == "plink": results = self.run_plink() elif self.mapping_method == "pylmm": print("RUNNING PYLMM") if self.num_perm > 0: self.run_permutations(str(temp_uuid)) results = self.gen_data(str(temp_uuid)) else: print("RUNNING NOTHING") if self.pair_scan == True: self.qtl_results = [] highest_chr = 1 #This is needed in order to convert the highest chr to X/Y for marker in results: if marker['chr1'] > 0 or marker['chr1'] == "X" or marker[ 'chr1'] == "X/Y": if marker['chr1'] > highest_chr or marker[ 'chr1'] == "X" or marker['chr1'] == "X/Y": highest_chr = marker['chr1'] if 'lod_score' in marker.keys(): self.qtl_results.append(marker) for qtl in enumerate(self.qtl_results): self.json_data['chr1'].append(str(qtl['chr1'])) self.json_data['chr2'].append(str(qtl['chr2'])) self.json_data['Mb'].append(qtl['Mb']) self.json_data['markernames'].append(qtl['name']) self.js_data = dict( json_data=self.json_data, this_trait=self.this_trait.name, data_set=self.dataset.name, maf=self.maf, manhattan_plot=self.manhattan_plot, mapping_scale=self.mapping_scale, qtl_results=self.qtl_results, ) else: self.cutoff = 2 self.qtl_results = [] highest_chr = 1 #This is needed in order to convert the highest chr to X/Y for marker in results: if marker['chr'] > 0 or marker['chr'] == "X" or marker[ 'chr'] == "X/Y": if marker['chr'] > highest_chr or marker[ 'chr'] == "X" or marker['chr'] == "X/Y": highest_chr = marker['chr'] if ('lod_score' in marker.keys()) or ('lrs_value' in marker.keys()): self.qtl_results.append(marker) self.trimmed_markers = trim_markers_for_table(results) self.json_data['chr'] = [] self.json_data['pos'] = [] self.json_data['lod.hk'] = [] self.json_data['markernames'] = [] self.json_data['suggestive'] = self.suggestive self.json_data['significant'] = self.significant #Need to convert the QTL objects that qtl reaper returns into a json serializable dictionary for index, qtl in enumerate(self.qtl_results): #if index<40: # print("lod score is:", qtl['lod_score']) if qtl['chr'] == highest_chr and highest_chr != "X" and highest_chr != "X/Y": #print("changing to X") self.json_data['chr'].append("X") else: self.json_data['chr'].append(str(qtl['chr'])) self.json_data['pos'].append(qtl['Mb']) if 'lrs_value' in qtl.keys(): self.json_data['lod.hk'].append(str(qtl['lrs_value'])) else: self.json_data['lod.hk'].append(str(qtl['lod_score'])) self.json_data['markernames'].append(qtl['name']) #Get chromosome lengths for drawing the interval map plot chromosome_mb_lengths = {} self.json_data['chrnames'] = [] for key in self.species.chromosomes.chromosomes.keys(): self.json_data['chrnames'].append([ self.species.chromosomes.chromosomes[key].name, self.species.chromosomes.chromosomes[key].mb_length ]) chromosome_mb_lengths[ key] = self.species.chromosomes.chromosomes[key].mb_length # print("json_data:", self.json_data) self.js_data = dict( result_score_type=self.score_type, json_data=self.json_data, this_trait=self.this_trait.name, data_set=self.dataset.name, maf=self.maf, manhattan_plot=self.manhattan_plot, mapping_scale=self.mapping_scale, chromosomes=chromosome_mb_lengths, qtl_results=self.qtl_results, )
def mapping_results_page(): initial_start_vars = request.form logger.info(request.url) temp_uuid = initial_start_vars['temp_uuid'] wanted = ('trait_id', 'dataset', 'group', 'species', 'samples', 'vals', 'sample_vals', 'vals_hash', 'first_run', 'output_files', 'geno_db_exists', 'method', 'mapping_results_path', 'trimmed_markers', 'selected_chr', 'chromosomes', 'mapping_scale', 'plotScale', 'score_type', 'suggestive', 'significant', 'num_perm', 'permCheck', 'perm_strata', 'categorical_vars', 'perm_output', 'num_bootstrap', 'bootCheck', 'bootstrap_results', 'LRSCheck', 'covariates', 'maf', 'use_loco', 'manhattan_plot', 'color_scheme', 'manhattan_single_color', 'control_marker', 'do_control', 'genofile', 'genofile_string', 'pair_scan', 'startMb', 'endMb', 'graphWidth', 'lrsMax', 'additiveCheck', 'showSNP', 'showGenes', 'viewLegend', 'haplotypeAnalystCheck', 'mapmethod_rqtl_geno', 'mapmodel_rqtl_geno', 'temp_trait', 'n_samples', 'transform') start_vars = {} for key, value in list(initial_start_vars.items()): if key in wanted: start_vars[key] = value version = "v3" key = "mapping_results:{}:".format(version) + json.dumps(start_vars, sort_keys=True) with Bench("Loading cache"): result = None # Just for testing #result = Redis.get(key) #logger.info("************************ Starting result *****************") #logger.info("result is [{}]: {}".format(type(result), result)) #logger.info("************************ Ending result ********************") if result: logger.info("Cache hit!!!") with Bench("Loading results"): result = pickle.loads(result) else: logger.info("Cache miss!!!") with Bench("Total time in RunMapping"): try: template_vars = run_mapping.RunMapping(start_vars, temp_uuid) if template_vars.no_results: rendered_template = render_template("mapping_error.html") return rendered_template except: rendered_template = render_template("mapping_error.html") return rendered_template template_vars.js_data = json.dumps(template_vars.js_data, default=json_default_handler, indent=" ") result = template_vars.__dict__ if result['pair_scan']: with Bench("Rendering template"): img_path = result['pair_scan_filename'] logger.info("img_path:", img_path) initial_start_vars = request.form logger.info("initial_start_vars:", initial_start_vars) imgfile = open(TEMPDIR + img_path, 'rb') imgdata = imgfile.read() imgB64 = base64.b64encode(imgdata) bytesarray = array.array('B', imgB64) result['pair_scan_array'] = bytesarray rendered_template = render_template( "pair_scan_results.html", **result) else: gn1_template_vars = display_mapping_results.DisplayMappingResults( result).__dict__ rendered_template = render_template("mapping_results.html", **gn1_template_vars) return rendered_template
def __init__(self, start_vars, temp_uuid): helper_functions.get_species_dataset_trait(self, start_vars) self.temp_uuid = temp_uuid #needed to pass temp_uuid to gn1 mapping code (marker_regression_gn1.py) self.json_data = {} self.json_data['lodnames'] = ['lod.hk'] self.samples = [] # Want only ones with values self.vals = [] all_samples_ordered = self.dataset.group.all_samples_ordered() primary_sample_names = list(all_samples_ordered) for sample in self.dataset.group.samplelist: # sample is actually the name of an individual in_trait_data = False for item in self.this_trait.data: if self.this_trait.data[item].name == sample: value = start_vars['value:' + self.this_trait.data[item].name] self.samples.append(self.this_trait.data[item].name) self.vals.append(value) in_trait_data = True break if not in_trait_data: value = start_vars.get('value:' + sample) if value: self.samples.append(sample) self.vals.append(value) #ZS: Check if genotypes exist in the DB in order to create links for markers if "geno_db_exists" in start_vars: self.geno_db_exists = start_vars['geno_db_exists'] else: try: self.geno_db_exists = "True" except: self.geno_db_exists = "False" self.mapping_method = start_vars['method'] if "results_path" in start_vars: self.mapping_results_path = start_vars['results_path'] else: mapping_results_filename = self.dataset.group.name + "_" + ''.join( random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) self.mapping_results_path = "{}{}.csv".format( webqtlConfig.GENERATED_IMAGE_DIR, mapping_results_filename) if start_vars['manhattan_plot'] == "True": self.manhattan_plot = True else: self.manhattan_plot = False self.maf = start_vars['maf'] # Minor allele frequency if "use_loco" in start_vars: self.use_loco = start_vars['use_loco'] else: self.use_loco = None self.suggestive = "" self.significant = "" self.pair_scan = False # Initializing this since it is checked in views to determine which template to use self.score_type = "LRS" #ZS: LRS or LOD self.mapping_scale = "physic" self.num_perm = 0 self.perm_output = [] self.bootstrap_results = [] self.covariates = start_vars[ 'covariates'] if "covariates" in start_vars else None #ZS: This is passed to GN1 code for single chr mapping self.selected_chr = -1 if "selected_chr" in start_vars: if int( start_vars['selected_chr'] ) != -1: #ZS: Needs to be -1 if showing full map; there's probably a better way to fix this self.selected_chr = int(start_vars['selected_chr']) + 1 else: self.selected_chr = int(start_vars['selected_chr']) if "startMb" in start_vars: self.startMb = start_vars['startMb'] if "endMb" in start_vars: self.endMb = start_vars['endMb'] if "graphWidth" in start_vars: self.graphWidth = start_vars['graphWidth'] if "lrsMax" in start_vars: self.lrsMax = start_vars['lrsMax'] if "haplotypeAnalystCheck" in start_vars: self.haplotypeAnalystCheck = start_vars['haplotypeAnalystCheck'] if "startMb" in start_vars: #ZS: This is to ensure showGenes, Legend, etc are checked the first time you open the mapping page, since startMb will only not be set during the first load if "permCheck" in start_vars: self.permCheck = "ON" else: self.permCheck = False self.num_perm = int(start_vars['num_perm']) self.LRSCheck = start_vars['LRSCheck'] if "showSNP" in start_vars: self.showSNP = start_vars['showSNP'] else: self.showSNP = False if "showGenes" in start_vars: self.showGenes = start_vars['showGenes'] else: self.showGenes = False if "viewLegend" in start_vars: self.viewLegend = start_vars['viewLegend'] else: self.viewLegend = False else: try: if int(start_vars['num_perm']) > 0: self.num_perm = int(start_vars['num_perm']) except: self.num_perm = 0 if self.num_perm > 0: self.permCheck = "ON" else: self.permCheck = False self.showSNP = "ON" self.showGenes = "ON" self.viewLegend = "ON" if 'genofile' in start_vars: if start_vars['genofile'] != "": self.genofile_string = start_vars['genofile'] self.dataset.group.genofile = self.genofile_string.split( ":")[0] self.dataset.group.get_markers() if self.mapping_method == "gemma": self.score_type = "-log(p)" self.manhattan_plot = True with Bench("Running GEMMA"): marker_obs = gemma_mapping.run_gemma(self.dataset, self.samples, self.vals, self.covariates, self.use_loco) results = marker_obs elif self.mapping_method == "rqtl_plink": results = self.run_rqtl_plink() elif self.mapping_method == "rqtl_geno": self.score_type = "LOD" self.mapping_scale = "morgan" self.control_marker = start_vars['control_marker'] self.do_control = start_vars['do_control'] self.method = start_vars['mapmethod_rqtl_geno'] self.model = start_vars['mapmodel_rqtl_geno'] #if start_vars['pair_scan'] == "true": # self.pair_scan = True if self.permCheck and self.num_perm > 0: self.perm_output, self.suggestive, self.significant, results = rqtl_mapping.run_rqtl_geno( self.vals, self.dataset, self.method, self.model, self.permCheck, self.num_perm, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan) else: results = rqtl_mapping.run_rqtl_geno( self.vals, self.dataset, self.method, self.model, self.permCheck, self.num_perm, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan) elif self.mapping_method == "reaper": if "startMb" in start_vars: #ZS: Check if first time page loaded, so it can default to ON if "additiveCheck" in start_vars: self.additiveCheck = start_vars['additiveCheck'] else: self.additiveCheck = False if "bootCheck" in start_vars: self.bootCheck = "ON" else: self.bootCheck = False self.num_bootstrap = int(start_vars['num_bootstrap']) else: self.additiveCheck = "ON" try: if int(start_vars['num_bootstrap']) > 0: self.bootCheck = "ON" self.num_bootstrap = int(start_vars['num_bootstrap']) else: self.bootCheck = False self.num_bootstrap = 0 except: self.bootCheck = False self.num_bootstrap = 0 self.control_marker = start_vars['control_marker'] self.do_control = start_vars['do_control'] logger.info("Running qtlreaper") results, self.json_data, self.perm_output, self.suggestive, self.significant, self.bootstrap_results = qtlreaper_mapping.gen_reaper_results( self.this_trait, self.dataset, self.samples, self.vals, self.json_data, self.num_perm, self.bootCheck, self.num_bootstrap, self.do_control, self.control_marker, self.manhattan_plot) elif self.mapping_method == "plink": self.score_type = "-log(p)" self.manhattan_plot = True results = plink_mapping.run_plink(self.this_trait, self.dataset, self.species, self.vals, self.maf) #results = self.run_plink() elif self.mapping_method == "pylmm": logger.debug("RUNNING PYLMM") if self.num_perm > 0: self.run_permutations(str(temp_uuid)) results = self.gen_data(str(temp_uuid)) else: logger.debug("RUNNING NOTHING") if self.pair_scan == True: self.qtl_results = [] highest_chr = 1 #This is needed in order to convert the highest chr to X/Y for marker in results: if marker['chr1'] > 0 or marker['chr1'] == "X" or marker[ 'chr1'] == "X/Y": if marker['chr1'] > highest_chr or marker[ 'chr1'] == "X" or marker['chr1'] == "X/Y": highest_chr = marker['chr1'] if 'lod_score' in marker.keys(): self.qtl_results.append(marker) self.trimmed_markers = results for qtl in enumerate(self.qtl_results): self.json_data['chr1'].append(str(qtl['chr1'])) self.json_data['chr2'].append(str(qtl['chr2'])) self.json_data['Mb'].append(qtl['Mb']) self.json_data['markernames'].append(qtl['name']) self.js_data = dict(json_data=self.json_data, this_trait=self.this_trait.name, data_set=self.dataset.name, maf=self.maf, manhattan_plot=self.manhattan_plot, mapping_scale=self.mapping_scale, qtl_results=self.qtl_results) else: self.qtl_results = [] highest_chr = 1 #This is needed in order to convert the highest chr to X/Y for marker in results: if marker['chr'] > 0 or marker['chr'] == "X" or marker[ 'chr'] == "X/Y": if marker['chr'] > highest_chr or marker[ 'chr'] == "X" or marker['chr'] == "X/Y": highest_chr = marker['chr'] if ('lod_score' in marker.keys()) or ('lrs_value' in marker.keys()): self.qtl_results.append(marker) with Bench("Exporting Results"): export_mapping_results(self.dataset, self.this_trait, self.qtl_results, self.mapping_results_path, self.mapping_scale, self.score_type) with Bench("Trimming Markers for Figure"): if len(self.qtl_results) > 30000: self.qtl_results = trim_markers_for_figure( self.qtl_results) with Bench("Trimming Markers for Table"): self.trimmed_markers = trim_markers_for_table(results) if self.mapping_method != "gemma": self.json_data['chr'] = [] self.json_data['pos'] = [] self.json_data['lod.hk'] = [] self.json_data['markernames'] = [] self.json_data['suggestive'] = self.suggestive self.json_data['significant'] = self.significant #Need to convert the QTL objects that qtl reaper returns into a json serializable dictionary for index, qtl in enumerate(self.qtl_results): #if index<40: # logger.debug("lod score is:", qtl['lod_score']) if qtl['chr'] == highest_chr and highest_chr != "X" and highest_chr != "X/Y": #logger.debug("changing to X") self.json_data['chr'].append("X") else: self.json_data['chr'].append(str(qtl['chr'])) self.json_data['pos'].append(qtl['Mb']) if 'lrs_value' in qtl.keys(): self.json_data['lod.hk'].append(str(qtl['lrs_value'])) else: self.json_data['lod.hk'].append(str(qtl['lod_score'])) self.json_data['markernames'].append(qtl['name']) #Get chromosome lengths for drawing the interval map plot chromosome_mb_lengths = {} self.json_data['chrnames'] = [] for key in self.species.chromosomes.chromosomes.keys(): self.json_data['chrnames'].append([ self.species.chromosomes.chromosomes[key].name, self.species.chromosomes.chromosomes[key].mb_length ]) chromosome_mb_lengths[ key] = self.species.chromosomes.chromosomes[ key].mb_length self.js_data = dict( result_score_type=self.score_type, json_data=self.json_data, this_trait=self.this_trait.name, data_set=self.dataset.name, maf=self.maf, manhattan_plot=self.manhattan_plot, mapping_scale=self.mapping_scale, chromosomes=chromosome_mb_lengths, qtl_results=self.qtl_results, num_perm=self.num_perm, perm_results=self.perm_output, )
def __init__(self, start_vars, temp_uuid): helper_functions.get_species_dataset_trait(self, start_vars) self.temp_uuid = temp_uuid #needed to pass temp_uuid to gn1 mapping code (marker_regression_gn1.py) #ZS: Needed to zoom in or remap temp traits like PCA traits if "temp_trait" in start_vars and start_vars['temp_trait'] != "False": self.temp_trait = "True" self.group = self.dataset.group.name self.json_data = {} self.json_data['lodnames'] = ['lod.hk'] #ZS: Sometimes a group may have a genofile that only includes a subset of samples genofile_samplelist = [] if 'genofile' in start_vars: if start_vars['genofile'] != "": self.genofile_string = start_vars['genofile'] self.dataset.group.genofile = self.genofile_string.split( ":")[0] genofile_samplelist = get_genofile_samplelist(self.dataset) all_samples_ordered = self.dataset.group.all_samples_ordered() self.vals = [] self.samples = [] self.sample_vals = start_vars['sample_vals'] sample_val_dict = json.loads(self.sample_vals) samples = sample_val_dict.keys() if (len(genofile_samplelist) != 0): for sample in genofile_samplelist: self.samples.append(sample) if sample in samples: self.vals.append(sample_val_dict[sample]) else: self.vals.append("x") else: for sample in self.dataset.group.samplelist: if sample in samples: self.vals.append(sample_val_dict[sample]) self.samples.append(sample) if 'n_samples' in start_vars: self.n_samples = start_vars['n_samples'] else: self.n_samples = len([val for val in self.vals if val != "x"]) #ZS: Check if genotypes exist in the DB in order to create links for markers self.geno_db_exists = geno_db_exists(self.dataset) self.mapping_method = start_vars['method'] if "results_path" in start_vars: self.mapping_results_path = start_vars['results_path'] else: mapping_results_filename = self.dataset.group.name + "_" + ''.join( random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) self.mapping_results_path = "{}{}.csv".format( webqtlConfig.GENERATED_IMAGE_DIR, mapping_results_filename) self.manhattan_plot = False if 'manhattan_plot' in start_vars: if start_vars['manhattan_plot'].lower() != "false": self.color_scheme = "alternating" if "color_scheme" in start_vars: self.color_scheme = start_vars['color_scheme'] if self.color_scheme == "single": self.manhattan_single_color = start_vars[ 'manhattan_single_color'] self.manhattan_plot = True self.maf = start_vars['maf'] # Minor allele frequency if "use_loco" in start_vars: self.use_loco = start_vars['use_loco'] else: self.use_loco = None self.suggestive = "" self.significant = "" self.pair_scan = False # Initializing this since it is checked in views to determine which template to use if 'transform' in start_vars: self.transform = start_vars['transform'] else: self.transform = "" self.score_type = "LRS" #ZS: LRS or LOD self.mapping_scale = "physic" if "mapping_scale" in start_vars: self.mapping_scale = start_vars['mapping_scale'] self.num_perm = 0 self.perm_output = [] self.bootstrap_results = [] self.covariates = start_vars[ 'covariates'] if "covariates" in start_vars else "" self.categorical_vars = [] #ZS: This is passed to GN1 code for single chr mapping self.selected_chr = -1 if "selected_chr" in start_vars: if int( start_vars['selected_chr'] ) != -1: #ZS: Needs to be -1 if showing full map; there's probably a better way to fix this self.selected_chr = int(start_vars['selected_chr']) + 1 else: self.selected_chr = int(start_vars['selected_chr']) if "startMb" in start_vars: self.startMb = start_vars['startMb'] if "endMb" in start_vars: self.endMb = start_vars['endMb'] if "graphWidth" in start_vars: self.graphWidth = start_vars['graphWidth'] if "lrsMax" in start_vars: self.lrsMax = start_vars['lrsMax'] if "haplotypeAnalystCheck" in start_vars: self.haplotypeAnalystCheck = start_vars['haplotypeAnalystCheck'] if "startMb" in start_vars: #ZS: This is to ensure showGenes, Legend, etc are checked the first time you open the mapping page, since startMb will only not be set during the first load if "permCheck" in start_vars: self.permCheck = "ON" else: self.permCheck = False self.num_perm = int(start_vars['num_perm']) self.LRSCheck = start_vars['LRSCheck'] if "showSNP" in start_vars: self.showSNP = start_vars['showSNP'] else: self.showSNP = False if "showGenes" in start_vars: self.showGenes = start_vars['showGenes'] else: self.showGenes = False if "viewLegend" in start_vars: self.viewLegend = start_vars['viewLegend'] else: self.viewLegend = False else: try: if int(start_vars['num_perm']) > 0: self.num_perm = int(start_vars['num_perm']) except: self.num_perm = 0 if self.num_perm > 0: self.permCheck = "ON" else: self.permCheck = False self.showSNP = "ON" self.showGenes = "ON" self.viewLegend = "ON" #self.dataset.group.get_markers() if self.mapping_method == "gemma": self.first_run = True self.output_files = None if 'output_files' in start_vars: self.output_files = start_vars['output_files'] if 'first_run' in start_vars: #ZS: check if first run so existing result files can be used if it isn't (for example zooming on a chromosome, etc) self.first_run = False self.score_type = "-logP" self.manhattan_plot = True with Bench("Running GEMMA"): if self.use_loco == "True": marker_obs, self.output_files = gemma_mapping.run_gemma( self.this_trait, self.dataset, self.samples, self.vals, self.covariates, self.use_loco, self.maf, self.first_run, self.output_files) else: marker_obs, self.output_files = gemma_mapping.run_gemma( self.this_trait, self.dataset, self.samples, self.vals, self.covariates, self.use_loco, self.maf, self.first_run, self.output_files) results = marker_obs elif self.mapping_method == "rqtl_plink": results = self.run_rqtl_plink() elif self.mapping_method == "rqtl_geno": perm_strata = [] if "perm_strata" in start_vars and "categorical_vars" in start_vars: self.categorical_vars = start_vars["categorical_vars"].split( ",") if len(self.categorical_vars ) and start_vars["perm_strata"] == "True": primary_samples = SampleList(dataset=self.dataset, sample_names=self.samples, this_trait=self.this_trait) perm_strata = get_perm_strata(self.this_trait, primary_samples, self.categorical_vars, self.samples) self.score_type = "LOD" self.control_marker = start_vars['control_marker'] self.do_control = start_vars['do_control'] if 'mapmethod_rqtl_geno' in start_vars: self.method = start_vars['mapmethod_rqtl_geno'] else: self.method = "em" self.model = start_vars['mapmodel_rqtl_geno'] #if start_vars['pair_scan'] == "true": # self.pair_scan = True if self.permCheck and self.num_perm > 0: self.perm_output, self.suggestive, self.significant, results = rqtl_mapping.run_rqtl_geno( self.vals, self.samples, self.dataset, self.mapping_scale, self.method, self.model, self.permCheck, self.num_perm, perm_strata, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan, self.covariates) else: results = rqtl_mapping.run_rqtl_geno( self.vals, self.samples, self.dataset, self.mapping_scale, self.method, self.model, self.permCheck, self.num_perm, perm_strata, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan, self.covariates) elif self.mapping_method == "reaper": if "startMb" in start_vars: #ZS: Check if first time page loaded, so it can default to ON if "additiveCheck" in start_vars: self.additiveCheck = start_vars['additiveCheck'] else: self.additiveCheck = False if "bootCheck" in start_vars: self.bootCheck = "ON" else: self.bootCheck = False self.num_bootstrap = int(start_vars['num_bootstrap']) else: self.additiveCheck = "ON" try: if int(start_vars['num_bootstrap']) > 0: self.bootCheck = "ON" self.num_bootstrap = int(start_vars['num_bootstrap']) else: self.bootCheck = False self.num_bootstrap = 0 except: self.bootCheck = False self.num_bootstrap = 0 self.reaper_version = start_vars['reaper_version'] self.control_marker = start_vars['control_marker'] self.do_control = start_vars['do_control'] logger.info("Running qtlreaper") if self.reaper_version == "new": self.first_run = True self.output_files = None if 'first_run' in start_vars: #ZS: check if first run so existing result files can be used if it isn't (for example zooming on a chromosome, etc) self.first_run = False if 'output_files' in start_vars: self.output_files = start_vars['output_files'].split( ",") results, self.perm_output, self.suggestive, self.significant, self.bootstrap_results, self.output_files = qtlreaper_mapping.run_reaper( self.this_trait, self.dataset, self.samples, self.vals, self.json_data, self.num_perm, self.bootCheck, self.num_bootstrap, self.do_control, self.control_marker, self.manhattan_plot, self.first_run, self.output_files) else: results, self.json_data, self.perm_output, self.suggestive, self.significant, self.bootstrap_results = qtlreaper_mapping.run_original_reaper( self.this_trait, self.dataset, self.samples, self.vals, self.json_data, self.num_perm, self.bootCheck, self.num_bootstrap, self.do_control, self.control_marker, self.manhattan_plot) elif self.mapping_method == "plink": self.score_type = "-logP" self.manhattan_plot = True results = plink_mapping.run_plink(self.this_trait, self.dataset, self.species, self.vals, self.maf) #results = self.run_plink() else: logger.debug("RUNNING NOTHING") self.no_results = False if len(results) == 0: self.no_results = True else: if self.pair_scan == True: self.qtl_results = [] highest_chr = 1 #This is needed in order to convert the highest chr to X/Y for marker in results: if marker['chr1'] > 0 or marker['chr1'] == "X" or marker[ 'chr1'] == "X/Y": if marker['chr1'] > highest_chr or marker[ 'chr1'] == "X" or marker['chr1'] == "X/Y": highest_chr = marker['chr1'] if 'lod_score' in list(marker.keys()): self.qtl_results.append(marker) self.trimmed_markers = results for qtl in enumerate(self.qtl_results): self.json_data['chr1'].append(str(qtl['chr1'])) self.json_data['chr2'].append(str(qtl['chr2'])) self.json_data['Mb'].append(qtl['Mb']) self.json_data['markernames'].append(qtl['name']) self.js_data = dict(json_data=self.json_data, this_trait=self.this_trait.name, data_set=self.dataset.name, maf=self.maf, manhattan_plot=self.manhattan_plot, mapping_scale=self.mapping_scale, qtl_results=self.qtl_results) else: self.qtl_results = [] self.results_for_browser = [] self.annotations_for_browser = [] highest_chr = 1 #This is needed in order to convert the highest chr to X/Y for marker in results: if 'Mb' in marker: this_ps = marker['Mb'] * 1000000 else: this_ps = marker['cM'] * 1000000 browser_marker = dict(chr=str(marker['chr']), rs=marker['name'], ps=this_ps, url="/show_trait?trait_id=" + marker['name'] + "&dataset=" + self.dataset.group.name + "Geno") if self.geno_db_exists == "True": annot_marker = dict(name=str(marker['name']), chr=str(marker['chr']), rs=marker['name'], pos=this_ps, url="/show_trait?trait_id=" + marker['name'] + "&dataset=" + self.dataset.group.name + "Geno") else: annot_marker = dict(name=str(marker['name']), chr=str(marker['chr']), rs=marker['name'], pos=this_ps) if 'lrs_value' in marker and marker['lrs_value'] > 0: browser_marker['p_wald'] = 10**-(marker['lrs_value'] / 4.61) elif 'lod_score' in marker and marker['lod_score'] > 0: browser_marker['p_wald'] = 10**-(marker['lod_score']) else: browser_marker['p_wald'] = 0 self.results_for_browser.append(browser_marker) self.annotations_for_browser.append(annot_marker) if str(marker['chr']) > '0' or str( marker['chr']) == "X" or str( marker['chr']) == "X/Y": if str(marker['chr']) > str(highest_chr) or str( marker['chr']) == "X" or str( marker['chr']) == "X/Y": highest_chr = marker['chr'] if ('lod_score' in marker.keys()) or ('lrs_value' in marker.keys()): if 'Mb' in marker.keys(): marker['display_pos'] = "Chr" + str( marker['chr']) + ": " + "{:.6f}".format( marker['Mb']) elif 'cM' in marker.keys(): marker['display_pos'] = "Chr" + str( marker['chr']) + ": " + "{:.3f}".format( marker['cM']) else: marker['display_pos'] = "N/A" self.qtl_results.append(marker) total_markers = len(self.qtl_results) with Bench("Exporting Results"): export_mapping_results(self.dataset, self.this_trait, self.qtl_results, self.mapping_results_path, self.mapping_scale, self.score_type) with Bench("Trimming Markers for Figure"): if len(self.qtl_results) > 30000: self.qtl_results = trim_markers_for_figure( self.qtl_results) self.results_for_browser = trim_markers_for_figure( self.results_for_browser) filtered_annotations = [] for marker in self.results_for_browser: for annot_marker in self.annotations_for_browser: if annot_marker['rs'] == marker['rs']: filtered_annotations.append(annot_marker) break self.annotations_for_browser = filtered_annotations browser_files = write_input_for_browser( self.dataset, self.results_for_browser, self.annotations_for_browser) else: browser_files = write_input_for_browser( self.dataset, self.results_for_browser, self.annotations_for_browser) with Bench("Trimming Markers for Table"): self.trimmed_markers = trim_markers_for_table(results) chr_lengths = get_chr_lengths(self.mapping_scale, self.mapping_method, self.dataset, self.qtl_results) #ZS: For zooming into genome browser, need to pass chromosome name instead of number if self.dataset.group.species == "mouse": if self.selected_chr == 20: this_chr = "X" else: this_chr = str(self.selected_chr) elif self.dataset.group.species == "rat": if self.selected_chr == 21: this_chr = "X" else: this_chr = str(self.selected_chr) else: if self.selected_chr == 22: this_chr = "X" elif self.selected_chr == 23: this_chr = "Y" else: this_chr = str(self.selected_chr) if self.mapping_method != "gemma": if self.score_type == "LRS": significant_for_browser = self.significant / 4.61 else: significant_for_browser = self.significant self.js_data = dict( #result_score_type = self.score_type, #this_trait = self.this_trait.name, #data_set = self.dataset.name, #maf = self.maf, #manhattan_plot = self.manhattan_plot, #mapping_scale = self.mapping_scale, #chromosomes = chromosome_mb_lengths, #qtl_results = self.qtl_results, categorical_vars=self.categorical_vars, chr_lengths=chr_lengths, num_perm=self.num_perm, perm_results=self.perm_output, significant=significant_for_browser, browser_files=browser_files, selected_chr=this_chr, total_markers=total_markers) else: self.js_data = dict(chr_lengths=chr_lengths, browser_files=browser_files, selected_chr=this_chr, total_markers=total_markers)
def __init__(self, start_vars): # get trait list from db (database name) # calculate correlation with Base vector and targets # Check parameters assert('corr_type' in start_vars) assert(is_str(start_vars['corr_type'])) assert('dataset' in start_vars) # assert('group' in start_vars) permitted to be empty? assert('corr_sample_method' in start_vars) assert('corr_samples_group' in start_vars) assert('corr_dataset' in start_vars) assert('corr_return_results' in start_vars) if 'loc_chr' in start_vars: assert('min_loc_mb' in start_vars) assert('max_loc_mb' in start_vars) with Bench("Doing correlations"): if start_vars['dataset'] == "Temp": self.dataset = data_set.create_dataset(dataset_name = "Temp", dataset_type = "Temp", group_name = start_vars['group']) self.trait_id = start_vars['trait_id'] self.this_trait = create_trait(dataset=self.dataset, name=self.trait_id, cellid=None) else: helper_functions.get_species_dataset_trait(self, start_vars) corr_samples_group = start_vars['corr_samples_group'] self.sample_data = {} self.corr_type = start_vars['corr_type'] self.corr_method = start_vars['corr_sample_method'] self.min_expr = get_float(start_vars, 'min_expr') self.p_range_lower = get_float(start_vars, 'p_range_lower', -1.0) self.p_range_upper = get_float(start_vars, 'p_range_upper', 1.0) if ('loc_chr' in start_vars and 'min_loc_mb' in start_vars and 'max_loc_mb' in start_vars): self.location_type = get_string(start_vars, 'location_type') self.location_chr = get_string(start_vars, 'loc_chr') self.min_location_mb = get_int(start_vars, 'min_loc_mb') self.max_location_mb = get_int(start_vars, 'max_loc_mb') else: self.location_type = self.location_chr = self.min_location_mb = self.max_location_mb = None self.get_formatted_corr_type() self.return_number = int(start_vars['corr_return_results']) #The two if statements below append samples to the sample list based upon whether the user #rselected Primary Samples Only, Other Samples Only, or All Samples primary_samples = self.dataset.group.samplelist if self.dataset.group.parlist != None: primary_samples += self.dataset.group.parlist if self.dataset.group.f1list != None: primary_samples += self.dataset.group.f1list #If either BXD/whatever Only or All Samples, append all of that group's samplelist if corr_samples_group != 'samples_other': self.process_samples(start_vars, primary_samples) #If either Non-BXD/whatever or All Samples, get all samples from this_trait.data and #exclude the primary samples (because they would have been added in the previous #if statement if the user selected All Samples) if corr_samples_group != 'samples_primary': if corr_samples_group == 'samples_other': primary_samples = [x for x in primary_samples if x not in ( self.dataset.group.parlist + self.dataset.group.f1list)] self.process_samples(start_vars, list(self.this_trait.data.keys()), primary_samples) self.target_dataset = data_set.create_dataset(start_vars['corr_dataset']) self.target_dataset.get_trait_data(list(self.sample_data.keys())) self.header_fields = get_header_fields(self.target_dataset.type, self.corr_method) if self.target_dataset.type == "ProbeSet": self.filter_cols = [7, 6] elif self.target_dataset.type == "Publish": self.filter_cols = [6, 0] else: self.filter_cols = [4, 0] self.correlation_results = [] self.correlation_data = {} if self.corr_type == "tissue": self.trait_symbol_dict = self.dataset.retrieve_genes("Symbol") tissue_corr_data = self.do_tissue_correlation_for_all_traits() if tissue_corr_data != None: for trait in list(tissue_corr_data.keys())[:self.return_number]: self.get_sample_r_and_p_values(trait, self.target_dataset.trait_data[trait]) else: for trait, values in list(self.target_dataset.trait_data.items()): self.get_sample_r_and_p_values(trait, values) elif self.corr_type == "lit": self.trait_geneid_dict = self.dataset.retrieve_genes("GeneId") lit_corr_data = self.do_lit_correlation_for_all_traits() for trait in list(lit_corr_data.keys())[:self.return_number]: self.get_sample_r_and_p_values(trait, self.target_dataset.trait_data[trait]) elif self.corr_type == "sample": for trait, values in list(self.target_dataset.trait_data.items()): self.get_sample_r_and_p_values(trait, values) self.correlation_data = collections.OrderedDict(sorted(list(self.correlation_data.items()), key=lambda t: -abs(t[1][0]))) #ZS: Convert min/max chromosome to an int for the location range option range_chr_as_int = None for order_id, chr_info in list(self.dataset.species.chromosomes.chromosomes.items()): if 'loc_chr' in start_vars: if chr_info.name == self.location_chr: range_chr_as_int = order_id for _trait_counter, trait in enumerate(list(self.correlation_data.keys())[:self.return_number]): trait_object = create_trait(dataset=self.target_dataset, name=trait, get_qtl_info=True, get_sample_info=False) if not trait_object: continue chr_as_int = 0 for order_id, chr_info in list(self.dataset.species.chromosomes.chromosomes.items()): if self.location_type == "highest_lod": if chr_info.name == trait_object.locus_chr: chr_as_int = order_id else: if chr_info.name == trait_object.chr: chr_as_int = order_id if (float(self.correlation_data[trait][0]) >= self.p_range_lower and float(self.correlation_data[trait][0]) <= self.p_range_upper): if (self.target_dataset.type == "ProbeSet" or self.target_dataset.type == "Publish") and bool(trait_object.mean): if (self.min_expr != None) and (float(trait_object.mean) < self.min_expr): continue if range_chr_as_int != None and (chr_as_int != range_chr_as_int): continue if self.location_type == "highest_lod": if (self.min_location_mb != None) and (float(trait_object.locus_mb) < float(self.min_location_mb)): continue if (self.max_location_mb != None) and (float(trait_object.locus_mb) > float(self.max_location_mb)): continue else: if (self.min_location_mb != None) and (float(trait_object.mb) < float(self.min_location_mb)): continue if (self.max_location_mb != None) and (float(trait_object.mb) > float(self.max_location_mb)): continue (trait_object.sample_r, trait_object.sample_p, trait_object.num_overlap) = self.correlation_data[trait] # Set some sane defaults trait_object.tissue_corr = 0 trait_object.tissue_pvalue = 0 trait_object.lit_corr = 0 if self.corr_type == "tissue" and tissue_corr_data != None: trait_object.tissue_corr = tissue_corr_data[trait][1] trait_object.tissue_pvalue = tissue_corr_data[trait][2] elif self.corr_type == "lit": trait_object.lit_corr = lit_corr_data[trait][1] self.correlation_results.append(trait_object) if self.corr_type != "lit" and self.dataset.type == "ProbeSet" and self.target_dataset.type == "ProbeSet": self.do_lit_correlation_for_trait_list() if self.corr_type != "tissue" and self.dataset.type == "ProbeSet" and self.target_dataset.type == "ProbeSet": self.do_tissue_correlation_for_trait_list() self.json_results = generate_corr_json(self.correlation_results, self.this_trait, self.dataset, self.target_dataset)
def __init__(self, kw): assert ('type' in kw) assert ('terms' in kw) self.type = kw['type'] self.terms = kw['terms'] assert (is_str(self.type)) if self.type == "gene": sql = """ SELECT Species.`Name` AS species_name, InbredSet.`Name` AS inbredset_name, Tissue.`Name` AS tissue_name, ProbeSetFreeze.Name AS probesetfreeze_name, ProbeSet.Name AS probeset_name, ProbeSet.Symbol AS probeset_symbol, ProbeSet.`description` AS probeset_description, ProbeSet.Chr AS chr, ProbeSet.Mb AS mb, ProbeSetXRef.Mean AS mean, ProbeSetXRef.LRS AS lrs, ProbeSetXRef.`Locus` AS locus, ProbeSetXRef.`pValue` AS pvalue, ProbeSetXRef.`additive` AS additive FROM Species, InbredSet, ProbeSetXRef, ProbeSet, ProbeFreeze, ProbeSetFreeze, Tissue WHERE InbredSet.`SpeciesId`=Species.`Id` AND ProbeFreeze.InbredSetId=InbredSet.`Id` AND ProbeFreeze.`TissueId`=Tissue.`Id` AND ProbeSetFreeze.ProbeFreezeId=ProbeFreeze.Id AND ( MATCH (ProbeSet.Name,ProbeSet.description,ProbeSet.symbol,alias,GenbankId, UniGeneId, Probe_Target_Description) AGAINST ('%s' IN BOOLEAN MODE) ) AND ProbeSet.Id = ProbeSetXRef.ProbeSetId AND ProbeSetXRef.ProbeSetFreezeId=ProbeSetFreeze.Id AND ProbeSetFreeze.confidentiality < 1 AND ProbeSetFreeze.public > 0 ORDER BY species_name, inbredset_name, tissue_name, probesetfreeze_name, probeset_name LIMIT 6000 """ % (self.terms) with Bench("Running query"): logger.sql(sql) re = g.db.execute(sql).fetchall() self.trait_list = [] with Bench("Creating trait objects"): for line in re: this_trait = {} this_trait['name'] = line[4] this_trait['dataset'] = line[3] this_trait['species'] = line[0] this_trait['group'] = line[1] this_trait['tissue'] = line[2] this_trait['symbol'] = line[5] this_trait['description'] = line[6] this_trait['location_repr'] = 'N/A' if (line[7] != "NULL" and line[7] != "") and (line[8] != 0): this_trait['location_repr'] = 'Chr%s: %.6f' % ( line[7], float(line[8])) this_trait['mean'] = line[9] this_trait['LRS_score_repr'] = "N/A" if line[10] != "" and line[10] != None: this_trait['LRS_score_repr'] = '%3.1f' % line[10] this_trait['additive'] = "N/A" if line[13] != "": this_trait['additive'] = line[13] #dataset = create_dataset(line[3], "ProbeSet", get_samplelist=False) #trait_id = line[4] #with Bench("Building trait object"): #this_trait = GeneralTrait(dataset=dataset, name=trait_id, get_qtl_info=False, get_sample_info=False) self.trait_list.append(this_trait) elif self.type == "phenotype": sql = """ SELECT Species.`Name`, InbredSet.`Name`, PublishFreeze.`Name`, PublishXRef.`Id`, Phenotype.`Post_publication_description`, Publication.`Authors`, Publication.`Year`, Publication.`PubMed_ID`, PublishXRef.`LRS`, PublishXRef.`additive` FROM Species,InbredSet,PublishFreeze,PublishXRef,Phenotype,Publication WHERE PublishXRef.`InbredSetId`=InbredSet.`Id` AND PublishFreeze.`InbredSetId`=InbredSet.`Id` AND InbredSet.`SpeciesId`=Species.`Id` AND PublishXRef.`PhenotypeId`=Phenotype.`Id` AND PublishXRef.`PublicationId`=Publication.`Id` AND (Phenotype.Post_publication_description REGEXP "[[:<:]]%s[[:>:]]" OR Phenotype.Pre_publication_description REGEXP "[[:<:]]%s[[:>:]]" OR Phenotype.Pre_publication_abbreviation REGEXP "[[:<:]]%s[[:>:]]" OR Phenotype.Post_publication_abbreviation REGEXP "[[:<:]]%s[[:>:]]" OR Phenotype.Lab_code REGEXP "[[:<:]]%s[[:>:]]" OR Publication.PubMed_ID REGEXP "[[:<:]]%s[[:>:]]" OR Publication.Abstract REGEXP "[[:<:]]%s[[:>:]]" OR Publication.Title REGEXP "[[:<:]]%s[[:>:]]" OR Publication.Authors REGEXP "[[:<:]]%s[[:>:]]" OR PublishXRef.Id REGEXP "[[:<:]]%s[[:>:]]") ORDER BY Species.`Name`, InbredSet.`Name`, PublishXRef.`Id` LIMIT 6000 """ % (self.terms, self.terms, self.terms, self.terms, self.terms, self.terms, self.terms, self.terms, self.terms, self.terms) logger.sql(sql) re = g.db.execute(sql).fetchall() self.trait_list = [] with Bench("Creating trait objects"): for line in re: this_trait = {} this_trait['name'] = line[3] this_trait['dataset'] = line[2] this_trait['species'] = line[0] this_trait['group'] = line[1] this_trait['description'] = line[4] this_trait['authors'] = line[5] this_trait['year'] = line[6] if this_trait['year'].isdigit(): this_trait['pubmed_text'] = this_trait['year'] else: this_trait['pubmed_text'] = "N/A" if line[7] != "" and line[7] != None: this_trait[ 'pubmed_link'] = webqtlConfig.PUBMEDLINK_URL % line[ 7] else: this_trait['pubmed_link'] = "N/A" this_trait['LRS_score_repr'] = "N/A" if line[8] != "" and line[8] != None: this_trait['LRS_score_repr'] = '%3.1f' % line[8] this_trait['additive'] = "N/A" if line[9] != "": this_trait['additive'] = line[9] #dataset = create_dataset(line[2], "Publish") #trait_id = line[3] #this_trait = GeneralTrait(dataset=dataset, name=trait_id, get_qtl_info=True, get_sample_info=False) self.trait_list.append(this_trait)
def __init__(self, kw): assert('type' in kw) assert('terms' in kw) self.type = kw['type'] self.terms = kw['terms'] assert(is_str(self.type)) if self.type == "gene": sql = """ SELECT Species.`Name` AS species_name, InbredSet.`Name` AS inbredset_name, Tissue.`Name` AS tissue_name, ProbeSetFreeze.Name AS probesetfreeze_name, ProbeSetFreeze.FullName AS probesetfreeze_fullname, ProbeSet.Name AS probeset_name, ProbeSet.Symbol AS probeset_symbol, CAST(ProbeSet.`description` AS BINARY) AS probeset_description, ProbeSet.Chr AS chr, ProbeSet.Mb AS mb, ProbeSetXRef.Mean AS mean, ProbeSetXRef.LRS AS lrs, ProbeSetXRef.`Locus` AS locus, ProbeSetXRef.`pValue` AS pvalue, ProbeSetXRef.`additive` AS additive, ProbeSetFreeze.Id AS probesetfreeze_id, Geno.Chr as geno_chr, Geno.Mb as geno_mb FROM Species INNER JOIN InbredSet ON InbredSet.`SpeciesId`=Species.`Id` INNER JOIN ProbeFreeze ON ProbeFreeze.InbredSetId=InbredSet.`Id` INNER JOIN Tissue ON ProbeFreeze.`TissueId`=Tissue.`Id` INNER JOIN ProbeSetFreeze ON ProbeSetFreeze.ProbeFreezeId=ProbeFreeze.Id INNER JOIN ProbeSetXRef ON ProbeSetXRef.ProbeSetFreezeId=ProbeSetFreeze.Id INNER JOIN ProbeSet ON ProbeSet.Id = ProbeSetXRef.ProbeSetId LEFT JOIN Geno ON ProbeSetXRef.Locus = Geno.Name AND Geno.SpeciesId = Species.Id WHERE ( MATCH (ProbeSet.Name,ProbeSet.description,ProbeSet.symbol,ProbeSet.alias,ProbeSet.GenbankId, ProbeSet.UniGeneId, ProbeSet.Probe_Target_Description) AGAINST ('%s' IN BOOLEAN MODE) ) AND ProbeSetFreeze.confidentiality < 1 AND ProbeSetFreeze.public > 0 ORDER BY species_name, inbredset_name, tissue_name, probesetfreeze_name, probeset_name LIMIT 6000 """ % (self.terms) with Bench("Running query"): logger.sql(sql) re = g.db.execute(sql).fetchall() trait_list = [] dataset_to_permissions = {} with Bench("Creating trait objects"): for i, line in enumerate(re): this_trait = {} this_trait['index'] = i + 1 this_trait['name'] = line[5] this_trait['dataset'] = line[3] this_trait['dataset_fullname'] = line[4] this_trait['hmac'] = hmac.data_hmac('{}:{}'.format(line[5], line[3])) this_trait['species'] = line[0] this_trait['group'] = line[1] this_trait['tissue'] = line[2] this_trait['symbol'] = line[6] if line[7]: this_trait['description'] = line[7].decode('utf-8', 'replace') else: this_trait['description'] = "N/A" this_trait['location_repr'] = 'N/A' if (line[8] != "NULL" and line[8] != "") and (line[9] != 0): this_trait['location_repr'] = 'Chr%s: %.6f' % (line[8], float(line[9])) try: this_trait['mean'] = '%.3f' % line[10] except: this_trait['mean'] = "N/A" this_trait['LRS_score_repr'] = "N/A" if line[11] != "" and line[11] != None: this_trait['LRS_score_repr'] = '%3.1f' % line[11] this_trait['additive'] = "N/A" if line[14] != "" and line[14] != None: this_trait['additive'] = '%.3f' % line[14] this_trait['dataset_id'] = line[15] this_trait['locus_chr'] = line[16] this_trait['locus_mb'] = line[17] dataset_ob = SimpleNamespace(id=this_trait["dataset_id"], type="ProbeSet",species=this_trait["species"]) if dataset_ob.id not in dataset_to_permissions: permissions = check_resource_availability(dataset_ob) dataset_to_permissions[dataset_ob.id] = permissions else: pemissions = dataset_to_permissions[dataset_ob.id] if "view" not in permissions['data']: continue max_lrs_text = "N/A" if this_trait['locus_chr'] != None and this_trait['locus_mb'] != None: max_lrs_text = "Chr" + str(this_trait['locus_chr']) + ": " + str(this_trait['locus_mb']) this_trait['max_lrs_text'] = max_lrs_text trait_list.append(this_trait) self.trait_count = len(trait_list) self.trait_list = json.dumps(trait_list) self.header_fields = ['Index', 'Record', 'Species', 'Group', 'Tissue', 'Dataset', 'Symbol', 'Description', 'Location', 'Mean', 'Max LRS', 'Max LRS Location', 'Additive Effect'] elif self.type == "phenotype": search_term = self.terms group_clause = "" if "_" in self.terms: if len(self.terms.split("_")[0]) == 3: search_term = self.terms.split("_")[1] group_clause = "AND InbredSet.`InbredSetCode` = '{}'".format(self.terms.split("_")[0]) sql = """ SELECT Species.`Name`, InbredSet.`Name`, PublishFreeze.`Name`, PublishFreeze.`FullName`, PublishXRef.`Id`, CAST(Phenotype.`Pre_publication_description` AS BINARY), CAST(Phenotype.`Post_publication_description` AS BINARY), Publication.`Authors`, Publication.`Year`, Publication.`PubMed_ID`, PublishXRef.`LRS`, PublishXRef.`additive`, InbredSet.`InbredSetCode`, PublishXRef.`mean` FROM Species,InbredSet,PublishFreeze,PublishXRef,Phenotype,Publication WHERE PublishXRef.`InbredSetId`=InbredSet.`Id` AND PublishFreeze.`InbredSetId`=InbredSet.`Id` AND InbredSet.`SpeciesId`=Species.`Id` {0} AND PublishXRef.`PhenotypeId`=Phenotype.`Id` AND PublishXRef.`PublicationId`=Publication.`Id` AND (Phenotype.Post_publication_description REGEXP "[[:<:]]{1}[[:>:]]" OR Phenotype.Pre_publication_description REGEXP "[[:<:]]{1}[[:>:]]" OR Phenotype.Pre_publication_abbreviation REGEXP "[[:<:]]{1}[[:>:]]" OR Phenotype.Post_publication_abbreviation REGEXP "[[:<:]]{1}[[:>:]]" OR Phenotype.Lab_code REGEXP "[[:<:]]{1}[[:>:]]" OR Publication.PubMed_ID REGEXP "[[:<:]]{1}[[:>:]]" OR Publication.Abstract REGEXP "[[:<:]]{1}[[:>:]]" OR Publication.Title REGEXP "[[:<:]]{1}[[:>:]]" OR Publication.Authors REGEXP "[[:<:]]{1}[[:>:]]" OR PublishXRef.Id REGEXP "[[:<:]]{1}[[:>:]]") ORDER BY Species.`Name`, InbredSet.`Name`, PublishXRef.`Id` LIMIT 6000 """.format(group_clause, search_term) logger.sql(sql) re = g.db.execute(sql).fetchall() trait_list = [] with Bench("Creating trait objects"): for i, line in enumerate(re): this_trait = {} this_trait['index'] = i + 1 this_trait['name'] = str(line[4]) if len(str(line[12])) == 3: this_trait['display_name'] = str(line[12]) + "_" + this_trait['name'] else: this_trait['display_name'] = this_trait['name'] this_trait['dataset'] = line[2] this_trait['dataset_fullname'] = line[3] this_trait['hmac'] = hmac.data_hmac('{}:{}'.format(line[4], line[2])) this_trait['species'] = line[0] this_trait['group'] = line[1] if line[9] != None and line[6] != None: this_trait['description'] = line[6].decode('utf-8', 'replace') elif line[5] != None: this_trait['description'] = line[5].decode('utf-8', 'replace') else: this_trait['description'] = "N/A" if line[13] != None and line[13] != "": this_trait['mean'] = line[13] else: this_trait['mean'] = "N/A" this_trait['authors'] = line[7] this_trait['year'] = line[8] if this_trait['year'].isdigit(): this_trait['pubmed_text'] = this_trait['year'] else: this_trait['pubmed_text'] = "N/A" if line[9] != "" and line[9] != None: this_trait['pubmed_link'] = webqtlConfig.PUBMEDLINK_URL % line[8] else: this_trait['pubmed_link'] = "N/A" if line[12]: this_trait['display_name'] = line[12] + "_" + str(this_trait['name']) this_trait['LRS_score_repr'] = "N/A" if line[10] != "" and line[10] != None: this_trait['LRS_score_repr'] = '%3.1f' % line[10] this_trait['additive'] = "N/A" if line[11] != "" and line[11] != None: this_trait['additive'] = '%.3f' % line[11] this_trait['max_lrs_text'] = "N/A" trait_ob = create_trait(dataset_name=this_trait['dataset'], name=this_trait['name'], get_qtl_info=True, get_sample_info=False) if not trait_ob: continue if this_trait['dataset'] == this_trait['group'] + "Publish": try: if trait_ob.locus_chr != "" and trait_ob.locus_mb != "": this_trait['max_lrs_text'] = "Chr" + str(trait_ob.locus_chr) + ": " + str(trait_ob.locus_mb) except: this_trait['max_lrs_text'] = "N/A" trait_list.append(this_trait) self.trait_count = len(trait_list) self.trait_list = json.dumps(trait_list) self.header_fields = ['Index', 'Species', 'Group', 'Record', 'Description', 'Authors', 'Year', 'Max LRS', 'Max LRS Location', 'Additive Effect']
def marker_regression_page(): initial_start_vars = request.form logger.debug("Marker regression called with initial_start_vars:", initial_start_vars.items()) temp_uuid = initial_start_vars['temp_uuid'] wanted = ('trait_id', 'dataset', 'method', 'trimmed_markers', 'selected_chr', 'chromosomes', 'mapping_scale', 'score_type', 'suggestive', 'significant', 'num_perm', 'permCheck', 'perm_output', 'num_bootstrap', 'bootCheck', 'bootstrap_results', 'LRSCheck', 'maf', 'manhattan_plot', 'control_marker', 'control_marker_db', 'do_control', 'genofile', 'pair_scan', 'startMb', 'endMb', 'graphWidth', 'lrsMax', 'additiveCheck', 'showSNP', 'showGenes', 'viewLegend', 'haplotypeAnalystCheck', 'mapmethod_rqtl_geno', 'mapmodel_rqtl_geno') start_vars = {} for key, value in initial_start_vars.iteritems(): if key in wanted or key.startswith(('value:')): start_vars[key] = value logger.debug("Marker regression called with start_vars:", start_vars) version = "v3" key = "marker_regression:{}:".format(version) + json.dumps(start_vars, sort_keys=True) logger.info("key is:", pf(key)) with Bench("Loading cache"): result = None # Just for testing #result = Redis.get(key) #logger.info("************************ Starting result *****************") #logger.info("result is [{}]: {}".format(type(result), result)) #logger.info("************************ Ending result ********************") if result: logger.info("Cache hit!!!") with Bench("Loading results"): result = pickle.loads(result) else: logger.info("Cache miss!!!") with Bench("Total time in MarkerRegression"): template_vars = marker_regression.MarkerRegression( start_vars, temp_uuid) if template_vars.mapping_method != "gemma" and template_vars.mapping_method != "plink": template_vars.js_data = json.dumps(template_vars.js_data, default=json_default_handler, indent=" ") result = template_vars.__dict__ if result['pair_scan']: with Bench("Rendering template"): img_path = result['pair_scan_filename'] logger.info("img_path:", img_path) initial_start_vars = request.form logger.info("initial_start_vars:", initial_start_vars) imgfile = open(TEMPDIR + img_path, 'rb') imgdata = imgfile.read() imgB64 = imgdata.encode("base64") bytesarray = array.array('B', imgB64) result['pair_scan_array'] = bytesarray rendered_template = render_template("pair_scan_results.html", **result) else: #for item in template_vars.__dict__.keys(): # logger.info(" ---**--- {}: {}".format(type(template_vars.__dict__[item]), item)) gn1_template_vars = marker_regression_gn1.MarkerRegression( result).__dict__ #pickled_result = pickle.dumps(result, pickle.HIGHEST_PROTOCOL) #logger.info("pickled result length:", len(pickled_result)) #Redis.set(key, pickled_result) #Redis.expire(key, 1*60) with Bench("Rendering template"): if (gn1_template_vars['mapping_method'] == "gemma") or (gn1_template_vars['mapping_method'] == "plink"): gn1_template_vars.pop('qtlresults', None) print("TEMPLATE KEYS:", list(gn1_template_vars.keys())) rendered_template = render_template( "marker_regression_gn1.html", **gn1_template_vars) # with Bench("Rendering template"): # if result['pair_scan'] == True: # img_path = result['pair_scan_filename'] # logger.info("img_path:", img_path) # initial_start_vars = request.form # logger.info("initial_start_vars:", initial_start_vars) # imgfile = open(TEMPDIR + '/' + img_path, 'rb') # imgdata = imgfile.read() # imgB64 = imgdata.encode("base64") # bytesarray = array.array('B', imgB64) # result['pair_scan_array'] = bytesarray # rendered_template = render_template("pair_scan_results.html", **result) # else: # rendered_template = render_template("marker_regression.html", **result) # rendered_template = render_template("marker_regression_gn1.html", **gn1_template_vars) return rendered_template