def check_resource_availability(dataset, trait_id=None): # At least for now assume temporary entered traits are accessible if type(dataset) == str or dataset.type == "Temp": return webqtlConfig.DEFAULT_PRIVILEGES resource_id = get_resource_id(dataset, trait_id) # ZS: This should never be false, but it's technically possible if # a non-Temp dataset somehow had a type other than # Publish/ProbeSet/Geno if resource_id: resource_info = get_resource_info(resource_id) # If resource isn't already in redis, add it with default # privileges if not resource_info: resource_info = add_new_resource(dataset, trait_id) # Check if super-user - we should probably come up with some # way to integrate this into the proxy if g.user_session.user_id in Redis.smembers("super_users"): return webqtlConfig.SUPER_PRIVILEGES response = None the_url = GN_PROXY_URL + "available?resource={}&user={}".format( resource_id, g.user_session.user_id) try: response = json.loads(requests.get(the_url).content) except: response = resource_info['default_mask'] return response
def check_owner(dataset=None, trait_id=None, resource_id=None): if resource_id: resource_info = get_resource_info(resource_id) if g.user_session.user_id == resource_info['owner_id']: return resource_id else: resource_id = get_resource_id(dataset, trait_id) if resource_id: resource_info = get_resource_info(resource_id) if g.user_session.user_id == resource_info['owner_id']: return resource_id return False
def check_owner_or_admin(dataset=None, trait_id=None, resource_id=None): if not resource_id: if dataset.type == "Temp": return "not-admin" else: resource_id = get_resource_id(dataset, trait_id) if g.user_session.user_id in Redis.smembers("super_users"): return "owner" resource_info = get_resource_info(resource_id) if resource_info: if g.user_session.user_id == resource_info['owner_id']: return "owner" else: return check_admin(resource_id) return "not-admin"
def __init__(self, user_id, kw): if 'trait_id' in kw and kw['dataset'] != "Temp": self.temp_trait = False self.trait_id = kw['trait_id'] helper_functions.get_species_dataset_trait(self, kw) elif 'group' in kw: self.temp_trait = True self.trait_id = "Temp_" + kw['species'] + "_" + kw['group'] + \ "_" + datetime.datetime.now().strftime("%m%d%H%M%S") self.temp_species = kw['species'] self.temp_group = kw['group'] self.dataset = data_set.create_dataset(dataset_name="Temp", dataset_type="Temp", group_name=self.temp_group) # Put values in Redis so they can be looked up later if # added to a collection Redis.set(self.trait_id, kw['trait_paste'], ex=ONE_YEAR) self.trait_vals = kw['trait_paste'].split() self.this_trait = create_trait(dataset=self.dataset, name=self.trait_id, cellid=None) else: self.temp_trait = True self.trait_id = kw['trait_id'] self.temp_species = self.trait_id.split("_")[1] self.temp_group = self.trait_id.split("_")[2] self.dataset = data_set.create_dataset(dataset_name="Temp", dataset_type="Temp", group_name=self.temp_group) self.this_trait = create_trait(dataset=self.dataset, name=self.trait_id, cellid=None) self.trait_vals = Redis.get(self.trait_id).split() self.resource_id = get_resource_id(self.dataset, self.trait_id) self.admin_status = get_highest_user_access_role( user_id=user_id, resource_id=(self.resource_id or ""), gn_proxy_url=GN_PROXY_URL) # ZS: Get verify/rna-seq link URLs try: blatsequence = self.this_trait.blatseq if not blatsequence: # XZ, 06/03/2009: ProbeSet name is not unique among platforms. We should use ProbeSet Id instead. query1 = """SELECT Probe.Sequence, Probe.Name FROM Probe, ProbeSet, ProbeSetFreeze, ProbeSetXRef WHERE ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND ProbeSetXRef.ProbeSetId = ProbeSet.Id AND ProbeSetFreeze.Name = '%s' AND ProbeSet.Name = '%s' AND Probe.ProbeSetId = ProbeSet.Id order by Probe.SerialOrder""" % ( self.this_trait.dataset.name, self.this_trait.name) seqs = g.db.execute(query1).fetchall() if not seqs: raise ValueError else: blatsequence = '' for seqt in seqs: if int(seqt[1][-1]) % 2 == 1: blatsequence += string.strip(seqt[0]) # --------Hongqiang add this part in order to not only blat ProbeSet, but also blat Probe blatsequence = '%3E' + self.this_trait.name + '%0A' + blatsequence + '%0A' # XZ, 06/03/2009: ProbeSet name is not unique among platforms. We should use ProbeSet Id instead. query2 = """SELECT Probe.Sequence, Probe.Name FROM Probe, ProbeSet, ProbeSetFreeze, ProbeSetXRef WHERE ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND ProbeSetXRef.ProbeSetId = ProbeSet.Id AND ProbeSetFreeze.Name = '%s' AND ProbeSet.Name = '%s' AND Probe.ProbeSetId = ProbeSet.Id order by Probe.SerialOrder""" % ( self.this_trait.dataset.name, self.this_trait.name) seqs = g.db.execute(query2).fetchall() for seqt in seqs: if int(seqt[1][-1]) % 2 == 1: blatsequence += '%3EProbe_' + \ seqt[1].strip() + '%0A' + seqt[0].strip() + '%0A' if self.dataset.group.species == "rat": self.UCSC_BLAT_URL = webqtlConfig.UCSC_BLAT % ('rat', 'rn6', blatsequence) self.UTHSC_BLAT_URL = "" elif self.dataset.group.species == "mouse": self.UCSC_BLAT_URL = webqtlConfig.UCSC_BLAT % ('mouse', 'mm10', blatsequence) self.UTHSC_BLAT_URL = webqtlConfig.UTHSC_BLAT % ( 'mouse', 'mm10', blatsequence) elif self.dataset.group.species == "human": self.UCSC_BLAT_URL = webqtlConfig.UCSC_BLAT % ('human', 'hg38', blatsequence) self.UTHSC_BLAT_URL = "" else: self.UCSC_BLAT_URL = "" self.UTHSC_BLAT_URL = "" except: self.UCSC_BLAT_URL = "" self.UTHSC_BLAT_URL = "" if self.dataset.type == "ProbeSet": self.show_probes = "True" trait_units = get_trait_units(self.this_trait) self.get_external_links() self.build_correlation_tools() self.ncbi_summary = get_ncbi_summary(self.this_trait) # Get nearest marker for composite mapping if not self.temp_trait: if check_if_attr_exists( self.this_trait, 'locus_chr' ) and self.dataset.type != "Geno" and self.dataset.type != "Publish": self.nearest_marker = get_nearest_marker( self.this_trait, self.dataset) else: self.nearest_marker = "" self.make_sample_lists() trait_vals_by_group = [] for sample_type in self.sample_groups: trait_vals_by_group.append(get_trait_vals(sample_type.sample_list)) self.max_digits_by_group = get_max_digits(trait_vals_by_group) self.qnorm_vals = quantile_normalize_vals(self.sample_groups, trait_vals_by_group) self.z_scores = get_z_scores(self.sample_groups, trait_vals_by_group) self.temp_uuid = uuid.uuid4() self.sample_group_types = OrderedDict() if len(self.sample_groups) > 1: self.sample_group_types[ 'samples_primary'] = self.dataset.group.name self.sample_group_types['samples_other'] = "Other" self.sample_group_types['samples_all'] = "All" else: self.sample_group_types[ 'samples_primary'] = self.dataset.group.name sample_lists = [group.sample_list for group in self.sample_groups] self.categorical_var_list = [] self.numerical_var_list = [] if not self.temp_trait: # ZS: Only using first samplelist, since I think mapping only uses those samples self.categorical_var_list = get_categorical_variables( self.this_trait, self.sample_groups[0]) self.numerical_var_list = get_numerical_variables( self.this_trait, self.sample_groups[0]) # ZS: Get list of chromosomes to select for mapping self.chr_list = [["All", -1]] for i, this_chr in enumerate( self.dataset.species.chromosomes.chromosomes): self.chr_list.append([ self.dataset.species.chromosomes.chromosomes[this_chr].name, i ]) self.genofiles = self.dataset.group.get_genofiles() study_samplelist_json = self.dataset.group.get_study_samplelists() self.study_samplelists = [ study["title"] for study in study_samplelist_json ] # ZS: No need to grab scales from .geno file unless it's using # a mapping method that reads .geno files if "QTLReaper" or "R/qtl" in dataset.group.mapping_names: if self.genofiles: self.scales_in_geno = get_genotype_scales(self.genofiles) else: self.scales_in_geno = get_genotype_scales( self.dataset.group.name + ".geno") else: self.scales_in_geno = {} self.has_num_cases = has_num_cases(self.this_trait) # ZS: Needed to know whether to display bar chart + get max # sample name length in order to set table column width self.num_values = 0 # ZS: So it knows whether to display the Binary R/qtl mapping # method, which doesn't work unless all values are 0 or 1 self.binary = "true" # ZS: Since we don't want to show log2 transform option for # situations where it doesn't make sense self.negative_vals_exist = "false" max_samplename_width = 1 for group in self.sample_groups: for sample in group.sample_list: if len(sample.name) > max_samplename_width: max_samplename_width = len(sample.name) if sample.display_value != "x": self.num_values += 1 if sample.display_value != 0 or sample.display_value != 1: self.binary = "false" if sample.value < 0: self.negative_vals_exist = "true" # ZS: Check whether any attributes have few enough distinct # values to show the "Block samples by group" option self.categorical_attr_exists = "false" for attribute in self.sample_groups[0].attributes: if len(self.sample_groups[0].attributes[attribute].distinct_values ) <= 10: self.categorical_attr_exists = "true" break sample_column_width = max_samplename_width * 8 self.stats_table_width, self.trait_table_width = get_table_widths( self.sample_groups, sample_column_width, self.has_num_cases) if self.num_values >= 5000: self.maf = 0.01 else: self.maf = 0.05 trait_symbol = None short_description = None if not self.temp_trait: if self.this_trait.symbol: trait_symbol = self.this_trait.symbol short_description = trait_symbol elif hasattr(self.this_trait, 'post_publication_abbreviation'): short_description = self.this_trait.post_publication_abbreviation elif hasattr(self.this_trait, 'pre_publication_abbreviation'): short_description = self.this_trait.pre_publication_abbreviation # Todo: Add back in the ones we actually need from below, as we discover we need them hddn = OrderedDict() if self.dataset.group.allsamples: hddn['allsamples'] = ','.join(self.dataset.group.allsamples) hddn['primary_samples'] = ','.join(self.primary_sample_names) hddn['trait_id'] = self.trait_id hddn['trait_display_name'] = self.this_trait.display_name hddn['dataset'] = self.dataset.name hddn['temp_trait'] = False if self.temp_trait: hddn['temp_trait'] = True hddn['group'] = self.temp_group hddn['species'] = self.temp_species else: hddn['group'] = self.dataset.group.name hddn['species'] = self.dataset.group.species hddn['use_outliers'] = False hddn['method'] = "gemma" hddn['selected_chr'] = -1 hddn['mapping_display_all'] = True hddn['suggestive'] = 0 hddn['study_samplelists'] = json.dumps(study_samplelist_json) hddn['num_perm'] = 0 hddn['categorical_vars'] = "" if self.categorical_var_list: hddn['categorical_vars'] = ",".join(self.categorical_var_list) hddn['manhattan_plot'] = "" hddn['control_marker'] = "" if not self.temp_trait: if hasattr( self.this_trait, 'locus_chr' ) and self.this_trait.locus_chr != "" and self.dataset.type != "Geno" and self.dataset.type != "Publish": hddn['control_marker'] = self.nearest_marker hddn['do_control'] = False hddn['maf'] = 0.05 hddn['mapping_scale'] = "physic" hddn['compare_traits'] = [] hddn['export_data'] = "" hddn['export_format'] = "excel" if len(self.scales_in_geno) < 2 and bool(self.scales_in_geno): hddn['mapping_scale'] = self.scales_in_geno[list( self.scales_in_geno.keys())[0]][0][0] # We'll need access to this_trait and hddn in the Jinja2 # Template, so we put it inside self self.hddn = hddn js_data = dict(trait_id=self.trait_id, trait_symbol=trait_symbol, max_digits=self.max_digits_by_group, short_description=short_description, unit_type=trait_units, dataset_type=self.dataset.type, species=self.dataset.group.species, scales_in_geno=self.scales_in_geno, data_scale=self.dataset.data_scale, sample_group_types=self.sample_group_types, sample_lists=sample_lists, se_exists=self.sample_groups[0].se_exists, has_num_cases=self.has_num_cases, attributes=self.sample_groups[0].attributes, categorical_attr_exists=self.categorical_attr_exists, categorical_vars=",".join(self.categorical_var_list), num_values=self.num_values, qnorm_values=self.qnorm_vals, zscore_values=self.z_scores, sample_column_width=sample_column_width, temp_uuid=self.temp_uuid) self.js_data = js_data
def retrieve_trait_info(trait, dataset, get_qtl_info=False): assert dataset, "Dataset doesn't exist" resource_id = get_resource_id(dataset, trait.name) if dataset.type == 'Publish': the_url = "http://localhost:8080/run-action?resource={}&user={}&branch=data&action=view".format( resource_id, g.user_session.user_id) else: the_url = "http://localhost:8080/run-action?resource={}&user={}&branch=data&action=view&trait={}".format( resource_id, g.user_session.user_id, trait.name) try: response = requests.get(the_url).content trait_info = json.loads(response) except: # ZS: I'm assuming the trait is viewable if the try fails for some reason; it should never reach this point unless the user has privileges, since that's dealt with in create_trait if dataset.type == 'Publish': query = """ SELECT PublishXRef.Id, InbredSet.InbredSetCode, Publication.PubMed_ID, CAST(Phenotype.Pre_publication_description AS BINARY), CAST(Phenotype.Post_publication_description AS BINARY), CAST(Phenotype.Original_description AS BINARY), CAST(Phenotype.Pre_publication_abbreviation AS BINARY), CAST(Phenotype.Post_publication_abbreviation AS BINARY), PublishXRef.mean, Phenotype.Lab_code, Phenotype.Submitter, Phenotype.Owner, Phenotype.Authorized_Users, CAST(Publication.Authors AS BINARY), CAST(Publication.Title AS BINARY), CAST(Publication.Abstract AS BINARY), CAST(Publication.Journal AS BINARY), Publication.Volume, Publication.Pages, Publication.Month, Publication.Year, PublishXRef.Sequence, Phenotype.Units, PublishXRef.comments FROM PublishXRef, Publication, Phenotype, PublishFreeze, InbredSet WHERE PublishXRef.Id = %s AND Phenotype.Id = PublishXRef.PhenotypeId AND Publication.Id = PublishXRef.PublicationId AND PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND PublishXRef.InbredSetId = InbredSet.Id AND PublishFreeze.Id = %s """ % (trait.name, dataset.id) logger.sql(query) trait_info = g.db.execute(query).fetchone() # XZ, 05/08/2009: Xiaodong add this block to use ProbeSet.Id to find the probeset instead of just using ProbeSet.Name # XZ, 05/08/2009: to avoid the problem of same probeset name from different platforms. elif dataset.type == 'ProbeSet': display_fields_string = ', ProbeSet.'.join(dataset.display_fields) display_fields_string = 'ProbeSet.' + display_fields_string query = """ SELECT %s FROM ProbeSet, ProbeSetFreeze, ProbeSetXRef WHERE ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND ProbeSetXRef.ProbeSetId = ProbeSet.Id AND ProbeSetFreeze.Name = '%s' AND ProbeSet.Name = '%s' """ % (escape(display_fields_string), escape( dataset.name), escape(str(trait.name))) logger.sql(query) trait_info = g.db.execute(query).fetchone() # XZ, 05/08/2009: We also should use Geno.Id to find marker instead of just using Geno.Name # to avoid the problem of same marker name from different species. elif dataset.type == 'Geno': display_fields_string = ',Geno.'.join(dataset.display_fields) display_fields_string = 'Geno.' + display_fields_string query = """ SELECT %s FROM Geno, GenoFreeze, GenoXRef WHERE GenoXRef.GenoFreezeId = GenoFreeze.Id AND GenoXRef.GenoId = Geno.Id AND GenoFreeze.Name = '%s' AND Geno.Name = '%s' """ % (escape(display_fields_string), escape( dataset.name), escape(trait.name)) logger.sql(query) trait_info = g.db.execute(query).fetchone() else: # Temp type query = """SELECT %s FROM %s WHERE Name = %s""" logger.sql(query) trait_info = g.db.execute(query, ','.join(dataset.display_fields), dataset.type, trait.name).fetchone() if trait_info: trait.haveinfo = True for i, field in enumerate(dataset.display_fields): holder = trait_info[i] if isinstance(holder, bytes): holder = holder.decode("utf-8", errors="ignore") setattr(trait, field, holder) if dataset.type == 'Publish': if trait.group_code: trait.display_name = trait.group_code + "_" + str(trait.name) trait.confidential = 0 if trait.pre_publication_description and not trait.pubmed_id: trait.confidential = 1 description = trait.post_publication_description # If the dataset is confidential and the user has access to confidential # phenotype traits, then display the pre-publication description instead # of the post-publication description if not trait.pubmed_id: trait.abbreviation = trait.pre_publication_abbreviation trait.description_display = trait.pre_publication_description else: trait.abbreviation = trait.post_publication_abbreviation if description: trait.description_display = description.strip() else: trait.description_display = "" if not trait.year.isdigit(): trait.pubmed_text = "N/A" else: trait.pubmed_text = trait.year if trait.pubmed_id: trait.pubmed_link = webqtlConfig.PUBMEDLINK_URL % trait.pubmed_id if dataset.type == 'ProbeSet' and dataset.group: description_string = trait.description target_string = trait.probe_target_description if str(description_string or "") != "" and description_string != 'None': description_display = description_string else: description_display = trait.symbol if (str(description_display or "") != "" and description_display != 'N/A' and str(target_string or "") != "" and target_string != 'None'): description_display = description_display + '; ' + target_string.strip( ) # Save it for the jinja2 template trait.description_display = description_display trait.location_repr = 'N/A' if trait.chr and trait.mb: trait.location_repr = 'Chr%s: %.6f' % (trait.chr, float(trait.mb)) elif dataset.type == "Geno": trait.location_repr = 'N/A' if trait.chr and trait.mb: trait.location_repr = 'Chr%s: %.6f' % (trait.chr, float(trait.mb)) if get_qtl_info: # LRS and its location trait.LRS_score_repr = "N/A" trait.LRS_location_repr = "N/A" trait.locus = trait.locus_chr = trait.locus_mb = trait.lrs = trait.pvalue = trait.additive = "" if dataset.type == 'ProbeSet' and not trait.cellid: trait.mean = "" query = """ SELECT ProbeSetXRef.Locus, ProbeSetXRef.LRS, ProbeSetXRef.pValue, ProbeSetXRef.mean, ProbeSetXRef.additive FROM ProbeSetXRef, ProbeSet WHERE ProbeSetXRef.ProbeSetId = ProbeSet.Id AND ProbeSet.Name = "{}" AND ProbeSetXRef.ProbeSetFreezeId ={} """.format(trait.name, dataset.id) logger.sql(query) trait_qtl = g.db.execute(query).fetchone() if trait_qtl: trait.locus, trait.lrs, trait.pvalue, trait.mean, trait.additive = trait_qtl if trait.locus: query = """ select Geno.Chr, Geno.Mb from Geno, Species where Species.Name = '{}' and Geno.Name = '{}' and Geno.SpeciesId = Species.Id """.format(dataset.group.species, trait.locus) logger.sql(query) result = g.db.execute(query).fetchone() if result: trait.locus_chr = result[0] trait.locus_mb = result[1] else: trait.locus = trait.locus_chr = trait.locus_mb = trait.additive = "" else: trait.locus = trait.locus_chr = trait.locus_mb = trait.additive = "" if dataset.type == 'Publish': query = """ SELECT PublishXRef.Locus, PublishXRef.LRS, PublishXRef.additive FROM PublishXRef, PublishFreeze WHERE PublishXRef.Id = %s AND PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND PublishFreeze.Id =%s """ % (trait.name, dataset.id) logger.sql(query) trait_qtl = g.db.execute(query).fetchone() if trait_qtl: trait.locus, trait.lrs, trait.additive = trait_qtl if trait.locus: query = """ select Geno.Chr, Geno.Mb from Geno, Species where Species.Name = '{}' and Geno.Name = '{}' and Geno.SpeciesId = Species.Id """.format(dataset.group.species, trait.locus) logger.sql(query) result = g.db.execute(query).fetchone() if result: trait.locus_chr = result[0] trait.locus_mb = result[1] else: trait.locus = trait.locus_chr = trait.locus_mb = trait.additive = "" else: trait.locus = trait.locus_chr = trait.locus_mb = trait.additive = "" else: trait.locus = trait.lrs = trait.additive = "" if (dataset.type == 'Publish' or dataset.type == "ProbeSet" ) and str(trait.locus_chr or "") != "" and str(trait.locus_mb or "") != "": trait.LRS_location_repr = LRS_location_repr = 'Chr%s: %.6f' % ( trait.locus_chr, float(trait.locus_mb)) if str(trait.lrs or "") != "": trait.LRS_score_repr = LRS_score_repr = '%3.1f' % trait.lrs else: raise KeyError( repr(trait.name) + ' information is not found in the database.') return trait