def __init__(self, start_vars, temp_uuid): helper_functions.get_species_dataset_trait(self, start_vars) tempdata = temp_data.TempData(temp_uuid) self.samples = [] # Want only ones with values self.vals = [] for sample in self.dataset.group.samplelist: value = start_vars['value:' + sample] self.samples.append(str(sample)) self.vals.append(value) self.gen_data(tempdata) #Get chromosome lengths for drawing the manhattan plot chromosome_mb_lengths = {} for key in self.species.chromosomes.chromosomes.keys(): chromosome_mb_lengths[key] = self.species.chromosomes.chromosomes[key].mb_length self.js_data = dict( chromosomes = chromosome_mb_lengths, qtl_results = self.qtl_results, )
def __init__(self, start_vars, temp_uuid): #Currently only getting trait data for one trait, but will need #to change this to accept multiple traits once the collection page is implemented helper_functions.get_species_dataset_trait(self, start_vars) tempdata = temp_data.TempData(temp_uuid) self.samples = [] # Want only ones with values self.vals = [] for sample in self.dataset.group.samplelist: value = start_vars['value:' + sample] self.samples.append(str(sample)) self.vals.append(value) print("start_vars:", start_vars) self.set_options(start_vars) self.json_data = {} #if self.method == "qtl_reaper": self.json_data['lodnames'] = ['lod.hk'] self.gen_reaper_results(tempdata) #else: # self.gen_pylmm_results(tempdata) #self.gen_qtl_results(tempdata) #Get chromosome lengths for drawing the interval map plot chromosome_mb_lengths = {} self.json_data['chrnames'] = [] for key in self.species.chromosomes.chromosomes.keys(): self.json_data['chrnames'].append([self.species.chromosomes.chromosomes[key].name, self.species.chromosomes.chromosomes[key].mb_length]) chromosome_mb_lengths[key] = self.species.chromosomes.chromosomes[key].mb_length #print("self.qtl_results:", self.qtl_results) print("JSON DATA:", self.json_data) #os.chdir(webqtlConfig.TMPDIR) json_filename = webqtlUtil.genRandStr(prefix="intmap_") json.dumps(self.json_data, webqtlConfig.TMPDIR + json_filename) self.js_data = dict( manhattan_plot = self.manhattan_plot, additive = self.additive, chromosomes = chromosome_mb_lengths, qtl_results = self.qtl_results, json_data = self.json_data #lrs_lod = self.lrs_lod, )
def __init__(self, start_vars, temp_uuid): # Currently only getting trait data for one trait, but will need # to change this to accept multiple traits once the collection page is implemented helper_functions.get_species_dataset_trait(self, start_vars) tempdata = temp_data.TempData(temp_uuid) self.samples = [] # Want only ones with values self.vals = [] for sample in self.dataset.group.samplelist: value = start_vars["value:" + sample] self.samples.append(str(sample)) self.vals.append(value) print("start_vars:", start_vars) self.set_options(start_vars) self.score_type = "LRS" self.cutoff = 3 self.json_data = {} self.json_data["lodnames"] = ["lod.hk"] self.gen_reaper_results(tempdata) # Get chromosome lengths for drawing the interval map plot chromosome_mb_lengths = {} self.json_data["chrnames"] = [] for key in self.species.chromosomes.chromosomes.keys(): self.json_data["chrnames"].append( [self.species.chromosomes.chromosomes[key].name, self.species.chromosomes.chromosomes[key].mb_length] ) chromosome_mb_lengths[key] = self.species.chromosomes.chromosomes[key].mb_length print("JSON DATA:", self.json_data) json_filename = webqtlUtil.genRandStr(prefix="intmap_") json.dumps(self.json_data, webqtlConfig.TMPDIR + json_filename) self.js_data = dict( result_score_type=self.score_type, manhattan_plot=self.manhattan_plot, chromosomes=chromosome_mb_lengths, qtl_results=self.qtl_results, json_data=self.json_data, )
def __init__(self, start_vars, temp_uuid): helper_functions.get_species_dataset_trait(self, start_vars) #tempdata = temp_data.TempData(temp_uuid) self.samples = [] # Want only ones with values self.vals = [] for sample in self.dataset.group.samplelist: value = start_vars['value:' + sample] self.samples.append(str(sample)) self.vals.append(value) self.mapping_method = start_vars['method'] self.maf = start_vars['maf'] # Minor allele frequency print("self.maf:", self.maf) self.dataset.group.get_markers() if self.mapping_method == "gemma": qtl_results = self.run_gemma() elif self.mapping_method == "plink": qtl_results = self.run_plink() #print("qtl_results:", pf(qtl_results)) elif self.mapping_method == "pylmm": print("RUNNING PYLMM") #self.qtl_results = self.gen_data(tempdata) qtl_results = self.gen_data(str(temp_uuid)) else: print("RUNNING NOTHING") self.lod_cutoff = 2 self.filtered_markers = [] for marker in qtl_results: if marker['chr'] > 0: self.filtered_markers.append(marker) #Get chromosome lengths for drawing the manhattan plot chromosome_mb_lengths = {} for key in self.species.chromosomes.chromosomes.keys(): chromosome_mb_lengths[key] = self.species.chromosomes.chromosomes[key].mb_length self.js_data = dict( this_trait = self.this_trait.name, data_set = self.dataset.name, maf = self.maf, chromosomes = chromosome_mb_lengths, qtl_results = self.filtered_markers, )
def __init__(self, start_vars, temp_uuid): print("TESTING!!!") #Currently only getting trait data for one trait, but will need #to change this to accept multiple traits once the collection page is implemented helper_functions.get_species_dataset_trait(self, start_vars) tempdata = temp_data.TempData(temp_uuid) self.samples = [] # Want only ones with values self.vals = [] for sample in self.dataset.group.samplelist: value = start_vars['value:' + sample] self.samples.append(str(sample)) self.vals.append(value) print("start_vars:", start_vars) self.set_options(start_vars) self.gen_qtl_results(tempdata) #Get chromosome lengths for drawing the interval map plot chromosome_mb_lengths = {} for key in self.species.chromosomes.chromosomes.keys(): chromosome_mb_lengths[key] = self.species.chromosomes.chromosomes[key].mb_length #print("self.qtl_results:", self.qtl_results) self.js_data = dict( chromosomes = chromosome_mb_lengths, qtl_results = self.qtl_results, #lrs_lod = self.lrs_lod, )
def __init__(self, start_vars, temp_uuid): helper_functions.get_species_dataset_trait(self, start_vars) # tempdata = temp_data.TempData(temp_uuid) self.json_data = {} self.json_data["lodnames"] = ["lod.hk"] self.samples = [] # Want only ones with values self.vals = [] for sample in self.dataset.group.samplelist: value = start_vars["value:" + sample] self.samples.append(str(sample)) self.vals.append(value) self.mapping_method = start_vars["method"] if start_vars["manhattan_plot"] == "true": self.manhattan_plot = True else: self.manhattan_plot = False self.maf = start_vars["maf"] # Minor allele frequency self.suggestive = "" self.significant = "" self.pair_scan = False # Initializing this since it is checked in views to determine which template to use self.score_type = "LRS" # ZS: LRS or LOD self.dataset.group.get_markers() if self.mapping_method == "gemma": self.score_type = "LOD" included_markers, p_values = gemma_mapping.run_gemma(self.dataset, self.samples, self.vals) self.dataset.group.get_specified_markers(markers=included_markers) self.dataset.group.markers.add_pvalues(p_values) results = self.dataset.group.markers.markers elif self.mapping_method == "rqtl_plink": results = self.run_rqtl_plink() elif self.mapping_method == "rqtl_geno": self.score_type = "LOD" if start_vars["num_perm"] == "": self.num_perm = 0 else: self.num_perm = start_vars["num_perm"] self.control = start_vars["control_marker"] self.do_control = start_vars["do_control"] print("StartVars:", start_vars) self.method = start_vars["mapmethod_rqtl_geno"] self.model = start_vars["mapmodel_rqtl_geno"] if start_vars["pair_scan"] == "true": self.pair_scan = True results = self.run_rqtl_geno() print("qtl_results:", results) elif self.mapping_method == "plink": results = self.run_plink() # print("qtl_results:", pf(results)) elif self.mapping_method == "pylmm": print("RUNNING PYLMM") self.num_perm = start_vars["num_perm"] if self.num_perm != "": if int(self.num_perm) > 0: self.run_permutations(str(temp_uuid)) results = self.gen_data(str(temp_uuid)) else: print("RUNNING NOTHING") if self.pair_scan == True: self.qtl_results = [] highest_chr = 1 # This is needed in order to convert the highest chr to X/Y for marker in results: if marker["chr1"] > 0 or marker["chr1"] == "X" or marker["chr1"] == "X/Y": if marker["chr1"] > highest_chr or marker["chr1"] == "X" or marker["chr1"] == "X/Y": highest_chr = marker["chr1"] if "lod_score" in marker: self.qtl_results.append(marker) for qtl in enumerate(self.qtl_results): self.json_data["chr1"].append(str(qtl["chr1"])) self.json_data["chr2"].append(str(qtl["chr2"])) self.json_data["Mb"].append(qtl["Mb"]) self.json_data["markernames"].append(qtl["name"]) self.js_data = dict( json_data=self.json_data, this_trait=self.this_trait.name, data_set=self.dataset.name, maf=self.maf, manhattan_plot=self.manhattan_plot, qtl_results=self.qtl_results, ) else: self.cutoff = 2 self.qtl_results = [] highest_chr = 1 # This is needed in order to convert the highest chr to X/Y for marker in results: if marker["chr"] > 0 or marker["chr"] == "X" or marker["chr"] == "X/Y": if marker["chr"] > highest_chr or marker["chr"] == "X" or marker["chr"] == "X/Y": highest_chr = marker["chr"] if "lod_score" in marker: self.qtl_results.append(marker) self.json_data["chr"] = [] self.json_data["pos"] = [] self.json_data["lod.hk"] = [] self.json_data["markernames"] = [] self.json_data["suggestive"] = self.suggestive self.json_data["significant"] = self.significant # Need to convert the QTL objects that qtl reaper returns into a json serializable dictionary for index, qtl in enumerate(self.qtl_results): if index < 40: print("lod score is:", qtl["lod_score"]) if qtl["chr"] == highest_chr and highest_chr != "X" and highest_chr != "X/Y": print("changing to X") self.json_data["chr"].append("X") else: self.json_data["chr"].append(str(qtl["chr"])) self.json_data["pos"].append(qtl["Mb"]) if "lrs_value" in qtl: self.json_data["lod.hk"].append(str(qtl["lrs_value"])) else: self.json_data["lod.hk"].append(str(qtl["lod_score"])) self.json_data["markernames"].append(qtl["name"]) # Get chromosome lengths for drawing the interval map plot chromosome_mb_lengths = {} self.json_data["chrnames"] = [] for key in self.species.chromosomes.chromosomes.keys(): self.json_data["chrnames"].append( [ self.species.chromosomes.chromosomes[key].name, self.species.chromosomes.chromosomes[key].mb_length, ] ) chromosome_mb_lengths[key] = self.species.chromosomes.chromosomes[key].mb_length # print("json_data:", self.json_data) self.js_data = dict( result_score_type=self.score_type, json_data=self.json_data, this_trait=self.this_trait.name, data_set=self.dataset.name, maf=self.maf, manhattan_plot=self.manhattan_plot, chromosomes=chromosome_mb_lengths, qtl_results=self.qtl_results, )
def __init__(self, kw): logger.debug("in ShowTrait, kw are:", kw) if 'trait_id' in kw and kw['dataset'] != "Temp": self.temp_trait = False self.trait_id = kw['trait_id'] helper_functions.get_species_dataset_trait(self, kw) elif 'group' in kw: self.temp_trait = True self.trait_id = "Temp_" + kw['species'] + "_" + kw[ 'group'] + "_" + datetime.datetime.now().strftime("%m%d%H%M%S") self.temp_species = kw['species'] self.temp_group = kw['group'] self.dataset = data_set.create_dataset(dataset_name="Temp", dataset_type="Temp", group_name=self.temp_group) # Put values in Redis so they can be looked up later if added to a collection Redis.set(self.trait_id, kw['trait_paste']) self.trait_vals = kw['trait_paste'].split() self.this_trait = GeneralTrait(dataset=self.dataset, name=self.trait_id, cellid=None) else: self.temp_trait = True self.trait_id = kw['trait_id'] self.temp_species = self.trait_id.split("_")[1] self.temp_group = self.trait_id.split("_")[2] self.dataset = data_set.create_dataset(dataset_name="Temp", dataset_type="Temp", group_name=self.temp_group) self.this_trait = GeneralTrait(dataset=self.dataset, name=self.trait_id, cellid=None) self.trait_vals = Redis.get(self.trait_id).split() #ZS: Get verify/rna-seq link URLs try: blatsequence = self.this_trait.blatseq if not blatsequence: #XZ, 06/03/2009: ProbeSet name is not unique among platforms. We should use ProbeSet Id instead. query1 = """SELECT Probe.Sequence, Probe.Name FROM Probe, ProbeSet, ProbeSetFreeze, ProbeSetXRef WHERE ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND ProbeSetXRef.ProbeSetId = ProbeSet.Id AND ProbeSetFreeze.Name = '%s' AND ProbeSet.Name = '%s' AND Probe.ProbeSetId = ProbeSet.Id order by Probe.SerialOrder""" % ( self.this_trait.dataset.name, self.this_trait.name) seqs = g.db.execute(query1).fetchall() if not seqs: raise ValueError else: blatsequence = '' for seqt in seqs: if int(seqt[1][-1]) % 2 == 1: blatsequence += string.strip(seqt[0]) #--------Hongqiang add this part in order to not only blat ProbeSet, but also blat Probe blatsequence = '%3E' + self.this_trait.name + '%0A' + blatsequence + '%0A' #XZ, 06/03/2009: ProbeSet name is not unique among platforms. We should use ProbeSet Id instead. query2 = """SELECT Probe.Sequence, Probe.Name FROM Probe, ProbeSet, ProbeSetFreeze, ProbeSetXRef WHERE ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND ProbeSetXRef.ProbeSetId = ProbeSet.Id AND ProbeSetFreeze.Name = '%s' AND ProbeSet.Name = '%s' AND Probe.ProbeSetId = ProbeSet.Id order by Probe.SerialOrder""" % ( self.this_trait.dataset.name, self.this_trait.name) seqs = g.db.execute(query2).fetchall() for seqt in seqs: if int(seqt[1][-1]) % 2 == 1: blatsequence += '%3EProbe_' + string.strip( seqt[1]) + '%0A' + string.strip(seqt[0]) + '%0A' if self.dataset.group.species == "rat": self.UCSC_BLAT_URL = webqtlConfig.UCSC_BLAT % ('rat', 'rn6', blatsequence) self.UTHSC_BLAT_URL = "" elif self.dataset.group.species == "mouse": self.UCSC_BLAT_URL = webqtlConfig.UCSC_BLAT % ('mouse', 'mm10', blatsequence) self.UTHSC_BLAT_URL = webqtlConfig.UTHSC_BLAT % ( 'mouse', 'mm10', blatsequence) elif self.dataset.group.species == "human": self.UCSC_BLAT_URL = webqtlConfig.UCSC_BLAT % ('human', 'hg38', blatsequence) self.UTHSC_BLAT_URL = "" else: self.UCSC_BLAT_URL = "" self.UTHSC_BLAT_URL = "" except: self.UCSC_BLAT_URL = "" self.UTHSC_BLAT_URL = "" if self.dataset.type == "ProbeSet": self.show_probes = "True" trait_units = get_trait_units(self.this_trait) self.get_external_links() self.build_correlation_tools() self.ncbi_summary = get_ncbi_summary(self.this_trait) #Get nearest marker for composite mapping if not self.temp_trait: if check_if_attr_exists( self.this_trait, 'locus_chr' ) and self.dataset.type != "Geno" and self.dataset.type != "Publish": self.nearest_marker = get_nearest_marker( self.this_trait, self.dataset) #self.nearest_marker1 = get_nearest_marker(self.this_trait, self.dataset)[0] #self.nearest_marker2 = get_nearest_marker(self.this_trait, self.dataset)[1] else: self.nearest_marker = "" #self.nearest_marker1 = "" #self.nearest_marker2 = "" self.make_sample_lists() self.qnorm_vals = quantile_normalize_vals(self.sample_groups) self.z_scores = get_z_scores(self.sample_groups) self.temp_uuid = uuid.uuid4() self.sample_group_types = OrderedDict() if len(self.sample_groups) > 1: self.sample_group_types[ 'samples_primary'] = self.dataset.group.name self.sample_group_types['samples_other'] = "Other" self.sample_group_types['samples_all'] = "All" else: self.sample_group_types[ 'samples_primary'] = self.dataset.group.name sample_lists = [group.sample_list for group in self.sample_groups] categorical_var_list = [] if not self.temp_trait: categorical_var_list = get_categorical_variables( self.this_trait, self.sample_groups[0] ) #ZS: Only using first samplelist, since I think mapping only uses those samples #ZS: Get list of chromosomes to select for mapping self.chr_list = [["All", -1]] for i, this_chr in enumerate( self.dataset.species.chromosomes.chromosomes): self.chr_list.append([ self.dataset.species.chromosomes.chromosomes[this_chr].name, i ]) self.genofiles = self.dataset.group.get_genofiles() self.has_num_cases = has_num_cases(self.this_trait) self.stats_table_width, self.trait_table_width = get_table_widths( self.sample_groups, self.has_num_cases) #ZS: Needed to know whether to display bar chart + get max sample name length in order to set table column width self.num_values = 0 self.binary = "true" #ZS: So it knows whether to display the Binary R/qtl mapping method, which doesn't work unless all values are 0 or 1 self.negative_vals_exist = "false" #ZS: Since we don't want to show log2 transform option for situations where it doesn't make sense max_samplename_width = 1 for group in self.sample_groups: for sample in group.sample_list: if len(sample.name) > max_samplename_width: max_samplename_width = len(sample.name) if sample.display_value != "x": self.num_values += 1 if sample.display_value != 0 or sample.display_value != 1: self.binary = "false" if sample.value < 0: self.negative_vals_exist = "true" sample_column_width = max_samplename_width * 8 if self.num_values >= 5000: self.maf = 0.01 else: self.maf = 0.05 trait_symbol = None short_description = None if not self.temp_trait: if self.this_trait.symbol: trait_symbol = self.this_trait.symbol short_description = trait_symbol elif hasattr(self.this_trait, 'post_publication_abbreviation'): short_description = self.this_trait.post_publication_abbreviation elif hasattr(self.this_trait, 'pre_publication_abbreviation'): short_description = self.this_trait.pre_publication_abbreviation # Todo: Add back in the ones we actually need from below, as we discover we need them hddn = OrderedDict() if self.dataset.group.allsamples: hddn['allsamples'] = string.join(self.dataset.group.allsamples, ' ') hddn['primary_samples'] = string.join(self.primary_sample_names, ',') hddn['trait_id'] = self.trait_id hddn['trait_display_name'] = self.this_trait.display_name hddn['dataset'] = self.dataset.name hddn['temp_trait'] = False if self.temp_trait: hddn['temp_trait'] = True hddn['group'] = self.temp_group hddn['species'] = self.temp_species hddn['use_outliers'] = False hddn['method'] = "gemma" hddn['selected_chr'] = -1 hddn['mapping_display_all'] = True hddn['suggestive'] = 0 hddn['num_perm'] = 0 hddn['categorical_vars'] = "" hddn['manhattan_plot'] = "" hddn['control_marker'] = "" if not self.temp_trait: if hasattr( self.this_trait, 'locus_chr' ) and self.this_trait.locus_chr != "" and self.dataset.type != "Geno" and self.dataset.type != "Publish": hddn['control_marker'] = self.nearest_marker #hddn['control_marker'] = self.nearest_marker1+","+self.nearest_marker2 hddn['do_control'] = False hddn['maf'] = 0.05 hddn['compare_traits'] = [] hddn['export_data'] = "" hddn['export_format'] = "excel" # We'll need access to this_trait and hddn in the Jinja2 Template, so we put it inside self self.hddn = hddn js_data = dict(trait_id=self.trait_id, trait_symbol=trait_symbol, short_description=short_description, unit_type=trait_units, dataset_type=self.dataset.type, data_scale=self.dataset.data_scale, sample_group_types=self.sample_group_types, sample_lists=sample_lists, attribute_names=self.sample_groups[0].attributes, categorical_vars=",".join(categorical_var_list), num_values=self.num_values, qnorm_values=self.qnorm_vals, zscore_values=self.z_scores, sample_column_width=sample_column_width, temp_uuid=self.temp_uuid) self.js_data = js_data
def __init__(self, kw): logger.debug("in ShowTrait, kw are:", kw) if 'trait_id' in kw and kw['dataset'] != "Temp": self.temp_trait = False self.trait_id = kw['trait_id'] helper_functions.get_species_dataset_trait(self, kw) elif 'group' in kw: self.temp_trait = True self.trait_id = "Temp_"+kw['species']+ "_" + kw['group'] + "_" + datetime.datetime.now().strftime("%m%d%H%M%S") self.temp_species = kw['species'] self.temp_group = kw['group'] self.dataset = data_set.create_dataset(dataset_name = "Temp", dataset_type = "Temp", group_name = self.temp_group) self.this_trait = GeneralTrait(dataset=self.dataset, name=self.trait_id, cellid=None) self.trait_vals = kw['trait_paste'].split() # Put values in Redis so they can be looked up later if added to a collection Redis.set(self.trait_id, kw['trait_paste']) else: self.temp_trait = True self.trait_id = kw['trait_id'] self.temp_species = self.trait_id.split("_")[1] self.temp_group = self.trait_id.split("_")[2] self.dataset = data_set.create_dataset(dataset_name = "Temp", dataset_type = "Temp", group_name = self.temp_group) self.this_trait = GeneralTrait(dataset=self.dataset, name=self.trait_id, cellid=None) self.trait_vals = Redis.get(self.trait_id).split() #self.dataset.group.read_genotype_file() #if this_trait: # if this_trait.dataset and this_trait.dataset.type and this_trait.dataset.type == 'ProbeSet': # self.cursor.execute("SELECT h2 from ProbeSetXRef WHERE DataId = %d" % # this_trait.mysqlid) # heritability = self.cursor.fetchone() self.build_correlation_tools() #Get nearest marker for composite mapping if not self.temp_trait: if hasattr(self.this_trait, 'locus_chr') and self.this_trait.locus_chr != "" and self.dataset.type != "Geno" and self.dataset.type != "Publish": self.nearest_marker = get_nearest_marker(self.this_trait, self.dataset) #self.nearest_marker1 = get_nearest_marker(self.this_trait, self.dataset)[0] #self.nearest_marker2 = get_nearest_marker(self.this_trait, self.dataset)[1] else: self.nearest_marker = "" #self.nearest_marker1 = "" #self.nearest_marker2 = "" self.make_sample_lists() # Todo: Add back in the ones we actually need from below, as we discover we need them hddn = OrderedDict() if self.dataset.group.allsamples: hddn['allsamples'] = string.join(self.dataset.group.allsamples, ' ') hddn['trait_id'] = self.trait_id hddn['dataset'] = self.dataset.name hddn['temp_trait'] = False if self.temp_trait: hddn['temp_trait'] = True hddn['group'] = self.temp_group hddn['species'] = self.temp_species hddn['use_outliers'] = False hddn['method'] = "pylmm" hddn['mapping_display_all'] = True hddn['suggestive'] = 0 hddn['num_perm'] = 0 hddn['manhattan_plot'] = "" hddn['control_marker'] = "" if not self.temp_trait: if hasattr(self.this_trait, 'locus_chr') and self.this_trait.locus_chr != "" and self.dataset.type != "Geno" and self.dataset.type != "Publish": hddn['control_marker'] = self.nearest_marker #hddn['control_marker'] = self.nearest_marker1+","+self.nearest_marker2 hddn['do_control'] = False hddn['maf'] = 0.01 hddn['compare_traits'] = [] hddn['export_data'] = "" # We'll need access to this_trait and hddn in the Jinja2 Template, so we put it inside self self.hddn = hddn self.temp_uuid = uuid.uuid4() self.sample_group_types = OrderedDict() if len(self.sample_groups) > 1: self.sample_group_types['samples_primary'] = self.dataset.group.name + " Only" self.sample_group_types['samples_other'] = "Non-" + self.dataset.group.name self.sample_group_types['samples_all'] = "All Cases" else: self.sample_group_types['samples_primary'] = self.dataset.group.name sample_lists = [group.sample_list for group in self.sample_groups] self.get_mapping_methods() self.trait_table_width = get_trait_table_width(self.sample_groups) trait_symbol = None if not self.temp_trait: if self.this_trait.symbol: trait_symbol = self.this_trait.symbol js_data = dict(trait_id = self.trait_id, trait_symbol = trait_symbol, dataset_type = self.dataset.type, data_scale = self.dataset.data_scale, sample_group_types = self.sample_group_types, sample_lists = sample_lists, attribute_names = self.sample_groups[0].attributes, temp_uuid = self.temp_uuid) self.js_data = js_data
def __init__(self, start_vars, temp_uuid): helper_functions.get_species_dataset_trait(self, start_vars) #tempdata = temp_data.TempData(temp_uuid) self.temp_uuid = temp_uuid #needed to pass temp_uuid to gn1 mapping code (marker_regression_gn1.py) self.json_data = {} self.json_data['lodnames'] = ['lod.hk'] self.samples = [] # Want only ones with values self.vals = [] for sample in self.dataset.group.samplelist: value = start_vars['value:' + sample] self.samples.append(str(sample)) self.vals.append(value) self.mapping_method = start_vars['method'] if start_vars['manhattan_plot'] == "true": self.manhattan_plot = True else: self.manhattan_plot = False self.maf = start_vars['maf'] # Minor allele frequency self.suggestive = "" self.significant = "" self.pair_scan = False # Initializing this since it is checked in views to determine which template to use self.score_type = "LRS" #ZS: LRS or LOD self.mapping_scale = "physic" self.bootstrap_results = [] #ZS: This is passed to GN1 code for single chr mapping self.selected_chr = -1 if "selected_chr" in start_vars: if int( start_vars['selected_chr'] ) != -1: #ZS: Needs to be -1 if showing full map; there's probably a better way to fix this self.selected_chr = int(start_vars['selected_chr']) + 1 else: self.selected_chr = int(start_vars['selected_chr']) if "startMb" in start_vars: self.startMb = start_vars['startMb'] if "endMb" in start_vars: self.endMb = start_vars['endMb'] if "graphWidth" in start_vars: self.graphWidth = start_vars['graphWidth'] if "lrsMax" in start_vars: self.lrsMax = start_vars['lrsMax'] if "haplotypeAnalystCheck" in start_vars: self.haplotypeAnalystCheck = start_vars['haplotypeAnalystCheck'] if "startMb" in start_vars: #ZS: This is to ensure showGenes, Legend, etc are checked the first time you open the mapping page, since startMb will only not be set during the first load if "permCheck" in start_vars: self.permCheck = "ON" else: self.permCheck = False self.num_perm = int(start_vars['num_perm']) self.LRSCheck = start_vars['LRSCheck'] if "showSNP" in start_vars: self.showSNP = start_vars['showSNP'] else: self.showSNP = False if "showGenes" in start_vars: self.showGenes = start_vars['showGenes'] else: self.showGenes = False if "viewLegend" in start_vars: self.viewLegend = start_vars['viewLegend'] else: self.viewLegend = False else: try: if int(start_vars['num_perm']) > 0: self.num_perm = int(start_vars['num_perm']) else: self.num_perm = 0 except: self.num_perm = 0 self.LRSCheck = self.score_type self.permCheck = "ON" self.showSNP = "ON" self.showGenes = "ON" self.viewLegend = "ON" self.dataset.group.get_markers() if self.mapping_method == "gemma": self.score_type = "LOD" self.manhattan_plot = True with Bench("Running GEMMA"): included_markers, p_values = gemma_mapping.run_gemma( self.dataset, self.samples, self.vals) with Bench("Getting markers from csv"): marker_obs = get_markers_from_csv(included_markers, p_values, self.dataset.group.name) results = marker_obs elif self.mapping_method == "rqtl_plink": results = self.run_rqtl_plink() elif self.mapping_method == "rqtl_geno": self.score_type = "LOD" self.mapping_scale = "morgan" self.control_marker = start_vars['control_marker'] self.do_control = start_vars['do_control'] self.method = start_vars['mapmethod_rqtl_geno'] self.model = start_vars['mapmodel_rqtl_geno'] if start_vars['pair_scan'] == "true": self.pair_scan = True results = self.run_rqtl_geno() elif self.mapping_method == "reaper": if "startMb" in start_vars: #ZS: Check if first time page loaded, so it can default to ON if "additiveCheck" in start_vars: self.additiveCheck = start_vars['additiveCheck'] else: self.additiveCheck = False if "bootCheck" in start_vars: self.bootCheck = "ON" else: self.bootCheck = False self.num_bootstrap = int(start_vars['num_bootstrap']) else: self.additiveCheck = "ON" try: if int(start_vars['num_bootstrap']) > 0: self.bootCheck = "ON" self.num_bootstrap = int(start_vars['num_bootstrap']) else: self.bootCheck = False self.num_bootstrap = 0 except: self.bootCheck = False self.num_bootstrap = 0 self.control_marker = start_vars['control_marker'] self.do_control = start_vars['do_control'] results = self.gen_reaper_results() elif self.mapping_method == "plink": results = self.run_plink() elif self.mapping_method == "pylmm": print("RUNNING PYLMM") if self.num_perm > 0: self.run_permutations(str(temp_uuid)) results = self.gen_data(str(temp_uuid)) else: print("RUNNING NOTHING") if self.pair_scan == True: self.qtl_results = [] highest_chr = 1 #This is needed in order to convert the highest chr to X/Y for marker in results: if marker['chr1'] > 0 or marker['chr1'] == "X" or marker[ 'chr1'] == "X/Y": if marker['chr1'] > highest_chr or marker[ 'chr1'] == "X" or marker['chr1'] == "X/Y": highest_chr = marker['chr1'] if 'lod_score' in marker.keys(): self.qtl_results.append(marker) for qtl in enumerate(self.qtl_results): self.json_data['chr1'].append(str(qtl['chr1'])) self.json_data['chr2'].append(str(qtl['chr2'])) self.json_data['Mb'].append(qtl['Mb']) self.json_data['markernames'].append(qtl['name']) self.js_data = dict( json_data=self.json_data, this_trait=self.this_trait.name, data_set=self.dataset.name, maf=self.maf, manhattan_plot=self.manhattan_plot, mapping_scale=self.mapping_scale, qtl_results=self.qtl_results, ) else: self.cutoff = 2 self.qtl_results = [] highest_chr = 1 #This is needed in order to convert the highest chr to X/Y for marker in results: if marker['chr'] > 0 or marker['chr'] == "X" or marker[ 'chr'] == "X/Y": if marker['chr'] > highest_chr or marker[ 'chr'] == "X" or marker['chr'] == "X/Y": highest_chr = marker['chr'] if ('lod_score' in marker.keys()) or ('lrs_value' in marker.keys()): self.qtl_results.append(marker) self.trimmed_markers = trim_markers_for_table(results) self.json_data['chr'] = [] self.json_data['pos'] = [] self.json_data['lod.hk'] = [] self.json_data['markernames'] = [] self.json_data['suggestive'] = self.suggestive self.json_data['significant'] = self.significant #Need to convert the QTL objects that qtl reaper returns into a json serializable dictionary for index, qtl in enumerate(self.qtl_results): #if index<40: # print("lod score is:", qtl['lod_score']) if qtl['chr'] == highest_chr and highest_chr != "X" and highest_chr != "X/Y": #print("changing to X") self.json_data['chr'].append("X") else: self.json_data['chr'].append(str(qtl['chr'])) self.json_data['pos'].append(qtl['Mb']) if 'lrs_value' in qtl.keys(): self.json_data['lod.hk'].append(str(qtl['lrs_value'])) else: self.json_data['lod.hk'].append(str(qtl['lod_score'])) self.json_data['markernames'].append(qtl['name']) #Get chromosome lengths for drawing the interval map plot chromosome_mb_lengths = {} self.json_data['chrnames'] = [] for key in self.species.chromosomes.chromosomes.keys(): self.json_data['chrnames'].append([ self.species.chromosomes.chromosomes[key].name, self.species.chromosomes.chromosomes[key].mb_length ]) chromosome_mb_lengths[ key] = self.species.chromosomes.chromosomes[key].mb_length # print("json_data:", self.json_data) self.js_data = dict( result_score_type=self.score_type, json_data=self.json_data, this_trait=self.this_trait.name, data_set=self.dataset.name, maf=self.maf, manhattan_plot=self.manhattan_plot, mapping_scale=self.mapping_scale, chromosomes=chromosome_mb_lengths, qtl_results=self.qtl_results, )
def __init__(self, start_vars, temp_uuid): helper_functions.get_species_dataset_trait(self, start_vars) self.temp_uuid = temp_uuid #needed to pass temp_uuid to gn1 mapping code (marker_regression_gn1.py) #ZS: Needed to zoom in or remap temp traits like PCA traits if "temp_trait" in start_vars and start_vars['temp_trait'] != "False": self.temp_trait = "True" self.group = self.dataset.group.name self.json_data = {} self.json_data['lodnames'] = ['lod.hk'] #ZS: Sometimes a group may have a genofile that only includes a subset of samples genofile_samplelist = [] if 'genofile' in start_vars: if start_vars['genofile'] != "": self.genofile_string = start_vars['genofile'] self.dataset.group.genofile = self.genofile_string.split( ":")[0] genofile_samplelist = get_genofile_samplelist(self.dataset) all_samples_ordered = self.dataset.group.all_samples_ordered() self.vals = [] self.samples = [] self.sample_vals = start_vars['sample_vals'] sample_val_dict = json.loads(self.sample_vals) samples = sample_val_dict.keys() if (len(genofile_samplelist) != 0): for sample in genofile_samplelist: self.samples.append(sample) if sample in samples: self.vals.append(sample_val_dict[sample]) else: self.vals.append("x") else: for sample in self.dataset.group.samplelist: if sample in samples: self.vals.append(sample_val_dict[sample]) self.samples.append(sample) if 'n_samples' in start_vars: self.n_samples = start_vars['n_samples'] else: self.n_samples = len([val for val in self.vals if val != "x"]) #ZS: Check if genotypes exist in the DB in order to create links for markers self.geno_db_exists = geno_db_exists(self.dataset) self.mapping_method = start_vars['method'] if "results_path" in start_vars: self.mapping_results_path = start_vars['results_path'] else: mapping_results_filename = self.dataset.group.name + "_" + ''.join( random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) self.mapping_results_path = "{}{}.csv".format( webqtlConfig.GENERATED_IMAGE_DIR, mapping_results_filename) self.manhattan_plot = False if 'manhattan_plot' in start_vars: if start_vars['manhattan_plot'].lower() != "false": self.color_scheme = "alternating" if "color_scheme" in start_vars: self.color_scheme = start_vars['color_scheme'] if self.color_scheme == "single": self.manhattan_single_color = start_vars[ 'manhattan_single_color'] self.manhattan_plot = True self.maf = start_vars['maf'] # Minor allele frequency if "use_loco" in start_vars: self.use_loco = start_vars['use_loco'] else: self.use_loco = None self.suggestive = "" self.significant = "" self.pair_scan = False # Initializing this since it is checked in views to determine which template to use if 'transform' in start_vars: self.transform = start_vars['transform'] else: self.transform = "" self.score_type = "LRS" #ZS: LRS or LOD self.mapping_scale = "physic" if "mapping_scale" in start_vars: self.mapping_scale = start_vars['mapping_scale'] self.num_perm = 0 self.perm_output = [] self.bootstrap_results = [] self.covariates = start_vars[ 'covariates'] if "covariates" in start_vars else "" self.categorical_vars = [] #ZS: This is passed to GN1 code for single chr mapping self.selected_chr = -1 if "selected_chr" in start_vars: if int( start_vars['selected_chr'] ) != -1: #ZS: Needs to be -1 if showing full map; there's probably a better way to fix this self.selected_chr = int(start_vars['selected_chr']) + 1 else: self.selected_chr = int(start_vars['selected_chr']) if "startMb" in start_vars: self.startMb = start_vars['startMb'] if "endMb" in start_vars: self.endMb = start_vars['endMb'] if "graphWidth" in start_vars: self.graphWidth = start_vars['graphWidth'] if "lrsMax" in start_vars: self.lrsMax = start_vars['lrsMax'] if "haplotypeAnalystCheck" in start_vars: self.haplotypeAnalystCheck = start_vars['haplotypeAnalystCheck'] if "startMb" in start_vars: #ZS: This is to ensure showGenes, Legend, etc are checked the first time you open the mapping page, since startMb will only not be set during the first load if "permCheck" in start_vars: self.permCheck = "ON" else: self.permCheck = False self.num_perm = int(start_vars['num_perm']) self.LRSCheck = start_vars['LRSCheck'] if "showSNP" in start_vars: self.showSNP = start_vars['showSNP'] else: self.showSNP = False if "showGenes" in start_vars: self.showGenes = start_vars['showGenes'] else: self.showGenes = False if "viewLegend" in start_vars: self.viewLegend = start_vars['viewLegend'] else: self.viewLegend = False else: try: if int(start_vars['num_perm']) > 0: self.num_perm = int(start_vars['num_perm']) except: self.num_perm = 0 if self.num_perm > 0: self.permCheck = "ON" else: self.permCheck = False self.showSNP = "ON" self.showGenes = "ON" self.viewLegend = "ON" #self.dataset.group.get_markers() if self.mapping_method == "gemma": self.first_run = True self.output_files = None if 'output_files' in start_vars: self.output_files = start_vars['output_files'] if 'first_run' in start_vars: #ZS: check if first run so existing result files can be used if it isn't (for example zooming on a chromosome, etc) self.first_run = False self.score_type = "-logP" self.manhattan_plot = True with Bench("Running GEMMA"): if self.use_loco == "True": marker_obs, self.output_files = gemma_mapping.run_gemma( self.this_trait, self.dataset, self.samples, self.vals, self.covariates, self.use_loco, self.maf, self.first_run, self.output_files) else: marker_obs, self.output_files = gemma_mapping.run_gemma( self.this_trait, self.dataset, self.samples, self.vals, self.covariates, self.use_loco, self.maf, self.first_run, self.output_files) results = marker_obs elif self.mapping_method == "rqtl_plink": results = self.run_rqtl_plink() elif self.mapping_method == "rqtl_geno": perm_strata = [] if "perm_strata" in start_vars and "categorical_vars" in start_vars: self.categorical_vars = start_vars["categorical_vars"].split( ",") if len(self.categorical_vars ) and start_vars["perm_strata"] == "True": primary_samples = SampleList(dataset=self.dataset, sample_names=self.samples, this_trait=self.this_trait) perm_strata = get_perm_strata(self.this_trait, primary_samples, self.categorical_vars, self.samples) self.score_type = "LOD" self.control_marker = start_vars['control_marker'] self.do_control = start_vars['do_control'] if 'mapmethod_rqtl_geno' in start_vars: self.method = start_vars['mapmethod_rqtl_geno'] else: self.method = "em" self.model = start_vars['mapmodel_rqtl_geno'] #if start_vars['pair_scan'] == "true": # self.pair_scan = True if self.permCheck and self.num_perm > 0: self.perm_output, self.suggestive, self.significant, results = rqtl_mapping.run_rqtl_geno( self.vals, self.samples, self.dataset, self.mapping_scale, self.method, self.model, self.permCheck, self.num_perm, perm_strata, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan, self.covariates) else: results = rqtl_mapping.run_rqtl_geno( self.vals, self.samples, self.dataset, self.mapping_scale, self.method, self.model, self.permCheck, self.num_perm, perm_strata, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan, self.covariates) elif self.mapping_method == "reaper": if "startMb" in start_vars: #ZS: Check if first time page loaded, so it can default to ON if "additiveCheck" in start_vars: self.additiveCheck = start_vars['additiveCheck'] else: self.additiveCheck = False if "bootCheck" in start_vars: self.bootCheck = "ON" else: self.bootCheck = False self.num_bootstrap = int(start_vars['num_bootstrap']) else: self.additiveCheck = "ON" try: if int(start_vars['num_bootstrap']) > 0: self.bootCheck = "ON" self.num_bootstrap = int(start_vars['num_bootstrap']) else: self.bootCheck = False self.num_bootstrap = 0 except: self.bootCheck = False self.num_bootstrap = 0 self.reaper_version = start_vars['reaper_version'] self.control_marker = start_vars['control_marker'] self.do_control = start_vars['do_control'] logger.info("Running qtlreaper") if self.reaper_version == "new": self.first_run = True self.output_files = None if 'first_run' in start_vars: #ZS: check if first run so existing result files can be used if it isn't (for example zooming on a chromosome, etc) self.first_run = False if 'output_files' in start_vars: self.output_files = start_vars['output_files'].split( ",") results, self.perm_output, self.suggestive, self.significant, self.bootstrap_results, self.output_files = qtlreaper_mapping.run_reaper( self.this_trait, self.dataset, self.samples, self.vals, self.json_data, self.num_perm, self.bootCheck, self.num_bootstrap, self.do_control, self.control_marker, self.manhattan_plot, self.first_run, self.output_files) else: results, self.json_data, self.perm_output, self.suggestive, self.significant, self.bootstrap_results = qtlreaper_mapping.run_original_reaper( self.this_trait, self.dataset, self.samples, self.vals, self.json_data, self.num_perm, self.bootCheck, self.num_bootstrap, self.do_control, self.control_marker, self.manhattan_plot) elif self.mapping_method == "plink": self.score_type = "-logP" self.manhattan_plot = True results = plink_mapping.run_plink(self.this_trait, self.dataset, self.species, self.vals, self.maf) #results = self.run_plink() else: logger.debug("RUNNING NOTHING") self.no_results = False if len(results) == 0: self.no_results = True else: if self.pair_scan == True: self.qtl_results = [] highest_chr = 1 #This is needed in order to convert the highest chr to X/Y for marker in results: if marker['chr1'] > 0 or marker['chr1'] == "X" or marker[ 'chr1'] == "X/Y": if marker['chr1'] > highest_chr or marker[ 'chr1'] == "X" or marker['chr1'] == "X/Y": highest_chr = marker['chr1'] if 'lod_score' in list(marker.keys()): self.qtl_results.append(marker) self.trimmed_markers = results for qtl in enumerate(self.qtl_results): self.json_data['chr1'].append(str(qtl['chr1'])) self.json_data['chr2'].append(str(qtl['chr2'])) self.json_data['Mb'].append(qtl['Mb']) self.json_data['markernames'].append(qtl['name']) self.js_data = dict(json_data=self.json_data, this_trait=self.this_trait.name, data_set=self.dataset.name, maf=self.maf, manhattan_plot=self.manhattan_plot, mapping_scale=self.mapping_scale, qtl_results=self.qtl_results) else: self.qtl_results = [] self.results_for_browser = [] self.annotations_for_browser = [] highest_chr = 1 #This is needed in order to convert the highest chr to X/Y for marker in results: if 'Mb' in marker: this_ps = marker['Mb'] * 1000000 else: this_ps = marker['cM'] * 1000000 browser_marker = dict(chr=str(marker['chr']), rs=marker['name'], ps=this_ps, url="/show_trait?trait_id=" + marker['name'] + "&dataset=" + self.dataset.group.name + "Geno") if self.geno_db_exists == "True": annot_marker = dict(name=str(marker['name']), chr=str(marker['chr']), rs=marker['name'], pos=this_ps, url="/show_trait?trait_id=" + marker['name'] + "&dataset=" + self.dataset.group.name + "Geno") else: annot_marker = dict(name=str(marker['name']), chr=str(marker['chr']), rs=marker['name'], pos=this_ps) if 'lrs_value' in marker and marker['lrs_value'] > 0: browser_marker['p_wald'] = 10**-(marker['lrs_value'] / 4.61) elif 'lod_score' in marker and marker['lod_score'] > 0: browser_marker['p_wald'] = 10**-(marker['lod_score']) else: browser_marker['p_wald'] = 0 self.results_for_browser.append(browser_marker) self.annotations_for_browser.append(annot_marker) if str(marker['chr']) > '0' or str( marker['chr']) == "X" or str( marker['chr']) == "X/Y": if str(marker['chr']) > str(highest_chr) or str( marker['chr']) == "X" or str( marker['chr']) == "X/Y": highest_chr = marker['chr'] if ('lod_score' in marker.keys()) or ('lrs_value' in marker.keys()): if 'Mb' in marker.keys(): marker['display_pos'] = "Chr" + str( marker['chr']) + ": " + "{:.6f}".format( marker['Mb']) elif 'cM' in marker.keys(): marker['display_pos'] = "Chr" + str( marker['chr']) + ": " + "{:.3f}".format( marker['cM']) else: marker['display_pos'] = "N/A" self.qtl_results.append(marker) total_markers = len(self.qtl_results) with Bench("Exporting Results"): export_mapping_results(self.dataset, self.this_trait, self.qtl_results, self.mapping_results_path, self.mapping_scale, self.score_type) with Bench("Trimming Markers for Figure"): if len(self.qtl_results) > 30000: self.qtl_results = trim_markers_for_figure( self.qtl_results) self.results_for_browser = trim_markers_for_figure( self.results_for_browser) filtered_annotations = [] for marker in self.results_for_browser: for annot_marker in self.annotations_for_browser: if annot_marker['rs'] == marker['rs']: filtered_annotations.append(annot_marker) break self.annotations_for_browser = filtered_annotations browser_files = write_input_for_browser( self.dataset, self.results_for_browser, self.annotations_for_browser) else: browser_files = write_input_for_browser( self.dataset, self.results_for_browser, self.annotations_for_browser) with Bench("Trimming Markers for Table"): self.trimmed_markers = trim_markers_for_table(results) chr_lengths = get_chr_lengths(self.mapping_scale, self.mapping_method, self.dataset, self.qtl_results) #ZS: For zooming into genome browser, need to pass chromosome name instead of number if self.dataset.group.species == "mouse": if self.selected_chr == 20: this_chr = "X" else: this_chr = str(self.selected_chr) elif self.dataset.group.species == "rat": if self.selected_chr == 21: this_chr = "X" else: this_chr = str(self.selected_chr) else: if self.selected_chr == 22: this_chr = "X" elif self.selected_chr == 23: this_chr = "Y" else: this_chr = str(self.selected_chr) if self.mapping_method != "gemma": if self.score_type == "LRS": significant_for_browser = self.significant / 4.61 else: significant_for_browser = self.significant self.js_data = dict( #result_score_type = self.score_type, #this_trait = self.this_trait.name, #data_set = self.dataset.name, #maf = self.maf, #manhattan_plot = self.manhattan_plot, #mapping_scale = self.mapping_scale, #chromosomes = chromosome_mb_lengths, #qtl_results = self.qtl_results, categorical_vars=self.categorical_vars, chr_lengths=chr_lengths, num_perm=self.num_perm, perm_results=self.perm_output, significant=significant_for_browser, browser_files=browser_files, selected_chr=this_chr, total_markers=total_markers) else: self.js_data = dict(chr_lengths=chr_lengths, browser_files=browser_files, selected_chr=this_chr, total_markers=total_markers)
def __init__(self, start_vars, temp_uuid): helper_functions.get_species_dataset_trait(self, start_vars) #tempdata = temp_data.TempData(temp_uuid) self.json_data = {} self.json_data['lodnames'] = ['lod.hk'] self.samples = [] # Want only ones with values self.vals = [] for sample in self.dataset.group.samplelist: value = start_vars['value:' + sample] self.samples.append(str(sample)) self.vals.append(value) self.mapping_method = start_vars['method'] if start_vars['manhattan_plot'] == "true": self.manhattan_plot = True else: self.manhattan_plot = False self.maf = start_vars['maf'] # Minor allele frequency self.suggestive = "" self.significant = "" self.pair_scan = False # Initializing this since it is checked in views to determine which template to use self.dataset.group.get_markers() if self.mapping_method == "gemma": qtl_results = self.run_gemma() elif self.mapping_method == "rqtl_plink": qtl_results = self.run_rqtl_plink() elif self.mapping_method == "rqtl_geno": if start_vars['num_perm'] == "": self.num_perm = 0 else: self.num_perm = start_vars['num_perm'] self.control = start_vars['control_marker'] print("StartVars:", start_vars) self.method = start_vars['mapmethod_rqtl_geno'] self.model = start_vars['mapmodel_rqtl_geno'] if start_vars['pair_scan'] == "true": self.pair_scan = True print("pair scan:", self.pair_scan) print("DOING RQTL GENO") qtl_results = self.run_rqtl_geno() print("qtl_results:", qtl_results) elif self.mapping_method == "plink": qtl_results = self.run_plink() #print("qtl_results:", pf(qtl_results)) elif self.mapping_method == "pylmm": print("RUNNING PYLMM") self.num_perm = start_vars['num_perm'] if self.num_perm != "": if int(self.num_perm) > 0: self.run_permutations(str(temp_uuid)) qtl_results = self.gen_data(str(temp_uuid)) else: print("RUNNING NOTHING") self.lod_cutoff = 2 self.filtered_markers = [] highest_chr = 1 #This is needed in order to convert the highest chr to X/Y for marker in qtl_results: if marker['chr'] > 0 or marker['chr'] == "X" or marker['chr'] == "X/Y": if marker['chr'] > highest_chr or marker['chr'] == "X" or marker['chr'] == "X/Y": highest_chr = marker['chr'] if 'lod_score' in marker: self.filtered_markers.append(marker) self.json_data['chr'] = [] self.json_data['pos'] = [] self.json_data['lod.hk'] = [] self.json_data['markernames'] = [] self.json_data['suggestive'] = self.suggestive self.json_data['significant'] = self.significant #Need to convert the QTL objects that qtl reaper returns into a json serializable dictionary self.qtl_results = [] for qtl in self.filtered_markers: print("lod score is:", qtl['lod_score']) if qtl['chr'] == highest_chr and highest_chr != "X" and highest_chr != "X/Y": print("changing to X") self.json_data['chr'].append("X") else: self.json_data['chr'].append(str(qtl['chr'])) self.json_data['pos'].append(qtl['Mb']) self.json_data['lod.hk'].append(str(qtl['lod_score'])) self.json_data['markernames'].append(qtl['name']) #Get chromosome lengths for drawing the interval map plot chromosome_mb_lengths = {} self.json_data['chrnames'] = [] for key in self.species.chromosomes.chromosomes.keys(): self.json_data['chrnames'].append([self.species.chromosomes.chromosomes[key].name, self.species.chromosomes.chromosomes[key].mb_length]) chromosome_mb_lengths[key] = self.species.chromosomes.chromosomes[key].mb_length print("json_data:", self.json_data) self.js_data = dict( json_data = self.json_data, this_trait = self.this_trait.name, data_set = self.dataset.name, maf = self.maf, manhattan_plot = self.manhattan_plot, chromosomes = chromosome_mb_lengths, qtl_results = self.filtered_markers, )
def __init__(self, start_vars): # get trait list from db (database name) # calculate correlation with Base vector and targets # Check parameters assert('corr_type' in start_vars) assert(is_str(start_vars['corr_type'])) assert('dataset' in start_vars) # assert('group' in start_vars) permitted to be empty? assert('corr_sample_method' in start_vars) assert('corr_samples_group' in start_vars) assert('corr_dataset' in start_vars) assert('corr_return_results' in start_vars) if 'loc_chr' in start_vars: assert('min_loc_mb' in start_vars) assert('max_loc_mb' in start_vars) with Bench("Doing correlations"): if start_vars['dataset'] == "Temp": self.dataset = data_set.create_dataset(dataset_name = "Temp", dataset_type = "Temp", group_name = start_vars['group']) self.trait_id = start_vars['trait_id'] self.this_trait = create_trait(dataset=self.dataset, name=self.trait_id, cellid=None) else: helper_functions.get_species_dataset_trait(self, start_vars) corr_samples_group = start_vars['corr_samples_group'] self.sample_data = {} self.corr_type = start_vars['corr_type'] self.corr_method = start_vars['corr_sample_method'] self.min_expr = get_float(start_vars, 'min_expr') self.p_range_lower = get_float(start_vars, 'p_range_lower', -1.0) self.p_range_upper = get_float(start_vars, 'p_range_upper', 1.0) if ('loc_chr' in start_vars and 'min_loc_mb' in start_vars and 'max_loc_mb' in start_vars): self.location_type = get_string(start_vars, 'location_type') self.location_chr = get_string(start_vars, 'loc_chr') self.min_location_mb = get_int(start_vars, 'min_loc_mb') self.max_location_mb = get_int(start_vars, 'max_loc_mb') else: self.location_type = self.location_chr = self.min_location_mb = self.max_location_mb = None self.get_formatted_corr_type() self.return_number = int(start_vars['corr_return_results']) #The two if statements below append samples to the sample list based upon whether the user #rselected Primary Samples Only, Other Samples Only, or All Samples primary_samples = self.dataset.group.samplelist if self.dataset.group.parlist != None: primary_samples += self.dataset.group.parlist if self.dataset.group.f1list != None: primary_samples += self.dataset.group.f1list #If either BXD/whatever Only or All Samples, append all of that group's samplelist if corr_samples_group != 'samples_other': self.process_samples(start_vars, primary_samples) #If either Non-BXD/whatever or All Samples, get all samples from this_trait.data and #exclude the primary samples (because they would have been added in the previous #if statement if the user selected All Samples) if corr_samples_group != 'samples_primary': if corr_samples_group == 'samples_other': primary_samples = [x for x in primary_samples if x not in ( self.dataset.group.parlist + self.dataset.group.f1list)] self.process_samples(start_vars, list(self.this_trait.data.keys()), primary_samples) self.target_dataset = data_set.create_dataset(start_vars['corr_dataset']) self.target_dataset.get_trait_data(list(self.sample_data.keys())) self.header_fields = get_header_fields(self.target_dataset.type, self.corr_method) if self.target_dataset.type == "ProbeSet": self.filter_cols = [7, 6] elif self.target_dataset.type == "Publish": self.filter_cols = [6, 0] else: self.filter_cols = [4, 0] self.correlation_results = [] self.correlation_data = {} if self.corr_type == "tissue": self.trait_symbol_dict = self.dataset.retrieve_genes("Symbol") tissue_corr_data = self.do_tissue_correlation_for_all_traits() if tissue_corr_data != None: for trait in list(tissue_corr_data.keys())[:self.return_number]: self.get_sample_r_and_p_values(trait, self.target_dataset.trait_data[trait]) else: for trait, values in list(self.target_dataset.trait_data.items()): self.get_sample_r_and_p_values(trait, values) elif self.corr_type == "lit": self.trait_geneid_dict = self.dataset.retrieve_genes("GeneId") lit_corr_data = self.do_lit_correlation_for_all_traits() for trait in list(lit_corr_data.keys())[:self.return_number]: self.get_sample_r_and_p_values(trait, self.target_dataset.trait_data[trait]) elif self.corr_type == "sample": for trait, values in list(self.target_dataset.trait_data.items()): self.get_sample_r_and_p_values(trait, values) self.correlation_data = collections.OrderedDict(sorted(list(self.correlation_data.items()), key=lambda t: -abs(t[1][0]))) #ZS: Convert min/max chromosome to an int for the location range option range_chr_as_int = None for order_id, chr_info in list(self.dataset.species.chromosomes.chromosomes.items()): if 'loc_chr' in start_vars: if chr_info.name == self.location_chr: range_chr_as_int = order_id for _trait_counter, trait in enumerate(list(self.correlation_data.keys())[:self.return_number]): trait_object = create_trait(dataset=self.target_dataset, name=trait, get_qtl_info=True, get_sample_info=False) if not trait_object: continue chr_as_int = 0 for order_id, chr_info in list(self.dataset.species.chromosomes.chromosomes.items()): if self.location_type == "highest_lod": if chr_info.name == trait_object.locus_chr: chr_as_int = order_id else: if chr_info.name == trait_object.chr: chr_as_int = order_id if (float(self.correlation_data[trait][0]) >= self.p_range_lower and float(self.correlation_data[trait][0]) <= self.p_range_upper): if (self.target_dataset.type == "ProbeSet" or self.target_dataset.type == "Publish") and bool(trait_object.mean): if (self.min_expr != None) and (float(trait_object.mean) < self.min_expr): continue if range_chr_as_int != None and (chr_as_int != range_chr_as_int): continue if self.location_type == "highest_lod": if (self.min_location_mb != None) and (float(trait_object.locus_mb) < float(self.min_location_mb)): continue if (self.max_location_mb != None) and (float(trait_object.locus_mb) > float(self.max_location_mb)): continue else: if (self.min_location_mb != None) and (float(trait_object.mb) < float(self.min_location_mb)): continue if (self.max_location_mb != None) and (float(trait_object.mb) > float(self.max_location_mb)): continue (trait_object.sample_r, trait_object.sample_p, trait_object.num_overlap) = self.correlation_data[trait] # Set some sane defaults trait_object.tissue_corr = 0 trait_object.tissue_pvalue = 0 trait_object.lit_corr = 0 if self.corr_type == "tissue" and tissue_corr_data != None: trait_object.tissue_corr = tissue_corr_data[trait][1] trait_object.tissue_pvalue = tissue_corr_data[trait][2] elif self.corr_type == "lit": trait_object.lit_corr = lit_corr_data[trait][1] self.correlation_results.append(trait_object) if self.corr_type != "lit" and self.dataset.type == "ProbeSet" and self.target_dataset.type == "ProbeSet": self.do_lit_correlation_for_trait_list() if self.corr_type != "tissue" and self.dataset.type == "ProbeSet" and self.target_dataset.type == "ProbeSet": self.do_tissue_correlation_for_trait_list() self.json_results = generate_corr_json(self.correlation_results, self.this_trait, self.dataset, self.target_dataset)
def __init__(self, kw): logger.debug("in ShowTrait, kw are:", kw) if 'trait_id' in kw and kw['dataset'] != "Temp": self.temp_trait = False self.trait_id = kw['trait_id'] helper_functions.get_species_dataset_trait(self, kw) elif 'group' in kw: self.temp_trait = True self.trait_id = "Temp_" + kw['species'] + "_" + kw[ 'group'] + "_" + datetime.datetime.now().strftime("%m%d%H%M%S") self.temp_species = kw['species'] self.temp_group = kw['group'] self.dataset = data_set.create_dataset(dataset_name="Temp", dataset_type="Temp", group_name=self.temp_group) self.this_trait = GeneralTrait(dataset=self.dataset, name=self.trait_id, cellid=None) self.trait_vals = kw['trait_paste'].split() # Put values in Redis so they can be looked up later if added to a collection Redis.set(self.trait_id, kw['trait_paste']) else: self.temp_trait = True self.trait_id = kw['trait_id'] self.temp_species = self.trait_id.split("_")[1] self.temp_group = self.trait_id.split("_")[2] self.dataset = data_set.create_dataset(dataset_name="Temp", dataset_type="Temp", group_name=self.temp_group) self.this_trait = GeneralTrait(dataset=self.dataset, name=self.trait_id, cellid=None) self.trait_vals = Redis.get(self.trait_id).split() #self.dataset.group.read_genotype_file() #if this_trait: # if this_trait.dataset and this_trait.dataset.type and this_trait.dataset.type == 'ProbeSet': # self.cursor.execute("SELECT h2 from ProbeSetXRef WHERE DataId = %d" % # this_trait.mysqlid) # heritability = self.cursor.fetchone() self.build_correlation_tools() #Get nearest marker for composite mapping if not self.temp_trait: if hasattr( self.this_trait, 'locus_chr' ) and self.this_trait.locus_chr != "" and self.dataset.type != "Geno" and self.dataset.type != "Publish": self.nearest_marker = get_nearest_marker( self.this_trait, self.dataset) #self.nearest_marker1 = get_nearest_marker(self.this_trait, self.dataset)[0] #self.nearest_marker2 = get_nearest_marker(self.this_trait, self.dataset)[1] else: self.nearest_marker = "" #self.nearest_marker1 = "" #self.nearest_marker2 = "" self.make_sample_lists() # Todo: Add back in the ones we actually need from below, as we discover we need them hddn = OrderedDict() if self.dataset.group.allsamples: hddn['allsamples'] = string.join(self.dataset.group.allsamples, ' ') hddn['trait_id'] = self.trait_id hddn['dataset'] = self.dataset.name hddn['temp_trait'] = False if self.temp_trait: hddn['temp_trait'] = True hddn['group'] = self.temp_group hddn['species'] = self.temp_species hddn['use_outliers'] = False hddn['method'] = "pylmm" hddn['mapping_display_all'] = True hddn['suggestive'] = 0 hddn['num_perm'] = 0 hddn['manhattan_plot'] = "" hddn['control_marker'] = "" if not self.temp_trait: if hasattr( self.this_trait, 'locus_chr' ) and self.this_trait.locus_chr != "" and self.dataset.type != "Geno" and self.dataset.type != "Publish": hddn['control_marker'] = self.nearest_marker #hddn['control_marker'] = self.nearest_marker1+","+self.nearest_marker2 hddn['do_control'] = False hddn['maf'] = 0.01 hddn['compare_traits'] = [] hddn['export_data'] = "" # We'll need access to this_trait and hddn in the Jinja2 Template, so we put it inside self self.hddn = hddn self.temp_uuid = uuid.uuid4() self.sample_group_types = OrderedDict() if len(self.sample_groups) > 1: self.sample_group_types[ 'samples_primary'] = self.dataset.group.name + " Only" self.sample_group_types[ 'samples_other'] = "Non-" + self.dataset.group.name self.sample_group_types['samples_all'] = "All Cases" else: self.sample_group_types[ 'samples_primary'] = self.dataset.group.name sample_lists = [group.sample_list for group in self.sample_groups] self.get_mapping_methods() self.trait_table_width = get_trait_table_width(self.sample_groups) trait_symbol = None if not self.temp_trait: if self.this_trait.symbol: trait_symbol = self.this_trait.symbol js_data = dict(trait_id=self.trait_id, trait_symbol=trait_symbol, dataset_type=self.dataset.type, data_scale=self.dataset.data_scale, sample_group_types=self.sample_group_types, sample_lists=sample_lists, attribute_names=self.sample_groups[0].attributes, temp_uuid=self.temp_uuid) self.js_data = js_data
def __init__(self, start_vars, temp_uuid): helper_functions.get_species_dataset_trait(self, start_vars) self.temp_uuid = temp_uuid #needed to pass temp_uuid to gn1 mapping code (marker_regression_gn1.py) self.json_data = {} self.json_data['lodnames'] = ['lod.hk'] self.samples = [] # Want only ones with values self.vals = [] all_samples_ordered = self.dataset.group.all_samples_ordered() primary_sample_names = list(all_samples_ordered) for sample in self.dataset.group.samplelist: # sample is actually the name of an individual in_trait_data = False for item in self.this_trait.data: if self.this_trait.data[item].name == sample: value = start_vars['value:' + self.this_trait.data[item].name] self.samples.append(self.this_trait.data[item].name) self.vals.append(value) in_trait_data = True break if not in_trait_data: value = start_vars.get('value:' + sample) if value: self.samples.append(sample) self.vals.append(value) #ZS: Check if genotypes exist in the DB in order to create links for markers if "geno_db_exists" in start_vars: self.geno_db_exists = start_vars['geno_db_exists'] else: try: self.geno_db_exists = "True" except: self.geno_db_exists = "False" self.mapping_method = start_vars['method'] if "results_path" in start_vars: self.mapping_results_path = start_vars['results_path'] else: mapping_results_filename = self.dataset.group.name + "_" + ''.join( random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) self.mapping_results_path = "{}{}.csv".format( webqtlConfig.GENERATED_IMAGE_DIR, mapping_results_filename) if start_vars['manhattan_plot'] == "True": self.manhattan_plot = True else: self.manhattan_plot = False self.maf = start_vars['maf'] # Minor allele frequency if "use_loco" in start_vars: self.use_loco = start_vars['use_loco'] else: self.use_loco = None self.suggestive = "" self.significant = "" self.pair_scan = False # Initializing this since it is checked in views to determine which template to use self.score_type = "LRS" #ZS: LRS or LOD self.mapping_scale = "physic" self.num_perm = 0 self.perm_output = [] self.bootstrap_results = [] self.covariates = start_vars[ 'covariates'] if "covariates" in start_vars else None #ZS: This is passed to GN1 code for single chr mapping self.selected_chr = -1 if "selected_chr" in start_vars: if int( start_vars['selected_chr'] ) != -1: #ZS: Needs to be -1 if showing full map; there's probably a better way to fix this self.selected_chr = int(start_vars['selected_chr']) + 1 else: self.selected_chr = int(start_vars['selected_chr']) if "startMb" in start_vars: self.startMb = start_vars['startMb'] if "endMb" in start_vars: self.endMb = start_vars['endMb'] if "graphWidth" in start_vars: self.graphWidth = start_vars['graphWidth'] if "lrsMax" in start_vars: self.lrsMax = start_vars['lrsMax'] if "haplotypeAnalystCheck" in start_vars: self.haplotypeAnalystCheck = start_vars['haplotypeAnalystCheck'] if "startMb" in start_vars: #ZS: This is to ensure showGenes, Legend, etc are checked the first time you open the mapping page, since startMb will only not be set during the first load if "permCheck" in start_vars: self.permCheck = "ON" else: self.permCheck = False self.num_perm = int(start_vars['num_perm']) self.LRSCheck = start_vars['LRSCheck'] if "showSNP" in start_vars: self.showSNP = start_vars['showSNP'] else: self.showSNP = False if "showGenes" in start_vars: self.showGenes = start_vars['showGenes'] else: self.showGenes = False if "viewLegend" in start_vars: self.viewLegend = start_vars['viewLegend'] else: self.viewLegend = False else: try: if int(start_vars['num_perm']) > 0: self.num_perm = int(start_vars['num_perm']) except: self.num_perm = 0 if self.num_perm > 0: self.permCheck = "ON" else: self.permCheck = False self.showSNP = "ON" self.showGenes = "ON" self.viewLegend = "ON" if 'genofile' in start_vars: if start_vars['genofile'] != "": self.genofile_string = start_vars['genofile'] self.dataset.group.genofile = self.genofile_string.split( ":")[0] self.dataset.group.get_markers() if self.mapping_method == "gemma": self.score_type = "-log(p)" self.manhattan_plot = True with Bench("Running GEMMA"): marker_obs = gemma_mapping.run_gemma(self.dataset, self.samples, self.vals, self.covariates, self.use_loco) results = marker_obs elif self.mapping_method == "rqtl_plink": results = self.run_rqtl_plink() elif self.mapping_method == "rqtl_geno": self.score_type = "LOD" self.mapping_scale = "morgan" self.control_marker = start_vars['control_marker'] self.do_control = start_vars['do_control'] self.method = start_vars['mapmethod_rqtl_geno'] self.model = start_vars['mapmodel_rqtl_geno'] #if start_vars['pair_scan'] == "true": # self.pair_scan = True if self.permCheck and self.num_perm > 0: self.perm_output, self.suggestive, self.significant, results = rqtl_mapping.run_rqtl_geno( self.vals, self.dataset, self.method, self.model, self.permCheck, self.num_perm, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan) else: results = rqtl_mapping.run_rqtl_geno( self.vals, self.dataset, self.method, self.model, self.permCheck, self.num_perm, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan) elif self.mapping_method == "reaper": if "startMb" in start_vars: #ZS: Check if first time page loaded, so it can default to ON if "additiveCheck" in start_vars: self.additiveCheck = start_vars['additiveCheck'] else: self.additiveCheck = False if "bootCheck" in start_vars: self.bootCheck = "ON" else: self.bootCheck = False self.num_bootstrap = int(start_vars['num_bootstrap']) else: self.additiveCheck = "ON" try: if int(start_vars['num_bootstrap']) > 0: self.bootCheck = "ON" self.num_bootstrap = int(start_vars['num_bootstrap']) else: self.bootCheck = False self.num_bootstrap = 0 except: self.bootCheck = False self.num_bootstrap = 0 self.control_marker = start_vars['control_marker'] self.do_control = start_vars['do_control'] logger.info("Running qtlreaper") results, self.json_data, self.perm_output, self.suggestive, self.significant, self.bootstrap_results = qtlreaper_mapping.gen_reaper_results( self.this_trait, self.dataset, self.samples, self.vals, self.json_data, self.num_perm, self.bootCheck, self.num_bootstrap, self.do_control, self.control_marker, self.manhattan_plot) elif self.mapping_method == "plink": self.score_type = "-log(p)" self.manhattan_plot = True results = plink_mapping.run_plink(self.this_trait, self.dataset, self.species, self.vals, self.maf) #results = self.run_plink() elif self.mapping_method == "pylmm": logger.debug("RUNNING PYLMM") if self.num_perm > 0: self.run_permutations(str(temp_uuid)) results = self.gen_data(str(temp_uuid)) else: logger.debug("RUNNING NOTHING") if self.pair_scan == True: self.qtl_results = [] highest_chr = 1 #This is needed in order to convert the highest chr to X/Y for marker in results: if marker['chr1'] > 0 or marker['chr1'] == "X" or marker[ 'chr1'] == "X/Y": if marker['chr1'] > highest_chr or marker[ 'chr1'] == "X" or marker['chr1'] == "X/Y": highest_chr = marker['chr1'] if 'lod_score' in marker.keys(): self.qtl_results.append(marker) self.trimmed_markers = results for qtl in enumerate(self.qtl_results): self.json_data['chr1'].append(str(qtl['chr1'])) self.json_data['chr2'].append(str(qtl['chr2'])) self.json_data['Mb'].append(qtl['Mb']) self.json_data['markernames'].append(qtl['name']) self.js_data = dict(json_data=self.json_data, this_trait=self.this_trait.name, data_set=self.dataset.name, maf=self.maf, manhattan_plot=self.manhattan_plot, mapping_scale=self.mapping_scale, qtl_results=self.qtl_results) else: self.qtl_results = [] highest_chr = 1 #This is needed in order to convert the highest chr to X/Y for marker in results: if marker['chr'] > 0 or marker['chr'] == "X" or marker[ 'chr'] == "X/Y": if marker['chr'] > highest_chr or marker[ 'chr'] == "X" or marker['chr'] == "X/Y": highest_chr = marker['chr'] if ('lod_score' in marker.keys()) or ('lrs_value' in marker.keys()): self.qtl_results.append(marker) with Bench("Exporting Results"): export_mapping_results(self.dataset, self.this_trait, self.qtl_results, self.mapping_results_path, self.mapping_scale, self.score_type) with Bench("Trimming Markers for Figure"): if len(self.qtl_results) > 30000: self.qtl_results = trim_markers_for_figure( self.qtl_results) with Bench("Trimming Markers for Table"): self.trimmed_markers = trim_markers_for_table(results) if self.mapping_method != "gemma": self.json_data['chr'] = [] self.json_data['pos'] = [] self.json_data['lod.hk'] = [] self.json_data['markernames'] = [] self.json_data['suggestive'] = self.suggestive self.json_data['significant'] = self.significant #Need to convert the QTL objects that qtl reaper returns into a json serializable dictionary for index, qtl in enumerate(self.qtl_results): #if index<40: # logger.debug("lod score is:", qtl['lod_score']) if qtl['chr'] == highest_chr and highest_chr != "X" and highest_chr != "X/Y": #logger.debug("changing to X") self.json_data['chr'].append("X") else: self.json_data['chr'].append(str(qtl['chr'])) self.json_data['pos'].append(qtl['Mb']) if 'lrs_value' in qtl.keys(): self.json_data['lod.hk'].append(str(qtl['lrs_value'])) else: self.json_data['lod.hk'].append(str(qtl['lod_score'])) self.json_data['markernames'].append(qtl['name']) #Get chromosome lengths for drawing the interval map plot chromosome_mb_lengths = {} self.json_data['chrnames'] = [] for key in self.species.chromosomes.chromosomes.keys(): self.json_data['chrnames'].append([ self.species.chromosomes.chromosomes[key].name, self.species.chromosomes.chromosomes[key].mb_length ]) chromosome_mb_lengths[ key] = self.species.chromosomes.chromosomes[ key].mb_length self.js_data = dict( result_score_type=self.score_type, json_data=self.json_data, this_trait=self.this_trait.name, data_set=self.dataset.name, maf=self.maf, manhattan_plot=self.manhattan_plot, mapping_scale=self.mapping_scale, chromosomes=chromosome_mb_lengths, qtl_results=self.qtl_results, num_perm=self.num_perm, perm_results=self.perm_output, )
def __init__(self, start_vars, temp_uuid): helper_functions.get_species_dataset_trait(self, start_vars) #tempdata = temp_data.TempData(temp_uuid) self.temp_uuid = temp_uuid #needed to pass temp_uuid to gn1 mapping code (marker_regression_gn1.py) self.json_data = {} self.json_data['lodnames'] = ['lod.hk'] self.samples = [] # Want only ones with values self.vals = [] for sample in self.dataset.group.samplelist: value = start_vars['value:' + sample] self.samples.append(str(sample)) self.vals.append(value) self.mapping_method = start_vars['method'] if start_vars['manhattan_plot'] == "true": self.manhattan_plot = True else: self.manhattan_plot = False self.maf = start_vars['maf'] # Minor allele frequency self.suggestive = "" self.significant = "" self.pair_scan = False # Initializing this since it is checked in views to determine which template to use self.score_type = "LRS" #ZS: LRS or LOD self.mapping_scale = "physic" self.num_perm = 0 #ZS: This is passed to GN1 code for single chr mapping self.selected_chr = -1 if "selected_chr" in start_vars: self.selected_chr = int(start_vars['selected_chr']) self.dataset.group.get_markers() if self.mapping_method == "gemma": self.score_type = "LOD" included_markers, p_values = gemma_mapping.run_gemma(self.dataset, self.samples, self.vals) self.dataset.group.get_specified_markers(markers = included_markers) self.dataset.group.markers.add_pvalues(p_values) results = self.dataset.group.markers.markers elif self.mapping_method == "rqtl_plink": results = self.run_rqtl_plink() elif self.mapping_method == "rqtl_geno": self.score_type = "LOD" self.mapping_scale = "morgan" if start_vars['num_perm'] == "": self.num_perm = 0 else: self.num_perm = start_vars['num_perm'] self.control = start_vars['control_marker'] self.do_control = start_vars['do_control'] self.method = start_vars['mapmethod_rqtl_geno'] self.model = start_vars['mapmodel_rqtl_geno'] if start_vars['pair_scan'] == "true": self.pair_scan = True results = self.run_rqtl_geno() elif self.mapping_method == "reaper": if start_vars['num_perm'] == "": self.num_perm = 0 else: self.num_perm = int(start_vars['num_perm']) self.additive = False self.control = start_vars['control_marker'] self.do_control = start_vars['do_control'] results = self.gen_reaper_results() elif self.mapping_method == "plink": results = self.run_plink() elif self.mapping_method == "pylmm": print("RUNNING PYLMM") self.num_perm = start_vars['num_perm'] if self.num_perm != "": if int(self.num_perm) > 0: self.run_permutations(str(temp_uuid)) results = self.gen_data(str(temp_uuid)) else: print("RUNNING NOTHING") if self.pair_scan == True: self.qtl_results = [] highest_chr = 1 #This is needed in order to convert the highest chr to X/Y for marker in results: if marker['chr1'] > 0 or marker['chr1'] == "X" or marker['chr1'] == "X/Y": if marker['chr1'] > highest_chr or marker['chr1'] == "X" or marker['chr1'] == "X/Y": highest_chr = marker['chr1'] if 'lod_score' in marker.keys(): self.qtl_results.append(marker) for qtl in enumerate(self.qtl_results): self.json_data['chr1'].append(str(qtl['chr1'])) self.json_data['chr2'].append(str(qtl['chr2'])) self.json_data['Mb'].append(qtl['Mb']) self.json_data['markernames'].append(qtl['name']) self.js_data = dict( json_data = self.json_data, this_trait = self.this_trait.name, data_set = self.dataset.name, maf = self.maf, manhattan_plot = self.manhattan_plot, mapping_scale = self.mapping_scale, qtl_results = self.qtl_results, ) else: self.cutoff = 2 self.qtl_results = [] highest_chr = 1 #This is needed in order to convert the highest chr to X/Y for marker in results: if marker['chr'] > 0 or marker['chr'] == "X" or marker['chr'] == "X/Y": if marker['chr'] > highest_chr or marker['chr'] == "X" or marker['chr'] == "X/Y": highest_chr = marker['chr'] if ('lod_score' in marker.keys()) or ('lrs_value' in marker.keys()): self.qtl_results.append(marker) self.json_data['chr'] = [] self.json_data['pos'] = [] self.json_data['lod.hk'] = [] self.json_data['markernames'] = [] self.json_data['suggestive'] = self.suggestive self.json_data['significant'] = self.significant #Need to convert the QTL objects that qtl reaper returns into a json serializable dictionary for index, qtl in enumerate(self.qtl_results): #if index<40: # print("lod score is:", qtl['lod_score']) if qtl['chr'] == highest_chr and highest_chr != "X" and highest_chr != "X/Y": print("changing to X") self.json_data['chr'].append("X") else: self.json_data['chr'].append(str(qtl['chr'])) self.json_data['pos'].append(qtl['Mb']) if 'lrs_value' in qtl.keys(): self.json_data['lod.hk'].append(str(qtl['lrs_value'])) else: self.json_data['lod.hk'].append(str(qtl['lod_score'])) self.json_data['markernames'].append(qtl['name']) #Get chromosome lengths for drawing the interval map plot chromosome_mb_lengths = {} self.json_data['chrnames'] = [] for key in self.species.chromosomes.chromosomes.keys(): self.json_data['chrnames'].append([self.species.chromosomes.chromosomes[key].name, self.species.chromosomes.chromosomes[key].mb_length]) chromosome_mb_lengths[key] = self.species.chromosomes.chromosomes[key].mb_length # print("json_data:", self.json_data) self.js_data = dict( result_score_type = self.score_type, json_data = self.json_data, this_trait = self.this_trait.name, data_set = self.dataset.name, maf = self.maf, manhattan_plot = self.manhattan_plot, mapping_scale = self.mapping_scale, chromosomes = chromosome_mb_lengths, qtl_results = self.qtl_results, )
def __init__(self, start_vars, temp_uuid): helper_functions.get_species_dataset_trait(self, start_vars) self.temp_uuid = temp_uuid #needed to pass temp_uuid to gn1 mapping code (marker_regression_gn1.py) self.json_data = {} self.json_data['lodnames'] = ['lod.hk'] self.samples = [] # Want only ones with values self.vals = [] all_samples_ordered = self.dataset.group.all_samples_ordered() primary_sample_names = list(all_samples_ordered) for sample in self.dataset.group.samplelist: # sample is actually the name of an individual in_trait_data = False for item in self.this_trait.data: if self.this_trait.data[item].name == sample: value = start_vars['value:' + self.this_trait.data[item].name] self.samples.append(self.this_trait.data[item].name) self.vals.append(value) in_trait_data = True break if not in_trait_data: value = start_vars.get('value:' + sample) if value: self.samples.append(sample) self.vals.append(value) self.mapping_method = start_vars['method'] if start_vars['manhattan_plot'] == "True": self.manhattan_plot = True else: self.manhattan_plot = False self.maf = start_vars['maf'] # Minor allele frequency self.suggestive = "" self.significant = "" self.pair_scan = False # Initializing this since it is checked in views to determine which template to use self.score_type = "LRS" #ZS: LRS or LOD self.mapping_scale = "physic" self.num_perm = 0 self.perm_output = [] self.bootstrap_results = [] #ZS: This is passed to GN1 code for single chr mapping self.selected_chr = -1 if "selected_chr" in start_vars: if int(start_vars['selected_chr']) != -1: #ZS: Needs to be -1 if showing full map; there's probably a better way to fix this self.selected_chr = int(start_vars['selected_chr']) + 1 else: self.selected_chr = int(start_vars['selected_chr']) if "startMb" in start_vars: self.startMb = start_vars['startMb'] if "endMb" in start_vars: self.endMb = start_vars['endMb'] if "graphWidth" in start_vars: self.graphWidth = start_vars['graphWidth'] if "lrsMax" in start_vars: self.lrsMax = start_vars['lrsMax'] if "haplotypeAnalystCheck" in start_vars: self.haplotypeAnalystCheck = start_vars['haplotypeAnalystCheck'] if "startMb" in start_vars: #ZS: This is to ensure showGenes, Legend, etc are checked the first time you open the mapping page, since startMb will only not be set during the first load if "permCheck" in start_vars: self.permCheck = "ON" else: self.permCheck = False self.num_perm = int(start_vars['num_perm']) self.LRSCheck = start_vars['LRSCheck'] if "showSNP" in start_vars: self.showSNP = start_vars['showSNP'] else: self.showSNP = False if "showGenes" in start_vars: self.showGenes = start_vars['showGenes'] else: self.showGenes = False if "viewLegend" in start_vars: self.viewLegend = start_vars['viewLegend'] else: self.viewLegend = False else: try: if int(start_vars['num_perm']) > 0: self.num_perm = int(start_vars['num_perm']) except: self.num_perm = 0 if self.num_perm > 0: self.permCheck = "ON" else: self.permCheck = False self.showSNP = "ON" self.showGenes = "ON" self.viewLegend = "ON" self.dataset.group.get_markers() if self.mapping_method == "gemma": self.score_type = "-log(p)" self.manhattan_plot = True with Bench("Running GEMMA"): marker_obs = gemma_mapping.run_gemma(self.dataset, self.samples, self.vals) results = marker_obs elif self.mapping_method == "rqtl_plink": results = self.run_rqtl_plink() elif self.mapping_method == "rqtl_geno": self.score_type = "LOD" self.mapping_scale = "morgan" self.control_marker = start_vars['control_marker'] self.do_control = start_vars['do_control'] self.dataset.group.genofile = start_vars['genofile'] self.method = start_vars['mapmethod_rqtl_geno'] self.model = start_vars['mapmodel_rqtl_geno'] if start_vars['pair_scan'] == "true": self.pair_scan = True if self.permCheck and self.num_perm > 0: self.perm_output, self.suggestive, self.significant, results = rqtl_mapping.run_rqtl_geno(self.vals, self.dataset, self.method, self.model, self.permCheck, self.num_perm, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan) else: results = rqtl_mapping.run_rqtl_geno(self.vals, self.dataset, self.method, self.model, self.permCheck, self.num_perm, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan) elif self.mapping_method == "reaper": if "startMb" in start_vars: #ZS: Check if first time page loaded, so it can default to ON if "additiveCheck" in start_vars: self.additiveCheck = start_vars['additiveCheck'] else: self.additiveCheck = False if "bootCheck" in start_vars: self.bootCheck = "ON" else: self.bootCheck = False self.num_bootstrap = int(start_vars['num_bootstrap']) else: self.additiveCheck = "ON" try: if int(start_vars['num_bootstrap']) > 0: self.bootCheck = "ON" self.num_bootstrap = int(start_vars['num_bootstrap']) else: self.bootCheck = False self.num_bootstrap = 0 except: self.bootCheck = False self.num_bootstrap = 0 self.control_marker = start_vars['control_marker'] self.do_control = start_vars['do_control'] self.dataset.group.genofile = start_vars['genofile'] logger.info("Running qtlreaper") results, self.json_data, self.perm_output, self.suggestive, self.significant, self.bootstrap_results = qtlreaper_mapping.gen_reaper_results(self.this_trait, self.dataset, self.samples, self.json_data, self.num_perm, self.bootCheck, self.num_bootstrap, self.do_control, self.control_marker, self.manhattan_plot) elif self.mapping_method == "plink": self.score_type = "-log(p)" self.manhattan_plot = True results = plink_mapping.run_plink(self.this_trait, self.dataset, self.species, self.vals, self.maf) #results = self.run_plink() elif self.mapping_method == "pylmm": logger.debug("RUNNING PYLMM") self.dataset.group.genofile = start_vars['genofile'] if self.num_perm > 0: self.run_permutations(str(temp_uuid)) results = self.gen_data(str(temp_uuid)) else: logger.debug("RUNNING NOTHING") if self.pair_scan == True: self.qtl_results = [] highest_chr = 1 #This is needed in order to convert the highest chr to X/Y for marker in results: if marker['chr1'] > 0 or marker['chr1'] == "X" or marker['chr1'] == "X/Y": if marker['chr1'] > highest_chr or marker['chr1'] == "X" or marker['chr1'] == "X/Y": highest_chr = marker['chr1'] if 'lod_score' in marker.keys(): self.qtl_results.append(marker) self.trimmed_markers = results for qtl in enumerate(self.qtl_results): self.json_data['chr1'].append(str(qtl['chr1'])) self.json_data['chr2'].append(str(qtl['chr2'])) self.json_data['Mb'].append(qtl['Mb']) self.json_data['markernames'].append(qtl['name']) self.js_data = dict( json_data = self.json_data, this_trait = self.this_trait.name, data_set = self.dataset.name, maf = self.maf, manhattan_plot = self.manhattan_plot, mapping_scale = self.mapping_scale, qtl_results = self.qtl_results ) else: self.cutoff = 2 self.qtl_results = [] highest_chr = 1 #This is needed in order to convert the highest chr to X/Y for marker in results: if marker['chr'] > 0 or marker['chr'] == "X" or marker['chr'] == "X/Y": if marker['chr'] > highest_chr or marker['chr'] == "X" or marker['chr'] == "X/Y": highest_chr = marker['chr'] if ('lod_score' in marker.keys()) or ('lrs_value' in marker.keys()): self.qtl_results.append(marker) self.trimmed_markers = trim_markers_for_table(results) if self.mapping_method != "gemma": self.json_data['chr'] = [] self.json_data['pos'] = [] self.json_data['lod.hk'] = [] self.json_data['markernames'] = [] self.json_data['suggestive'] = self.suggestive self.json_data['significant'] = self.significant #Need to convert the QTL objects that qtl reaper returns into a json serializable dictionary for index, qtl in enumerate(self.qtl_results): #if index<40: # logger.debug("lod score is:", qtl['lod_score']) if qtl['chr'] == highest_chr and highest_chr != "X" and highest_chr != "X/Y": #logger.debug("changing to X") self.json_data['chr'].append("X") else: self.json_data['chr'].append(str(qtl['chr'])) self.json_data['pos'].append(qtl['Mb']) if 'lrs_value' in qtl.keys(): self.json_data['lod.hk'].append(str(qtl['lrs_value'])) else: self.json_data['lod.hk'].append(str(qtl['lod_score'])) self.json_data['markernames'].append(qtl['name']) #Get chromosome lengths for drawing the interval map plot chromosome_mb_lengths = {} self.json_data['chrnames'] = [] for key in self.species.chromosomes.chromosomes.keys(): self.json_data['chrnames'].append([self.species.chromosomes.chromosomes[key].name, self.species.chromosomes.chromosomes[key].mb_length]) chromosome_mb_lengths[key] = self.species.chromosomes.chromosomes[key].mb_length # logger.debug("json_data:", self.json_data) self.js_data = dict( result_score_type = self.score_type, json_data = self.json_data, this_trait = self.this_trait.name, data_set = self.dataset.name, maf = self.maf, manhattan_plot = self.manhattan_plot, mapping_scale = self.mapping_scale, chromosomes = chromosome_mb_lengths, qtl_results = self.qtl_results, num_perm = self.num_perm, perm_results = self.perm_output, )
def __init__(self, kw): logger.debug("in ShowTrait, kw are:", kw) if 'trait_id' in kw and kw['dataset'] != "Temp": self.temp_trait = False self.trait_id = kw['trait_id'] helper_functions.get_species_dataset_trait(self, kw) elif 'group' in kw: self.temp_trait = True self.trait_id = "Temp_"+kw['species']+ "_" + kw['group'] + "_" + datetime.datetime.now().strftime("%m%d%H%M%S") self.temp_species = kw['species'] self.temp_group = kw['group'] self.dataset = data_set.create_dataset(dataset_name = "Temp", dataset_type = "Temp", group_name = self.temp_group) self.this_trait = GeneralTrait(dataset=self.dataset, name=self.trait_id, cellid=None) self.trait_vals = kw['trait_paste'].split() # Put values in Redis so they can be looked up later if added to a collection Redis.set(self.trait_id, kw['trait_paste']) else: self.temp_trait = True self.trait_id = kw['trait_id'] self.temp_species = self.trait_id.split("_")[1] self.temp_group = self.trait_id.split("_")[2] self.dataset = data_set.create_dataset(dataset_name = "Temp", dataset_type = "Temp", group_name = self.temp_group) self.this_trait = GeneralTrait(dataset=self.dataset, name=self.trait_id, cellid=None) self.trait_vals = Redis.get(self.trait_id).split() #self.dataset.group.read_genotype_file() #if this_trait: # if this_trait.dataset and this_trait.dataset.type and this_trait.dataset.type == 'ProbeSet': # self.cursor.execute("SELECT h2 from ProbeSetXRef WHERE DataId = %d" % # this_trait.mysqlid) # heritability = self.cursor.fetchone() #ZS: Get verify/rna-seq link URLs try: blatsequence = self.this_trait.blatseq if not blatsequence: #XZ, 06/03/2009: ProbeSet name is not unique among platforms. We should use ProbeSet Id instead. query1 = """SELECT Probe.Sequence, Probe.Name FROM Probe, ProbeSet, ProbeSetFreeze, ProbeSetXRef WHERE ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND ProbeSetXRef.ProbeSetId = ProbeSet.Id AND ProbeSetFreeze.Name = '%s' AND ProbeSet.Name = '%s' AND Probe.ProbeSetId = ProbeSet.Id order by Probe.SerialOrder""" % (self.this_trait.dataset.name, self.this_trait.name) seqs = g.db.execute(query1).fetchall() if not seqs: raise ValueError else: blatsequence = '' for seqt in seqs: if int(seqt[1][-1]) % 2 == 1: blatsequence += string.strip(seqt[0]) #--------Hongqiang add this part in order to not only blat ProbeSet, but also blat Probe blatsequence = '%3E' + self.this_trait.name + '%0A' + blatsequence + '%0A' #XZ, 06/03/2009: ProbeSet name is not unique among platforms. We should use ProbeSet Id instead. query2 = """SELECT Probe.Sequence, Probe.Name FROM Probe, ProbeSet, ProbeSetFreeze, ProbeSetXRef WHERE ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND ProbeSetXRef.ProbeSetId = ProbeSet.Id AND ProbeSetFreeze.Name = '%s' AND ProbeSet.Name = '%s' AND Probe.ProbeSetId = ProbeSet.Id order by Probe.SerialOrder""" % (self.this_trait.dataset.name, self.this_trait.name) seqs = g.db.execute(query2).fetchall() for seqt in seqs: if int(seqt[1][-1]) %2 == 1: blatsequence += '%3EProbe_' + string.strip(seqt[1]) + '%0A' + string.strip(seqt[0]) + '%0A' if self.dataset.group.species == "rat": self.UCSC_BLAT_URL = webqtlConfig.UCSC_BLAT % ('rat', 'rn3', blatsequence) self.UTHSC_BLAT_URL = "" elif self.dataset.group.species == "mouse": self.UCSC_BLAT_URL = webqtlConfig.UTHSC_BLAT2 % ('mouse', 'mm10', blatsequence) self.UTHSC_BLAT_URL = webqtlConfig.UTHSC_BLAT % ('mouse', 'mm10', blatsequence) elif self.dataset.group.species == "human": self.UCSC_BLAT_URL = webqtlConfig.UTHSC_BLAT2 % ('human', 'hg19', blatsequence) self.UTHSC_BLAT_URL = "" else: self.UCSC_BLAT_URL = "" self.UTHSC_BLAT_URL = "" except: self.UCSC_BLAT_URL = "" self.UTHSC_BLAT_URL = "" self.build_correlation_tools() #Get nearest marker for composite mapping if not self.temp_trait: if hasattr(self.this_trait, 'locus_chr') and self.this_trait.locus_chr != "" and self.dataset.type != "Geno" and self.dataset.type != "Publish": self.nearest_marker = get_nearest_marker(self.this_trait, self.dataset) #self.nearest_marker1 = get_nearest_marker(self.this_trait, self.dataset)[0] #self.nearest_marker2 = get_nearest_marker(self.this_trait, self.dataset)[1] else: self.nearest_marker = "" #self.nearest_marker1 = "" #self.nearest_marker2 = "" self.make_sample_lists() # Todo: Add back in the ones we actually need from below, as we discover we need them hddn = OrderedDict() if self.dataset.group.allsamples: hddn['allsamples'] = string.join(self.dataset.group.allsamples, ' ') hddn['trait_id'] = self.trait_id hddn['dataset'] = self.dataset.name hddn['temp_trait'] = False if self.temp_trait: hddn['temp_trait'] = True hddn['group'] = self.temp_group hddn['species'] = self.temp_species hddn['use_outliers'] = False hddn['method'] = "pylmm" hddn['mapping_display_all'] = True hddn['suggestive'] = 0 hddn['num_perm'] = 0 hddn['manhattan_plot'] = "" hddn['control_marker'] = "" if not self.temp_trait: if hasattr(self.this_trait, 'locus_chr') and self.this_trait.locus_chr != "" and self.dataset.type != "Geno" and self.dataset.type != "Publish": hddn['control_marker'] = self.nearest_marker #hddn['control_marker'] = self.nearest_marker1+","+self.nearest_marker2 hddn['do_control'] = False hddn['maf'] = 0.01 hddn['compare_traits'] = [] hddn['export_data'] = "" # We'll need access to this_trait and hddn in the Jinja2 Template, so we put it inside self self.hddn = hddn self.temp_uuid = uuid.uuid4() self.sample_group_types = OrderedDict() if len(self.sample_groups) > 1: self.sample_group_types['samples_primary'] = self.dataset.group.name self.sample_group_types['samples_other'] = "Other" self.sample_group_types['samples_all'] = "All" else: self.sample_group_types['samples_primary'] = self.dataset.group.name sample_lists = [group.sample_list for group in self.sample_groups] self.get_mapping_methods() self.stats_table_width, self.trait_table_width = get_table_widths(self.sample_groups) trait_symbol = None if not self.temp_trait: if self.this_trait.symbol: trait_symbol = self.this_trait.symbol js_data = dict(trait_id = self.trait_id, trait_symbol = trait_symbol, dataset_type = self.dataset.type, data_scale = self.dataset.data_scale, sample_group_types = self.sample_group_types, sample_lists = sample_lists, attribute_names = self.sample_groups[0].attributes, temp_uuid = self.temp_uuid) self.js_data = js_data
def __init__(self, start_vars): # get trait list from db (database name) # calculate correlation with Base vector and targets # Check parameters assert('corr_type' in start_vars) assert(is_str(start_vars['corr_type'])) assert('dataset' in start_vars) # assert('group' in start_vars) permitted to be empty? assert('corr_sample_method' in start_vars) assert('corr_samples_group' in start_vars) assert('corr_dataset' in start_vars) assert('min_expr' in start_vars) assert('corr_return_results' in start_vars) if 'loc_chr' in start_vars: assert('min_loc_mb' in start_vars) assert('max_loc_mb' in start_vars) with Bench("Doing correlations"): if start_vars['dataset'] == "Temp": self.dataset = data_set.create_dataset(dataset_name = "Temp", dataset_type = "Temp", group_name = start_vars['group']) self.trait_id = "Temp" self.this_trait = GeneralTrait(dataset=self.dataset, name=self.trait_id, cellid=None) else: helper_functions.get_species_dataset_trait(self, start_vars) self.dataset.group.read_genotype_file() corr_samples_group = start_vars['corr_samples_group'] self.sample_data = {} self.corr_type = start_vars['corr_type'] self.corr_method = start_vars['corr_sample_method'] self.min_expr = get_float(start_vars,'min_expr') self.p_range_lower = get_float(start_vars,'p_range_lower',-1.0) self.p_range_upper = get_float(start_vars,'p_range_upper',1.0) if ('loc_chr' in start_vars and 'min_loc_mb' in start_vars and 'max_loc_mb' in start_vars): self.location_chr = get_string(start_vars,'loc_chr') self.min_location_mb = get_int(start_vars,'min_loc_mb') self.max_location_mb = get_int(start_vars,'max_loc_mb') self.get_formatted_corr_type() self.return_number = int(start_vars['corr_return_results']) #The two if statements below append samples to the sample list based upon whether the user #rselected Primary Samples Only, Other Samples Only, or All Samples primary_samples = self.dataset.group.samplelist if self.dataset.group.parlist != None: primary_samples += self.dataset.group.parlist if self.dataset.group.f1list != None: primary_samples += self.dataset.group.f1list #If either BXD/whatever Only or All Samples, append all of that group's samplelist if corr_samples_group != 'samples_other': self.process_samples(start_vars, primary_samples) #If either Non-BXD/whatever or All Samples, get all samples from this_trait.data and #exclude the primary samples (because they would have been added in the previous #if statement if the user selected All Samples) if corr_samples_group != 'samples_primary': if corr_samples_group == 'samples_other': primary_samples = [x for x in primary_samples if x not in ( self.dataset.group.parlist + self.dataset.group.f1list)] self.process_samples(start_vars, self.this_trait.data.keys(), primary_samples) self.target_dataset = data_set.create_dataset(start_vars['corr_dataset']) self.target_dataset.get_trait_data(self.sample_data.keys()) self.correlation_results = [] self.correlation_data = {} if self.corr_type == "tissue": self.trait_symbol_dict = self.dataset.retrieve_genes("Symbol") tissue_corr_data = self.do_tissue_correlation_for_all_traits() if tissue_corr_data != None: for trait in tissue_corr_data.keys()[:self.return_number]: self.get_sample_r_and_p_values(trait, self.target_dataset.trait_data[trait]) else: for trait, values in self.target_dataset.trait_data.iteritems(): self.get_sample_r_and_p_values(trait, values) elif self.corr_type == "lit": self.trait_geneid_dict = self.dataset.retrieve_genes("GeneId") lit_corr_data = self.do_lit_correlation_for_all_traits() for trait in lit_corr_data.keys()[:self.return_number]: self.get_sample_r_and_p_values(trait, self.target_dataset.trait_data[trait]) elif self.corr_type == "sample": for trait, values in self.target_dataset.trait_data.iteritems(): self.get_sample_r_and_p_values(trait, values) self.correlation_data = collections.OrderedDict(sorted(self.correlation_data.items(), key=lambda t: -abs(t[1][0]))) if self.target_dataset.type == "ProbeSet" or self.target_dataset.type == "Geno": #ZS: Convert min/max chromosome to an int for the location range option range_chr_as_int = None for order_id, chr_info in self.dataset.species.chromosomes.chromosomes.iteritems(): if chr_info.name == self.location_chr: range_chr_as_int = order_id for _trait_counter, trait in enumerate(self.correlation_data.keys()[:self.return_number]): trait_object = GeneralTrait(dataset=self.target_dataset, name=trait, get_qtl_info=True, get_sample_info=False) if self.target_dataset.type == "ProbeSet" or self.target_dataset.type == "Geno": #ZS: Convert trait chromosome to an int for the location range option chr_as_int = 0 for order_id, chr_info in self.dataset.species.chromosomes.chromosomes.iteritems(): if chr_info.name == trait_object.chr: chr_as_int = order_id if (float(self.correlation_data[trait][0]) >= self.p_range_lower and float(self.correlation_data[trait][0]) <= self.p_range_upper): if self.target_dataset.type == "ProbeSet" or self.target_dataset.type == "Geno": if (self.min_expr != None) and (float(trait_object.mean) < self.min_expr): continue elif range_chr_as_int != None and (chr_as_int != range_chr_as_int): continue elif (self.min_location_mb != None) and (float(trait_object.mb) < float(self.min_location_mb)): continue elif (self.max_location_mb != None) and (float(trait_object.mb) > float(self.max_location_mb)): continue (trait_object.sample_r, trait_object.sample_p, trait_object.num_overlap) = self.correlation_data[trait] # Set some sane defaults trait_object.tissue_corr = 0 trait_object.tissue_pvalue = 0 trait_object.lit_corr = 0 if self.corr_type == "tissue" and tissue_corr_data != None: trait_object.tissue_corr = tissue_corr_data[trait][1] trait_object.tissue_pvalue = tissue_corr_data[trait][2] elif self.corr_type == "lit": trait_object.lit_corr = lit_corr_data[trait][1] self.correlation_results.append(trait_object) else: (trait_object.sample_r, trait_object.sample_p, trait_object.num_overlap) = self.correlation_data[trait] # Set some sane defaults trait_object.tissue_corr = 0 trait_object.tissue_pvalue = 0 trait_object.lit_corr = 0 if self.corr_type == "tissue": trait_object.tissue_corr = tissue_corr_data[trait][1] trait_object.tissue_pvalue = tissue_corr_data[trait][2] elif self.corr_type == "lit": trait_object.lit_corr = lit_corr_data[trait][1] self.correlation_results.append(trait_object) self.target_dataset.get_trait_info(self.correlation_results, self.target_dataset.group.species) if self.corr_type != "lit" and self.dataset.type == "ProbeSet" and self.target_dataset.type == "ProbeSet": self.do_lit_correlation_for_trait_list() if self.corr_type != "tissue" and self.dataset.type == "ProbeSet" and self.target_dataset.type == "ProbeSet": self.do_tissue_correlation_for_trait_list() self.json_results = generate_corr_json(self.correlation_results, self.this_trait, self.dataset, self.target_dataset)