def __init__(self, start_vars): trait_db_list = [trait.strip() for trait in start_vars['trait_list'].split(',')] helper_functions.get_trait_db_obs(self, trait_db_list) self.all_sample_list = [] self.traits = [] for trait_db in self.trait_list: this_trait = trait_db[0] self.traits.append(this_trait) this_sample_data = this_trait.data for sample in this_sample_data: if sample not in self.all_sample_list: self.all_sample_list.append(sample) self.sample_data = [] for trait_db in self.trait_list: this_trait = trait_db[0] this_sample_data = this_trait.data #self.sample_data[this_trait.name] = [] this_trait_vals = [] for sample in self.all_sample_list: if sample in this_sample_data: this_trait_vals.append(this_sample_data[sample].value) #self.sample_data[this_trait.name].append(this_sample_data[sample].value) else: this_trait_vals.append('') #self.sample_data[this_trait.name].append('') self.sample_data.append(this_trait_vals) self.lowest_overlap = 8 #ZS: Variable set to the lowest overlapping samples in order to notify user, or 8, whichever is lower (since 8 is when we want to display warning) self.corr_results = [] self.pca_corr_results = [] for trait_db in self.trait_list: this_trait = trait_db[0] this_db = trait_db[1] this_db_samples = this_db.group.samplelist this_sample_data = this_trait.data corr_result_row = [] pca_corr_result_row = [] is_spearman = False #ZS: To determine if it's above or below the diagonal for target in self.trait_list: target_trait = target[0] target_db = target[1] target_samples = target_db.group.samplelist target_sample_data = target_trait.data print("target_samples", len(target_samples)) this_trait_vals = [] target_vals = [] for index, sample in enumerate(target_samples): if (sample in this_sample_data) and (sample in target_sample_data): sample_value = this_sample_data[sample].value target_sample_value = target_sample_data[sample].value this_trait_vals.append(sample_value) target_vals.append(target_sample_value) this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values(this_trait_vals, target_vals) if num_overlap < self.lowest_overlap: self.lowest_overlap = num_overlap if num_overlap == 0: corr_result_row.append([target_trait, 0, num_overlap]) pca_corr_result_row.append(0) else: pearson_r, pearson_p = scipy.stats.pearsonr(this_trait_vals, target_vals) if is_spearman == False: sample_r, sample_p = pearson_r, pearson_p if sample_r == 1: is_spearman = True else: sample_r, sample_p = scipy.stats.spearmanr(this_trait_vals, target_vals) corr_result_row.append([target_trait, sample_r, num_overlap]) pca_corr_result_row.append(pearson_r) self.corr_results.append(corr_result_row) self.pca_corr_results.append(pca_corr_result_row) print("corr_results:", pf(self.corr_results)) groups = [] for sample in self.all_sample_list: groups.append(1) pca = self.calculate_pca(self.pca_corr_results, range(len(self.traits))) self.loadings_array = self.process_loadings() self.js_data = dict(traits = [trait.name for trait in self.traits], groups = groups, cols = range(len(self.traits)), rows = range(len(self.traits)), samples = self.all_sample_list, sample_data = self.sample_data,)
def __init__(self, start_vars): trait_db_list = [trait.strip() for trait in start_vars['trait_list'].split(',')] helper_functions.get_trait_db_obs(self, trait_db_list) self.all_sample_list = [] self.traits = [] for trait_db in self.trait_list: this_trait = trait_db[0] self.traits.append(this_trait) this_sample_data = this_trait.data for sample in this_sample_data: if sample not in self.all_sample_list: self.all_sample_list.append(sample) self.sample_data = [] for trait_db in self.trait_list: this_trait = trait_db[0] this_sample_data = this_trait.data #self.sample_data[this_trait.name] = [] this_trait_vals = [] for sample in self.all_sample_list: if sample in this_sample_data: this_trait_vals.append(this_sample_data[sample].value) #self.sample_data[this_trait.name].append(this_sample_data[sample].value) else: this_trait_vals.append('') #self.sample_data[this_trait.name].append('') self.sample_data.append(this_trait_vals) self.corr_results = [] self.corr_results_for_pca = [] for trait_db in self.trait_list: this_trait = trait_db[0] this_db = trait_db[1] this_db_samples = this_db.group.samplelist #for sample in this_db_samples: # if sample not in self.samples: # self.samples.append(sample) this_sample_data = this_trait.data print("this_sample_data", len(this_sample_data)) #for sample in this_sample_data: # if sample not in self.all_sample_list: # self.all_sample_list.append(sample) corr_result_row = [] is_spearman = False #ZS: To determine if it's above or below the diagonal for target in self.trait_list: target_trait = target[0] target_db = target[1] target_samples = target_db.group.samplelist #if this_trait == target_trait and this_db == target_db: # corr_result_row.append(1) # continue target_sample_data = target_trait.data print("target_samples", len(target_samples)) this_trait_vals = [] target_vals = [] for index, sample in enumerate(target_samples): if (sample in this_sample_data) and (sample in target_sample_data): sample_value = this_sample_data[sample].value target_sample_value = target_sample_data[sample].value this_trait_vals.append(sample_value) target_vals.append(target_sample_value) this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values(this_trait_vals, target_vals) if num_overlap == 0: corr_result_row.append([target_trait, 0, num_overlap]) else: if is_spearman == False: sample_r, sample_p = scipy.stats.pearsonr(this_trait_vals, target_vals) if sample_r == 1: is_spearman = True else: sample_r, sample_p = scipy.stats.spearmanr(this_trait_vals, target_vals) corr_result_row.append([target_trait, sample_r, num_overlap]) self.corr_results.append(corr_result_row) print("corr_results:", pf(self.corr_results)) groups = [] for sample in self.all_sample_list: groups.append(1) #pca = self.calculate_pca(self.corr_results, range(len(self.traits))) self.js_data = dict(traits = [trait.name for trait in self.traits], groups = groups, cols = range(len(self.traits)), rows = range(len(self.traits)), samples = self.all_sample_list, sample_data = self.sample_data,)
def __init__(self, start_vars): trait_db_list = [trait.strip() for trait in start_vars['trait_list'].split(',')] helper_functions.get_trait_db_obs(self, trait_db_list) self.all_sample_list = [] self.traits = [] self.insufficient_shared_samples = False this_group = self.trait_list[0][1].group.name #ZS: Getting initial group name before verifying all traits are in the same group in the following loop for trait_db in self.trait_list: if trait_db[1].group.name != this_group: self.insufficient_shared_samples = True break else: this_group = trait_db[1].group.name this_trait = trait_db[0] self.traits.append(this_trait) this_sample_data = this_trait.data for sample in this_sample_data: if sample not in self.all_sample_list: self.all_sample_list.append(sample) if self.insufficient_shared_samples: pass else: self.sample_data = [] for trait_db in self.trait_list: this_trait = trait_db[0] this_sample_data = this_trait.data this_trait_vals = [] for sample in self.all_sample_list: if sample in this_sample_data: this_trait_vals.append(this_sample_data[sample].value) else: this_trait_vals.append('') self.sample_data.append(this_trait_vals) if len(this_trait_vals) < len(self.trait_list): #Shouldn't do PCA if there are more traits than observations/samples return False self.lowest_overlap = 8 #ZS: Variable set to the lowest overlapping samples in order to notify user, or 8, whichever is lower (since 8 is when we want to display warning) self.corr_results = [] self.pca_corr_results = [] self.trait_data_array = [] for trait_db in self.trait_list: this_trait = trait_db[0] this_db = trait_db[1] this_db_samples = this_db.group.all_samples_ordered() this_sample_data = this_trait.data this_trait_vals = [] for index, sample in enumerate(this_db_samples): if (sample in this_sample_data): sample_value = this_sample_data[sample].value this_trait_vals.append(sample_value) self.trait_data_array.append(this_trait_vals) corr_result_row = [] pca_corr_result_row = [] is_spearman = False #ZS: To determine if it's above or below the diagonal for target in self.trait_list: target_trait = target[0] target_db = target[1] target_samples = target_db.group.all_samples_ordered() target_sample_data = target_trait.data this_trait_vals = [] target_vals = [] for index, sample in enumerate(target_samples): if (sample in this_sample_data) and (sample in target_sample_data): sample_value = this_sample_data[sample].value target_sample_value = target_sample_data[sample].value this_trait_vals.append(sample_value) target_vals.append(target_sample_value) this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values(this_trait_vals, target_vals) if num_overlap < self.lowest_overlap: self.lowest_overlap = num_overlap if num_overlap == 0: corr_result_row.append([target_trait, 0, num_overlap]) pca_corr_result_row.append(0) else: pearson_r, pearson_p = scipy.stats.pearsonr(this_trait_vals, target_vals) if is_spearman == False: sample_r, sample_p = pearson_r, pearson_p if sample_r == 1: is_spearman = True else: sample_r, sample_p = scipy.stats.spearmanr(this_trait_vals, target_vals) corr_result_row.append([target_trait, sample_r, num_overlap]) pca_corr_result_row.append(pearson_r) self.corr_results.append(corr_result_row) self.pca_corr_results.append(pca_corr_result_row) corr_result_eigen = la.eigenvectors(numarray.array(self.pca_corr_results)) corr_eigen_value, corr_eigen_vectors = sortEigenVectors(corr_result_eigen) groups = [] for sample in self.all_sample_list: groups.append(1) pca = self.calculate_pca(range(len(self.traits)), corr_eigen_value, corr_eigen_vectors) self.loadings_array = self.process_loadings() self.js_data = dict(traits = [trait.name for trait in self.traits], groups = groups, cols = range(len(self.traits)), rows = range(len(self.traits)), samples = self.all_sample_list, sample_data = self.sample_data,)
def run_analysis(self, requestform): print("Starting WGCNA analysis on dataset") self.r_enableWGCNAThreads() # Enable multi threading self.trait_db_list = [trait.strip() for trait in requestform['trait_list'].split(',')] print("Retrieved phenotype data from database", requestform['trait_list']) helper_functions.get_trait_db_obs(self, self.trait_db_list) self.input = {} # self.input contains the phenotype values we need to send to R strains = [] # All the strains we have data for (contains duplicates) traits = [] # All the traits we have data for (should not contain duplicates) for trait in self.trait_list: traits.append(trait[0].name) self.input[trait[0].name] = {} for strain in trait[0].data: strains.append(strain) self.input[trait[0].name][strain] = trait[0].data[strain].value # Transfer the load data from python to R uStrainsR = r_unique(ro.Vector(strains)) # Unique strains in R vector uTraitsR = r_unique(ro.Vector(traits)) # Unique traits in R vector r_cat("The number of unique strains:", r_length(uStrainsR), "\n") r_cat("The number of unique traits:", r_length(uTraitsR), "\n") # rM is the datamatrix holding all the data in R /rows = strains columns = traits rM = ro.r.matrix(ri.NA_Real, nrow=r_length(uStrainsR), ncol=r_length(uTraitsR), dimnames = r_list(uStrainsR, uTraitsR)) for t in uTraitsR: trait = t[0] # R uses vectors every single element is a vector for s in uStrainsR: strain = s[0] # R uses vectors every single element is a vector #DEBUG: print(trait, strain, " in python: ", self.input[trait].get(strain), "in R:", rM.rx(strain,trait)[0]) rM.rx[strain, trait] = self.input[trait].get(strain) # Update the matrix location sys.stdout.flush() self.results = {} self.results['nphe'] = r_length(uTraitsR)[0] # Number of phenotypes/traits self.results['nstr'] = r_length(uStrainsR)[0] # Number of strains self.results['phenotypes'] = uTraitsR # Traits used self.results['strains'] = uStrainsR # Strains used in the analysis self.results['requestform'] = requestform # Store the user specified parameters for the output page # Calculate soft threshold if the user specified the SoftThreshold variable if requestform.get('SoftThresholds') is not None: powers = [int(threshold.strip()) for threshold in requestform['SoftThresholds'].rstrip().split(",")] rpow = r_unlist(r_c(powers)) print "SoftThresholds: {} == {}".format(powers, rpow) self.sft = self.r_pickSoftThreshold(rM, powerVector = rpow, verbose = 5) print "PowerEstimate: {}".format(self.sft[0]) self.results['PowerEstimate'] = self.sft[0] if self.sft[0][0] is ri.NA_Integer: print "No power is suitable for the analysis, just use 1" self.results['Power'] = 1 # No power could be estimated else: self.results['Power'] = self.sft[0][0] # Use the estimated power else: # The user clicked a button, so no soft threshold selection self.results['Power'] = requestform.get('Power') # Use the power value the user gives # Create the block wise modules using WGCNA network = self.r_blockwiseModules(rM, power = self.results['Power'], TOMType = requestform['TOMtype'], minModuleSize = requestform['MinModuleSize'], verbose = 3) # Save the network for the GUI self.results['network'] = network # How many modules and how many gene per module ? print "WGCNA found {} modules".format(r_table(network[1])) self.results['nmod'] = r_length(r_table(network[1]))[0] # The iconic WCGNA plot of the modules in the hanging tree self.results['imgurl'] = webqtlUtil.genRandStr("WGCNAoutput_") + ".png" self.results['imgloc'] = webqtlConfig.TMPDIR + self.results['imgurl'] r_png(self.results['imgloc'], width=1000, height=600) mergedColors = self.r_labels2colors(network[1]) self.r_plotDendroAndColors(network[5][0], mergedColors, "Module colors", dendroLabels = False, hang = 0.03, addGuide = True, guideHang = 0.05) r_dev_off() sys.stdout.flush()
def __init__(self, start_vars): trait_db_list = [trait.strip() for trait in start_vars['trait_list'].split(',')] helper_functions.get_trait_db_obs(self, trait_db_list) self.all_sample_list = [] self.traits = [] for trait_db in self.trait_list: this_trait = trait_db[0] self.traits.append(this_trait) this_sample_data = this_trait.data for sample in this_sample_data: if sample not in self.all_sample_list: self.all_sample_list.append(sample) self.sample_data = [] for trait_db in self.trait_list: this_trait = trait_db[0] this_sample_data = this_trait.data this_trait_vals = [] for sample in self.all_sample_list: if sample in this_sample_data: this_trait_vals.append(this_sample_data[sample].value) else: this_trait_vals.append('') self.sample_data.append(this_trait_vals) self.lowest_overlap = 8 #ZS: Variable set to the lowest overlapping samples in order to notify user, or 8, whichever is lower (since 8 is when we want to display warning) self.network_data = {} self.nodes_list = [] self.edges_list = [] for trait_db in self.trait_list: this_trait = trait_db[0] this_db = trait_db[1] this_db_samples = this_db.group.samplelist this_sample_data = this_trait.data corr_result_row = [] is_spearman = False #ZS: To determine if it's above or below the diagonal max_corr = 0 #ZS: Used to determine whether node should be hidden when correlation coefficient slider is used for target in self.trait_list: target_trait = target[0] target_db = target[1] target_samples = target_db.group.samplelist target_sample_data = target_trait.data this_trait_vals = [] target_vals = [] for index, sample in enumerate(target_samples): if (sample in this_sample_data) and (sample in target_sample_data): sample_value = this_sample_data[sample].value target_sample_value = target_sample_data[sample].value this_trait_vals.append(sample_value) target_vals.append(target_sample_value) this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values(this_trait_vals, target_vals) if num_overlap < self.lowest_overlap: self.lowest_overlap = num_overlap if num_overlap == 0: continue else: pearson_r, pearson_p = scipy.stats.pearsonr(this_trait_vals, target_vals) if is_spearman == False: sample_r, sample_p = pearson_r, pearson_p if sample_r == 1: continue else: sample_r, sample_p = scipy.stats.spearmanr(this_trait_vals, target_vals) if -1 <= sample_r < -0.7: color = "#0000ff" width = 3 elif -0.7 <= sample_r < -0.5: color = "#00ff00" width = 2 elif -0.5 <= sample_r < 0: color = "#000000" width = 0.5 elif 0 <= sample_r < 0.5: color = "#ffc0cb" width = 0.5 elif 0.5 <= sample_r < 0.7: color = "#ffa500" width = 2 elif 0.7 <= sample_r <= 1: color = "#ff0000" width = 3 else: color = "#000000" width = 0 if abs(sample_r) > max_corr: max_corr = abs(sample_r) edge_data = {'id' : str(this_trait.name) + '_to_' + str(target_trait.name), 'source' : str(this_trait.name) + ":" + str(this_trait.dataset.name), 'target' : str(target_trait.name) + ":" + str(target_trait.dataset.name), 'correlation' : round(sample_r, 3), 'abs_corr' : abs(round(sample_r, 3)), 'p_value' : round(sample_p, 3), 'overlap' : num_overlap, 'color' : color, 'width' : width } edge_dict = { 'data' : edge_data } self.edges_list.append(edge_dict) if trait_db[1].type == "ProbeSet": node_dict = { 'data' : {'id' : str(this_trait.name) + ":" + str(this_trait.dataset.name), 'label' : this_trait.symbol, 'symbol' : this_trait.symbol, 'geneid' : this_trait.geneid, 'omim' : this_trait.omim, 'max_corr' : max_corr } } elif trait_db[1].type == "Publish": node_dict = { 'data' : {'id' : str(this_trait.name) + ":" + str(this_trait.dataset.name), 'label' : this_trait.name, 'max_corr' : max_corr } } else: node_dict = { 'data' : {'id' : str(this_trait.name) + ":" + str(this_trait.dataset.name), 'label' : this_trait.name, 'max_corr' : max_corr } } self.nodes_list.append(node_dict) #self.network_data['dataSchema'] = {'nodes' : [{'name' : "label" , 'type' : "string"}], # 'edges' : [{'name' : "label" , 'type' : "string"}] } #self.network_data['data'] = {'nodes' : self.nodes_list, # 'edges' : self.edges_list } self.elements = json.dumps(self.nodes_list + self.edges_list) groups = [] for sample in self.all_sample_list: groups.append(1) self.js_data = dict(traits = [trait.name for trait in self.traits], groups = groups, cols = range(len(self.traits)), rows = range(len(self.traits)), samples = self.all_sample_list, sample_data = self.sample_data, elements = self.elements,)
def __init__(self, start_vars, temp_uuid): trait_db_list = [trait.strip() for trait in start_vars['trait_list'].split(',')] helper_functions.get_trait_db_obs(self, trait_db_list) self.temp_uuid = temp_uuid self.num_permutations = 5000 self.dataset = self.trait_list[0][1] self.json_data = {} #The dictionary that will be used to create the json object that contains all the data needed to create the figure self.all_sample_list = [] self.traits = [] chrnames = [] self.species = species.TheSpecies(dataset=self.trait_list[0][1]) for key in self.species.chromosomes.chromosomes.keys(): chrnames.append([self.species.chromosomes.chromosomes[key].name, self.species.chromosomes.chromosomes[key].mb_length]) for trait_db in self.trait_list: this_trait = trait_db[0] self.traits.append(this_trait.name) this_sample_data = this_trait.data for sample in this_sample_data: if sample not in self.all_sample_list: self.all_sample_list.append(sample) self.sample_data = [] for trait_db in self.trait_list: this_trait = trait_db[0] this_sample_data = this_trait.data #self.sample_data[this_trait.name] = [] this_trait_vals = [] for sample in self.all_sample_list: if sample in this_sample_data: this_trait_vals.append(this_sample_data[sample].value) #self.sample_data[this_trait.name].append(this_sample_data[sample].value) else: this_trait_vals.append('') #self.sample_data[this_trait.name].append('') self.sample_data.append(this_trait_vals) self.gen_reaper_results() #self.gen_pylmm_results() #chrnames = [] lodnames = [] chr_pos = [] pos = [] markernames = [] for trait in self.trait_results.keys(): lodnames.append(trait) for marker in self.dataset.group.markers.markers: #if marker['chr'] not in chrnames: # chr_ob = [marker['chr'], "filler"] # chrnames.append(chr_ob) chr_pos.append(marker['chr']) pos.append(marker['Mb']) markernames.append(marker['name']) self.json_data['chrnames'] = chrnames self.json_data['lodnames'] = lodnames self.json_data['chr'] = chr_pos self.json_data['pos'] = pos self.json_data['markernames'] = markernames for trait in self.trait_results: self.json_data[trait] = self.trait_results[trait] self.js_data = dict( json_data = self.json_data ) print("self.js_data:", self.js_data)