예제 #1
0
    def __init__(self, start_vars):
        trait_db_list = [trait.strip() for trait in start_vars['trait_list'].split(',')]
        
        helper_functions.get_trait_db_obs(self, trait_db_list)

        self.all_sample_list = []
        self.traits = []
        for trait_db in self.trait_list:
            this_trait = trait_db[0]
            self.traits.append(this_trait)
            this_sample_data = this_trait.data
            
            for sample in this_sample_data:
                if sample not in self.all_sample_list:
                    self.all_sample_list.append(sample)

        self.sample_data = []
        for trait_db in self.trait_list:
            this_trait = trait_db[0]
            this_sample_data = this_trait.data
            
            #self.sample_data[this_trait.name] = []
            this_trait_vals = []
            for sample in self.all_sample_list:
                if sample in this_sample_data:
                    this_trait_vals.append(this_sample_data[sample].value)
                    #self.sample_data[this_trait.name].append(this_sample_data[sample].value)
                else:
                    this_trait_vals.append('')
                    #self.sample_data[this_trait.name].append('')
            self.sample_data.append(this_trait_vals)

        self.lowest_overlap = 8 #ZS: Variable set to the lowest overlapping samples in order to notify user, or 8, whichever is lower (since 8 is when we want to display warning)

        self.corr_results = []
        self.pca_corr_results = []
        for trait_db in self.trait_list:
            this_trait = trait_db[0]
            this_db = trait_db[1]
            
            this_db_samples = this_db.group.samplelist
            this_sample_data = this_trait.data
            
            corr_result_row = []
            pca_corr_result_row = []
            is_spearman = False #ZS: To determine if it's above or below the diagonal
            for target in self.trait_list:
                target_trait = target[0]
                target_db = target[1]
                target_samples = target_db.group.samplelist

                target_sample_data = target_trait.data
                print("target_samples", len(target_samples))
                
                this_trait_vals = []
                target_vals = []
                for index, sample in enumerate(target_samples):
                    
                    if (sample in this_sample_data) and (sample in target_sample_data):
                        sample_value = this_sample_data[sample].value
                        target_sample_value = target_sample_data[sample].value
                        this_trait_vals.append(sample_value)
                        target_vals.append(target_sample_value)
        
                this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values(this_trait_vals, target_vals)

                if num_overlap < self.lowest_overlap:
                    self.lowest_overlap = num_overlap
                if num_overlap == 0:
                    corr_result_row.append([target_trait, 0, num_overlap])
                    pca_corr_result_row.append(0)
                else:
                    pearson_r, pearson_p = scipy.stats.pearsonr(this_trait_vals, target_vals)
                    if is_spearman == False:
                        sample_r, sample_p = pearson_r, pearson_p
                        if sample_r == 1:
                            is_spearman = True
                    else:
                        sample_r, sample_p = scipy.stats.spearmanr(this_trait_vals, target_vals)

                    corr_result_row.append([target_trait, sample_r, num_overlap])
                    pca_corr_result_row.append(pearson_r)
                
            self.corr_results.append(corr_result_row)
            self.pca_corr_results.append(pca_corr_result_row)

        print("corr_results:", pf(self.corr_results))

        groups = []
        for sample in self.all_sample_list:
            groups.append(1)

        pca = self.calculate_pca(self.pca_corr_results, range(len(self.traits)))

        self.loadings_array = self.process_loadings()

        self.js_data = dict(traits = [trait.name for trait in self.traits],
                            groups = groups,
                            cols = range(len(self.traits)),
                            rows = range(len(self.traits)),
                            samples = self.all_sample_list,
                            sample_data = self.sample_data,)
예제 #2
0
    def __init__(self, start_vars):
        trait_db_list = [trait.strip() for trait in start_vars['trait_list'].split(',')]
        
        helper_functions.get_trait_db_obs(self, trait_db_list)

        self.all_sample_list = []
        self.traits = []
        for trait_db in self.trait_list:
            this_trait = trait_db[0]
            self.traits.append(this_trait)
            this_sample_data = this_trait.data
            
            for sample in this_sample_data:
                if sample not in self.all_sample_list:
                    self.all_sample_list.append(sample)

        self.sample_data = []
        for trait_db in self.trait_list:
            this_trait = trait_db[0]
            this_sample_data = this_trait.data
            
            #self.sample_data[this_trait.name] = []
            this_trait_vals = []
            for sample in self.all_sample_list:
                if sample in this_sample_data:
                    this_trait_vals.append(this_sample_data[sample].value)
                    #self.sample_data[this_trait.name].append(this_sample_data[sample].value)
                else:
                    this_trait_vals.append('')
                    #self.sample_data[this_trait.name].append('')
            self.sample_data.append(this_trait_vals)

        self.corr_results = []
        self.corr_results_for_pca = []
        for trait_db in self.trait_list:
            this_trait = trait_db[0]
            this_db = trait_db[1]
            
            this_db_samples = this_db.group.samplelist
            
            #for sample in this_db_samples:
            #    if sample not in self.samples:
            #        self.samples.append(sample)
            
            this_sample_data = this_trait.data
            print("this_sample_data", len(this_sample_data))
            
            #for sample in this_sample_data:
            #    if sample not in self.all_sample_list:
            #        self.all_sample_list.append(sample)
            
            corr_result_row = []
            is_spearman = False #ZS: To determine if it's above or below the diagonal
            for target in self.trait_list:
                target_trait = target[0]
                target_db = target[1]
                target_samples = target_db.group.samplelist
                
                #if this_trait == target_trait and this_db == target_db:
                #    corr_result_row.append(1)
                #    continue

                target_sample_data = target_trait.data
                print("target_samples", len(target_samples))
                
                this_trait_vals = []
                target_vals = []
                for index, sample in enumerate(target_samples):
                    
                    if (sample in this_sample_data) and (sample in target_sample_data):
                        sample_value = this_sample_data[sample].value
                        target_sample_value = target_sample_data[sample].value
                        this_trait_vals.append(sample_value)
                        target_vals.append(target_sample_value)
        
                this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values(this_trait_vals, target_vals)
                if num_overlap == 0:
                    corr_result_row.append([target_trait, 0, num_overlap])
                else:
                    if is_spearman == False:
                        sample_r, sample_p = scipy.stats.pearsonr(this_trait_vals, target_vals)
                        if sample_r == 1:
                            is_spearman = True
                    else:
                        sample_r, sample_p = scipy.stats.spearmanr(this_trait_vals, target_vals)

                    corr_result_row.append([target_trait, sample_r, num_overlap])
                
            self.corr_results.append(corr_result_row)

        print("corr_results:", pf(self.corr_results))

        groups = []
        for sample in self.all_sample_list:
            groups.append(1)

        #pca = self.calculate_pca(self.corr_results, range(len(self.traits)))


        self.js_data = dict(traits = [trait.name for trait in self.traits],
                            groups = groups,
                            cols = range(len(self.traits)),
                            rows = range(len(self.traits)),
                            samples = self.all_sample_list,
                            sample_data = self.sample_data,)
예제 #3
0
    def __init__(self, start_vars):
        trait_db_list = [trait.strip() for trait in start_vars['trait_list'].split(',')]

        helper_functions.get_trait_db_obs(self, trait_db_list)

        self.all_sample_list = []
        self.traits = []
        self.insufficient_shared_samples = False
        this_group = self.trait_list[0][1].group.name #ZS: Getting initial group name before verifying all traits are in the same group in the following loop
        for trait_db in self.trait_list:
            if trait_db[1].group.name != this_group:
                self.insufficient_shared_samples = True
                break
            else:
                this_group = trait_db[1].group.name
            this_trait = trait_db[0]
            self.traits.append(this_trait)
            this_sample_data = this_trait.data

            for sample in this_sample_data:
                if sample not in self.all_sample_list:
                    self.all_sample_list.append(sample)

        if self.insufficient_shared_samples:
            pass
        else:
            self.sample_data = []
            for trait_db in self.trait_list:
                this_trait = trait_db[0]
                this_sample_data = this_trait.data

                this_trait_vals = []
                for sample in self.all_sample_list:
                    if sample in this_sample_data:
                        this_trait_vals.append(this_sample_data[sample].value)
                    else:
                        this_trait_vals.append('')
                self.sample_data.append(this_trait_vals)

            if len(this_trait_vals) < len(self.trait_list): #Shouldn't do PCA if there are more traits than observations/samples
                return False

            self.lowest_overlap = 8 #ZS: Variable set to the lowest overlapping samples in order to notify user, or 8, whichever is lower (since 8 is when we want to display warning)

            self.corr_results = []
            self.pca_corr_results = []
            self.trait_data_array = []
            for trait_db in self.trait_list:
                this_trait = trait_db[0]
                this_db = trait_db[1]

                this_db_samples = this_db.group.all_samples_ordered()
                this_sample_data = this_trait.data

                this_trait_vals = []
                for index, sample in enumerate(this_db_samples):
                    if (sample in this_sample_data):
                        sample_value = this_sample_data[sample].value
                        this_trait_vals.append(sample_value)
                self.trait_data_array.append(this_trait_vals)

                corr_result_row = []
                pca_corr_result_row = []
                is_spearman = False #ZS: To determine if it's above or below the diagonal
                for target in self.trait_list:
                    target_trait = target[0]
                    target_db = target[1]
                    target_samples = target_db.group.all_samples_ordered()
                    target_sample_data = target_trait.data

                    this_trait_vals = []
                    target_vals = []
                    for index, sample in enumerate(target_samples):
                        if (sample in this_sample_data) and (sample in target_sample_data):
                            sample_value = this_sample_data[sample].value
                            target_sample_value = target_sample_data[sample].value
                            this_trait_vals.append(sample_value)
                            target_vals.append(target_sample_value)

                    this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values(this_trait_vals, target_vals)

                    if num_overlap < self.lowest_overlap:
                        self.lowest_overlap = num_overlap
                    if num_overlap == 0:
                        corr_result_row.append([target_trait, 0, num_overlap])
                        pca_corr_result_row.append(0)
                    else:
                        pearson_r, pearson_p = scipy.stats.pearsonr(this_trait_vals, target_vals)
                        if is_spearman == False:
                            sample_r, sample_p = pearson_r, pearson_p
                            if sample_r == 1:
                                is_spearman = True
                        else:
                            sample_r, sample_p = scipy.stats.spearmanr(this_trait_vals, target_vals)

                        corr_result_row.append([target_trait, sample_r, num_overlap])
                        pca_corr_result_row.append(pearson_r)

                self.corr_results.append(corr_result_row)
                self.pca_corr_results.append(pca_corr_result_row)

            corr_result_eigen = la.eigenvectors(numarray.array(self.pca_corr_results))
            corr_eigen_value, corr_eigen_vectors = sortEigenVectors(corr_result_eigen)

            groups = []
            for sample in self.all_sample_list:
                groups.append(1)

            pca = self.calculate_pca(range(len(self.traits)), corr_eigen_value, corr_eigen_vectors)

            self.loadings_array = self.process_loadings()

            self.js_data = dict(traits = [trait.name for trait in self.traits],
                                groups = groups,
                                cols = range(len(self.traits)),
                                rows = range(len(self.traits)),
                                samples = self.all_sample_list,
                                sample_data = self.sample_data,)
예제 #4
0
    def run_analysis(self, requestform):
        print("Starting WGCNA analysis on dataset")
        self.r_enableWGCNAThreads()                                      # Enable multi threading
        self.trait_db_list = [trait.strip() for trait in requestform['trait_list'].split(',')]
        print("Retrieved phenotype data from database", requestform['trait_list'])
        helper_functions.get_trait_db_obs(self, self.trait_db_list)

        self.input = {}           # self.input contains the phenotype values we need to send to R
        strains = []              # All the strains we have data for (contains duplicates)
        traits  = []              # All the traits we have data for (should not contain duplicates)
        for trait in self.trait_list:
            traits.append(trait[0].name)
            self.input[trait[0].name] = {}
            for strain in trait[0].data:
                strains.append(strain)
                self.input[trait[0].name][strain]  = trait[0].data[strain].value

        # Transfer the load data from python to R
        uStrainsR = r_unique(ro.Vector(strains))    # Unique strains in R vector
        uTraitsR = r_unique(ro.Vector(traits))      # Unique traits in R vector

        r_cat("The number of unique strains:", r_length(uStrainsR), "\n")
        r_cat("The number of unique traits:", r_length(uTraitsR), "\n")

        # rM is the datamatrix holding all the data in R /rows = strains columns = traits
        rM = ro.r.matrix(ri.NA_Real, nrow=r_length(uStrainsR), ncol=r_length(uTraitsR), dimnames = r_list(uStrainsR, uTraitsR))
        for t in uTraitsR:
            trait = t[0]                  # R uses vectors every single element is a vector
            for s in uStrainsR:
                strain = s[0]             # R uses vectors every single element is a vector
                #DEBUG: print(trait, strain, " in python: ", self.input[trait].get(strain), "in R:", rM.rx(strain,trait)[0])
                rM.rx[strain, trait] = self.input[trait].get(strain)  # Update the matrix location
                sys.stdout.flush()

        self.results = {}
        self.results['nphe'] = r_length(uTraitsR)[0]          # Number of phenotypes/traits
        self.results['nstr'] = r_length(uStrainsR)[0]         # Number of strains
        self.results['phenotypes'] = uTraitsR                 # Traits used
        self.results['strains'] = uStrainsR                   # Strains used in the analysis
        self.results['requestform'] = requestform             # Store the user specified parameters for the output page

        # Calculate soft threshold if the user specified the SoftThreshold variable
        if requestform.get('SoftThresholds') is not None:
          powers = [int(threshold.strip()) for threshold in requestform['SoftThresholds'].rstrip().split(",")]
          rpow = r_unlist(r_c(powers))
          print "SoftThresholds: {} == {}".format(powers, rpow)
          self.sft    = self.r_pickSoftThreshold(rM, powerVector = rpow, verbose = 5)

          print "PowerEstimate: {}".format(self.sft[0])
          self.results['PowerEstimate'] = self.sft[0]
          if self.sft[0][0] is ri.NA_Integer:
            print "No power is suitable for the analysis, just use 1"
            self.results['Power'] = 1                         # No power could be estimated
          else:
            self.results['Power'] = self.sft[0][0]            # Use the estimated power
        else:
          # The user clicked a button, so no soft threshold selection
          self.results['Power'] = requestform.get('Power')    # Use the power value the user gives

        # Create the block wise modules using WGCNA
        network = self.r_blockwiseModules(rM, power = self.results['Power'], TOMType = requestform['TOMtype'], minModuleSize = requestform['MinModuleSize'], verbose = 3)

        # Save the network for the GUI
        self.results['network'] = network

        # How many modules and how many gene per module ?
        print "WGCNA found {} modules".format(r_table(network[1]))
        self.results['nmod'] = r_length(r_table(network[1]))[0]

        # The iconic WCGNA plot of the modules in the hanging tree
        self.results['imgurl'] = webqtlUtil.genRandStr("WGCNAoutput_") + ".png"
        self.results['imgloc'] = webqtlConfig.TMPDIR + self.results['imgurl']
        r_png(self.results['imgloc'], width=1000, height=600)
        mergedColors = self.r_labels2colors(network[1])
        self.r_plotDendroAndColors(network[5][0], mergedColors, "Module colors", dendroLabels = False, hang = 0.03, addGuide = True, guideHang = 0.05)
        r_dev_off()
        sys.stdout.flush()
예제 #5
0
    def __init__(self, start_vars):
        trait_db_list = [trait.strip() for trait in start_vars['trait_list'].split(',')]

        helper_functions.get_trait_db_obs(self, trait_db_list)

        self.all_sample_list = []
        self.traits = []
        for trait_db in self.trait_list:
            this_trait = trait_db[0]
            self.traits.append(this_trait)
            this_sample_data = this_trait.data

            for sample in this_sample_data:
                if sample not in self.all_sample_list:
                    self.all_sample_list.append(sample)

        self.sample_data = []
        for trait_db in self.trait_list:
            this_trait = trait_db[0]
            this_sample_data = this_trait.data

            this_trait_vals = []
            for sample in self.all_sample_list:
                if sample in this_sample_data:
                    this_trait_vals.append(this_sample_data[sample].value)
                else:
                    this_trait_vals.append('')
            self.sample_data.append(this_trait_vals)

        self.lowest_overlap = 8 #ZS: Variable set to the lowest overlapping samples in order to notify user, or 8, whichever is lower (since 8 is when we want to display warning)

        self.network_data = {}
        self.nodes_list = []
        self.edges_list = []
        for trait_db in self.trait_list:
            this_trait = trait_db[0]
            this_db = trait_db[1]

            this_db_samples = this_db.group.samplelist
            this_sample_data = this_trait.data

            corr_result_row = []
            is_spearman = False #ZS: To determine if it's above or below the diagonal
            
            max_corr = 0 #ZS: Used to determine whether node should be hidden when correlation coefficient slider is used
            
            for target in self.trait_list:
                target_trait = target[0]
                target_db = target[1]
                target_samples = target_db.group.samplelist

                target_sample_data = target_trait.data

                this_trait_vals = []
                target_vals = []
                for index, sample in enumerate(target_samples):

                    if (sample in this_sample_data) and (sample in target_sample_data):
                        sample_value = this_sample_data[sample].value
                        target_sample_value = target_sample_data[sample].value
                        this_trait_vals.append(sample_value)
                        target_vals.append(target_sample_value)

                this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values(this_trait_vals, target_vals)

                if num_overlap < self.lowest_overlap:
                    self.lowest_overlap = num_overlap
                if num_overlap == 0:
                    continue
                else:
                    pearson_r, pearson_p = scipy.stats.pearsonr(this_trait_vals, target_vals)
                    if is_spearman == False:
                        sample_r, sample_p = pearson_r, pearson_p
                        if sample_r == 1:
                            continue
                    else:
                        sample_r, sample_p = scipy.stats.spearmanr(this_trait_vals, target_vals)
 
                    if -1 <= sample_r < -0.7:
                        color = "#0000ff"
                        width = 3
                    elif -0.7 <= sample_r < -0.5:
                        color = "#00ff00"
                        width = 2
                    elif -0.5 <= sample_r < 0:
                        color = "#000000"
                        width = 0.5
                    elif 0 <= sample_r < 0.5:
                        color = "#ffc0cb"
                        width = 0.5
                    elif 0.5 <= sample_r < 0.7:
                        color = "#ffa500"
                        width = 2
                    elif 0.7 <= sample_r <= 1:
                        color = "#ff0000"  
                        width = 3 
                    else:
                        color = "#000000"
                        width = 0                      
 
                    if abs(sample_r) > max_corr:
                        max_corr = abs(sample_r)
 
                    edge_data = {'id' : str(this_trait.name) + '_to_' + str(target_trait.name),
                                 'source' : str(this_trait.name) + ":" + str(this_trait.dataset.name),
                                 'target' : str(target_trait.name) + ":" + str(target_trait.dataset.name),
                                 'correlation' : round(sample_r, 3),
                                 'abs_corr' : abs(round(sample_r, 3)),
                                 'p_value' : round(sample_p, 3),
                                 'overlap' : num_overlap,
                                 'color' : color,
                                 'width' : width }
                                 
                    edge_dict = { 'data' : edge_data }
                                 
                    self.edges_list.append(edge_dict)
      
            if trait_db[1].type == "ProbeSet":
                node_dict = { 'data' : {'id' : str(this_trait.name) + ":" + str(this_trait.dataset.name), 
                                        'label' : this_trait.symbol,
                                        'symbol' : this_trait.symbol,
                                        'geneid' : this_trait.geneid,
                                        'omim' : this_trait.omim,
                                        'max_corr' : max_corr } }
            elif trait_db[1].type == "Publish":
                node_dict = { 'data' : {'id' : str(this_trait.name) + ":" + str(this_trait.dataset.name), 
                                        'label' : this_trait.name,
                                        'max_corr' : max_corr } }
            else:
                node_dict = { 'data' : {'id' : str(this_trait.name) + ":" + str(this_trait.dataset.name), 
                                        'label' : this_trait.name,
                                        'max_corr' : max_corr } }
            self.nodes_list.append(node_dict)
      
        #self.network_data['dataSchema'] = {'nodes' : [{'name' : "label" , 'type' : "string"}],
        #                                   'edges' : [{'name' : "label" , 'type' : "string"}] }
        
        #self.network_data['data'] = {'nodes' : self.nodes_list,
        #                             'edges' : self.edges_list }

        self.elements = json.dumps(self.nodes_list + self.edges_list)
        
        groups = []
        for sample in self.all_sample_list:
            groups.append(1)
            
        self.js_data = dict(traits = [trait.name for trait in self.traits],
                            groups = groups,
                            cols = range(len(self.traits)),
                            rows = range(len(self.traits)),
                            samples = self.all_sample_list,
                            sample_data = self.sample_data,
                            elements = self.elements,)
예제 #6
0
    def __init__(self, start_vars, temp_uuid):
    
        trait_db_list = [trait.strip() for trait in start_vars['trait_list'].split(',')]
        
        helper_functions.get_trait_db_obs(self, trait_db_list)
        
        self.temp_uuid = temp_uuid
        self.num_permutations = 5000
        self.dataset = self.trait_list[0][1]
        
        self.json_data = {} #The dictionary that will be used to create the json object that contains all the data needed to create the figure
        
        self.all_sample_list = []
        self.traits = []
        
        chrnames = []
        self.species = species.TheSpecies(dataset=self.trait_list[0][1])
        for key in self.species.chromosomes.chromosomes.keys():
            chrnames.append([self.species.chromosomes.chromosomes[key].name, self.species.chromosomes.chromosomes[key].mb_length])
        
        for trait_db in self.trait_list:
                
            this_trait = trait_db[0]
            self.traits.append(this_trait.name)
            this_sample_data = this_trait.data
            
            for sample in this_sample_data:
                if sample not in self.all_sample_list:
                    self.all_sample_list.append(sample)
                    
        self.sample_data = []
        for trait_db in self.trait_list:
            this_trait = trait_db[0]
            this_sample_data = this_trait.data
            
            #self.sample_data[this_trait.name] = []
            this_trait_vals = []
            for sample in self.all_sample_list:
                if sample in this_sample_data:
                    this_trait_vals.append(this_sample_data[sample].value)
                    #self.sample_data[this_trait.name].append(this_sample_data[sample].value)
                else:
                    this_trait_vals.append('')
                    #self.sample_data[this_trait.name].append('')
            self.sample_data.append(this_trait_vals)

        self.gen_reaper_results()
        #self.gen_pylmm_results()
            
        #chrnames = []
        lodnames = []
        chr_pos = []
        pos = []
        markernames = []
        
        for trait in self.trait_results.keys():
            lodnames.append(trait)
        
        for marker in self.dataset.group.markers.markers:
            #if marker['chr'] not in chrnames:
            #    chr_ob = [marker['chr'], "filler"]
            #    chrnames.append(chr_ob)
            chr_pos.append(marker['chr'])
            pos.append(marker['Mb'])
            markernames.append(marker['name'])
            
        self.json_data['chrnames'] = chrnames
        self.json_data['lodnames'] = lodnames
        self.json_data['chr'] = chr_pos
        self.json_data['pos'] = pos
        self.json_data['markernames'] = markernames
        
        for trait in self.trait_results:
            self.json_data[trait] = self.trait_results[trait]
            
        self.js_data = dict(
            json_data = self.json_data
        )
            
        print("self.js_data:", self.js_data)