def Run(self): self.transit_message("Starting IGV Export") start_time = time.time() #Get orf data self.transit_message("Getting Data") (fulldata, position) = tnseq_tools.get_data(self.ctrldata) (fulldata, factors) = norm_tools.normalize_data(fulldata, self.normalization, self.ctrldata, self.annotation_path) position = position.astype(int) hash = transit_tools.get_pos_hash(self.annotation_path) rv2info = transit_tools.get_gene_info(self.annotation_path) self.transit_message("Normalizing") self.output.write("#Converted to IGV with TRANSIT.\n") if self.normalization != "nonorm": self.output.write("#Reads normalized using '%s'\n" % self.normalization) if type(factors[0]) == type(0.0): self.output.write( "#Normalization Factors: %s\n" % "\t".join(["%s" % f for f in factors.flatten()])) else: self.output.write("#Normalization Factors: %s\n" % " ".join( [",".join(["%s" % bx for bx in b]) for b in factors])) self.output.write("#Files:\n") for f in self.ctrldata: self.output.write("#%s\n" % f) dataset_str = "\t".join( [transit_tools.fetch_name(F) for F in self.ctrldata]) self.output.write("#Chromosome\tStart\tEnd\tFeature\t%s\tTAs\n" % dataset_str) chrom = transit_tools.fetch_name(self.annotation_path) (K, N) = fulldata.shape self.progress_range(N) for i, pos in enumerate(position): self.output.write( "%s\t%s\t%s\tTA%s\t%s\t1\n" % (chrom, position[i], position[i] + 1, position[i], "\t".join( ["%1.1f" % fulldata[j][i] for j in range(len(fulldata))]))) # Update progress text = "Running Export Method... %5.1f%%" % (100.0 * i / N) self.progress_update(text, i) self.output.close() self.transit_message("") # Printing empty line to flush stdout self.finish() self.transit_message("Finished Export")
def Run(self): self.transit_message("Starting IGV Export") start_time = time.time() #Get orf data self.transit_message("Getting Data") (fulldata, position) = tnseq_tools.get_data(self.ctrldata) (fulldata, factors) = norm_tools.normalize_data(fulldata, self.normalization, self.ctrldata, self.annotation_path) position = position.astype(int) hash = transit_tools.get_pos_hash(self.annotation_path) rv2info = transit_tools.get_gene_info(self.annotation_path) self.transit_message("Normalizing") self.output.write("#Converted to IGV with TRANSIT.\n") if self.normalization != "nonorm": self.output.write("#Reads normalized using '%s'\n" % self.normalization) if type(factors[0]) == type(0.0): self.output.write("#Normalization Factors: %s\n" % "\t".join(["%s" % f for f in factors.flatten()])) else: self.output.write("#Normalization Factors: %s\n" % " ".join([",".join(["%s" % bx for bx in b]) for b in factors])) self.output.write("#Files:\n") for f in self.ctrldata: self.output.write("#%s\n" % f) dataset_str = "\t".join([transit_tools.fetch_name(F) for F in self.ctrldata]) self.output.write("#Chromosome\tStart\tEnd\tFeature\t%s\tTAs\n" % dataset_str) chrom = transit_tools.fetch_name(self.annotation_path) (K,N) = fulldata.shape self.progress_range(N) for i,pos in enumerate(position): self.output.write("%s\t%s\t%s\tTA%s\t%s\t1\n" % (chrom, position[i], position[i]+1, position[i], "\t".join(["%1.1f" % fulldata[j][i] for j in range(len(fulldata))]))) # Update progress text = "Running Export Method... %5.1f%%" % (100.0*i/N) self.progress_update(text, i) self.output.close() self.transit_message("") # Printing empty line to flush stdout self.finish() self.transit_message("Finished Export")
def displayHistogram(self, displayFrame, event): gene = displayFrame.grid.GetCellValue(displayFrame.row, 0) filepath = os.path.join(ntpath.dirname(displayFrame.path), transit_tools.fetch_name(displayFrame.path)) filename = os.path.join(filepath, gene+".png") if os.path.exists(filename): imgWindow = pytransit.fileDisplay.ImgFrame(None, filename) imgWindow.Show() else: transit_tools.ShowError(MSG="Error Displaying File. Histogram image not found. Make sure results were obtained with the histogram option turned on.") print("Error Displaying File. Histogram image does not exist.")
def displayHistogram(self, displayFrame, event): gene = displayFrame.grid.GetCellValue(displayFrame.row, 0) filepath = os.path.join(ntpath.dirname(displayFrame.path), transit_tools.fetch_name(displayFrame.path)) filename = os.path.join(filepath, gene+".png") if os.path.exists(filename): imgWindow = pytransit.fileDisplay.ImgFrame(None, filename) imgWindow.Show() else: transit_tools.ShowError(MSG="Error Displaying File. Histogram image not found. Make sure results were obtained with the histogram option turned on.") print "Error Displaying File. Histogram image does not exist."
def Run(self): self.transit_message("Starting Gene Mean Counts Export") start_time = time.time() #Get orf data self.transit_message("Getting Data") (fulldata, position) = tnseq_tools.get_data(self.ctrldata) (fulldata, factors) = norm_tools.normalize_data(fulldata, self.normalization, self.ctrldata, self.annotation_path) position = position.astype(int) hash = transit_tools.get_pos_hash(self.annotation_path) rv2info = transit_tools.get_gene_info(self.annotation_path) self.transit_message("Normalizing") self.output.write("#Summarized to Mean Gene Counts with TRANSIT.\n") if self.normalization != "nonorm": self.output.write("#Reads normalized using '%s'\n" % self.normalization) if type(factors[0]) == type(0.0): self.output.write("#Normalization Factors: %s\n" % "\t".join(["%s" % f for f in factors.flatten()])) else: self.output.write("#Normalization Factors: %s\n" % " ".join([",".join(["%s" % bx for bx in b]) for b in factors])) self.output.write("#Files:\n") for f in self.ctrldata: self.output.write("#%s\n" % f) K,Nsites = fulldata.shape # Get Gene objects G = tnseq_tools.Genes(self.ctrldata, self.annotation_path, norm=self.normalization) N = len(G) self.progress_range(N) dataset_header = "\t".join([transit_tools.fetch_name(D) for D in self.ctrldata]) self.output.write("#Orf\tName\tNumber of TA sites\t%s\n" % dataset_header) for i,gene in enumerate(G): if gene.n > 0: data_str = "\t".join(["%1.2f" % (M) for M in numpy.mean(gene.reads, 1)]) else: data_str = "\t".join(["%1.2f" % (Z) for Z in numpy.zeros(K)]) self.output.write("%s\t%s\t%s\t%s\n" % (gene.orf, gene.name, gene.n, data_str)) # Update progress text = "Running Export Method... %5.1f%%" % (100.0*i/N) self.progress_update(text, i) self.output.close() self.transit_message("") # Printing empty line to flush stdout self.finish() self.transit_message("Finished Export")
def Run(self): #if not self.wxobj: # # Force matplotlib to use good backend for png. # import matplotlib.pyplot as plt #elif "matplotlib.pyplot" not in sys.modules: try: import matplotlib.pyplot as plt except: print "Error: cannot do histograms" self.doHistogram = False self.transit_message("Starting resampling Method") start_time = time.time() if self.doHistogram: histPath = os.path.join( os.path.dirname(self.output.name), transit_tools.fetch_name(self.output.name) + "_histograms") if not os.path.isdir(histPath): os.makedirs(histPath) else: histPath = "" Kctrl = len(self.ctrldata) Kexp = len(self.expdata) #Get orf data self.transit_message("Getting Data") (data, position) = transit_tools.get_validated_data(self.ctrldata + self.expdata, wxobj=self.wxobj) (K, N) = data.shape if self.normalization != "nonorm": self.transit_message("Normalizing using: %s" % self.normalization) (data, factors) = norm_tools.normalize_data(data, self.normalization, self.ctrldata + self.expdata, self.annotation_path) if self.LOESS: self.transit_message("Performing LOESS Correction") for j in range(K): data[j] = stat_tools.loess_correction(position, data[j]) G = tnseq_tools.Genes(self.ctrldata + self.expdata, self.annotation_path, ignoreCodon=self.ignoreCodon, nterm=self.NTerminus, cterm=self.CTerminus, data=data, position=position) #G = tnseq_tools.Genes(self.ctrldata+self.expdata, self.annotation_path, norm=self.normalization, ignoreCodon=self.ignoreCodon, nterm=self.NTerminus, cterm=self.CTerminus) #Resampling data = [] N = len(G) count = 0 self.progress_range(N) for gene in G: count += 1 if gene.k == 0 or gene.n == 0: (test_obs, mean1, mean2, log2FC, pval_ltail, pval_utail, pval_2tail, testlist, data1, data2) = (0, 0, 0, 0, 1.00, 1.00, 1.00, [], [0], [0]) else: if not self.includeZeros: ii = numpy.sum(gene.reads, 0) > 0 else: ii = numpy.ones(gene.n) == 1 data1 = gene.reads[:Kctrl, ii].flatten() + self.pseudocount data2 = gene.reads[Kctrl:, ii].flatten() + self.pseudocount (test_obs, mean1, mean2, log2FC, pval_ltail, pval_utail, pval_2tail, testlist) = stat_tools.resampling( data1, data2, S=self.samples, testFunc=stat_tools.F_mean_diff_flat, adaptive=self.adaptive) if self.doHistogram: import matplotlib.pyplot as plt if testlist: n, bins, patches = plt.hist(testlist, density=1, facecolor='c', alpha=0.75, bins=100) else: n, bins, patches = plt.hist([0, 0], density=1, facecolor='c', alpha=0.75, bins=100) plt.xlabel('Delta Mean') plt.ylabel('Probability') plt.title('%s - Histogram of Delta Mean' % gene.orf) plt.axvline(test_obs, color='r', linestyle='dashed', linewidth=3) plt.grid(True) genePath = os.path.join(histPath, gene.orf + ".png") if not os.path.exists(histPath): os.makedirs(histPath) plt.savefig(genePath) plt.clf() sum1 = numpy.sum(data1) sum2 = numpy.sum(data2) data.append([ gene.orf, gene.name, gene.desc, gene.n, mean1, mean2, sum1, sum2, test_obs, log2FC, pval_2tail ]) # Update progress text = "Running Resampling Method... %5.1f%%" % (100.0 * count / N) self.progress_update(text, count) # self.transit_message("") # Printing empty line to flush stdout self.transit_message("Performing Benjamini-Hochberg Correction") data.sort() qval = stat_tools.BH_fdr_correction([row[-1] for row in data]) self.output.write("#Resampling\n") if self.wxobj: members = sorted([ attr for attr in dir(self) if not callable(getattr(self, attr)) and not attr.startswith("__") ]) memberstr = "" for m in members: memberstr += "%s = %s, " % (m, getattr(self, m)) self.output.write( "#GUI with: norm=%s, samples=%s, pseudocounts=%1.2f, adaptive=%s, histogram=%s, includeZeros=%s, output=%s\n" % (self.normalization, self.samples, self.pseudocount, self.adaptive, self.doHistogram, self.includeZeros, self.output.name.encode('utf-8'))) else: self.output.write("#Console: python %s\n" % " ".join(sys.argv)) self.output.write("#Control Data: %s\n" % (",".join(self.ctrldata).encode('utf-8'))) self.output.write("#Experimental Data: %s\n" % (",".join(self.expdata).encode('utf-8'))) self.output.write("#Annotation path: %s\n" % (self.annotation_path.encode('utf-8'))) self.output.write("#Time: %s\n" % (time.time() - start_time)) self.output.write("#%s\n" % "\t".join(columns)) for i, row in enumerate(data): (orf, name, desc, n, mean1, mean2, sum1, sum2, test_obs, log2FC, pval_2tail) = row self.output.write( "%s\t%s\t%s\t%d\t%1.1f\t%1.1f\t%1.2f\t%1.1f\t%1.2f\t%1.1f\t%1.5f\t%1.5f\n" % (orf, name, desc, n, mean1, mean2, log2FC, sum1, sum2, test_obs, pval_2tail, qval[i])) self.output.close() self.transit_message("Adding File: %s" % (self.output.name)) self.add_file(filetype="Resampling") self.finish() self.transit_message("Finished resampling Method")
def Run(self): #if not self.wxobj: # # Force matplotlib to use good backend for png. # import matplotlib.pyplot as plt #elif "matplotlib.pyplot" not in sys.modules: try: import matplotlib.pyplot as plt except: print("Error: cannot do histograms") self.doHistogram = False self.transit_message("Starting resampling Method") start_time = time.time() histPath = "" if self.doHistogram: histPath = os.path.join(os.path.dirname(self.output.name), transit_tools.fetch_name(self.output.name)+"_histograms") if not os.path.isdir(histPath): os.makedirs(histPath) #Get orf data self.transit_message("Getting Data") if self.diffStrains: self.transit_message("Multiple annotation files found") self.transit_message("Mapping ctrl data to {0}, exp data to {1}".format(self.annotation_path, self.annotation_path_exp)) if self.combinedWigParams: (position, data, filenamesInCombWig) = tnseq_tools.read_combined_wig(self.combinedWigParams['combined_wig']) conditionsByFile, _, _, _ = tnseq_tools.read_samples_metadata(self.combinedWigParams['samples_metadata']) conditions = self.wigs_to_conditions(conditionsByFile, filenamesInCombWig) data, conditions = self.filter_wigs_by_conditions(data, conditions, self.combinedWigParams['conditions']) data_ctrl = numpy.array([d for i, d in enumerate(data) if conditions[i].lower() == self.combinedWigParams['conditions'][0]]) data_exp = numpy.array([d for i, d in enumerate(data) if conditions[i].lower() == self.combinedWigParams['conditions'][1]]) position_ctrl, position_exp = position, position else: (data_ctrl, position_ctrl) = transit_tools.get_validated_data(self.ctrldata, wxobj=self.wxobj) (data_exp, position_exp) = transit_tools.get_validated_data(self.expdata, wxobj=self.wxobj) (K_ctrl, N_ctrl) = data_ctrl.shape (K_exp, N_exp) = data_exp.shape if not self.diffStrains and (N_ctrl != N_exp): self.transit_error("Error: Ctrl and Exp wig files don't have the same number of sites.") self.transit_error("Make sure all .wig files come from the same strain.") return # (data, position) = transit_tools.get_validated_data(self.ctrldata+self.expdata, wxobj=self.wxobj) self.transit_message("Preprocessing Ctrl data...") data_ctrl = self.preprocess_data(position_ctrl, data_ctrl) self.transit_message("Preprocessing Exp data...") data_exp = self.preprocess_data(position_exp, data_exp) G_ctrl = tnseq_tools.Genes(self.ctrldata, self.annotation_path, ignoreCodon=self.ignoreCodon, nterm=self.NTerminus, cterm=self.CTerminus, data=data_ctrl, position=position_ctrl) G_exp = tnseq_tools.Genes(self.expdata, self.annotation_path_exp, ignoreCodon=self.ignoreCodon, nterm=self.NTerminus, cterm=self.CTerminus, data=data_exp, position=position_exp) doLibraryResampling = False # If library string not empty if self.ctrl_lib_str or self.exp_lib_str: letters_ctrl = set(self.ctrl_lib_str) letters_exp = set(self.exp_lib_str) # Check if using exactly 1 letters; i.e. no different libraries if len(letters_ctrl) == 1 and letters_exp==1: pass # If using more than one letter, then check no differences in set else: lib_diff = letters_ctrl ^ letters_exp # Check that their differences if not lib_diff: doLibraryResampling = True else: transit_tools.transit_error("Error: Library Strings (Ctrl = %s, Exp = %s) do not use the same letters. Make sure every letter / library is represented in both Control and Experimental Conditions. Proceeding with resampling assuming all datasets belong to the same library." % (self.ctrl_lib_str, self.exp_lib_str)) self.ctrl_lib_str = "" self.exp_lib_str = "" (data, qval) = self.run_resampling(G_ctrl, G_exp, doLibraryResampling, histPath) self.write_output(data, qval, start_time) self.finish() self.transit_message("Finished resampling Method")
def Run(self): #if not self.wxobj: # # Force matplotlib to use good backend for png. # import matplotlib.pyplot as plt #elif "matplotlib.pyplot" not in sys.modules: try: import matplotlib.pyplot as plt except: print "Error: cannot do histograms" self.doHistogram = False self.transit_message("Starting resampling Method") start_time = time.time() histPath = "" if self.doHistogram: histPath = os.path.join(os.path.dirname(self.output.name), transit_tools.fetch_name(self.output.name)+"_histograms") if not os.path.isdir(histPath): os.makedirs(histPath) #Get orf data self.transit_message("Getting Data") if self.diffStrains: self.transit_message("Multiple annotation files found") self.transit_message("Mapping ctrl data to {0}, exp data to {1}".format(self.annotation_path, self.annotation_path_exp)) if self.combinedWigParams: (position, data, filenamesInCombWig) = tnseq_tools.read_combined_wig(self.combinedWigParams['combined_wig']) conditionsByFile, _, _, _ = tnseq_tools.read_samples_metadata(self.combinedWigParams['samples_metadata']) conditions = self.wigs_to_conditions(conditionsByFile, filenamesInCombWig) data, conditions = self.filter_wigs_by_conditions(data, conditions, self.combinedWigParams['conditions']) data_ctrl = numpy.array([d for i, d in enumerate(data) if conditions[i].lower() == self.combinedWigParams['conditions'][0]]) data_exp = numpy.array([d for i, d in enumerate(data) if conditions[i].lower() == self.combinedWigParams['conditions'][1]]) position_ctrl, position_exp = position, position else: (data_ctrl, position_ctrl) = transit_tools.get_validated_data(self.ctrldata, wxobj=self.wxobj) (data_exp, position_exp) = transit_tools.get_validated_data(self.expdata, wxobj=self.wxobj) (K_ctrl, N_ctrl) = data_ctrl.shape (K_exp, N_exp) = data_exp.shape if not self.diffStrains and (N_ctrl != N_exp): self.transit_error("Error: Ctrl and Exp wig files don't have the same number of sites.") self.transit_error("Make sure all .wig files come from the same strain.") return # (data, position) = transit_tools.get_validated_data(self.ctrldata+self.expdata, wxobj=self.wxobj) self.transit_message("Preprocessing Ctrl data...") data_ctrl = self.preprocess_data(position_ctrl, data_ctrl) self.transit_message("Preprocessing Exp data...") data_exp = self.preprocess_data(position_exp, data_exp) G_ctrl = tnseq_tools.Genes(self.ctrldata, self.annotation_path, ignoreCodon=self.ignoreCodon, nterm=self.NTerminus, cterm=self.CTerminus, data=data_ctrl, position=position_ctrl) G_exp = tnseq_tools.Genes(self.expdata, self.annotation_path_exp, ignoreCodon=self.ignoreCodon, nterm=self.NTerminus, cterm=self.CTerminus, data=data_exp, position=position_exp) doLibraryResampling = False # If library string not empty if self.ctrl_lib_str or self.exp_lib_str: letters_ctrl = set(self.ctrl_lib_str) letters_exp = set(self.exp_lib_str) # Check if using exactly 1 letters; i.e. no different libraries if len(letters_ctrl) == 1 and letters_exp==1: pass # If using more than one letter, then check no differences in set else: lib_diff = letters_ctrl ^ letters_exp # Check that their differences if not lib_diff: doLibraryResampling = True else: transit_tools.transit_error("Error: Library Strings (Ctrl = %s, Exp = %s) do not use the same letters. Make sure every letter / library is represented in both Control and Experimental Conditions. Proceeding with resampling assuming all datasets belong to the same library." % (self.ctrl_lib_str, self.exp_lib_str)) self.ctrl_lib_str = "" self.exp_lib_str = "" (data, qval) = self.run_resampling(G_ctrl, G_exp, doLibraryResampling, histPath) self.write_output(data, qval, start_time) self.finish() self.transit_message("Finished resampling Method")