def _get_ordered_files(self): """ get all permuted files and return it in a dict with as key phenotype and a list of filenames """ files = glob.glob(self.inout.out + "*.perm") #get perm *.perm pattern = re.compile(self.inout.out + ".{1,8}\.P(?P<pheno>.*)\.perm") files_clean = [x for x in files if pattern.search(x) != None] if (len(files) < 1): log.info("Could not find any permutations named as " + self.inout.out + "*.perm") log.info(common.get_terminated_time()) sys.exit() ordered_results = _get_ordered_files_from_list(files_clean) return (ordered_results)
def mergeresults(self): """ function to merge multiple permutation files into one file """ #load sumlog files to create sumlog files aa_result = self._load_sumlog_files() ordered_results = self._get_ordered_results() perm_files = self._get_ordered_files() keys = ordered_results.keys() keys.sort(key=common.alpha_sort) pheno_nr = 0 #merge each phenotype for key in keys: pheno_nr=pheno_nr+1 log.info("\nMerging " + (str(len(perm_files[key]))) + " permutation files for phenotype " + key + "...") perm_out_string = ordered_results[key].format_permout() # concatenated results perm_filename = "merged.P" + key + ".perm" perm_out_filename = self.inout.save_text_to_filename(perm_filename, perm_out_string) log.info("Saved merged permutations as " + perm_out_filename) #save empirical P file if sumlog files is found if (aa_result.has_key(key)): aa_object = Clusterresults() aa_object.read_formated_results(common.getfile_handle(aa_result[key])) empp_out_as_text = ordered_results[key].format_permutated_results(aa_object) emp_filename = "merged.P" + key + ".empp" empp_filename = self.inout.save_text_to_filename(emp_filename, empp_out_as_text) log.info("Saved empirical pvalues as " + empp_filename) #call R for distribution plot self.files[key] = {"perm":perm_out_filename, "empp":empp_filename} if self.inout.run_rproject: import jag.plot_with_r as plot_with_r plotter = plot_with_r.call_r(self.inout) plotter.draw_dist_plot(self.files, key) else: log.info("\nWarning: Could not find sumlog file " + self.inout.out + ".P" + key + ".sumlog") log.info(common.get_terminated_time()) sys.exit()
def format_permout(self): """format the permutations score in a table like manner with as column headers groupname. """ groupnames = self.permutated_scores.keys() groupnames.sort(key=common.alpha_sort) length_permutations = [len(self.permutated_scores[g]) for g in groupnames] diffrence_in_length = sum([abs(l - length_permutations[0])for l in length_permutations]) formated_results = '\t'.join(groupnames) + "\tseed\n" if(diffrence_in_length == 0): for i in range(length_permutations[0]): formated_results += '\t'.join([str(self.permutated_scores[p][i])for p in groupnames]) formated_results += "\t" + str(self.seeds[i]) + "\n" else: log.info("\nWarning: gene-sets are not equal over all permuted files. Analysis will be terminated.") log.info(common.get_terminated_time()) exit() return(formated_results)