def test_wr_methods(log=sys.stdout): """Demonstrate printing a subset of all available fields using two methods.""" # 1. Gene Ontology Enrichment Analysis # 1a. Initialize: Load ontologies, associations, and population gene IDs nature_data = get_goea_results() goeaobj = nature_data['goeaobj'] goea_results = nature_data['goea_results'] # 2. Write results # Write parameters: # The format_string names below are the same names as in the namedtuple field_names. prtfmt = "{GO} {NS} {level:>2} {depth:>2} {p_fdr_bh:5.2e} {study_count:>5} {name}\n" wr_params = { # Format for printing in text format 'prtfmt' : prtfmt, # Format for p-values in tsv and xlsx format 'fld2fmt' : {'p_fdr_bh' : '{:8.2e}'}, # Print a subset namedtuple fields, don't print all fields in namedtuple. 'prt_flds' : get_fmtflds(prtfmt) } # 2a. Use the write functions inside the GOEnrichmentStudy class. cwddir = os.getcwd() tsv_obj = os.path.join(cwddir, 'nbt3102_subset_obj.tsv') tsv_nts = os.path.join(cwddir, 'nbt3102_subset_nt.tsv') _wr_3fmt_goeaobj(tsv_obj, goea_results, goeaobj, wr_params, log) # 2b. Use the write functions from the wr_tbl package to print a list of namedtuples. _wr_3fmt_wrtbl(tsv_nts, goea_results, wr_params, log) assert filecmp.cmp(tsv_obj, tsv_nts)
def test_wr_methods(log=sys.stdout): """Demonstrate printing a subset of all available fields using two methods.""" # 1. Gene Ontology Enrichment Analysis # 1a. Initialize: Load ontologies, associations, and population gene IDs nature_data = get_goea_results() goeaobj = nature_data['goeaobj'] goea_results = nature_data['goea_results'] # 2. Write results # Write parameters: # The format_string names below are the same names as in the namedtuple field_names. prtfmt = "{GO} {NS} {level:>2} {depth:>2} {p_fdr_bh:5.2e} {study_count:>5} {name}\n" wr_params = { # Format for printing in text format 'prtfmt': prtfmt, # Format for p-values in tsv and xlsx format 'fld2fmt': { 'p_fdr_bh': '{:8.2e}' }, # Print a subset namedtuple fields, don't print all fields in namedtuple. 'prt_flds': get_fmtflds(prtfmt) } # 2a. Use the write functions inside the GOEnrichmentStudy class. cwddir = os.getcwd() tsv_obj = os.path.join(cwddir, 'nbt3102_subset_obj.tsv') tsv_nts = os.path.join(cwddir, 'nbt3102_subset_nt.tsv') _wr_3fmt_goeaobj(tsv_obj, goea_results, goeaobj, wr_params, log) # 2b. Use the write functions from the wr_tbl package to print a list of namedtuples. _wr_3fmt_wrtbl(tsv_nts, goea_results, wr_params, log) assert filecmp.cmp(tsv_obj, tsv_nts)
def test_wr_methods(log=sys.stdout): """Demonstrate printing a subset of all available fields using two methods.""" # 1. Gene Ontology Enrichment Analysis # 1a. Initialize: Load ontologies, associations, and population gene IDs taxid = 10090 # Mouse study geneids_pop = GeneID2nt_mus.keys() # Mouse protein-coding genes goeaobj = get_goeaobj("fdr_bh", geneids_pop, taxid) # 1b. Run GOEA geneids_study = get_geneid2symbol("nbt.3102-S4_GeneIDs.xlsx") keep_if = lambda nt: getattr(nt, "p_fdr_bh") < 0.05 # keep if results are significant goea_results = goeaobj.run_study(geneids_study, keep_if=keep_if) # 2. Write results # Write parameters: # The format_string names below are the same names as in the namedtuple field_names. prtfmt = "{GO} {NS} {level:>2} {depth:>2} {p_fdr_bh:5.2e} {study_count:>5} {name}\n" wr_params = { # Format for printing in text format 'prtfmt' : prtfmt, # Format for p-values in tsv and xlsx format 'fld2fmt' : {'p_fdr_bh' : '{:8.2e}'}, # Print a subset namedtuple fields, don't print all fields in namedtuple. 'prt_flds' : get_fmtflds(prtfmt) } # 2a. Use the write functions inside the GOEnrichmentStudy class. _wr_3fmt_goeaobj(goea_results, goeaobj, wr_params, log) # 2b. Use the write functions straight from the wr_tbl package to print a list of namedtuples. _wr_3fmt_wrtbl(goea_results, wr_params, log) assert filecmp.cmp('nbt3102_subset_obj.tsv', 'nbt3102_subset_nt.tsv')
def prt_txt(self, prt, goea_results, prtfmt=None, **kws): """Print GOEA results in text format.""" if prtfmt is None: prtfmt = "{GO} {NS} {p_uncorrected:5.2e} {study_count:>5} {name}\n" prtfmt = self.adjust_prtfmt(prtfmt) prt_flds = RPT.get_fmtflds(prtfmt) data_nts = get_goea_nts_prt(goea_results, prt_flds, **kws) RPT.prt_txt(prt, data_nts, prtfmt, prt_flds, **kws) return data_nts
def prt_txt(self, prt, goea_results, prtfmt=None, **kws): """Print GOEA results in text format.""" if prtfmt is None: prtfmt = ("{GO} {NS} {p_uncorrected:5.2e} {ratio_in_study:>6} {ratio_in_pop:>9} " "{depth:02} {name:40} {study_items}\n") prtfmt = self.adjust_prtfmt(prtfmt) prt_flds = RPT.get_fmtflds(prtfmt) data_nts = get_goea_nts_prt(goea_results, prt_flds, **kws) RPT.prt_txt(prt, data_nts, prtfmt, prt_flds, **kws) return data_nts
def prt_txt(prt, goea_results, prtfmt=None, **kws): """Print GOEA results in text format.""" objprt = PrtFmt() if prtfmt is None: flds = ['GO', 'NS', 'p_uncorrected', 'ratio_in_study', 'ratio_in_pop', 'depth', 'name', 'study_items'] prtfmt = objprt.get_prtfmt_str(flds) prtfmt = objprt.adjust_prtfmt(prtfmt) prt_flds = RPT.get_fmtflds(prtfmt) data_nts = MgrNtGOEAs(goea_results).get_goea_nts_prt(prt_flds, **kws) RPT.prt_txt(prt, data_nts, prtfmt, prt_flds, **kws) return data_nts
def prt_txt(self, prt, goea_results, prtfmt=None, **kws): """Print GOEA results in text format.""" objprt = PrtFmt() if prtfmt is None: flds = ['GO', 'NS', 'p_uncorrected', 'ratio_in_study', 'ratio_in_pop', 'depth', 'name', 'study_items'] prtfmt = objprt.get_prtfmt_str(flds) #### prtfmt = " ".join([objprt.default_fld2fmt[f] for f in flds]) #### prtfmt = ("{GO} {NS} {p_uncorrected:5.2e} {ratio_in_study:>6} {ratio_in_pop:>9} " #### "{depth:02} {name:40} {study_items}\n") prtfmt = objprt.adjust_prtfmt(prtfmt) prt_flds = RPT.get_fmtflds(prtfmt) data_nts = MgrNtGOEAs(goea_results).get_goea_nts_prt(prt_flds, **kws) RPT.prt_txt(prt, data_nts, prtfmt, prt_flds, **kws) return data_nts
def prt_txt(self, prt, results_nt, prtfmt, **kws): """Print GOEA results in text format.""" prtfmt = self.adjust_prtfmt(prtfmt) prt_flds = RPT.get_fmtflds(prtfmt) data_nts = self._get_nts(results_nt, prt_flds, True, **kws) RPT.prt_txt(prt, data_nts, prtfmt, prt_flds, **kws)