def _run(goea_results, flds): """Get string representations of P-value data floats .""" obj = MgrNtGOEAs(goea_results) pval_flds = set("s_" + k[2:] for k in flds if k[:2] == "p_") nts = obj.get_nts_strpval() fmts = ["{FLD}={{{FLD}}}".format(FLD=f) for f in pval_flds] fmt = "{N} {NS} {GO} " + " ".join(fmts) + " {name}" for idx, ntd in enumerate(nts): print(fmt.format(N=idx, **ntd._asdict())) print("FORMAT", fmt)
def test_example(): """Test GoeaResults in plotting package.""" # -------------------------------------------------------------------- # -------------------------------------------------------------------- # Gene Ontology Enrichment Analysis (GOEA) # -------------------------------------------------------------------- # -------------------------------------------------------------------- taxid = 10090 # Mouse study # Load ontologies, associations, and population ids geneids_pop = GeneID2nt_mus.keys() geneids2symbol_study = get_geneid2symbol("nbt.3102-S4_GeneIDs.xlsx") geneids_study = geneids2symbol_study.keys() goeaobj = get_goeaobj("fdr_bh", geneids_pop, taxid) go2obj = goeaobj.obo_dag # Run GOEA on study goea_results_all = goeaobj.run_study(geneids_study) goea_results_sig = [r for r in goea_results_all if r.p_fdr_bh < 0.05] goea_results_nt = MgrNtGOEAs(goea_results_sig).get_goea_nts_all() assert goea_results_nt ns2gos = get_ns2gos(goea_results_sig) # Test plotting GOEA results gosubdag = GoSubDag(set(r.GO for r in goea_results_sig), go2obj) plot_results("test_plot_goids_a_goea_{NS}.png", goea_results_sig, id2symbol=geneids2symbol_study, parentcnt=True, childcnt=True) for nss, goids in ns2gos.items(): plt_goids(gosubdag, "test_plot_goids_b_{NS}.png".format(NS=nss), goids) plot_gos("test_plot_goids_c_{NS}.png".format(NS=nss), goids, go2obj)
def test_example(prt=sys.stdout): """Test GoeaResults in plotting package.""" # -------------------------------------------------------------------- # -------------------------------------------------------------------- # Gene Ontology Enrichment Analysis (GOEA) # -------------------------------------------------------------------- # -------------------------------------------------------------------- taxid = 10090 # Mouse study # Load ontologies, associations, and population ids geneids_pop = GeneID2nt_mus.keys() geneids2symbol_study = get_geneid2symbol("nbt.3102-S4_GeneIDs.xlsx") geneids_study = geneids2symbol_study.keys() goeaobj = get_goeaobj("fdr_bh", geneids_pop, taxid) # Run GOEA on study goea_results_all = goeaobj.run_study(geneids_study) goea_results_sig = [r for r in goea_results_all if r.p_fdr_bh < 0.05] goea_results_nt = MgrNtGOEAs(goea_results_sig).get_goea_nts_all() # Test managing GOEA results objres = GoeaResults(goea_results_sig) run(objres, prt) objnts = GoeaResults(goea_results_nt) run(objnts, prt) # Plot GOEA results fout_img = os.path.join(REPO, "test_plot_objgoearesults_{NS}.png") plot_results(fout_img, goea_results_sig, id2symbol=geneids2symbol_study)
def wr_py_goea_results(self, fout_py, goea_results, **kws): """Save GOEA results into Python package containing list of namedtuples.""" var_name = kws.get("var_name", "goea_results") docstring = kws.get("docstring", "") sortby = kws.get("sortby", None) if goea_results: from goatools.nt_utils import wr_py_nts nts_goea = goea_results # If list has GOEnrichmentRecords or verbose namedtuples, exclude some fields. if hasattr(goea_results[0], "_fldsdefprt") or hasattr( goea_results[0], 'goterm'): # Exclude some attributes from the namedtuple when saving results # to a Python file because the information is redundant or verbose. nts_goea = MgrNtGOEAs(goea_results).get_goea_nts_prt(**kws) docstring = "\n".join( [docstring, "# {VER}\n\n".format(VER=self.obo_dag.version)]) assert hasattr(nts_goea[0], '_fields') if sortby is None: sortby = lambda nt: [ getattr(nt, 'namespace'), getattr(nt, 'enrichment'), getattr(nt, 'p_uncorrected'), getattr(nt, 'depth'), getattr(nt, 'GO') ] nts_goea = sorted(nts_goea, key=sortby) wr_py_nts(fout_py, nts_goea, docstring, var_name)
def get_ns2nts(results, fldnames=None, **kws): """Get namedtuples of GOEA results, split into BP, MF, CC.""" ns2nts = cx.defaultdict(list) nts = MgrNtGOEAs(results).get_goea_nts_all(fldnames, **kws) for ntgoea in nts: ns2nts[ntgoea.NS].append(ntgoea) return ns2nts
def wr_xlsx(self, fout_xlsx, goea_results, **kws): """Write a xlsx file.""" # kws: prt_if indent itemid2name(study_items) objprt = PrtFmt() prt_flds = kws.get('prt_flds', self.objprtres.get_prtflds_default(goea_results)) xlsx_data = MgrNtGOEAs(goea_results).get_goea_nts_prt(prt_flds, **kws) if 'fld2col_widths' not in kws: kws['fld2col_widths'] = {f:objprt.default_fld2col_widths.get(f, 8) for f in prt_flds} RPT.wr_xlsx(fout_xlsx, xlsx_data, **kws)
def get_sortobj(self, goea_results, **kws): """Return a Grouper object, given a list of GOEnrichmentRecord.""" nts_goea = MgrNtGOEAs(goea_results).get_goea_nts_prt(**kws) goids = set(nt.GO for nt in nts_goea) go2nt = {nt.GO:nt for nt in nts_goea} grprobj = Grouper("GOEA", goids, self.hdrobj, self.grprdflt.gosubdag, go2nt=go2nt) grprobj.prt_summary(sys.stdout) # hdrgo_prt", "section_prt", "top_n", "use_sections" sortobj = Sorter(grprobj, section_sortby=lambda nt: getattr(nt, self.pval_fld)) return sortobj
def prt_txt(self, prt, goea_results, prtfmt=None, **kws): """Print GOEA results in text format.""" if prtfmt is None: prtfmt = ( "{GO} {NS} {p_uncorrected:5.2e} {ratio_in_study:>6} {ratio_in_pop:>9} " "{depth:02} {name:40} {study_items}\n") prtfmt = self.adjust_prtfmt(prtfmt) prt_flds = RPT.get_fmtflds(prtfmt) data_nts = MgrNtGOEAs(goea_results).get_goea_nts_prt(prt_flds, **kws) RPT.prt_txt(prt, data_nts, prtfmt, prt_flds, **kws) return data_nts
def prt_txt(prt, goea_results, prtfmt=None, **kws): """Print GOEA results in text format.""" objprt = PrtFmt() if prtfmt is None: flds = ['GO', 'NS', 'p_uncorrected', 'ratio_in_study', 'ratio_in_pop', 'depth', 'name', 'study_items'] prtfmt = objprt.get_prtfmt_str(flds) prtfmt = objprt.adjust_prtfmt(prtfmt) prt_flds = RPT.get_fmtflds(prtfmt) data_nts = MgrNtGOEAs(goea_results).get_goea_nts_prt(prt_flds, **kws) RPT.prt_txt(prt, data_nts, prtfmt, prt_flds, **kws) return data_nts
def wr_txt(self, fout_txt, goea_results, prtfmt=None, **kws): """Print GOEA results to text file.""" if not goea_results: sys.stdout.write(" 0 GOEA results. NOT WRITING {FOUT}\n".format(FOUT=fout_txt)) return with open(fout_txt, 'w') as prt: if 'title' in kws: prt.write("{TITLE}\n".format(TITLE=kws['title'])) data_nts = self.prt_txt(prt, goea_results, prtfmt, **kws) log = self.log if self.log is not None else sys.stdout log.write(" {N:>5} GOEA results for {CUR:5} study items. WROTE: {F}\n".format( N=len(data_nts), CUR=len(MgrNtGOEAs(goea_results).get_study_items()), F=fout_txt))
def prt_txt(self, prt, goea_results, prtfmt=None, **kws): """Print GOEA results in text format.""" objprt = PrtFmt() if prtfmt is None: flds = ['GO', 'NS', 'p_uncorrected', 'ratio_in_study', 'ratio_in_pop', 'depth', 'name', 'study_items'] prtfmt = objprt.get_prtfmt_str(flds) #### prtfmt = " ".join([objprt.default_fld2fmt[f] for f in flds]) #### prtfmt = ("{GO} {NS} {p_uncorrected:5.2e} {ratio_in_study:>6} {ratio_in_pop:>9} " #### "{depth:02} {name:40} {study_items}\n") prtfmt = objprt.adjust_prtfmt(prtfmt) prt_flds = RPT.get_fmtflds(prtfmt) data_nts = MgrNtGOEAs(goea_results).get_goea_nts_prt(prt_flds, **kws) RPT.prt_txt(prt, data_nts, prtfmt, prt_flds, **kws) return data_nts
def get_objaart(goea_results, **kws): """Return a AArtGeneProductSetsOne object.""" nts_goea = MgrNtGOEAs(goea_results).get_goea_nts_prt(**kws)
def wr_tsv(self, fout_tsv, goea_results, **kws): """Write tab-separated table data to file""" prt_flds = kws.get('prt_flds', self.get_prtflds_default(goea_results)) tsv_data = MgrNtGOEAs(goea_results).get_goea_nts_prt(prt_flds, **kws) RPT.wr_tsv(fout_tsv, tsv_data, **kws)
def run_study_nts(self, study, **kws): """Run GOEA on study ids. Return results as a list of namedtuples.""" goea_results = self.run_study(study, **kws) return MgrNtGOEAs(goea_results).get_goea_nts_all()
def get_go2nt(self, goea_results): """Return go2nt with added formatted string versions of the P-values.""" go2obj = self.objaartall.grprdflt.gosubdag.go2obj # Add string version of P-values goea_nts = MgrNtGOEAs(goea_results).get_nts_strpval() return {go2obj[nt.GO].id: nt for nt in goea_nts if nt.GO in go2obj}
def prt_tsv(self, prt, goea_results, **kws): """Write tab-separated table data""" prt_flds = kws.get('prt_flds', self.objprtres.get_prtflds_default(goea_results)) tsv_data = MgrNtGOEAs(goea_results).get_goea_nts_prt(prt_flds, **kws) RPT.prt_tsv(prt, tsv_data, **kws)