def test_gosubdag_relationships(wr_new_obo_subset=False): """Plot both the standard 'is_a' field and the 'part_of' relationship.""" # Leaf GO: viral triggering of virus induced gene silencing goid_chosen = 'GO:0060150' # Load GODag with all relationships fin_obo = os.path.join(REPO, "go-basic.obo") godag_r0 = get_godag(fin_obo, loading_bar=None) godag_r1 = get_godag(fin_obo, loading_bar=None, optional_attrs=['relationship']) file_sub = os.path.join(REPO, "tests/data/viral_gene_silence.obo") # Get all GO terms above this low-level GO ID using all relationships if wr_new_obo_subset: _wr_sub_obo(file_sub, goid_chosen, godag_r1, fin_obo) gosubdag_r0 = GoSubDag(set([goid_chosen]), godag_r0) gosubdag_r1 = GoSubDag(set([goid_chosen]), godag_r1, relationships=True) _run_baseline_r0(gosubdag_r0, gosubdag_r1) # BASELINE r1: Test that GOTerm.get_all_upper() is the same as GoSubDag ancestors for goid, term in gosubdag_r1.go2obj.items(): ancestors_r1 = gosubdag_r1.rcntobj.go2parents[goid] assert ancestors_r1 == term.get_all_upper()
def run(self, go_sources, exp_gos, **kws): """Create GoSubDag using specified GO sources.""" print("\nSRCS: {GOs}".format(GOs=go_sources)) gosubdag = GoSubDag(go_sources, self.go2obj_all, **kws) gosubdag.prt_goids(gosubdag.go2nt) assert set(gosubdag.go2nt) == exp_gos, "ACT({}) != EXP({})\n{} {}".format( sorted(gosubdag.go2nt), sorted(exp_gos), go_sources, kws)
class Run(object): """Printing GO IDs and Plotting; GODag from obo using GoSubDag.""" def __init__(self, obo): self.go2obj_all = get_godag(os.path.join(REPO, obo)) self.gosubdag_all = GoSubDag(None, self.go2obj_all) self.prtfmt = self.gosubdag_all.prt_attr['fmta'] def prt_goids_all(self, prt): """Print all GO IDs, including alternate GO IDs, in GODag.""" self.gosubdag_all.prt_goids(prtfmt=self.prtfmt, prt=prt) def plt_goids(self, fout_img, go_sources): """Plot GO IDs.""" # % src/bin/go_plot.py GOs --obo=../goatools/data/i86.obo --outfile=t00.jpg --mark_alt_id gosubdag = GoSubDag(go_sources, self.go2obj_all) objplt = GoSubDagPlot(gosubdag, mark_alt_id=True) objplt.plt_dag(os.path.join(REPO, fout_img)) def run(self, go_sources, exp_gos, **kws): """Create GoSubDag using specified GO sources.""" print("\nSRCS: {GOs}".format(GOs=go_sources)) gosubdag = GoSubDag(go_sources, self.go2obj_all, **kws) gosubdag.prt_goids(gosubdag.go2nt) assert set(gosubdag.go2nt) == exp_gos, "ACT({}) != EXP({})\n{} {}".format( sorted(gosubdag.go2nt), sorted(exp_gos), go_sources, kws)
def __init__(self, go2obj, annots, relationships=None, **kws): ''' Initialise the counts and ''' _prt = kws.get('prt') # Backup self.go2obj = go2obj # Full GODag self.annots, go_alts = clean_anno(annots, go2obj, _prt)[:2] # Genes annotated to all associated GO, including inherited up ancestors' _relationship_set = RelationshipCombos(go2obj).get_set(relationships) self.go2genes = self._init_go2genes(_relationship_set, go2obj) self.gene2gos = get_b2aset(self.go2genes) # Annotation main GO IDs (prefer main id to alt_id) self.goids = set(self.go2genes.keys()) self.gocnts = Counter({go:len(geneset) for go, geneset in self.go2genes.items()}) # Get total count for each branch: BP MF CC self.aspect_counts = { 'biological_process': self.gocnts.get(NAMESPACE2GO['biological_process'], 0), 'molecular_function': self.gocnts.get(NAMESPACE2GO['molecular_function'], 0), 'cellular_component': self.gocnts.get(NAMESPACE2GO['cellular_component'], 0)} self._init_add_goid_alt(go_alts) self.gosubdag = GoSubDag( set(self.gocnts.keys()), go2obj, tcntobj=self, relationships=_relationship_set, prt=None) if _prt: self.prt_objdesc(_prt)
class Run(object): """Objects for running plotting test.""" def __init__(self, obo, gaf, prt): self.prt = prt self.cwd = os.getcwd() # Gene Ontologies self.go2obj_all = get_godag(os.path.join(REPO, "../goatools/", obo)) # Annotations #_file_gaf = dnld_gaf(os.path.join(REPO, gaf)) _file_gaf = dnld_gaf(gaf) print("GAF: {GAF}\n".format(GAF=_file_gaf)) self.gene2gos = read_gaf(_file_gaf) self.tcntobj = TermCounts(self.go2obj_all, self.gene2gos) # GoSubDag self.gosubdag_all = GoSubDag(None, self.go2obj_all, tcntobj=self.tcntobj, prt=prt) self.prtfmt = self.gosubdag_all.prt_attr['fmta'] def prt_goids_all(self, prt): """Print all GO IDs, including alternate GO IDs, in GODag.""" self.gosubdag_all.prt_goids(prtfmt=self.prtfmt, prt=prt) def plt_goids(self, fout_img, go_sources): """Plot GO IDs.""" # % src/bin/go_plot.py GOs --obo=../goatools/data/i86.obo --outfile=t00.jpg --mark_alt_id gosubdag = GoSubDag(go_sources, self.gosubdag_all.go2obj, prt=self.prt, # rcntobj=False, rcntobj=self.gosubdag_all.rcntobj, go2nt=self.gosubdag_all.go2nt) prtfmt = gosubdag.prt_attr['fmta'] goids_plt = GoSubDagPlot(gosubdag).get_goids_plt() self.prt.write("\n{N} GO IDs\n".format(N=len(goids_plt))) gosubdag.prt_goids(goids_plt, prtfmt=prtfmt, prt=self.prt) objplt = GoSubDagPlot(gosubdag, mark_alt_id=True) objplt.plt_dag(os.path.join(self.cwd, fout_img))
def plt_goids(self, fout_img, go_sources): """Plot GO IDs.""" # % src/bin/go_plot.py GOs --obo=../goatools/data/i86.obo --outfile=t00.jpg --mark_alt_id gosubdag = GoSubDag(go_sources, self.gosubdag_all.go2obj, prt=self.prt, # rcntobj=False, rcntobj=self.gosubdag_all.rcntobj, go2nt=self.gosubdag_all.go2nt) prtfmt = gosubdag.prt_attr['fmta'] goids_plt = GoSubDagPlot(gosubdag).get_goids_plt() self.prt.write("\n{N} GO IDs\n".format(N=len(goids_plt))) gosubdag.prt_goids(goids_plt, prtfmt=prtfmt, prt=self.prt) objplt = GoSubDagPlot(gosubdag, mark_alt_id=True) objplt.plt_dag(os.path.join(self.cwd, fout_img))
def get_go2desc(goids, go2obj, go2genes): """Print GO terms and the number of genes associated with the GO ID.""" go_desc = [] gosubdag = GoSubDag(goids, go2obj) go2nt = gosubdag.get_go2nt(goids) pat = "{G:6,} genes {DESC}" pat_go = gosubdag.prt_attr['fmt'] for goid, ntgo in sorted( go2nt.items(), key=lambda t: [t[1].NS, t[1].depth, -1 * t[1].dcnt]): desc = pat_go.format(**ntgo._asdict()) go_desc.append((goid, pat.format(G=len(go2genes[goid]), DESC=desc))) return cx.OrderedDict(go_desc)
def test_gosubdag_relationships(prt=sys.stdout): """Plot both the standard 'is_a' field and the 'part_of' relationship.""" goids = set([ "GO:0032501", "GO:0044707", # alt_id: GO:0032501 # BP 1011 L01 D01 B multicellular organismal process "GO:0050874", "GO:0007608", # sensory perception of smell "GO:0050911" ]) # detection of chemical stimulus involved in sensory perception of smell # Load GO-DAG: Load optional 'relationship' fin_obo = os.path.join(REPO, "go-basic.obo") download_go_basic_obo(fin_obo, prt, loading_bar=None) go2obj_plain = GODag(fin_obo) go2obj_relat = GODag(fin_obo, optional_attrs=['relationship']) print("\nCreate GoSubDag with GO DAG containing no relationships.") tic = timeit.default_timer() # Create Plot object; Plot both 'is_a' and optional 'part_of' relationship gosubdag = GoSubDag(goids, go2obj_plain, relationships=False, prt=prt) # gosubdag.prt_goids(gosubdag.go2obj) goids_plain = set(gosubdag.go2obj) tic = _rpt_hms(tic, len(gosubdag.go2obj)) print("\nCreate GoSubDag while IGNORING relationships") # Create Plot object; Plot both 'is_a' and optional 'part_of' relationship gosubdag = GoSubDag(goids, go2obj_relat, relationships=False, prt=prt) # gosubdag.prt_goids(gosubdag.go2obj) goids_false = set(gosubdag.go2obj) tic = _rpt_hms(tic, len(gosubdag.go2obj)) assert goids_plain == goids_false print("\nCreate GoSubDag while loading only the 'part_of' relationship") gosubdag = GoSubDag(goids, go2obj_relat, relationships=['part_of'], prt=prt) # gosubdag.prt_goids(gosubdag.go2obj) goids_part_of = set(gosubdag.go2obj) tic = _rpt_hms(tic, len(gosubdag.go2obj)) assert goids_plain.intersection(goids_part_of) == goids_plain assert len(goids_part_of) > len(goids_plain) print("\nCreate GoSubDag while loading all relationships") gosubdag = GoSubDag(goids, go2obj_relat, relationships=True, prt=prt) # gosubdag.prt_goids(gosubdag.go2obj) goids_true = set(gosubdag.go2obj) tic = _rpt_hms(tic, len(gosubdag.go2obj)) assert goids_part_of.intersection(goids_true) == goids_part_of assert len(goids_true) >= len(goids_part_of)
def plt_goids(self, fout_img, go_sources): """Plot GO IDs.""" # % src/bin/go_plot.py GOs --obo=../goatools/data/i86.obo --outfile=t00.jpg --mark_alt_id gosubdag = GoSubDag( go_sources, self.gosubdag_all.go2obj, prt=self.prt, # rcntobj=False, rcntobj=self.gosubdag_all.rcntobj, go2nt=self.gosubdag_all.go2nt) prtfmt = gosubdag.prt_attr['fmta'] goids_plt = GoSubDagPlot(gosubdag).get_goids_plt() self.prt.write("\n{N} GO IDs\n".format(N=len(goids_plt))) gosubdag.prt_goids(goids_plt, prtfmt=prtfmt, prt=self.prt) objplt = GoSubDagPlot(gosubdag, mark_alt_id=True) objplt.plt_dag(os.path.join(self.cwd, fout_img))
def build_hierarcy(): print "fetching ppi" go_edges = fetch_string_ppi_edges() go2geneids, geneids2go = fetch_go_hierarcy() """Run numerous tests for various reports.""" dag_fin = os.path.join(constants.GO_DIR, constants.GO_FILE_NAME) tic = timeit.default_timer() godag = GODag(dag_fin, optional_attrs=['relationship']) gosubdag = GoSubDag(godag.keys(), godag) toc = timeit.default_timer() out = file(os.path.join(constants.BASE_PROFILE, "output", "go_hierarcy.txt"), "w+") # sys.stdout dict_result = {} for cur_term in ['GO:0005575']: vertices, edges = extract_hier_all(gosubdag, out, cur_term, go2geneids) dict_result[cur_term] = {"vertices": vertices, "edges": edges} driver = GraphDatabase.driver("bolt://localhost:7687", auth=("neo4j", "Hh123456")) def add_edge(tx, src, dst, score): tx.run(("MERGE (n1: GO{{term:\"{TERM1}\"}})"+ \ "MERGE (n2: GO{{term:\"{TERM2}\"}})"+ \ "MERGE (n1)-[r:SCR {{ score: {SCORE} }}]->(n2)").format(TERM1=src, TERM2=dst, SCORE=score)) def add_node(tx, nd): tx.run(("CREATE (n1: GO{{term:\"{TERM1}\"}})".format(TERM1=nd))) def add_friends(tx, name, friend_name): tx.run("MERGE (a:Person {name: $name}) " "MERGE (a)-[:KNOWS]->(friend:Person {name: $friend_name})", name=name, friend_name=friend_name) def print_friends(tx, name): for record in tx.run("MATCH (a:Person)-[:KNOWS]->(friend) WHERE a.name = $name " "RETURN friend.name ORDER BY friend.name", name=name): print(record["friend.name"]) # with driver.session() as session: # count=0 # for k, v in dict_result['GO:0005575']['vertices'].iteritems(): # if dict_result['GO:0005575']['vertices'].has_key(k) \ # and dict_result['GO:0005575']['vertices'][k]['isleaf']: # session.write_transaction(add_node,k) # count+=1 # print "total vartices: {}".foramt(count) with driver.session() as session: count=0 for cur_edges, score in go_edges.iteritems(): vertices = cur_edges.split("=") if dict_result['GO:0005575']['vertices'].has_key(vertices[0]) and dict_result['GO:0005575'][ 'vertices'].has_key(vertices[1]) and score > 100000 \ and dict_result['GO:0005575']['vertices'][vertices[0]]['isleaf'] and \ dict_result['GO:0005575']['vertices'][vertices[1]]['isleaf']: count+=1 session.write_transaction(add_edge, vertices[0], vertices[1], score) print "total edges: {}".format(count)
def wr_xlsx(self, fout_xlsx, goids, sortby=None, **kws_usr): """Write goids into a table.""" nts = GoSubDag(goids, self.go2obj).get_nts(goids, sortby) kws_wr = kws_usr.copy() if 'fld2col_widths' not in kws_wr: kws_wr['fld2col_widths'] = self.fld2col_widths wr_xlsx_tbl(fout_xlsx, nts, **kws_wr)
def build_hierarcy(go_folder, roots=['GO:0008150'], ev_exclude=set()): # 0008150 0005575 0003674 go2geneids, geneids2go = fetch_go_hierarcy(go_folder, ev_exclude) """Run numerous tests for various reports.""" dag_fin = os.path.join(constants.GO_DIR, constants.GO_FILE_NAME) tic = timeit.default_timer() godag = GODag(dag_fin, optional_attrs=['relationship']) gosubdag = GoSubDag(godag.keys(), godag) toc = timeit.default_timer() dict_result = {} for cur_term in roots: vertices, edges = extract_hier_all(gosubdag, cur_term, go2geneids) # all_go_ids=set(vertices.keys()) # for cur_id in all_go_ids: # if not cur_id in go2geneids: # go2geneids[cur_id]=set() msg = "Elapsed HMS: {}\n\n".format( str(datetime.timedelta(seconds=(toc - tic)))) sys.stdout.write(msg) dict_result[cur_term] = {"vertices": vertices, "edges": edges} return dict_result, go2geneids, geneids2go, get_entrez2ensembl_dictionary()
def test_go_parents(): """Run GO parent tests""" gosubdag_all = GoSubDag(None, get_godag("go-basic.obo", prt=None), rcntobj=True) run_1(gosubdag_all) run_2(gosubdag_all)
def test_wr_sections_all(): """Test that all sections files generated by wr_sections have the same content.""" f_sec_rd = "data/gjoneska_pfenning/sections_in.txt" f_sec_wr = "tmp_test_sections_out.txt" # Travis-CI path is cwd f_sec_py = "tmp_test_sections.py" # f_sec_mod = "tmp_test_sections" # Read user GO IDs. Setup to write sections text file and Python file usrgos = [getattr(nt, 'GO') for nt in goea_results] sec_rd = _read_sections(f_sec_rd) # Do preliminaries godag = get_godag("go-basic.obo", prt=None, loading_bar=False, optional_attrs=['relationship']) gosubdag = GoSubDag(usrgos, godag, relationships=True, tcntobj=None) grprdflt = GrouperDflts(gosubdag) # Exclude ungrouped "Misc." section of sections var(sec_rd) hdrobj = HdrgosSections(gosubdag, grprdflt.hdrgos_dflt, sec_rd[:-1]) assert sec_rd[-1][0] == hdrobj.secdflt, sec_rd[-1][0] grprobj = Grouper("test", usrgos, hdrobj, gosubdag) # Create text and Python sections files objsecwr = WrSectionsTxt(grprobj) objsecwr.wr_txt_section_hdrgos(os.path.join(REPO, f_sec_wr)) objsecpy = WrSectionsPy(grprobj) objsecpy.wr_py_sections(os.path.join(REPO, f_sec_py), sec_rd, doc=godag.version) # Read text and Python sections files sec_wr = _read_sections(f_sec_wr) sec_py = _read_sections(f_sec_py)
def test_write_hier_bp_mf_cc(): """Test that write hierarchy writes all: BP, MF, CC""" fin_anno = os.path.join(REPO, 'gene2go') fin_dag = os.path.join(REPO, "go-basic.obo") _dnld_anno(fin_anno) #godag = get_godag(os.path.join(REPO, 'go-basic.obo'), loading_bar=None) print('\nTEST STORING ONLY ONE SPECIES') #### obj = Gene2GoReader(fin_anno) godag = get_godag(fin_dag) gene2gos = read_annotations(namespace='ALL') tcntobj = TermCounts(godag, gene2gos) if gene2gos else None gosubdag = GoSubDag(godag.keys(), godag, relationships=False, tcntobj=tcntobj, children=True, prt=sys.stdout) objwr = WrHierGO(gosubdag) # 2020 11: # 594,748 GO lines under GO:0008150 # 23,199 GO lines under GO:0003674 # 6,259 GO lines under GO:0005575 # 624,206 items WROTE: tmp_test_wr_hier_BP_MF_CC.txt assert len(_wr_hier(['BP', 'MF', 'CC'], gosubdag.go2nt, objwr)) > 600000 assert len(_wr_hier([ 'BP', ], gosubdag.go2nt, objwr)) > 500000 assert len(_wr_hier([ 'MF', ], gosubdag.go2nt, objwr)) > 20000 assert len(_wr_hier([ 'CC', ], gosubdag.go2nt, objwr)) > 5000
def _get_tcntobj(goids, go2obj, **kws): """Get a TermCounts object if the user provides an annotation file, otherwise None.""" # kws: gaf (gene2go taxid) if 'gaf' in kws or 'gene2go' in kws: # Get a reduced go2obj set for TermCounts _gosubdag = GoSubDag(goids, go2obj, rcntobj=False, prt=None) return get_tcntobj(_gosubdag.go2obj, **kws) # TermCounts
def get_ic_of_most_informative_ancestor(id, term_counts, go_dag): """get the information content of the go_id parent with the highest information content. Parameters ---------- go_id : str GO term term_counts : dict dictionary: key: GO terms, values: number of occurrences of GO term and its children in body of evidence go_dag : GODag object GODag object from the goatools package Returns ------- float """ if term_counts.get(id, 0) > 0: return 0 gosubdag_r0 = GoSubDag([id], go_dag, prt=None) if id in gosubdag_r0.rcntobj.go2ancestors: P = gosubdag_r0.rcntobj.go2ancestors[id] max_ic = 0 for i in P: ic = get_info_content(i, term_counts, go_dag) if max_ic < ic: max_ic = ic return max_ic else: return 0
def build_hierarcy(): print "fetching ppi" go_edges = fetch_string_ppi_edges() go2geneids, geneids2go = fetch_go_hierarcy() """Run numerous tests for various reports.""" dag_fin = os.path.join(constants.GO_DIR, constants.GO_FILE_NAME) tic = timeit.default_timer() godag = GODag(dag_fin, optional_attrs=['relationship']) gosubdag = GoSubDag(godag.keys(), godag) toc = timeit.default_timer() out = file( os.path.join(constants.BASE_PROFILE, "output", "go_hierarcy.txt"), "w+") # sys.stdout dict_result = {} for cur_term in ['GO:0005575']: vertices, edges = extract_hier_all(gosubdag, out, cur_term, go2geneids) dict_result[cur_term] = {"vertices": vertices, "edges": edges} go_edges_filtered = {} lines = [] for cur_edges, score in go_edges.iteritems(): vertices = cur_edges.split("=") if dict_result['GO:0005575']['vertices'].has_key(vertices[0]) and dict_result['GO:0005575']['vertices'].has_key(vertices[1]) and score > 1000 \ and dict_result['GO:0005575']['vertices'][vertices[0]]['isleaf'] and dict_result['GO:0005575']['vertices'][vertices[1]]['isleaf']: go_edges_filtered[cur_edges] = score lines.append("{}\t{}\n".format(cur_edges, score)) print "about to write filtered ppi go edges to file ({} lines)".format( len(lines)) with file( os.path.join(constants.OUTPUT_GLOBAL_DIR, "GO_edges_ppi_filtered.txt"), "w+") as f: f.writelines(lines)
def cli(self, prt=sys.stdout): """Command-line interface for go_draw script.""" kws = self.objdoc.get_docargs(prt=None) godag = get_godag(kws['obo'], prt=None, loading_bar=False, optional_attrs=['relationship']) usrgos = GetGOs(godag, max_gos=200).get_usrgos(kws.get('GO_FILE'), prt) tcntobj = self._get_tcntobj(usrgos, godag, **kws) # Gets TermCounts or None self.gosubdag = GoSubDag(usrgos, godag, relationships=True, tcntobj=tcntobj, prt=None) grprdflt = GrouperDflts(self.gosubdag, kws['slims']) ver_list = [godag.version, grprdflt.ver_goslims] prt.write("{VER}\n".format(VER="\n".join(ver_list))) sections = self._read_sections(kws['ifile']) # print("SECSECSEC", sections) hdrobj = HdrgosSections(self.gosubdag, grprdflt.hdrgos_dflt, sections) grprobj = Grouper("init", usrgos, hdrobj, self.gosubdag) # Write sections objsecwr = WrSectionsTxt(grprobj, ver_list) if not os.path.exists(kws['ifile']): objsecwr.wr_txt_section_hdrgos(kws['ifile']) objsecwr.wr_txt_section_hdrgos(kws['ofile']) objsecpy = WrSectionsPy(grprobj, ver_list) if 'py' in kws: objsecpy.wr_py_sections(kws['py'], sections, doc=godag.version) # Write user GO IDs in sections sortobj = Sorter(grprobj) objgowr = WrXlsxSortedGos("init", sortobj, ver_list) objgowr.wr_txt_gos(kws['txt'], sortby=objsecpy.fncsortnt) #objwr.wr_txt_section_hdrgos(kws['ofile'], sortby=objwr.fncsortnt) self._prt_cnt_usrgos(usrgos, sys.stdout)
def show_go_dag_for_terms(terms, add_relationships=True): if type(terms) is pd.core.series.Series: terms = terms.tolist() if not terms: return with open(os.devnull, 'w') as null, redirect_stdout(null): obo_fname = download_and_move_go_basic_obo(prt=null) file_gene2go = download_ncbi_associations(prt=null) if add_relationships: optional_attrs = ['relationship', 'def'] else: optional_attrs = ['def'] obodag = GODag("geneinfo_cache/go-basic.obo", optional_attrs=optional_attrs, prt=null) gosubdag = GoSubDag(terms, obodag, relationships=add_relationships) GoSubDagPlot(gosubdag).plt_dag('geneinfo_cache/plot.png') return Image('geneinfo_cache/plot.png')
def plt_goea_results(fout_img, goea_results, **kws): """Plot a single page.""" go_sources = [rec.GO for rec in goea_results] go2obj = {rec.GO: rec.goterm for rec in goea_results} gosubdag = GoSubDag(go_sources, go2obj, rcntobj=True) godagplot = GoSubDagPlot(gosubdag, goea_results=goea_results, **kws) godagplot.plt_dag(fout_img)
def test_example(): """Test GoeaResults in plotting package.""" # -------------------------------------------------------------------- # -------------------------------------------------------------------- # Gene Ontology Enrichment Analysis (GOEA) # -------------------------------------------------------------------- # -------------------------------------------------------------------- taxid = 10090 # Mouse study # Load ontologies, associations, and population ids geneids_pop = GeneID2nt_mus.keys() geneids2symbol_study = get_geneid2symbol("nbt.3102-S4_GeneIDs.xlsx") geneids_study = geneids2symbol_study.keys() goeaobj = get_goeaobj("fdr_bh", geneids_pop, taxid) go2obj = goeaobj.obo_dag # Run GOEA on study goea_results_all = goeaobj.run_study(geneids_study) goea_results_sig = [r for r in goea_results_all if r.p_fdr_bh < 0.05] goea_results_nt = MgrNtGOEAs(goea_results_sig).get_goea_nts_all() assert goea_results_nt ns2gos = get_ns2gos(goea_results_sig) # Test plotting GOEA results gosubdag = GoSubDag(set(r.GO for r in goea_results_sig), go2obj) plot_results("test_plot_goids_a_goea_{NS}.png", goea_results_sig, id2symbol=geneids2symbol_study, parentcnt=True, childcnt=True) for nss, goids in ns2gos.items(): plt_goids(gosubdag, "test_plot_goids_b_{NS}.png".format(NS=nss), goids) plot_gos("test_plot_goids_c_{NS}.png".format(NS=nss), goids, go2obj)
def _get_grprobj(): """Get object for grouping GO IDs.""" fin_obo = os.path.join(REPO, "go-basic.obo") godag = get_godag(fin_obo, prt=None, loading_bar=False, optional_attrs=['relationship']) gosubdag = GoSubDag(USER_GOS, godag, relationships=True, tcntobj=None) grprdflt = GrouperDflts(gosubdag) hdrobj = HdrgosSections(gosubdag, grprdflt.hdrgos_dflt, SECTIONS) return Grouper("wrusrgos", USER_GOS, hdrobj, gosubdag)
def test_i177(): """Run code from issue #177, which is reporting a recursion error""" go_id = 'GO:0050807' godag = get_godag('go.obo', optional_attrs='relationship') gosubdag_r0 = GoSubDag([go_id], godag, prt=None) print('{GO} ancestors: {P}'.format( GO=go_id, P=gosubdag_r0.rcntobj.go2ancestors[go_id]))
def test_full(out=sys.stdout, opt_fields=None): """Use OBOReader in default operation.""" dag_fin = "./go-basic.obo" dag = _load_dag(dag_fin, opt_fields, out) goleafs = set(o.id for o in dag.values() if not o.children) gosubdag = GoSubDag(goleafs, dag) test_write_hier_all("FULL", "GO:0000009", gosubdag, out) test_write_hier_norep("FULL", "GO:0000010", gosubdag, out)
def _get_enriched_goids(top, godag): """Get a set of GO IDs related to specified top term""" gosubdag = GoSubDag(None, godag, relationships=True) return { go for go, s in gosubdag.rcntobj.go2descendants.items() if top in s or top == go }
def _get_gosubdag(): """Get GO DAG.""" fin = os.path.join(REPO, 'go-basic.obo') godag = get_godag(fin, prt=sys.stdout, loading_bar=False, optional_attrs=['relationship']) return GoSubDag(None, godag)
def __init__(self, obo, gaf, prt): self.prt = prt self.cwd = os.getcwd() # Gene Ontologies self.go2obj_all = get_godag(os.path.join(REPO, "../goatools/", obo)) # Annotations #_file_gaf = dnld_gaf(os.path.join(REPO, gaf)) _file_gaf = dnld_gaf(gaf) print("GAF: {GAF}\n".format(GAF=_file_gaf)) self.gene2gos = read_gaf(_file_gaf) self.tcntobj = TermCounts(self.go2obj_all, self.gene2gos) # GoSubDag self.gosubdag_all = GoSubDag(None, self.go2obj_all, tcntobj=self.tcntobj, prt=prt) self.prtfmt = self.gosubdag_all.prt_attr['fmta']
def get_gosubdag_all(self, prt=sys.stdout): ''' Get GO DAG subset include descendants which are not included in the annotations ''' goids = set() for gos in self.gosubdag.rcntobj.go2descendants.values(): goids.update(gos) return GoSubDag(goids, self.go2obj, self.gosubdag.relationships, tcntobj=self, prt=prt)
def test_nb(): """Test notebook code""" godag = get_godag("go-basic.obo", optional_attrs={'relationship'}) go_leafs = set(o.item_id for o in godag.values() if not o.children) virion = 'GO:0019012' gosubdag_r0 = GoSubDag(go_leafs, godag) nt_virion = gosubdag_r0.go2nt[virion] print(nt_virion) print('r0 THE VALUE OF dcnt IS: {dcnt}'.format(dcnt=nt_virion.dcnt)) gosubdag_r1 = GoSubDag(go_leafs, godag, relationships=True) nt_virion = gosubdag_r1.go2nt[virion] print(nt_virion) print('r1 THE VALUE OF dcnt IS: {dcnt}'.format(dcnt=nt_virion.dcnt)) gosubdag_partof = GoSubDag(go_leafs, godag, relationships={'part_of'}) nt_virion = gosubdag_partof.go2nt[virion] print(nt_virion) print('THE VALUE OF dcnt IS: {dcnt}'.format(dcnt=nt_virion.dcnt)) virion_descendants = gosubdag_partof.rcntobj.go2descendants[virion] print('{N} descendants of virion were found'.format( N=len(virion_descendants))) # Limit plot of descendants to get a smaller plot virion_capsid_fiber = {'GO:0098033', 'GO:0098032'} gosubdag_partof.prt_goids(virion_capsid_fiber, '{NS} {GO} dcnt({dcnt}) D-{depth:02} {GO_name}') # Limit plot size by choosing just two virion descendants # Get a subset containing only a couple virion descendants and their ancestors pltdag = GoSubDag(virion_capsid_fiber, godag, relationships={'part_of'}) pltobj = GoSubDagPlot(pltdag) pltobj.plt_dag('virion_capsid_fiber.png')
def plt_goids(gosubdag, fout_img, goids, **kws_plt): """Plot GO IDs in a DAG (Directed Acyclic Graph).""" gosubdag_plt = GoSubDag(goids, gosubdag.go2obj, rcntobj=gosubdag.rcntobj, **kws_plt) godagplot = GoSubDagPlot(gosubdag_plt, **kws_plt) godagplot.plt_dag(fout_img) return godagplot
def _plot_grouped_gos(self, fout_img, pltgosusr, kws_plt, kws_dag): gosubdag_plt = GoSubDag(pltgosusr, self.grprobj.gosubdag.get_go2obj(pltgosusr), self.grprobj.gosubdag.relationships, rcntobj=self.grprobj.gosubdag.rcntobj, go2nt=self.grprobj.gosubdag.go2nt, **kws_dag) godagplot = GoSubDagPlot(gosubdag_plt, **kws_plt) godagplot.plt_dag(fout_img)
class Run(object): """Printing GO IDs and Plotting; GODag from obo using GoSubDag.""" def __init__(self, obo): self.cwd = os.getcwd() self.go2obj_all = get_godag(os.path.join(REPO, "../goatools/", obo)) self.gosubdag_all = GoSubDag(None, self.go2obj_all) self.prtfmt = self.gosubdag_all.prt_attr['fmta'] def prt_goids_all(self, prt): """Print all GO IDs, including alternate GO IDs, in GODag.""" self.gosubdag_all.prt_goids(prtfmt=self.prtfmt, prt=prt) def plt_goids(self, fout_img, go_sources): """Plot GO IDs.""" # % src/bin/go_plot.py GOs --obo=../goatools/data/i86.obo --outfile=t00.jpg --mark_alt_id gosubdag = GoSubDag(go_sources, self.go2obj_all) objplt = GoSubDagPlot(gosubdag, mark_alt_id=True) objplt.plt_dag(os.path.join(self.cwd, fout_img))
def test_semantic_i88(): """Computing basic semantic similarities between GO terms.""" godag = obo_parser.GODag("go-basic.obo") goids = set(go for go, o in godag.items() if go == o.id) goids = set(godag.keys()) # Get all the annotations from arabidopsis. fin_gaf = os.path.join(REPO, "tair.gaf") # dnld_assc includes read_gaf associations = dnld_assc(fin_gaf, godag, prt=None) # First get the counts and information content for each GO term. termcounts = TermCounts(godag, associations) gosubdag = GoSubDag(goids, godag, tcntobj=termcounts) # Now we can calculate the semantic distance and semantic similarity, as so: # "The semantic similarity between terms GO:0048364 and GO:0044707 is 0.25. go_id3 = 'GO:0048364' # BP level-03 depth-04 root development go_id4 = 'GO:0044707' # BP level-02 depth-02 single-multicellular organism process go_root = deepest_common_ancestor([go_id3, go_id4], godag) sim = semantic_similarity(go_id3, go_id4, godag) print('\nThe semantic similarity between terms {GO1} and {GO2} is {VAL}.'.format( GO1=go_id3, GO2=go_id4, VAL=sim)) gosubdag.prt_goids([go_root, go_id3, go_id4]) # Calculate the information content go_id = "GO:0048364" infocontent = get_info_content(go_id, termcounts) print('\nInformation content ({GO}) = {INFO}\n'.format(GO=go_id, INFO=infocontent)) # Resnik's similarity measure is defined as the information content of the most # informative common ancestor. That is, the most specific common parent-term in # the GO. Then we can calculate this as follows: # "Resnik similarity score (GO:0048364, GO:0044707) = 4.0540784252 sim_r = resnik_sim(go_id3, go_id4, godag, termcounts) print('Resnik similarity score ({GO1}, {GO2}) = {VAL}'.format( GO1=go_id3, GO2=go_id4, VAL=sim_r)) # Lin similarity score (GO:0048364, GO:0044707) = -0.607721957763 sim_l = lin_sim(go_id3, go_id4, godag, termcounts) print('Lin similarity score ({GO1}, {GO2}) = {VAL}'.format( GO1=go_id3, GO2=go_id4, VAL=sim_l))
def __init__(self, obo, gaf, prt): self.prt = prt self.cwd = os.getcwd() # Gene Ontologies self.go2obj_all = get_godag(os.path.join(REPO, "../goatools/", obo)) # Annotations #_file_gaf = dnld_gaf(os.path.join(REPO, gaf)) _file_gaf = dnld_gaf(gaf) print("GAF: {GAF}\n".format(GAF=_file_gaf)) self.gene2gos = read_gaf(_file_gaf) self.tcntobj = TermCounts(self.go2obj_all, self.gene2gos) # GoSubDag self.gosubdag_all = GoSubDag(None, self.go2obj_all, tcntobj=self.tcntobj, prt=prt) self.prtfmt = self.gosubdag_all.prt_attr['fmta']
def get_nts_sections(self, sections, sortby=None): """Given a list of sections containing GO IDs, get a list of sections w/GO nts.""" goids = self.get_goids_sections(sections) gosubdag = GoSubDag(goids, self.go2obj) return [(sec, gosubdag.get_nts(gos, sortby)) for sec, gos in sections]
def plot_all(self, goids, name, prt=sys.stdout): """Create plots with various numbers of relationships.""" prt.write("\nCreate GoSubDag not loading any relationship") gosubdag_orig = GoSubDag(goids, self.go2obj, relationships=False, prt=prt) gosubdag_orig.prt_goids(gosubdag_orig.go2obj, prt=prt) prt.write("{N} GO IDS".format(N=len(gosubdag_orig.go2obj))) gopltdag = GoSubDagPlot(gosubdag_orig, mark_alt_id=True) gopltdag.plt_dag(os.path.join(REPO, "a_relationship_{NAME}_r0.png".format(NAME=name))) # goids.update(['GO:0007507'], ['GO:0072359']) prt.write("\nCreate GoSubDag while loading only the 'part_of' relationship") gosubdag = GoSubDag(goids, self.go2obj, relationships=['part_of'], prt=prt) gosubdag.prt_goids(gosubdag.go2obj, prt=prt) prt.write("{N} GO IDS".format(N=len(gosubdag.go2obj))) gopltdag = GoSubDagPlot(gosubdag, mark_alt_id=True) prt.write("GO SOURCES:") gosubdag.prt_goids(gosubdag.go_sources, prt=prt) gopltdag.plt_dag(os.path.join(REPO, "a_relationship_{NAME}_partof.png".format(NAME=name))) prt.write("\nCreate GoSubDag while loading all relationships") gosubdag = GoSubDag(goids, self.go2obj, relationships=True, prt=prt) prt.write("ALL {N} GO IDS:".format(N=len(gosubdag.go2obj))) gosubdag.prt_goids(gosubdag.go2obj, prt=prt) prt.write("2 GO SOURCES:") gosubdag.prt_goids(gosubdag.go_sources, prt=prt) goids_new = set(gosubdag.go2obj).difference(set(gosubdag_orig.go2obj)) go2color = {go:'#d5ffff' for go in goids_new} prt.write("{N} NEW GO IDS:".format(N=len(goids_new))) gosubdag.prt_goids(goids_new, prt=prt) prt.write("{N} GO IDS".format(N=len(gosubdag.go2obj))) gopltdag = GoSubDagPlot(gosubdag, mark_alt_id=True, go2color=go2color) gopltdag.plt_dag(os.path.join(REPO, "a_relationship_{NAME}_r1.png".format(NAME=name)))
def __init__(self, obo): self.go2obj_all = get_godag(os.path.join(REPO, obo)) self.gosubdag_all = GoSubDag(None, self.go2obj_all) self.prtfmt = self.gosubdag_all.prt_attr['fmta']
def __init__(self, obo): self.cwd = os.getcwd() self.go2obj_all = get_godag(os.path.join(REPO, "../goatools/", obo)) self.gosubdag_all = GoSubDag(None, self.go2obj_all) self.prtfmt = self.gosubdag_all.prt_attr['fmta']