def __init__(self, context): super(GO, self).__init__("go", context) ofactory = OntologyFactory() try: #sometimes the ontology world is down :( self.ont = ofactory.create('go') except: self.ont = ofactory.create('obo:go') #self.ont = ofactory.create('./mondo.owl') #This seems to be required to make the ontology actually load: _ = self.ont.get_level(0)
def load_associations(self, taxon) -> None: taxon_map = { 'human': 'NCBITaxon:9606', 'mouse': 'NCBITaxon:10090', } ofactory = OntologyFactory() self.ontology = ofactory.create(self.ont) p = GafParser() url = '' if self.ont == 'go': # CX: GO:0008150 is biological_process, GO:0003674 is molecular_function. # CX: These are 2 out of 3 top-level terms in GO ontology. # CX: The excluded term is cellular_component (where gene carries out a molecular function) go_roots = set(self.ontology.descendants('GO:0008150') + self.ontology.descendants('GO:0003674')) sub_ont = self.ontology.subontology(go_roots) if taxon == 'mouse': url = "http://current.geneontology.org/annotations/mgi.gaf.gz" if taxon == 'human': url = "http://current.geneontology.org/annotations/goa_human.gaf.gz" assocs = p.parse(url) self.assocs = assocs assocs = [x for x in assocs if 'header' not in x.keys()] assocs = [x for x in assocs if x['object']['id'] in go_roots] self.associations = self.afactory.create_from_assocs(assocs, ontology=sub_ont) else: self.associations = \ self.afactory.create( ontology=self.ontology, subject_category='gene', object_category='phenotype', taxon=taxon_map[taxon] )
def run_phenolog(ont, aset, args): """ Like run_enrichment_test, but uses classes from a 2nd ontology/assocset to build the gene set. """ ofactory = OntologyFactory() ont2 = ofactory.create(args.resource2) afactory = AssociationSetFactory() aset2 = afactory.create(ontology=ont2, file=args.file2) # only test for genes (or other subjects of statements) in common common = set(aset.subjects).intersection(aset2.subjects) num_common = len(common) logging.info("Genes in common between two KBs: {}/\{} = {}".format( len(aset.subjects), len(aset2.subjects), num_common)) if num_common < 2: logging.error("TOO FEW") return None for n in aset.ontology.nodes(): nl = ont.label(n, id_if_null=True) genes = aset.query([n]) num_genes = len(genes) if num_genes > 2: logging.info("BASE: {} {} num={}".format(n, nl, num_genes)) enr = aset2.enrichment_test(subjects=genes, background=aset2.subjects, labels=True) for r in enr: print("{:8.3g} {} {:20s} <-> {} {:20s}".format( r['p'], n, nl, r['c'], str(r['n'])))
def retrieve_associations(self, ont, group): taxon_map = { 'human': 'NCBITaxon:9606', 'mouse': 'NCBITaxon:10090', } ofactory = OntologyFactory() self.ontology = ofactory.create(ont) p = GafParser() url = '' if ont == 'go': go_roots = set( self.ontology.descendants('GO:0008150') + self.ontology.descendants('GO:0003674')) sub_ont = self.ontology.subontology(go_roots) if group == 'mouse': url = "http://current.geneontology.org/annotations/mgi.gaf.gz" if group == 'human': url = "http://current.geneontology.org/annotations/goa_human.gaf.gz" assocs = p.parse('goa_human.gaf.gz') #assocs = p.parse(url) self.assocs = assocs assocs = [x for x in assocs if 'header' not in x.keys()] assocs = [x for x in assocs if x['object']['id'] in go_roots] self.associations = self.afactory.create_from_assocs( assocs, ontology=sub_ont) else: self.associations = self.afactory.create( ontology=self.ontology, subject_category='gene', object_category='phenotype', taxon=taxon_map[group])
def test_remote_sparql(): """ reconstitution test """ factory = OntologyFactory() # default method is sparql ont = factory.create('pato') g = ont.get_graph() info = g.node[PLOIDY] print(str(info)) nodes = g.nodes() print(len(nodes)) assert len(nodes) > 100 nbrs = g.successors(PLOIDY) print("SUCC:" + str(nbrs)) parents = g.predecessors(PLOIDY) print("PRED:" + str(parents)) assert parents == ['PATO:0001396'] ancs = ancestors(g, PLOIDY) print("ANCS:" + str(ancs)) assert 'PATO:0000001' in ancs print(g) Q = ['.*shape.*'] w = GraphRenderer.create('tree') shapes1 = ont.resolve_names(Q, is_regex=True, is_remote=False) print("SHAPE Q:" + str(shapes1)) show_nodes(w, ont, shapes1) assert Y_SHAPED in shapes1 shapes2 = ont.resolve_names(Q, is_regex=True, is_remote=True) print("SHAPE Q:" + str(shapes2)) show_nodes(w, ont, shapes2) assert Y_SHAPED in shapes2
def get(self, ontology, node): """ Extract a subgraph from an ontology """ args = parser.parse_args() ids = [node] if args.cnode is not None: ids += args.cnode factory = OntologyFactory() ont = factory.create(ontology) g = ont.get_filtered_graph(relations=args.relation) nodes = set() dirn = 'du' for id in ids: nodes.add(id) # NOTE: we use direct networkx methods as we have already extracted # the subgraph we want if dirn.find("u") > -1: nodes.update(nx.ancestors(g, id)) if dirn.find("d") > -1: nodes.update(nx.descendants(g, id)) subg = g.subgraph(nodes) ojr = OboJsonGraphRenderer() json_obj = ojr.to_json(subg) return json_obj
def test_local_json_parse(): """ Load ontology from JSON """ factory = OntologyFactory() print("Creating ont") ont = factory.create('tests/resources/pato.json') ploidy = ont.node(PLOIDY) print("PLOIDY: {}".format(ploidy)) assert ont.label(PLOIDY) == 'ploidy' # exact match search_results = ont.search('shape') print("SEARCH (exact): {}".format(search_results)) assert [SHAPE] == search_results # implicit regexp search_results = ont.search('%shape%') print("SEARCH (re, implicit): {}".format(search_results)) assert SHAPE in search_results assert len(search_results) > 10 # explicit regexp search_results = ont.search('.*shape.*', is_regex=True) print("SEARCH (re, explicit): {}".format(search_results)) assert SHAPE in search_results assert len(search_results) > 10 num_nodes = 0 for n in ont.nodes(): num_nodes = num_nodes + 1 assert num_nodes > 100
def __init__(self, context ): super(HPO, self).__init__("hpo", context) ofactory = OntologyFactory() try: #sometimes the ontology world is down :( self.ont = ofactory.create('hp') except: logger.warn('Problem reaching sparql endpoint, falling back to obo') try: self.ont = ofactory.create('obo:hp') except: logger.error('Problem reaching obo, add local owl file') import sys sys.exit(1) #self.ont = ofactory.create('onto_cache/hpo.owl') #This seems to be required to make the ontology actually load: _ = self.ont.get_level(0)
def get(self): """ Extract a subgraph from an ontology """ factory = OntologyFactory() ont = factory.create() z = get_db() return {'z': z, 'test': len(ont.nodes())}
def __init__(self, context): super(Mondo, self).__init__("mondo", context) ofactory = OntologyFactory() try: #sometimes the ontology world is down :( self.ont = ofactory.create('mondo') except: logger.warn( 'Problem reaching sparql endpoint, falling back to obo') try: self.ont = ofactory.create('obo:mondo') except: logger.warn( 'Problem reaching obo, falling back to local owl file') self.ont = ofactory.create('onto_cache/mondo.owl') #self.ont = ofactory.create('./mondo.owl') #This seems to be required to make the ontology actually load: _ = self.ont.get_level(0)
def test_write(): """ write obo from json """ factory = OntologyFactory() print("Creating ont") ont = factory.create('tests/resources/nucleus.json') w = GraphRenderer.create('obo') w.write(ont)
def make_apo_map(): # load apo for term mapping ofactory = OntologyFactory() apo_ont = ofactory.create("apo") apo_nodes = apo_ont.nodes() # dict schema { 'term': 'apo_id' } apo_term_id = dict() for node in apo_nodes: label = apo_ont.label(node) apo_term_id[label] = node return apo_term_id
def make_apo_map(): # load apo for term mapping ofactory = OntologyFactory() apo_ont = ofactory.create("apo") apo_nodes = apo_ont.nodes() # dict schema { 'term': 'apo_id' } apo_term_id = dict() for node in apo_nodes: label = apo_ont.label(node) apo_term_id[label] = node return apo_term_id
def test_align(): """ Text lexical mapping """ factory = OntologyFactory() print("Creating ont") ont1 = factory.create('ma') ont2 = factory.create('zfa') lexmap = LexicalMapEngine() lexmap.index_ontology(ont1) lexmap.index_ontology(ont2) print(lexmap.lmap) print(ont1.all_synonyms()) print(ont2.all_synonyms()) g = lexmap.get_xref_graph() for x, y, d in g.edges_iter(data=True): print("{}<->{} :: {}".format(x, y, d)) for x in g.nodes(): print("{} --> {}".format(x, lexmap.grouped_mappings(x)))
def test_gaf(): """ Test loading from gaf """ ofactory = OntologyFactory() afactory = AssociationSetFactory() ont = ofactory.create('go') aset = afactory.create_from_gaf(open(POMBASE, "r"), ontology=ont) print(str(aset)) genes = aset.query([INTRACELLULAR]) for g in genes: print("G={} '{}'".format(g, aset.label(g))) assert G1 in genes
def test_alt_id(): """ test alt_ids and replaced by """ factory = OntologyFactory() print("Creating ont") ont = factory.create('tests/resources/alt_id_test.json') for x in ont.nodes(): if ont.is_obsolete(x): if ont.replaced_by(x): print('{} --> {}'.format(x, ont.replaced_by(x))) else: print('OBS: {} no replacement'.format(x))
def test_remote_disease(): """ factory test """ ofactory = OntologyFactory() afactory = AssociationSetFactory() ont = ofactory.create('doid') aset = afactory.create(ontology=ont, subject_category='disease', object_category='phenotype', taxon=HUMAN) rs = aset.query_associations([PD]) print("Gene Assocs to PD: {} {}".format(rs, len(rs)))
def get_ontology(id): handle = id for c in cfg['ontologies']: if c['id'] == id: logging.info("getting handle for id: {} from cfg".format(id)) handle = c['handle'] if handle not in omap: logging.info("Creating a new ontology object for {}".format(handle)) ofa = OntologyFactory() omap[handle] = ofa.create(handle) else: logging.info("Using cached for {}".format(handle)) return omap[handle]
def test_subontology(): """ Load extracting subontology """ factory = OntologyFactory() print("Creating ont") ont = factory.create('tests/resources/go-truncated-pombase.json') print("ONT NODES: {}".format(ont.nodes())) subont = ont.subontology(relations=['subClassOf']) PERM = 'GO:1990578' print("NODES: {}".format(subont.nodes())) ancs = subont.ancestors(PERM) print(str(ancs)) assert len(ancs) > 0
def test_local_json_parse(): """ Load ontology from JSON """ factory = OntologyFactory() print("Creating ont") #tbox_ontology = factory.create('go') # TODO: create test module for this example tbox_ontology = factory.create('tests/resources/go-truncated-pombase.json') ont = factory.create('tests/resources/gocam-example.ttl', tbox_ontology=tbox_ontology) g = ont.get_graph() nodes = ont.search('%') print("NODES: {}".format(nodes)) w = GraphRenderer.create(None) w.write_subgraph(ont, nodes) i = 'http://model.geneontology.org/0000000300000001/0000000300000007' ni = g[i] print(str(ni)) ['GO:0060070'] == ni['types'] nbrs = ont.neighbors(i) print("NEIGHBORS: {}".format(nbrs)) subont = tbox_ontology.subontology(nodes, minimal=False) w = GraphRenderer.create('obo') print(w.render(subont))
def load_associations(self, ontology_name:str=None, subject_category:str=None, object_category:str=None, evidence=None, taxon:str=None, relation=None, file:Union[str, TextIO]=None, fmt:str=None, skim:bool=False) -> None: ofactory = OntologyFactory() afactory = AssociationSetFactory() ontology = ofactory.create(ontology_name, subject_category) self.associations = afactory.create( ontology=ontology, subject_category=subject_category, object_category=object_category, evidence=evidence, taxon=taxon, relation=relation, file=file, fmt=fmt, skim=skim )
def initOntologies(): ofactory = OntologyFactory() print("Loading Ontologies...") ontologies['go'] = ofactory.create('go') ontologies['bfo'] = ofactory.create('bfo') ontologies['ro'] = ofactory.create('ro') ontologies['cl'] = ofactory.create('cl') ontologies['zfa'] = ofactory.create('zfa') ontologies['uberon'] = ofactory.create('uberon') ontologies['emapa'] = ofactory.create('emapa') #ontologies['chebi'] = ofactory.create('chebi') print("Done.")
def test_obsolete(): """ Test obsoletion metadata """ factory = OntologyFactory() print("Creating ont") ont = factory.create('tests/resources/obsolete.json') print("ONT NODES: {}".format(ont.nodes())) n_obs = 0 for nid in ont.nodes(): is_obs = ont.is_obsolete(nid) if is_obs: print("OBS: {} {}".format(nid, ont.label(nid))) n_obs += 1 rb = ont.replaced_by(nid) if rb is not None: print("REPLACED BY: {} {}".format(rb, ont.label(rb))) assert ont.replaced_by('GO:2') == ['GO:1'] assert ont.replaced_by('GO:3') == ['GO:1'] assert n_obs == 3
def test_remote_go(): """ factory test """ ofactory = OntologyFactory() afactory = AssociationSetFactory() ont = ofactory.create('go').subontology(relations=['subClassOf', PART_OF]) aset = afactory.create(ontology=ont, subject_category='gene', object_category='function', taxon=MOUSE) rs = aset.query([TRANSCRIPTION_FACTOR], []) print("Mouse genes annotated to TF: {} {}".format(rs, len(rs))) for g in rs: print(" Gene: {} {}".format(g, aset.label(g))) set_tf = rs rs = aset.query([NUCLEUS], []) print("Mouse genes annotated to nucleus: {} {}".format(rs, len(rs))) set_nucleus = rs assert (len(rs) > 100) rs = aset.query([TRANSCRIPTION_FACTOR, NUCLEUS], []) print("Mouse TF genes annotated to nucleus: {} {}".format(rs, len(rs))) assert (len(rs) > 100) set_nucleus_tf = rs assert (len(rs) < len(set_nucleus)) rs = aset.query([NUCLEUS], [TRANSCRIPTION_FACTOR]) print("Mouse non-TF genes annotated to nucleus: {} {}".format(rs, len(rs))) assert (len(rs) > 100) set_nucleus_non_tf = rs assert (len(rs) < len(set_nucleus)) assert (len(set_nucleus_tf) + len(set_nucleus_non_tf) == len(set_nucleus)) enr = aset.enrichment_test(subjects=set_tf, labels=True) print("ENRICHMENT (tf): {}".format(enr)) [match] = [x for x in enr if x['c'] == NUCLEUS] print("ENRICHMENT (tf) for NUCLEUS: {}".format(match)) assert match['p'] < 0.00001
def todo_subontology(): """ subontology """ factory = OntologyFactory() print("Creating ont") ont = factory.create('scigraph:ontology') print("ONT NODES: {}".format(ont.nodes())) # TODO: implement this for SciGraph: subont = ont.subontology(relations=['subClassOf']) PERM = 'GO:1990578' print("NODES: {}".format(subont.nodes())) ancs = subont.ancestors(PERM, reflexive=True) print(str(ancs)) for a in ancs: print(" ANC: {} '{}'".format(a,subont.label(a))) assert len(ancs) > 0 from ontobio.io.ontol_renderers import GraphRenderer w = GraphRenderer.create('tree') w.write_subgraph(ont, ancs)
def test_enrichment(): """ enrichment """ factory = OntologyFactory() ont = factory.create('pato') # gene set 'a' is biased to ploidy termprobs = [(QUALITY, 0.8, 0.8), (PLOIDY, 0.8, 0.2), (EUPLOID, 0.7, 0.01), (SHAPE, 0.2, 0.75), (Y_SHAPED, 0.01, 0.5)] amap = {} geneset_a = [] geneset_b = [] for x in range(1, 100): for y in ['a', 'b']: dts = [] for (t, p1, p2) in termprobs: if y == 'a': p = p1 else: p = p2 if random.random() < p: dts.append(t) g = y + str(x) if y == 'a': geneset_a.append(g) else: geneset_b.append(g) amap[g] = dts logging.info(str(amap)) aset = AssociationSet(ontology=ont, association_map=amap) logging.info(str(aset)) print(str(geneset_a)) results = aset.enrichment_test(geneset_a, labels=True) print(str(results)) print("EXPECTED: {} {}".format(PLOIDY, EUPLOID)) results = aset.enrichment_test(geneset_b, labels=True) print(str(results)) print("EXPECTED: {} {}".format(SHAPE, Y_SHAPED))
def test_factory(): """ test ontology factory using wikidata as source and using PTSD. """ f = OntologyFactory() ont = f.create('wdq:Q544006') for n in ont.nodes(): print('{} "{}"'.format(n, ont.label(n))) qids = ont.search('anxiety%') assert len(qids) > 0 print(qids) nodes = ont.traverse_nodes(qids, up=True, down=True) print(nodes) assert len(nodes) > 0 labels = [ont.label(n) for n in nodes] print(labels) # Note: it's possible wd may change rendering this false assert 'Fear of frogs' in labels from ontobio.io.ontol_renderers import GraphRenderer w = GraphRenderer.create('tree') w.write(ont, query_ids=qids)
def main(): """ Wrapper for Assoc Parsing """ parser = argparse.ArgumentParser( description='Wrapper for obographs assocmodel library' """ By default, ontologies and assocs are cached locally and synced from a remote sparql endpoint """, formatter_class=argparse.RawTextHelpFormatter) parser.add_argument('-r', '--resource', type=str, required=False, help='Name of ontology') parser.add_argument( '-f', '--file', type=str, required=False, help='Name of input file for associations - currently GAF is assumed') parser.add_argument('-F', '--format', type=str, required=False, help='Format of assoc file. One of GAF, GPAD or HPOA') parser.add_argument('-o', '--outfile', type=str, required=False, help='Path to output file') parser.add_argument('-m', '--messagefile', type=str, required=False, help='Path to messages (report) markdown file') parser.add_argument('-t', '--to', type=str, required=False, help='Output to (tree, dot, ...)') parser.add_argument( "--filter-out", nargs="+", required=False, default=[], metavar="EVIDENCE", help= "List of any evidence codes to filter out of the GAF. E.G. --filter-out IEA IMP" ) parser.add_argument("--filtered-file", required=False, default=None, metavar="FILTERED_FILE", help="File to write the filtered out evidence GAF to") parser.add_argument( '-T', '--taxon', nargs='*', required=False, help='valid taxon (NCBITaxon ID) - validate against this') parser.add_argument('--subject_prefix', nargs='*', required=False, help='E.g PomBase - validate against this') parser.add_argument('--object_prefix', nargs='*', required=False, help='E.g GO - validate against this') parser.add_argument('-v', '--verbosity', default=0, action='count', help='Increase output verbosity') subparsers = parser.add_subparsers(dest='subcommand', help='sub-command help') parser_n = subparsers.add_parser('validate', help='Validate associations') parser_n.set_defaults(function=validate_assocs) parser_n = subparsers.add_parser('filter', help='Filter associations') parser_n.set_defaults(function=filter_assocs) parser_n = subparsers.add_parser('convert', help='Convert associations') parser_n.set_defaults(function=convert_assocs) parser_n.add_argument('-t', '--to', type=str, required=True, help='Format to convert to') parser_n = subparsers.add_parser('map2slim', help='Map to a subset/slim') parser_n.set_defaults(function=map2slim) parser_n.add_argument('-p', '--properties', nargs='*', type=str, required=False, help='Properties') parser_n.add_argument('-s', '--subset', type=str, required=True, help='subset (e.g. map2slim)') args = parser.parse_args() if args.verbosity >= 2: logging.basicConfig(level=logging.DEBUG) elif args.verbosity == 1: logging.basicConfig(level=logging.INFO) else: logging.basicConfig(level=logging.WARNING) logging.info("Welcome!") handle = args.resource # Ontology Factory ofactory = OntologyFactory() logging.info("Creating ont object from: {} {}".format(handle, ofactory)) ont = ofactory.create(handle) logging.info("ont: {}".format(ont)) func = args.function # Upper case all evidence codes args.filter_out = [code.upper() for code in args.filter_out] # set configuration filtered_evidence_file = open(args.filtered_file, "w") if args.filtered_file else None config = assocparser.AssocParserConfig( valid_taxa=args.taxon, ontology=ont, class_idspaces=args.object_prefix, entity_idspaces=args.subject_prefix, filter_out_evidence=args.filter_out, filtered_evidence_file=filtered_evidence_file) p = None fmt = None if args.format is None: fmt = 'gaf' else: fmt = args.format.lower() # TODO: use a factory if fmt == 'gaf': p = GafParser() elif fmt == 'gpad': p = GpadParser() elif fmt == 'hpoa': p = HpoaParser() elif fmt == "gpi": p = entityparser.GpiParser() func = validate_entity p.config = config outfh = None if args.outfile is not None: two_mb = 2097152 outfh = open(args.outfile, "w", buffering=two_mb) func(ont, args.file, outfh, p, args) if filtered_evidence_file: filtered_evidence_file.close() if outfh is not None: outfh.close() if args.messagefile is not None: mfh = open(args.messagefile, "w") mfh.write(p.report.to_markdown()) mfh.close() else: print(p.report.to_markdown())
def main(): """ Wrapper for OGR Assocs """ parser = argparse.ArgumentParser( description='Wrapper for obographs assocmodel library' """ By default, ontologies and assocs are cached locally and synced from a remote sparql endpoint """, formatter_class=argparse.RawTextHelpFormatter) parser.add_argument('-r', '--resource', type=str, required=False, help='Name of ontology') parser.add_argument('-f', '--assocfile', type=str, required=False, help='Name of input file for associations') parser.add_argument( '--assocformat', type=str, default='gaf', required=False, help='Format of association file, if passed (default: gaf)') parser.add_argument('-o', '--outfile', type=str, required=False, help='Path to output file') parser.add_argument('-t', '--to', type=str, required=False, help='Output to (tree, dot, ...)') parser.add_argument('-d', '--direction', type=str, default='u', required=False, help='u = up, d = down, ud = up and down') parser.add_argument('-e', '--evidence', type=str, required=False, help='ECO class') parser.add_argument('-p', '--properties', nargs='*', type=str, required=False, help='Properties') parser.add_argument('-P', '--plot', type=bool, default=False, help='if set, plot output (requires plotly)') parser.add_argument('-y', '--yamlconfig', type=str, required=False, help='Path to setup/configuration yaml file') parser.add_argument('-S', '--slim', type=str, default='', required=False, help='Slim type. m=minimal') parser.add_argument('-c', '--container_properties', nargs='*', type=str, required=False, help='Properties to nest in graph') parser.add_argument('-C', '--category', nargs=2, type=str, required=False, help='category tuple (SUBJECT OBJECT)') parser.add_argument('-T', '--taxon', type=str, required=False, help='Taxon of associations') parser.add_argument('-v', '--verbosity', default=0, action='count', help='Increase output verbosity') subparsers = parser.add_subparsers(dest='subcommand', help='sub-command help') # EXTRACT ONTOLOGY parser_n = subparsers.add_parser( 'subontology', help= 'Extract sub-ontology, include only annotated nodes or their descendants' ) parser_n.add_argument('-M', '--minimal', dest='minimal', action='store_true', default=False, help='If set, remove non-MRCA nodes') parser_n.set_defaults(function=extract_ontology) # ENRICHMENT parser_n = subparsers.add_parser( 'enrichment', help= 'Perform an enrichment test over a sample set of annotated entities') parser_n.add_argument( '-q', '--query', type=str, help='query all genes for this class an use as subject') parser_n.add_argument('-H', '--hypotheses', nargs='*', help='list of classes to test against') parser_n.add_argument( '-s', '--sample_file', type=str, help='file containing list of gene IDs in sample set') parser_n.add_argument( '-b', '--background_file', type=str, help='file containing list of gene IDs in background set') parser_n.add_argument('-t', '--threshold', type=float, help='p-value threshold') parser_n.add_argument('sample_ids', nargs='*', help='list of gene IDs in sample set') parser_n.set_defaults(function=run_enrichment_test) # PHENOLOG parser_n = subparsers.add_parser( 'phenolog', help= 'Perform multiple enrichment tests, using a second ontology and assoc set to build gene sets' ) parser_n.add_argument('-R', '--resource2', type=str, required=True, help='path to second GAF') parser_n.add_argument('-F', '--file2', type=str, required=True, help='handle for second ontology') parser_n.set_defaults(function=run_phenolog) # QUERY parser_n = subparsers.add_parser( 'query', help= 'Query for entities (e.g. genes) based on positive and negative terms') parser_n.add_argument('-q', '--query', nargs='*', help='positive classes') parser_n.add_argument('-N', '--negative', type=str, help='negative classes') parser_n.set_defaults(function=run_query) # QUERY ASSOCIATIONS parser_n = subparsers.add_parser( 'associations', help='Query for associations for a set of entities (e.g. genes)') parser_n.add_argument('subjects', nargs='*', help='subject ids') parser_n.add_argument('-D', '--dendrogram', type=bool, default=False) parser_n.set_defaults(function=run_query_associations) # INTERSECTIONS parser_n = subparsers.add_parser('intersections', help='Query intersections') parser_n.add_argument('-X', '--xterms', nargs='*', help='x classes') parser_n.add_argument('-Y', '--yterms', nargs='*', help='y classes') parser_n.add_argument('--useids', type=bool, default=False, help='if true, use IDs not labels on axes') parser_n.add_argument('terms', nargs='*', help='all terms (x and y)') parser_n.set_defaults(function=plot_intersections) # INTERSECTION DENDROGRAM (TODO: merge into previous?) parser_n = subparsers.add_parser('intersection-dendrogram', help='Plot dendrogram from intersections') parser_n.add_argument('-X', '--xterms', nargs='*', help='x classes') parser_n.add_argument('-Y', '--yterms', nargs='*', help='y classes') parser_n.add_argument('--useids', type=bool, default=False, help='if true, use IDs not labels on axes') parser_n.add_argument('terms', nargs='*', help='all terms (x and y)') parser_n.set_defaults(function=plot_term_intersection_dendrogram) # SIMILARITY MATRIX (may move to another module) parser_n = subparsers.add_parser( 'simmatrix', help='Plot dendrogram for similarities between subjects') parser_n.add_argument('-X', '--xsubjects', nargs='*', help='x subjects') parser_n.add_argument('-Y', '--ysubjects', nargs='*', help='y subjects') parser_n.add_argument('--useids', type=bool, default=False, help='if true, use IDs not labels on axes') parser_n.add_argument('subjects', nargs='*', help='all terms (x and y)') parser_n.set_defaults(function=plot_simmatrix) args = parser.parse_args() if args.verbosity >= 2: logging.basicConfig(level=logging.DEBUG) elif args.verbosity == 1: logging.basicConfig(level=logging.INFO) else: logging.basicConfig(level=logging.WARNING) if not args.assocfile: if not args.taxon or not args.category: raise ValueError( "Must specify EITHER assocfile OR both taxon and category") logging.info("Welcome!") if args.yamlconfig is not None: logging.info("Setting config from: {}".format(args.yamlconfig)) # note this sets a global: # we would not do this outside the context of a standalone script from ontobio.config import set_config set_config(args.yamlconfig) handle = args.resource # Ontology Factory ofactory = OntologyFactory() logging.info("Creating ont object from: {} {}".format(handle, ofactory)) ont = ofactory.create(handle) logging.info("ont: {}".format(ont)) evidence = args.evidence if evidence is not None and evidence.lower() == 'noiea': evidence = "-ECO:0000501" # Association Factory afactory = AssociationSetFactory() aset = None if args.assocfile is not None: aset = afactory.create_from_file(file=args.assocfile, fmt=args.assocformat, ontology=ont) else: [subject_category, object_category] = args.category # create using GO/Monarch services aset = afactory.create(ontology=ont, subject_category=subject_category, object_category=object_category, taxon=args.taxon) func = args.function func(ont, aset, args)
def load_ontology(ont): print('loading ontology -- this can take a while') ofactory = OntologyFactory() return ofactory.create(ont)
def test_graph(): """ Load ontology from JSON """ factory = OntologyFactory() print("Creating ont") ont = factory.create('tests/resources/nucleus.json') icp = ont.node(INTRACELLULAR_PART) print("ICP: {}".format(icp)) assert ont.label(INTRACELLULAR_PART) == 'intracellular part' graph = ont.get_graph() print("GRAPH: {}".format(graph.nodes)) ancs = ont.ancestors(NUCLEUS) print("ANCS nucleus (all): {}".format(ancs)) assert CELL in ancs assert CELLULAR_COMPONENT in ancs assert INTRACELLULAR in ancs assert NUCLEUS not in ancs ancs = ont.ancestors(INTRACELLULAR_PART) print("ANCS intracellular part(all): {}".format(ancs)) assert CELL in ancs assert CELLULAR_COMPONENT in ancs assert NUCLEUS not in ancs ancs = ont.ancestors(INTRACELLULAR_PART, relations=['subClassOf']) print("ANCS intracellular part(subclass): {}".format(ancs)) assert CELLULAR_COMPONENT in ancs assert CELL not in ancs assert NUCLEUS not in ancs # note: queries over *only* part_of are a non-use case, as we # typically always include subClassOf, due to how these chain # together according to OWL semantics ancs = ont.ancestors(INTRACELLULAR_PART, relations=[PART_OF]) print("ANCS intracellular part(part_of): {}".format(ancs)) assert INTRACELLULAR in ancs assert CELL not in ancs assert NUCLEUS not in ancs ancs = ont.parents(INTRACELLULAR_PART) print("PARENTS intracellular (all): {}".format(ancs)) assert INTRACELLULAR in ancs assert CELL_PART in ancs assert CELLULAR_COMPONENT not in ancs assert NUCLEUS not in ancs ancs = ont.parents(INTRACELLULAR_PART, relations=[PART_OF]) print("PARENTS intracellular (part_of): {}".format(ancs)) assert INTRACELLULAR in ancs assert CELL_PART not in ancs assert CELLULAR_COMPONENT not in ancs assert NUCLEUS not in ancs decs = ont.descendants(INTRACELLULAR_PART) print("DECS: {}".format(decs)) assert NUCLEUS in decs assert CELL not in decs decs = ont.descendants(INTRACELLULAR, relations=[PART_OF]) print("DECS: {}".format(decs)) assert INTRACELLULAR_PART in decs assert NUCLEUS not in decs assert CELL not in decs decs = ont.children(INTRACELLULAR) print("CHILDREN (all): {}".format(decs)) assert [INTRACELLULAR_PART] == decs decs = ont.children(CELL_PART) print("CHILDREN (all): {}".format(decs)) assert INTRACELLULAR_PART in decs assert INTRACELLULAR in decs decs = ont.children(INTRACELLULAR, relations=[PART_OF]) print("CHILDREN (po): {}".format(decs)) assert INTRACELLULAR_PART in decs assert NUCLEUS not in decs assert CELL not in decs xrefs = ont.xrefs(CELL) print("XREFS (from GO): {}".format(xrefs)) assert WIKIPEDIA_CELL in xrefs assert NIF_CELL in xrefs assert len(xrefs) == 2 # xrefs are bidirectional xrefs = ont.xrefs(WIKIPEDIA_CELL, bidirectional=True) print("XREFS (from WP, bidi): {}".format(xrefs)) assert CELL in xrefs assert len(xrefs) == 1 # xrefs queries unidirectional by default xrefs = ont.xrefs(WIKIPEDIA_CELL) print("XREFS (from WP): {}".format(xrefs)) assert len(xrefs) == 0 tdef = ont.text_definition(NUCLEUS) print("TDEF: {}".format(tdef)) assert tdef.xrefs == ["GOC:go_curators"] assert tdef.val.startswith( "A membrane-bounded organelle of eukaryotic cells in which") [ldef] = ont.logical_definitions(INTRACELLULAR_PART) print("LDEF: {}".format(ldef)) assert ldef.genus_ids == [CELLULAR_COMPONENT] assert ldef.restrictions == [(PART_OF, INTRACELLULAR)] syns = ont.synonyms(CELL_PART, include_label=True) print("SYNS: {}".format(syns)) [s1] = [x for x in syns if x.val == 'protoplast'] assert s1.pred == 'hasRelatedSynonym' assert s1.xrefs == ['GOC:mah'] GOSLIM = 'goslim_generic' subsets = ont.subsets(NUCLEUS) print("SUBSETS: {}".format(subsets)) assert GOSLIM in subsets assert len(subsets) > 0 in_slim = ont.extract_subset(GOSLIM) print("IN SLIM: {}".format(in_slim)) assert len(in_slim) > 0 assert NUCLEUS in in_slim #logging.basicConfig(level=logging.DEBUG) assert [] == ont.search('protoplast', synonyms=False) assert {CELL_PART, INTRACELLULAR} == set(ont.search('protoplast', synonyms=True)) assert ont.has_node(CELL_PART) assert not ont.has_node('FOO:123') # ensure subontology retains properties decs = ont.descendants(CELL, reflexive=True) subont = ont.subontology(nodes=decs) syns = subont.synonyms(CELL_PART, include_label=True) print("SYNS: {}".format(syns)) [s1] = [x for x in syns if x.val == 'protoplast'] assert s1.pred == 'hasRelatedSynonym' assert s1.xrefs == ['GOC:mah'] assert subont.parents(NUCLEUS) == [IMBO] from ontobio import GraphRenderer w = GraphRenderer.create('obo') w.write(subont, query_ids=[CELL, CELL_PART, NUCLEUS])
import requests import ontobio.sparql.wikidata as wd from ontobio.assoc_factory import AssociationSetFactory from dipper.graph.RDFGraph import RDFGraph from scigraph.api.SciGraph import SciGraph import re import logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # Make ontology from wikidata onto_factory = OntologyFactory() wd_ontology = onto_factory.create('wdq:Q185034') # Sickle cell anemia qids = wd_ontology.search('Sickle%') # Traverse up and down from query node in our sub-ontology nodes = wd_ontology.traverse_nodes(qids, up=True, down=True) renderer = GraphRenderer.create('obo') renderer.outfile = './output/wd-ontology.obo' # renderer.write(wd_ontology) # >> AttributeError: 'EagerWikidataOntology' object has no attribute 'all_logical_definitions' renderer.write_subgraph(wd_ontology, nodes, query_ids=qids) # Get GO terms outfile = open('./output/go-terms.tsv', 'w')