def learn(resource, input, outdir, target_assocfile, target_ontology, target_root_class): """ Learn association rules """ logging.basicConfig(level=logging.INFO) afa = AssociationSetFactory() ofa = OntologyFactory() ont = ofa.create(resource) aset = afa.create_from_file(file=input, ontology=ont, fmt=None) learner = ol.OntologyLearner(assocs=aset) isa_ont = ont.subontology(relations=['subClassOf']) if target_root_class: learner.split_assocs(target_root_class, ontology=isa_ont) if target_ontology: learner.target_ontology = ofa.create(target_ontology) if target_assocfile: tont = ont if learner.target_ontology is not None: tont = learner.target_ontology learner.target_assocs = afa.create_from_file(target_assocfile, ontology=tont, fmt=None) with open(outdir + '/index.md', 'w') as file: learner.fit_all(dir=outdir, reportfile=file)
def load_from_files(self, files: List[str]) -> None: """ loads an ontology from an obojson file :param files: list of fils in obojson format :return: """ factory = OntologyFactory() ont = None for file in files: if ont == None: ont = factory.create(file) else: ont.merge(factory.create(file)) self.ontology = ont
def test_merge(): factory = OntologyFactory() print("Creating ont") ont = factory.create('tests/resources/lexmap_test.json') ont2 = Ontology() ont2.merge([ont]) assert ont2.xref_graph is not None
def test_lexmap_multi(): """ Text lexical mapping """ factory = OntologyFactory() print("Creating ont") files = ['x', 'm', 'h', 'bto'] onts = [ factory.create('tests/resources/autopod-{}.json'.format(f)) for f in files ] lexmap = LexicalMapEngine() lexmap.index_ontologies(onts) #print(lexmap.lmap) #print(ont.all_synonyms()) g = lexmap.get_xref_graph() for x in g.nodes(): print("{} --> {}".format(x, lexmap.grouped_mappings(x))) for x, y, d in g.edges_iter(data=True): cl = nx.ancestors(g, x) print("{} '{}' <-> {} '{}' :: {} CLOSURE={}".format( x, lexmap.label(x), y, lexmap.label(y), d, len(cl))) cpr = d[lexmap.CONDITIONAL_PR] assert cpr > 0 and cpr <= 1.0 unmapped = lexmap.unmapped_nodes(g) print('U: {}'.format(len(unmapped))) unmapped = lexmap.unmapped_nodes(g, rs_threshold=4) print('U4: {}'.format(len(unmapped))) cliques = lexmap.cliques(g) maxc = max(cliques, key=len) print('CLIQUES: {}'.format(cliques)) print('MAX CLIQUES: {}'.format(maxc)) df = lexmap.as_dataframe(g) print(df.to_csv(sep="\t"))
def test_subontology(): """ subontology """ factory = OntologyFactory() print("Creating ont") ont = factory.create('go') assert ont.is_obsolete('GO:0000267') == True print("ONT NODES: {}".format(ont.nodes())) subont = ont.subontology(relations=['subClassOf']) PERM = 'GO:1990578' print("NODES: {}".format(subont.nodes())) ancs = subont.ancestors(PERM, reflexive=True) print(str(ancs)) for a in ancs: print(" ANC: {} '{}'".format(a, subont.label(a))) assert len(ancs) > 0 assert subont.is_obsolete('GO:0000267') == True w = GraphRenderer.create('tree') w.write_subgraph(ont, ancs) # TODO: sub-ontology does not create # full metadata w = GraphRenderer.create('obo') w.write_subgraph(ont, ancs)
def test_dynamic_query(): """ Dynamic query """ factory = OntologyFactory() print("Creating ont") ont = factory.create('pato') ids = ont.sparql(body="{?x rdfs:subClassOf+ "+SHAPE+"}", inject_prefixes = ont.prefixes(), single_column=True) assert Y_SHAPED in ids assert ABSENT not in ids
def test_expand(): factory = OntologyFactory() ontobj = factory.create("tests/resources/goslim_pombe.json") expand_tsv(INPUT, ontology=ontobj, outfile=open(OUTPUT, "w"), cols=["term"]) reader = csv.DictReader(open(OUTPUT, "r"), delimiter='\t') n = 0 for row in reader: if row['term'] == 'GO:0002181': assert row['term_label'] == 'cytoplasmic translation' n += 1 if row['term'] == 'FAKE:123': assert row['term_label'] == '' n += 1 assert n == 2
def test_lexmap_basic(): """ Text lexical mapping """ factory = OntologyFactory() print("Creating ont") ont = factory.create('tests/resources/lexmap_test.json') lexmap = LexicalMapEngine() lexmap.index_ontology(ont) print(lexmap.lmap) print(ont.all_synonyms()) g = lexmap.get_xref_graph() for x, y, d in g.edges_iter(data=True): print("{}<->{} :: {}".format(x, y, d)) for x in g.nodes(): print("{} --> {}".format(x, lexmap.grouped_mappings(x))) assert g.has_edge('Z:2', 'ZZ:2') # roman numerals assert g.has_edge('Z:2', 'Y:2') # case insensitivity assert g.has_edge('A:1', 'B:1') # synonyms assert g.has_edge('B:1', 'A:1') # bidirectional for x, y, d in g.edges_iter(data=True): print("{}<->{} :: {}".format(x, y, d)) cpr = d[lexmap.CONDITIONAL_PR] assert cpr > 0 and cpr <= 1.0 df = lexmap.as_dataframe(g) print(df.to_csv(sep="\t")) lexmap = LexicalMapEngine( config=dict(synsets=[dict(word="", synonym="ignoreme", weight=-2.0)], normalized_form_confidence=0.25, abbreviation_confidence=0.5, meaningful_ids=True, ontology_configurations=[ dict(prefix='AA', normalized_form_confidence=-1000) ])) assert len(lexmap._get_config_val('NULL', 'synsets')) == 1 assert lexmap._normalize_label('ignoreme foo', {'ignoreme': ''}) == 'foo' assert lexmap._normalize_label('replaceme foo', {'replaceme': 'zz'}) == 'foo zz' ont.add_node('TEST:1', 'foo bar') ont.add_node('TEST:2', 'bar foo') ont.add_node('TEST:3', 'foo bar') ont.add_node('TEST:4', 'wiz') syn = Synonym('TEST:4', val='bar foo', pred='hasRelatedSynonym') ont.add_synonym(syn) ont.add_node('http://x.org/wiz#FooBar') ont.add_node('TEST:6', '123') ont.add_node('TEST:7', '123') ont.add_node('TEST:8', 'bar ignoreme foo') ont.add_node('AA:1', 'foo bar') ont.add_node('AA:2', 'bar foo') ont.add_node('ABBREV:1', 'ABCD') ont.add_node('ABBREV:2', 'ABCD') for s in ont.synonyms('TEST:4'): print('S={}'.format(s)) lexmap.index_ontology(ont) g = lexmap.get_xref_graph() for x, d in g['TEST:1'].items(): print('XREF: {} = {}'.format(x, d)) assert g.has_edge('TEST:1', 'TEST:2') # normalized logging.info('E 1-2 = {}'.format(g['TEST:1']['TEST:2'])) assert int(g['TEST:1']['TEST:2']['score']) == 25 assert int(g['TEST:1']['TEST:3']['score']) == 100 assert int(g['TEST:1']['TEST:4']['score']) < 25 assert g.has_edge('TEST:3', 'http://x.org/wiz#FooBar') # IDs and CamelCase assert not g.has_edge('TEST:6', 'TEST:7') # should omit syns with no alphanumeric # test exclude normalized form assert not g.has_edge('AA:1', 'AA:2') # test custom synsets are used assert g.has_edge('TEST:8', 'TEST:2') assert g.has_edge('TEST:8', 'AA:2') assert not g.has_edge('TEST:8', 'AA:1') # do not normalize AAs assert lexmap.smap['ABBREV:1'][0].is_abbreviation() assert lexmap.smap['ABBREV:2'][0].is_abbreviation() assert g.has_edge('ABBREV:1', 'ABBREV:2') assert int(g['ABBREV:1']['ABBREV:2']['score']) == 25 df = lexmap.unmapped_dataframe(g) print(df.to_csv())
def test_remote_sparql_pato(): """ Load ontology from remote SPARQL endpoint """ factory = OntologyFactory() print("Creating ont") ont = factory.create('pato') ploidy = ont.node(PLOIDY) print("PLOIDY: {}".format(ploidy)) assert ont.label(PLOIDY) == 'ploidy' # exact match search_results = ont.search('shape') print("SEARCH (exact): {}".format(search_results)) assert [SHAPE] == search_results # implicit regexp search_results = ont.search('%shape%') print("SEARCH (re, implicit): {}".format(search_results)) assert SHAPE in search_results assert len(search_results)>10 # explicit regexp search_results = ont.search('.*shape.*', is_regex=True) print("SEARCH (re, explicit): {}".format(search_results)) assert SHAPE in search_results assert len(search_results)>10 # syns syn = 'cone-shaped' search_results = ont.search(syn, synonyms=False) print("SEARCH (no syns): {}".format(search_results)) assert [] == search_results #search_results = ont.search(syn, synonyms=True) #print("SEARCH (with syns): {}".format(search_results)) #assert [CONICAL] == search_results num_nodes = 0 for n in ont.nodes(): num_nodes = num_nodes+1 assert num_nodes > 100 ancs = ont.ancestors(PLOIDY) print("ANCS ploidy (all): {}".format(ancs)) assert QUALITY in ancs assert PENTAPLOID not in ancs ancs = ont.ancestors(PLOIDY, relations=['subClassOf']) print("ANCS ploidy (subClassOf): {}".format(ancs)) assert QUALITY in ancs assert PENTAPLOID not in ancs # this is a non-use case ancs = ont.ancestors(SWOLLEN, relations=[HAS_PART]) print("ANCS swollen (has_part): {}".format(ancs)) assert INCREASED_SIZE in ancs assert PROTRUDING in ancs assert len(ancs) == 2 ancs = ont.ancestors(SWOLLEN, relations=['subClassOf']) print("ANCS swollen (has_part): {}".format(ancs)) assert MORPHOLOGY in ancs assert QUALITY in ancs assert PROTRUDING not in ancs decs = ont.descendants(PLOIDY) print("DECS ploidy (all): {}".format(decs)) assert QUALITY not in decs assert EUPLOID in decs assert PENTAPLOID in decs # this is a non-use case ancs = ont.descendants(INCREASED_SIZE, relations=[HAS_PART]) print("ANCS increased size (has part): {}".format(ancs)) assert SWOLLEN in ancs assert len(ancs) == 1 subsets = ont.subsets() print("SUBSETS: {}".format(subsets)) slim = ont.extract_subset('absent_slim') print("SLIM: {}".format(slim)) assert ABSENT in slim assert QUALITY not in slim syns = ont.synonyms(INCREASED_SIZE) print("SYNS: {}".format(syns)) syn_vals = [syn.val for syn in syns] assert 'big' in syn_vals [bigsyn] = [syn for syn in syns if syn.val=='big'] # TODO xrefs assert not bigsyn.exact_or_label() assert bigsyn.scope() == 'RELATED' w = GraphRenderer.create('obo') w.write_subgraph(ont, [INCREASED_SIZE])
def expand(tsvfile, cols, ontology, output): factory = OntologyFactory() ontobj = factory.create(ontology) expand_tsv(tsvfile, ontology=ontobj, outfile=output, cols=cols)