Exemplo n.º 1
0
def learn(resource, input, outdir, target_assocfile, target_ontology,
          target_root_class):
    """
    Learn association rules
    """
    logging.basicConfig(level=logging.INFO)

    afa = AssociationSetFactory()
    ofa = OntologyFactory()

    ont = ofa.create(resource)
    aset = afa.create_from_file(file=input, ontology=ont, fmt=None)

    learner = ol.OntologyLearner(assocs=aset)
    isa_ont = ont.subontology(relations=['subClassOf'])

    if target_root_class:
        learner.split_assocs(target_root_class, ontology=isa_ont)

    if target_ontology:
        learner.target_ontology = ofa.create(target_ontology)
    if target_assocfile:
        tont = ont
        if learner.target_ontology is not None:
            tont = learner.target_ontology
        learner.target_assocs = afa.create_from_file(target_assocfile,
                                                     ontology=tont,
                                                     fmt=None)

    with open(outdir + '/index.md', 'w') as file:
        learner.fit_all(dir=outdir, reportfile=file)
Exemplo n.º 2
0
 def load_from_files(self, files: List[str]) -> None:
     """
     loads an ontology from an obojson file
     :param files: list of fils in obojson format
     :return:
     """
     factory = OntologyFactory()
     ont = None
     for file in files:
         if ont == None:
             ont = factory.create(file)
         else:
             ont.merge(factory.create(file))
     self.ontology = ont
Exemplo n.º 3
0
def test_merge():
    factory = OntologyFactory()
    print("Creating ont")
    ont = factory.create('tests/resources/lexmap_test.json')
    ont2 = Ontology()
    ont2.merge([ont])
    assert ont2.xref_graph is not None
Exemplo n.º 4
0
def test_lexmap_multi():
    """
    Text lexical mapping
    """
    factory = OntologyFactory()
    print("Creating ont")
    files = ['x', 'm', 'h', 'bto']
    onts = [
        factory.create('tests/resources/autopod-{}.json'.format(f))
        for f in files
    ]
    lexmap = LexicalMapEngine()
    lexmap.index_ontologies(onts)
    #print(lexmap.lmap)
    #print(ont.all_synonyms())
    g = lexmap.get_xref_graph()
    for x in g.nodes():
        print("{} --> {}".format(x, lexmap.grouped_mappings(x)))
    for x, y, d in g.edges_iter(data=True):
        cl = nx.ancestors(g, x)
        print("{} '{}' <-> {} '{}' :: {} CLOSURE={}".format(
            x, lexmap.label(x), y, lexmap.label(y), d, len(cl)))
        cpr = d[lexmap.CONDITIONAL_PR]
        assert cpr > 0 and cpr <= 1.0
    unmapped = lexmap.unmapped_nodes(g)
    print('U: {}'.format(len(unmapped)))
    unmapped = lexmap.unmapped_nodes(g, rs_threshold=4)
    print('U4: {}'.format(len(unmapped)))

    cliques = lexmap.cliques(g)
    maxc = max(cliques, key=len)
    print('CLIQUES: {}'.format(cliques))
    print('MAX CLIQUES: {}'.format(maxc))
    df = lexmap.as_dataframe(g)
    print(df.to_csv(sep="\t"))
Exemplo n.º 5
0
def test_subontology():
    """
    subontology
    """
    factory = OntologyFactory()
    print("Creating ont")
    ont = factory.create('go')
    assert ont.is_obsolete('GO:0000267') == True
    print("ONT NODES: {}".format(ont.nodes()))
    subont = ont.subontology(relations=['subClassOf'])
    PERM = 'GO:1990578'
    print("NODES: {}".format(subont.nodes()))
    ancs = subont.ancestors(PERM, reflexive=True)
    print(str(ancs))
    for a in ancs:
        print(" ANC: {} '{}'".format(a, subont.label(a)))
    assert len(ancs) > 0

    assert subont.is_obsolete('GO:0000267') == True

    w = GraphRenderer.create('tree')
    w.write_subgraph(ont, ancs)

    # TODO: sub-ontology does not create
    # full metadata
    w = GraphRenderer.create('obo')
    w.write_subgraph(ont, ancs)
Exemplo n.º 6
0
def test_dynamic_query():
    """
    Dynamic query
    """
    factory = OntologyFactory()
    print("Creating ont")
    ont = factory.create('pato')

    ids = ont.sparql(body="{?x rdfs:subClassOf+ "+SHAPE+"}",
                     inject_prefixes = ont.prefixes(),
                     single_column=True)
    assert Y_SHAPED in ids
    assert ABSENT not in ids
Exemplo n.º 7
0
def test_expand():
    factory = OntologyFactory()
    ontobj = factory.create("tests/resources/goslim_pombe.json")
    expand_tsv(INPUT,
               ontology=ontobj,
               outfile=open(OUTPUT, "w"),
               cols=["term"])
    reader = csv.DictReader(open(OUTPUT, "r"), delimiter='\t')
    n = 0
    for row in reader:
        if row['term'] == 'GO:0002181':
            assert row['term_label'] == 'cytoplasmic translation'
            n += 1
        if row['term'] == 'FAKE:123':
            assert row['term_label'] == ''
            n += 1
    assert n == 2
Exemplo n.º 8
0
def test_lexmap_basic():
    """
    Text lexical mapping
    """
    factory = OntologyFactory()
    print("Creating ont")
    ont = factory.create('tests/resources/lexmap_test.json')
    lexmap = LexicalMapEngine()
    lexmap.index_ontology(ont)

    print(lexmap.lmap)
    print(ont.all_synonyms())
    g = lexmap.get_xref_graph()
    for x, y, d in g.edges_iter(data=True):
        print("{}<->{} :: {}".format(x, y, d))
    for x in g.nodes():
        print("{} --> {}".format(x, lexmap.grouped_mappings(x)))
    assert g.has_edge('Z:2', 'ZZ:2')  # roman numerals
    assert g.has_edge('Z:2', 'Y:2')  # case insensitivity
    assert g.has_edge('A:1', 'B:1')  # synonyms
    assert g.has_edge('B:1', 'A:1')  # bidirectional
    for x, y, d in g.edges_iter(data=True):
        print("{}<->{} :: {}".format(x, y, d))
        cpr = d[lexmap.CONDITIONAL_PR]
        assert cpr > 0 and cpr <= 1.0

    df = lexmap.as_dataframe(g)
    print(df.to_csv(sep="\t"))

    lexmap = LexicalMapEngine(
        config=dict(synsets=[dict(word="", synonym="ignoreme", weight=-2.0)],
                    normalized_form_confidence=0.25,
                    abbreviation_confidence=0.5,
                    meaningful_ids=True,
                    ontology_configurations=[
                        dict(prefix='AA', normalized_form_confidence=-1000)
                    ]))

    assert len(lexmap._get_config_val('NULL', 'synsets')) == 1
    assert lexmap._normalize_label('ignoreme foo', {'ignoreme': ''}) == 'foo'
    assert lexmap._normalize_label('replaceme foo',
                                   {'replaceme': 'zz'}) == 'foo zz'

    ont.add_node('TEST:1', 'foo bar')
    ont.add_node('TEST:2', 'bar foo')
    ont.add_node('TEST:3', 'foo bar')
    ont.add_node('TEST:4', 'wiz')
    syn = Synonym('TEST:4', val='bar foo', pred='hasRelatedSynonym')
    ont.add_synonym(syn)
    ont.add_node('http://x.org/wiz#FooBar')
    ont.add_node('TEST:6', '123')
    ont.add_node('TEST:7', '123')
    ont.add_node('TEST:8', 'bar ignoreme foo')
    ont.add_node('AA:1', 'foo bar')
    ont.add_node('AA:2', 'bar foo')
    ont.add_node('ABBREV:1', 'ABCD')
    ont.add_node('ABBREV:2', 'ABCD')
    for s in ont.synonyms('TEST:4'):
        print('S={}'.format(s))
    lexmap.index_ontology(ont)
    g = lexmap.get_xref_graph()
    for x, d in g['TEST:1'].items():
        print('XREF: {} = {}'.format(x, d))
    assert g.has_edge('TEST:1', 'TEST:2')  # normalized
    logging.info('E 1-2 = {}'.format(g['TEST:1']['TEST:2']))
    assert int(g['TEST:1']['TEST:2']['score']) == 25
    assert int(g['TEST:1']['TEST:3']['score']) == 100
    assert int(g['TEST:1']['TEST:4']['score']) < 25
    assert g.has_edge('TEST:3', 'http://x.org/wiz#FooBar')  # IDs and CamelCase
    assert not g.has_edge('TEST:6',
                          'TEST:7')  # should omit syns with no alphanumeric

    # test exclude normalized form
    assert not g.has_edge('AA:1', 'AA:2')

    # test custom synsets are used
    assert g.has_edge('TEST:8', 'TEST:2')
    assert g.has_edge('TEST:8', 'AA:2')
    assert not g.has_edge('TEST:8', 'AA:1')  # do not normalize AAs

    assert lexmap.smap['ABBREV:1'][0].is_abbreviation()
    assert lexmap.smap['ABBREV:2'][0].is_abbreviation()
    assert g.has_edge('ABBREV:1', 'ABBREV:2')
    assert int(g['ABBREV:1']['ABBREV:2']['score']) == 25

    df = lexmap.unmapped_dataframe(g)
    print(df.to_csv())
Exemplo n.º 9
0
def test_remote_sparql_pato():
    """
    Load ontology from remote SPARQL endpoint
    """
    factory = OntologyFactory()
    print("Creating ont")
    ont = factory.create('pato')

    ploidy = ont.node(PLOIDY)
    print("PLOIDY: {}".format(ploidy))
    assert ont.label(PLOIDY) == 'ploidy'

    # exact match
    search_results = ont.search('shape')
    print("SEARCH (exact): {}".format(search_results))
    assert [SHAPE] == search_results

    # implicit regexp
    search_results = ont.search('%shape%')
    print("SEARCH (re, implicit): {}".format(search_results))
    assert SHAPE in search_results
    assert len(search_results)>10

    # explicit regexp
    search_results = ont.search('.*shape.*', is_regex=True)
    print("SEARCH (re, explicit): {}".format(search_results))
    assert SHAPE in search_results
    assert len(search_results)>10
    
    # syns
    syn = 'cone-shaped'
    search_results = ont.search(syn, synonyms=False)
    print("SEARCH (no syns): {}".format(search_results))
    assert [] == search_results
    #search_results = ont.search(syn, synonyms=True)
    #print("SEARCH (with syns): {}".format(search_results))
    #assert [CONICAL] == search_results
    
    num_nodes = 0
    for n in ont.nodes():
        num_nodes = num_nodes+1
    assert num_nodes > 100

    ancs = ont.ancestors(PLOIDY)
    print("ANCS ploidy (all): {}".format(ancs))
    assert QUALITY in ancs
    assert PENTAPLOID not in ancs

    ancs = ont.ancestors(PLOIDY, relations=['subClassOf'])
    print("ANCS ploidy (subClassOf): {}".format(ancs))
    assert QUALITY in ancs
    assert PENTAPLOID not in ancs

    # this is a non-use case
    ancs = ont.ancestors(SWOLLEN, relations=[HAS_PART])
    print("ANCS swollen (has_part): {}".format(ancs))
    assert INCREASED_SIZE in ancs
    assert PROTRUDING in ancs
    assert len(ancs) == 2

    ancs = ont.ancestors(SWOLLEN, relations=['subClassOf'])
    print("ANCS swollen (has_part): {}".format(ancs))
    assert MORPHOLOGY in ancs
    assert QUALITY in ancs
    assert PROTRUDING not in ancs
    
    decs = ont.descendants(PLOIDY)
    print("DECS ploidy (all): {}".format(decs))
    assert QUALITY not in decs
    assert EUPLOID in decs
    assert PENTAPLOID in decs

    # this is a non-use case
    ancs = ont.descendants(INCREASED_SIZE, relations=[HAS_PART])
    print("ANCS increased size (has part): {}".format(ancs))
    assert SWOLLEN in ancs
    assert len(ancs) == 1

    subsets = ont.subsets()
    print("SUBSETS: {}".format(subsets))

    slim = ont.extract_subset('absent_slim')
    print("SLIM: {}".format(slim))
    assert ABSENT in slim
    assert QUALITY not in slim

    syns = ont.synonyms(INCREASED_SIZE)
    print("SYNS: {}".format(syns))
    syn_vals = [syn.val for syn in syns]
    assert 'big' in syn_vals
    [bigsyn] = [syn for syn in syns if syn.val=='big']
    # TODO xrefs
    assert not bigsyn.exact_or_label()
    assert bigsyn.scope() == 'RELATED'

    w = GraphRenderer.create('obo')
    w.write_subgraph(ont, [INCREASED_SIZE])
Exemplo n.º 10
0
def expand(tsvfile, cols, ontology, output):

    factory = OntologyFactory()
    ontobj = factory.create(ontology)
    expand_tsv(tsvfile, ontology=ontobj, outfile=output, cols=cols)