def test_lexmap_basic(): """ Text lexical mapping """ factory = OntologyFactory() print("Creating ont") ont = factory.create('tests/resources/lexmap_test.json') lexmap = LexicalMapEngine() lexmap.index_ontology(ont) print(lexmap.lmap) print(ont.all_synonyms()) g = lexmap.get_xref_graph() for x, y, d in g.edges_iter(data=True): print("{}<->{} :: {}".format(x, y, d)) for x in g.nodes(): print("{} --> {}".format(x, lexmap.grouped_mappings(x))) assert g.has_edge('Z:2', 'ZZ:2') # roman numerals assert g.has_edge('Z:2', 'Y:2') # case insensitivity assert g.has_edge('A:1', 'B:1') # synonyms assert g.has_edge('B:1', 'A:1') # bidirectional for x, y, d in g.edges_iter(data=True): print("{}<->{} :: {}".format(x, y, d)) cpr = d[lexmap.CONDITIONAL_PR] assert cpr > 0 and cpr <= 1.0 df = lexmap.as_dataframe(g) print(df.to_csv(sep="\t")) lexmap = LexicalMapEngine( config=dict(synsets=[dict(word="", synonym="ignoreme", weight=-2.0)], normalized_form_confidence=0.25, abbreviation_confidence=0.5, meaningful_ids=True, ontology_configurations=[ dict(prefix='AA', normalized_form_confidence=-1000) ])) assert len(lexmap._get_config_val('NULL', 'synsets')) == 1 assert lexmap._normalize_label('ignoreme foo', {'ignoreme': ''}) == 'foo' assert lexmap._normalize_label('replaceme foo', {'replaceme': 'zz'}) == 'foo zz' ont.add_node('TEST:1', 'foo bar') ont.add_node('TEST:2', 'bar foo') ont.add_node('TEST:3', 'foo bar') ont.add_node('TEST:4', 'wiz') syn = Synonym('TEST:4', val='bar foo', pred='hasRelatedSynonym') ont.add_synonym(syn) ont.add_node('http://x.org/wiz#FooBar') ont.add_node('TEST:6', '123') ont.add_node('TEST:7', '123') ont.add_node('TEST:8', 'bar ignoreme foo') ont.add_node('AA:1', 'foo bar') ont.add_node('AA:2', 'bar foo') ont.add_node('ABBREV:1', 'ABCD') ont.add_node('ABBREV:2', 'ABCD') for s in ont.synonyms('TEST:4'): print('S={}'.format(s)) lexmap.index_ontology(ont) g = lexmap.get_xref_graph() for x, d in g['TEST:1'].items(): print('XREF: {} = {}'.format(x, d)) assert g.has_edge('TEST:1', 'TEST:2') # normalized logging.info('E 1-2 = {}'.format(g['TEST:1']['TEST:2'])) assert int(g['TEST:1']['TEST:2']['score']) == 25 assert int(g['TEST:1']['TEST:3']['score']) == 100 assert int(g['TEST:1']['TEST:4']['score']) < 25 assert g.has_edge('TEST:3', 'http://x.org/wiz#FooBar') # IDs and CamelCase assert not g.has_edge('TEST:6', 'TEST:7') # should omit syns with no alphanumeric # test exclude normalized form assert not g.has_edge('AA:1', 'AA:2') # test custom synsets are used assert g.has_edge('TEST:8', 'TEST:2') assert g.has_edge('TEST:8', 'AA:2') assert not g.has_edge('TEST:8', 'AA:1') # do not normalize AAs assert lexmap.smap['ABBREV:1'][0].is_abbreviation() assert lexmap.smap['ABBREV:2'][0].is_abbreviation() assert g.has_edge('ABBREV:1', 'ABBREV:2') assert int(g['ABBREV:1']['ABBREV:2']['score']) == 25 df = lexmap.unmapped_dataframe(g) print(df.to_csv())
def main(): """ Wrapper for OGR """ parser = argparse.ArgumentParser(description='Wrapper for ontobio lexical mapping' """ Lexically maps one or more ontologies. Ontologies can be local or remote, any input handle can be specified, see docs for more details on handles. If multiple ontologies are specified, then each ontology in the list is compared against the first one. If a simgle ontology is specified, then all pairs in that ontology will be compared Output format to be documented - see lexmap.py for the various scoring attributes for now. """, formatter_class=argparse.RawTextHelpFormatter) parser.add_argument('-o', '--outfile', type=str, nargs='*', default=[], required=False, help='Path to output file') parser.add_argument('-t', '--to', type=str, required=False, default='tsv', help='Output to (tree, dot, ...)') parser.add_argument('-l', '--labels', type=str, help='If set, then include node labels in results. DEPRECATED') parser.add_argument('-s', '--scoring', default='sim', type=str, help='Score weighting scheme. Default=sim') parser.add_argument('-P', '--prefix', type=str, required=False, help='Prefix to constrain traversal on, e.g. PATO, ENVO') parser.add_argument('-c', '--config', type=str, required=False, help='lexmap configuration file (yaml). See schema for details') parser.add_argument('-X', '--xref_weights', type=str, required=False, help='csv of curated per-xref weights') parser.add_argument('-u', '--unmapped', type=str, required=False, help='File to export unmapped nodes to') parser.add_argument('-A', '--all-by-all', dest='all_by_all', action='store_true', help='compare all ontologies against all.') parser.add_argument('-v', '--verbosity', default=0, action='count', help='Increase output verbosity') parser.add_argument('ontologies',nargs='*', help='one or more ontologies to be aligned. Any input handle can be specified') args = parser.parse_args() if args.verbosity >= 2: logging.basicConfig(level=logging.DEBUG) elif args.verbosity == 1: logging.basicConfig(level=logging.INFO) else: logging.basicConfig(level=logging.WARNING) logging.info("Welcome!") factory = OntologyFactory() onts = [filter_by_prefix(factory.create(h)) for h in args.ontologies] config = {} if args.config is not None: f = open(args.config,'r') config = yaml.load(f) f.close() if args.xref_weights is not None: if 'xref_weights' not in config: config['xref_weights'] = [] xws = config['xref_weights'] df = pd.read_csv(args.xref_weights) df = df.fillna(0.0) for _, row in df.iterrows(): w = float(row['weight']) WA = np.array((0.0, 0.0, 0.0, 0.0)) if w < 0: WA[2] = w WA[3] = abs(w) else: WA[2] = w WA[3] = -w xws.append({'left':row['left'], 'right':row['right'], 'weights':WA}) logging.info("ALL: {}".format(args.all_by_all)) lexmap = LexicalMapEngine(config=config) if len(onts) == 0: raise ValueException("must pass one or more ontologies") else: logging.info("Indexing ontologies: {}".format(onts)) for ont in onts: lexmap.index_ontology(ont) oid0 = onts[0].id pairs = [(oid0,oid0)] if len(onts) > 1: if args.all_by_all: logging.info("All vs ALL: {}".format(onts)) pairs = [] for i in onts: for j in onts: if i.id < j.id: pairs.append((i.id, j.id)) else: logging.info("All vs first in list: {}".format(oid0)) pairs = [(oid0, ont.id) for ont in onts[1:]] logging.info("Comparing the following pairs of ontologies: {}".format(pairs)) lexmap.ontology_pairs = pairs mo = Ontology() mo.merge(onts) g = lexmap.get_xref_graph() if args.to == 'obo': write_obo(g,mo,args) else: write_tsv(lexmap,g,mo,args) if args.unmapped: udf = lexmap.unmapped_dataframe(g) udf.to_csv(args.unmapped, sep="\t", index=False)