def _qrun(queries, mmjar): out = {} # run one query at a time for more efficient caching for k, v in queries.iteritems(): q = {k: v} tmp = tempfile.NamedTemporaryFile() qfn = tmp.name f = codecs.open(qfn, "w", encoding="utf-8") for qid, qtxt in q.iteritems(): print >> f, "%s:%s" % (qid, qtxt) f.close() d = _run(qfn, mmjar, cache_file=cache_file(CACHEDIR, [hash_file(qfn), hash_file(mmjar)], "metamap")) d = longest_concepts(d) for kk, vv in d.iteritems(): if kk in out: print >> sys.stderr, "error: duplicate key:", kk print >> sys.stderr, out sys.exit(1) else: out[kk] = vv tmp.close() return out
def _qrun(queries, mmjar): out = {} # run one query at a time for more efficient caching for k, v in queries.iteritems(): q = {k: v} tmp = tempfile.NamedTemporaryFile() qfn = tmp.name f = codecs.open(qfn, "w", encoding="utf-8") for qid, qtxt in q.iteritems(): print >> f, "%s:%s" % (qid, qtxt) f.close() d = _run(qfn, mmjar, cache_file=cache_file( CACHEDIR, [hash_file(qfn), hash_file(mmjar)], "metamap")) d = longest_concepts(d) for kk, vv in d.iteritems(): if kk in out: print >> sys.stderr, "error: duplicate key:", kk print >> sys.stderr, out sys.exit(1) else: out[kk] = vv tmp.close() return out
def _txtrun(text, mmjar, no_cache, long_concepts): if not text: print '[metamap info] empty query' return {'txt': {'concepts': []}} # tmp = tempfile.NamedTemporaryFile() # tfn = tmp.name # # with codecs.open(tfn, 'wb', encoding='utf-8') as f: # print >> f, u"txt:{0}".format(text) cf = cache_file(CACHEDIR, [hash_obj(text), hash_file(mmjar)], "metamap") # for some inesplicable reason, the Java MetaMap API client # throws a java.lang.StringIndexOutOfBoundsException exception # instead of gracefully returning nothing when a text has no concepts. try: d = _run(text, mmjar, no_cache=no_cache, cache_file=cf) except ValueError: print '[metamap info] no concepts found' d = {'txt': {'concepts': []}} return d