def format_value(self, entry): return entry.pop() def get_similar(self, word, data): if not data: try: data = self.whisperer.findSimilarByWord(word, max=self.max_similar+10) data = data[0]['value'] except: # error, the word was not found data = self.get_suggestions(word) data.append('<2nd lookup, %s not found>' % word) return data semes = data.split(' ') data = self.whisperer.getSimilarBySem(semes, 10, True) if len(data) < 2: return [] records = [] for info in data: if info['key'] != word: records.append(self.prepare_entry([info])) return records[0:self.max_similar] #test(INDEX, 'scalar', [('particle', None)], cls=TaggingWhisper) #test(INDEX, 'scalar', [('boson', '0012v _rel194 _rel195')], cls=TaggingWhisper) get_worker, application = app(INDEX, MAX_AUTOCOMPLETE, MAX_SIMILAR, TaggingWhisper)
#!python -u import sys import os import dumean _d = os.path.abspath(os.path.join(os.path.dirname(dumean.__file__), '../../..')) if _d not in sys.path: sys.path.append(_d) MAX_AUTOCOMPLETE = 10 MAX_SIMILAR = 10 INDEX = os.path.join(_d, 'indices/hepnet') from whisper import app, test, Whisperer class KeywordWhisper(Whisperer): def format_value(self, entry): if len(entry) > 1: entry = 'keyword:(%s)' % ' OR '.join(entry) else: entry = 'keyword:%s' % entry.pop() return entry #test(INDEX, 'scalarz', [('particle', None)]) get_worker, application = app(INDEX, MAX_AUTOCOMPLETE, MAX_SIMILAR, KeywordWhisper)
seen[r['key']] = 1 out.append({'label': r['key'], 'value': r['key'], 'data': r['value']}) if len(out) < self.max_autocomplete: others = Whisperer.get_suggestions(self, word) i = 0 while len(out) < self.max_autocomplete and i < len(others): x = others[i] if x['label'] not in seen: out.append(x) i += 1 return out def format_value(self, entry): return entry.pop() #test(INDEX, 'xA', [('xA9Pv YRLVs', None)], cls=RandomWhisper) get_worker, application = app(INDEX, MAX_AUTOCOMPLETE, MAX_SIMILAR, RandomWhisper) def run_wsgi(): port = 8080 from wsgiref.util import setup_testing_defaults from wsgiref.simple_server import make_server httpd = make_server('', port, application) print "Serving on port %s..." % port httpd.serve_forever() if __name__ == "__main__": run_wsgi()
def get_similar(self, word, data): # for authors, we don't have any information so we'll search # for similar names if not data or (len(data) > 1 and not data[0]): return self.get_suggestions(word) recs = [] for n in data[0].split("|"): recs.append({"label": n, "value": n}) return recs def prepare_entry(self, info): """Info Entry is actually array of possible records becaue every word may have many meanings""" score, entry, data = 0.0, set(), [] key = "%s (%d)" % (info[0]["key"], len(info)) for x in info: if " " in x["key"]: entry.add('"%s"' % x["key"]) else: entry.add(x["key"]) data.append(x["value"]) score += x["score"] return {"label": key, "value": self.format_value(entry), "data": self.format_data(data)} # test(INDEX, 'Ellis', [('Ellis', ['Ellis J', 'Ellis J.R'])], # cls=AuthorWhisper) get_worker, application = app(INDEX, MAX_AUTOCOMPLETE, MAX_SIMILAR, AuthorWhisper)