class QueryHandler: def __init__(self): self.sp = Spell(settings.SPELL_WORDS_NUM) self.index = DirectIndex(settings.DIRECT_INDEX_PATH) self.searcher = Searcher(os.path.join(settings.INVERSE_INDEX_DIR, "index.txt"), os.path.join(settings.INVERSE_INDEX_DIR, "dict.txt"), os.path.join(settings.INVERSE_INDEX_DIR, "urls.txt")) self.snippet_builder = SnippetBuilder() def get_search_results(self, query): index = self.index searcher = self.searcher query_result_ids = searcher.search(query.encode("utf-8"), return_urls_only=True) query_result = list() for url_id in query_result_ids[10]: record = index.record_by_id(randrange(300)) try: snippet = self.snippet_builder.build_snippet(record, query.encode("utf-8")) except Exception as e: snippet = u" SnipetBuilder упал" + e.message query_result.append({"url": url_id[1], "snippet": snippet, "image": record.img_url, "title": record.title}) return query_result def spell(self, query): return self.sp.spell(query)
def __init__(self): self.sp = Spell(settings.SPELL_WORDS_NUM) self.index = DirectIndex(settings.DIRECT_INDEX_PATH) self.searcher = Searcher(os.path.join(settings.INVERSE_INDEX_DIR, "index.txt"), os.path.join(settings.INVERSE_INDEX_DIR, "dict.txt"), os.path.join(settings.INVERSE_INDEX_DIR, "urls.txt")) self.snippet_builder = SnippetBuilder()
query = unicode(sys.stdin.readline(), 'cp866') else: reload(sys) sys.setdefaultencoding('utf-8') query = unicode( sys.stdin.readline() ) sys.stdout = codecs.getwriter('utf-8')(sys.stdout) queries = [] splt = query.split('\t') if splt > 1: queries += [( splt[0], int(splt[1]) )] else: print "Incorrect input!" # ------------------------------------------ if queries: index = StrictIndex(u'Lenta.ru20-StrictIndex.txt') # u'povarenok.ru30-StrictIndex.txt') # SB = SnippetBuilder(index) for query, doc_id in queries: if sys.platform.startswith('win'): print (u"query= '%s'\n" % query).encode('cp866', 'ignore') print SB.snippet(query, doc_id=doc_id).encode('cp866', 'ignore'), '\n\n' else: print (u"query= '%s'\n" % query) print SB.snippet(query, doc_id=doc_id), '\n\n' # ------------------------------------------ else: print "Incorrect argument!"