Exemplo n.º 1
0
class QueryHandler:
    def __init__(self):
        self.sp = Spell(settings.SPELL_WORDS_NUM)
        self.index = DirectIndex(settings.DIRECT_INDEX_PATH)
        self.searcher = Searcher(os.path.join(settings.INVERSE_INDEX_DIR, "index.txt"),
                                 os.path.join(settings.INVERSE_INDEX_DIR, "dict.txt"),
                                 os.path.join(settings.INVERSE_INDEX_DIR, "urls.txt"))
        self.snippet_builder = SnippetBuilder()

    def get_search_results(self, query):
        index = self.index
        searcher = self.searcher
        query_result_ids = searcher.search(query.encode("utf-8"), return_urls_only=True)
        query_result = list()

        for url_id in query_result_ids[10]:
            record = index.record_by_id(randrange(300))
            try:
                snippet = self.snippet_builder.build_snippet(record, query.encode("utf-8"))
            except Exception as e:
                snippet = u" SnipetBuilder упал" + e.message

            query_result.append({"url": url_id[1],
                                 "snippet": snippet,
                                 "image": record.img_url,
                                 "title": record.title})
        return query_result

    def spell(self, query):
        return self.sp.spell(query)
Exemplo n.º 2
0
 def __init__(self):
     self.sp = Spell(settings.SPELL_WORDS_NUM)
     self.index = DirectIndex(settings.DIRECT_INDEX_PATH)
     self.searcher = Searcher(os.path.join(settings.INVERSE_INDEX_DIR, "index.txt"),
                              os.path.join(settings.INVERSE_INDEX_DIR, "dict.txt"),
                              os.path.join(settings.INVERSE_INDEX_DIR, "urls.txt"))
     self.snippet_builder = SnippetBuilder()
Exemplo n.º 3
0
                query = unicode(sys.stdin.readline(), 'cp866')
            else:
                reload(sys)
                sys.setdefaultencoding('utf-8')
                query = unicode( sys.stdin.readline() )
                sys.stdout = codecs.getwriter('utf-8')(sys.stdout)

            queries = []
            splt = query.split('\t')
            if splt > 1:
                queries += [( splt[0], int(splt[1]) )]
            else:
                print "Incorrect input!"

        # ------------------------------------------
        if queries:
            index = StrictIndex(u'Lenta.ru20-StrictIndex.txt') # u'povarenok.ru30-StrictIndex.txt') # 
            SB = SnippetBuilder(index)

            for query, doc_id in queries:

                if sys.platform.startswith('win'):
                    print (u"query= '%s'\n" % query).encode('cp866', 'ignore')
                    print SB.snippet(query, doc_id=doc_id).encode('cp866', 'ignore'), '\n\n'
                else:
                    print (u"query= '%s'\n" % query)
                    print SB.snippet(query, doc_id=doc_id), '\n\n'
        # ------------------------------------------
    else: print "Incorrect argument!"