Пример #1
0
    def add(self, text, doc_id):
        # Convert text to dictionary of words mapping to their term frequency
        wf = word_freq(clean_text(text))

        # Get doc length by summing all term frequencies
        doc_length = reduce(lambda x, y: x + y, wf.values())

        self.db.insert(create_document_words(wf, doc_id))
        self.db.insert([Document(id=doc_id, length=doc_length)])
Пример #2
0
    def search(self, query):
        """Searches the index for the query.

        Returns a list of dictiories containing keys "document_id" and
        "score" sorted in descending order.
        """
        words = word_tokenize(clean_text(query))

        results = self.index_reader.search(words)

        doc_scores_sum = defaultdict(float)

        for word, doc_scores in results.items():
            for doc_id, score in doc_scores.items():
                doc_scores_sum[doc_id] += score

        # Sort all the doc_ids by the score descending
        sorted_doc_ids = sorted(doc_scores_sum, key=doc_scores_sum.get,
                                reverse=True)

        return [{"document_id" : doc_id, "score" : doc_scores_sum[doc_id]}
                    for doc_id in sorted_doc_ids]