Python Index.finalize示例

编程语言: Python

命名空间/包名称: analysis.index

类/类型: Index

方法/功能: finalize

hotexamples.com的示例: 3

Python Index.finalize - 已找到3个示例。这些是从开源项目中提取的最受好评的analysis.index.Index.finalize现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

Index(2)

add_documents(2)

finalize(2)

add_document(1)

get_filtered_terms(1)

get_top_terms(1)

示例#1

显示文件

文件： abstract.py 项目： nihaofuyue0617/pythia

 def _filter_terms(self):
     '''
     Removes tokens that appear either too often or too rarely. It returns the corpus of 
     filtered tokens and also changes the token and word_frequencies lists of each document
     according to the filtered tokens.
     '''
     index = Index("kmeans_index")
     index.add_documents(self.db_documents)
     index.finalize()
     filtered_terms = index.get_filtered_terms(lowestf=0.1, highestf=0.2)
     corpus = []
     for id, document in self.document_dict.iteritems():
         filtered_tokens = []
         for token in document.tokens:
             if token in filtered_terms:
                 filtered_tokens.append(token)
         
         if len(filtered_tokens) == 0: #if all the tokens are removed then this document is worthless
             self.document_dict.pop(id)
         else: #if there are still tokens ammend the token list and the word frequencies list to include only the relevant tokens
             self.document_dict[id].tokens = filtered_tokens
             self.document_dict[id].word_frequencies = [item for item in self.document_dict[id].word_frequencies if item.word in filtered_tokens]
             
         corpus.append(filtered_tokens)
     return corpus

示例#2

显示文件

文件： index_tests.py 项目： aurora1625/pythia

My playground!
'''
import unittest, os
from analysis.index import Index
from database.warehouse import WarehouseServer
from database.model.tweets import TwoGroupsTweet

BASE_PATH = os.path.expanduser("~/virtualenvfyp/pythia/data/")
index_path = BASE_PATH + "test_index"
ws = WarehouseServer()
sample_docs = ws.get_n_documents(100, type=TwoGroupsTweet)

index = Index(index_path)
for doc in sample_docs:
    index.add_document(doc)
index.finalize()

class TestPlayground(unittest.TestCase):
  
    def test_searching(self):        
        results = index.search_by_term("sales")
        
        calculated = []
        for doc in results:
            calculated.append(doc.get('id'))
            
        expected = ['4f2d602780286c38a7000013', '4f2d603280286c38a700001e']
        self.assertEqual(expected, calculated)
    
    def test_top_terms_index(self):
        results = index.get_top_terms(10)

示例#3

显示文件

My playground!
'''
import unittest, os
from analysis.index import Index
from database.warehouse import WarehouseServer
from database.model.tweets import TwoGroupsTweet

BASE_PATH = os.path.expanduser("~/virtualenvfyp/pythia/data/")
index_path = BASE_PATH + "test_index"
ws = WarehouseServer()
sample_docs = ws.get_n_documents(100, type=TwoGroupsTweet)

index = Index(index_path)
for doc in sample_docs:
    index.add_document(doc)
index.finalize()


class TestPlayground(unittest.TestCase):
    def test_searching(self):
        results = index.search_by_term("sales")

        calculated = []
        for doc in results:
            calculated.append(doc.get('id'))

        expected = ['4f2d602780286c38a7000013', '4f2d603280286c38a700001e']
        self.assertEqual(expected, calculated)

    def test_top_terms_index(self):
        results = index.get_top_terms(10)