Python Preprocessor示例

编程语言: Python

命名空间/包名称: search_engine.preprocessing.preprocessor

类/类型: Preprocessor

hotexamples.com的示例: 5

Python Preprocessor - 已找到5个示例。这些是从开源项目中提取的最受好评的search_engine.preprocessing.preprocessor.Preprocessor现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

Preprocessor(4)

preproces_tagged_docs_with_urls(1)

preprocess_doc(1)

preprocess_tagged_wiki(1)

process_docs_with_urls(1)

process_wiki(1)

示例#1

显示文件

文件： d2v_engine.py 项目： mwoss/mors

class D2VEngine(SearchEngine):
    def __init__(self, max_workers=4):
        super().__init__()
        self.preprocessor = Preprocessor(max_workers=max_workers)

    @classmethod
    def from_configfile(cls):
        profile = environ.get('d2v_profile', 'local')
        config = Config(profile).d2v

        search_engine = cls()
        search_engine.load_model(config['dbow_model_path'])
        return search_engine

    def load_model(self, model_path, dict_path=None):
        self.model = Doc2Vec.load(model_path)

    def search(self, query, limit=50):
        inferred_vector = self._infer(query)
        return self.model.docvecs.most_similar([inferred_vector], topn=limit)

    def dict_search(self, query, limit=100):
        limit = self.search(query, limit=limit)

        query_len = len(query.split(" "))
        return {
            url: self._adjust(query_len, similarity)
            for url, similarity in limit
        }

    def _infer(self, document):
        tokens = self.preprocessor.preprocess_doc(document)
        return self.model.infer_vector(tokens, alpha=0.001, steps=40)

    def _adjust(self, query_length, similarity):
        return similarity / (
            (5 - query_length)**2) if query_length < 4 else similarity

示例#2

显示文件

 def __init__(self, topics, max_workers=4):
     super().__init__()
     self.topics = topics
     self.preprocessor = Preprocessor(max_workers=max_workers)

示例#3

显示文件

def preprocess_tagged_wiki(wiki):
    preprocessor = Preprocessor(None)
    return preprocessor.preprocess_tagged_wiki(wiki)

示例#4

显示文件

文件： aggregate_training.py 项目： mwoss/mors

def preprocess_tagged_doc(articles, max_workers):
    preprocessor = Preprocessor(max_workers)
    return preprocessor.preproces_tagged_docs_with_urls(articles)

示例#5

显示文件

def preprocess_wiki(wiki, max_workers):
    logger.info("preprocessing {0}".format(wiki))
    preprocessor = Preprocessor(max_workers)
    return preprocessor.process_wiki(wiki)