Python Builder.build примеры использования

Язык программирования: Python

Пространство имен/Пакет: lunr.builder

Класс/Тип: Builder

Метод/Функция: build

Примеров на hotexamples.com: 3

Python Builder.build - 3 примера найдено. Это лучшие примеры Python кода для lunr.builder.Builder.build, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Builder(21)

field(6)

ref(6)

add(4)

b(3)

build(3)

k1(1)

use(1)

Пример #1

Показать файл

Файл: test_builder.py Проект: yeraydiazdiaz/lunr.py

class TestBuilderBuild:
    def setup_method(self, method):
        self.builder = Builder()
        doc = {"id": "id", "title": "test", "body": "missing"}

        self.builder.ref("id")
        self.builder.field("title")
        self.builder.add(doc)
        self.index = self.builder.build()

    def test_adds_tokens_to_inverted_index(self):
        _assert_deep_keys(self.builder.inverted_index, "test.title.id")

    def test_builds_vector_space_of_the_document_fields(self):
        assert "title/id" in self.builder.field_vectors
        assert isinstance(self.builder.field_vectors["title/id"], Vector)

    def test_skips_fields_not_defined_for_indexing(self):
        assert "missing" not in self.builder.inverted_index

    def test_builds_a_token_set_for_the_corpus(self):
        needle = TokenSet.from_string("test")
        assert "test" in self.builder.token_set.intersect(needle).to_list()

    def test_calculates_document_count(self):
        assert self.builder.average_field_length["title"] == 1

    def test_index_is_returned(self):
        assert isinstance(self.index, Index)

Пример #2

Показать файл

Файл: __main__.py Проект: wilhelmer/lunr.py

def lunr(ref, fields, documents, languages=None):
    """A convenience function to configure and construct a lunr.Index.

    Args:
        ref (str): The key in the documents to be used a the reference.
        fields (list): A list of strings defining fields in the documents to
            index. Optionally a list of dictionaries with three keys:
            `field_name` defining the document's field, `boost` an integer
            defining a boost to be applied to the field, and `extractor`
            a callable taking the document as a single argument and returning
            a string located in the document in a particular way.
        documents (list): The list of dictonaries representing the documents
            to index. Optionally a 2-tuple of dicts, the first one being
            the document and the second the associated attributes to it.
        languages (str or list, optional): The languages to use if using
            NLTK language support, ignored if NLTK is not available.

    Returns:
        Index: The populated Index ready to search against.
    """
    if languages is not None and lang.LANGUAGE_SUPPORT:
        if isinstance(languages, basestring):
            languages = [languages]

        unsupported_languages = set(languages) - set(lang.SUPPORTED_LANGUAGES)
        if unsupported_languages:
            raise RuntimeError("The specified languages {} are not supported, "
                               "please choose one of {}".format(
                                   ", ".join(unsupported_languages),
                                   ", ".join(lang.SUPPORTED_LANGUAGES.keys()),
                               ))
        builder = lang.get_nltk_builder(languages)
    else:
        builder = Builder()
        builder.pipeline.add(trimmer, stop_word_filter)  #, stemmer)
        #builder.search_pipeline.add(stemmer)

    builder.ref(ref)
    for field in fields:
        if isinstance(field, dict):
            builder.field(**field)
        else:
            builder.field(field)

    for document in documents:
        if isinstance(document, (tuple, list)):
            builder.add(document[0], attributes=document[1])
        else:
            builder.add(document)

    return builder.build()

Пример #3

Показать файл

Файл: search.py Проект: adamcupial/wdl

    def generate_output(self, writer):
        pages = [self.create_node(x) for x in self.context['articles']]
        path = os.path.join(self.output_path, 'search_index.json')

        pages_to_index = [{
            'id': x['id'],
            'title': x['title'],
            'text': x['text']
        } for x in pages]

        additional_data = {
            x['id']: {
                'url': x['url'],
                'title': x['title'],
                'summary': x['summary'],
            }
            for x in pages
        }

        Pipeline.register_function(special_chars_remover,
                                   'specialCharsRemover')

        bldr = Builder()
        bldr.pipeline.add(trimmer, stop_word_filter, stemmer,
                          special_chars_remover)
        bldr.search_pipeline.add(stemmer)
        bldr.ref('id')
        bldr.field('title', 10)
        bldr.field('text')

        for page in pages_to_index:
            bldr.add(page)
        idx = bldr.build().serialize()

        with open(path, 'w') as idxfile:
            json.dump({
                'index': idx,
                'data': additional_data,
            }, idxfile)