Python Index.analyzer примеры использования

Язык программирования: Python

Пространство имен/Пакет: django_elasticsearch_dsl

Класс/Тип: Index

Метод/Функция: analyzer

Примеров на hotexamples.com: 7

Python Index.analyzer - 7 примеров найдено. Это лучшие примеры Python кода для django_elasticsearch_dsl.Index.analyzer, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Index(30)

settings(30)

analyzer(7)

analyze(2)

create(2)

exists(2)

save(2)

close(1)

delete(1)

document(1)

flush(1)

open(1)

Пример #1

Показать файл

    def test_tokenization(self):
        """
        The whether the elasticsearch analyzer yields the right tokens for the german analyzer.

        Check the comments in mainapp.documents.index for more details
        """
        tokenizations = {
            "die": [],
            "hunde": ["hunde", "hund"],
            "wi-fi": ["wi", "fi"],
            "Feuerwehr": ["feuerwehr"],  # Would ideally split the words
            "oktopoden": ["oktopoden", "oktopod"],
            "Äpfel": ["äpfel", "apfel"],
            "ging": ["ging"],
            "schwierigste": ["schwierigste", "schwierig"],
            "1234/89": ["1234", "89"],  # Would be better if it included "1234/89"
        }

        text_analyzer = get_text_analyzer("german")
        elastic_index = Index("mst-test-tokenization")
        if not elastic_index.exists():
            elastic_index.create()
        elastic_index.close()
        elastic_index.analyzer(text_analyzer)
        elastic_index.save()
        elastic_index.open()
        elastic_index.flush()

        for word, expected_tokens in tokenizations.items():
            analysis = elastic_index.analyze(
                body={"analyzer": "text_analyzer", "text": word}
            )
            actual_tokens = [i["token"] for i in analysis["tokens"]]
            self.assertEqual(expected_tokens, actual_tokens, "Word was {}".format(word))

Пример #2

Показать файл

Файл: test_search.py Проект: codeformuenster/meine-stadt-transparent

    def analyze(self, text: str) -> Dict[str, List[Dict]]:
        """Shows what elasticsearch does with the tokens"""

        elastic_index_file = Index(settings.ELASTICSEARCH_PREFIX + "-file")
        elastic_index_file.analyzer(autocomplete_analyzer)
        elastic_index_file.analyzer(text_analyzer)
        return elastic_index_file.analyze(
            body={"analyzer": "text_analyzer", "text": text}
        )

Пример #3

Показать файл

# Создаем анализаторы
ru_analyzer = analyzer(
    'ru_analyzer',
    type='custom',
    tokenizer='standard',
    filter=['lowercase', russian_stop_filter, russian_stemmer_filter],
)
en_analyzer = analyzer('en_analyzer',
                       type='custom',
                       tokenizer='standard',
                       filter=[
                           english_possessive_stemmer_filter, 'lowercase',
                           english_stop_filter, english_stemmer_filter
                       ])
# Добавляем анализаторы в Индекс
movie_index.analyzer(ru_analyzer)
movie_index.analyzer(en_analyzer)


@movie_index.doc_type
class MovieDocument(Document):
    title = fields.TextField(
        analyzer=ru_analyzer,  # Анализатор для индексации
        search_analyzer=ru_analyzer  # Анализатор для поискового запроса
    )
    description = fields.TextField(
        analyzer=ru_analyzer,  # Анализатор для индексации
        search_analyzer=ru_analyzer  # Анализатор для поискового запроса
    )
    subtitles = fields.TextField(
        attr='get_subtitles',

Пример #4

Показать файл

Файл: utils.py Проект: woolsoft/meine-stadt-transparent

# Name of the Elasticsearch index
from django.conf import settings
from django_elasticsearch_dsl import Index, DEDField, Integer
from elasticsearch_dsl import analyzer, token_filter


class RelatedToValueList(DEDField, Integer):
    def get_value_from_instance(self, data):
        return [obj.id for obj in super().get_value_from_instance(data)]


mainIndex = Index(settings.ELASTICSEARCH_INDEX)
# See Elasticsearch Indices API reference for available settings
mainIndex.settings(number_of_shards=1, number_of_replicas=0)

autocomplete_filter = token_filter(
    "autocomplete_filter",
    "edge_ngram",
    min_gram=1,
    max_gram=20,
)

# Using this analyzer with an empty field fails, so we're using methods instead that add a space
autocomplete_analyzer = analyzer(
    'autocomplete',
    tokenizer="standard",
    filter=["lowercase", autocomplete_filter],
)
mainIndex.analyzer(autocomplete_analyzer)

Пример #5

Показать файл

Файл: classification.py Проект: City-of-Helsinki/helerm

from django.conf import settings
from django.db.models import QuerySet
from django_elasticsearch_dsl import Index

from metarecord.models import Classification
from search_indices import get_finnish_analyzer
from search_indices.documents.base import BaseDocument

# Name of the Elasticsearch index
INDEX = Index(settings.ELASTICSEARCH_INDEX_NAMES[__name__])

finnish_analyzer = get_finnish_analyzer()

INDEX.analyzer(finnish_analyzer)

INDEX.settings(max_result_window=500000, )


@INDEX.document
class ClassificationDocument(BaseDocument):
    class Django:
        model = Classification

    def get_queryset(self) -> QuerySet:
        return Classification.objects.latest_version()

Пример #6

Показать файл

Файл: index.py Проект: barseghyanartur/meine-stadt-transparent

def make_index(suffix: str) -> Index:
    elastic_index = Index(settings.ELASTICSEARCH_PREFIX + "-" + suffix)
    elastic_index.analyzer(autocomplete_analyzer)
    elastic_index.analyzer(text_analyzer)
    return elastic_index

Пример #7

Показать файл

Файл: documents.py Проект: ayamburg/jobstats

# Name of the Elasticsearch index
job_listing = Index('joblistings')
# See Elasticsearch Indices API reference for available settings
job_listing.settings(
    number_of_shards=1,
    number_of_replicas=0
)

custom_tokenizer = tokenizer(
    "pattern",
    "pattern",
    pattern="\s|-|\n|/|,|\.\s"
)

keyword_analyzer = analyzer("default", type="custom", tokenizer=custom_tokenizer, filter=["lowercase"])
job_listing.analyzer(keyword_analyzer)


filter_shingle = token_filter(name_or_instance="filter_shingle", type="shingle", max_shingle_size=2, min_shingle_size=2, output_unigrams="false")
shingle_analyzer = analyzer("shingle", tokenizer=custom_tokenizer, type="custom", filter=["lowercase", filter_shingle])
job_listing.analyzer(shingle_analyzer)

# triple_filter_shingle = token_filter(name_or_instance="triple_filter_shingle", type="shingle", max_shingle_size=3, min_shingle_size=3, output_unigrams="false")
# triple_shingle_analyzer = analyzer("triple_shingle", tokenizer=custom_tokenizer, type="custom", filter=["lowercase", triple_filter_shingle])
# job_listing.analyzer(triple_shingle_analyzer)


@job_listing.doc_type
class JobListingDocument(DocType):
    keywords = fields.TextField(attr="description", fielddata=True)
#    shingles = fields.TextField(attr="description", analyzer="shingle", fielddata=True)