def test_analyzer_has_definition(): a = analysis.CustomAnalyzer('my_analyzer', tokenizer='keyword', filter=['lowercase']) assert { 'type': 'custom', 'tokenizer': 'keyword', 'filter': ["lowercase"], } == a.get_definition()
def test_analyzer_has_definition(): a = analysis.CustomAnalyzer( "my_analyzer", tokenizer="keyword", filter=["lowercase"] ) assert { "type": "custom", "tokenizer": "keyword", "filter": ["lowercase"], } == a.get_definition()
) # Trigram tokenizer enables us to support partial matching trigram = analysis.tokenizer( 'trigram', 'nGram', min_gram=3, max_gram=3, token_chars=('letter', 'digit'), ) # Filters out "-" so that t-shirt and tshirt can be matched special_chars = analysis.char_filter('special_chars', 'mapping', mappings=('-=>',)) trigram_analyzer = analysis.CustomAnalyzer( 'trigram_analyzer', tokenizer=trigram, char_filter=special_chars, filter=('lowercase',), ) space_remover = analysis.token_filter( 'space_remover', type='pattern_replace', pattern=' ', replacement='', ) AREA_REGEX = r'[a-z]{1,2}' DISTRICT_REGEX = r'(?:[0-9][a-z]|[0-9]{1,2})' SECTOR_REGEX = r'[0-9]' UNIT_REGEX = r'[a-z]{2}'
analysis.Tokenizer._builtins = analysis.TOKENIZERS = frozenset( ('keyword', 'standard', 'path_hierarchy', 'whitespace')) class PathHierarchyTokenizer(analysis.Tokenizer): name = 'path_hierarchy' class WhitespaceTokenizer(analysis.Tokenizer): name = 'whitespace' path_analyzer = analysis.CustomAnalyzer('path', tokenizer='path_hierarchy', filter=['lowercase']) lower_whitespace_analyzer = analysis.analyzer('lower_whitespace', tokenizer='whitespace', filter=['lowercase', 'stop'], char_filter=['html_strip']) class DocumentDocType(ImprovedDocType): """ The main documentation doc type to be used for searching. It stores a bit of meta data so we don't have to hit the db when rendering search results. The search view will be using the 'lang' and 'version' fields
# Trigram tokenizer enables us to support partial matching trigram = analysis.tokenizer( 'trigram', 'nGram', min_gram=3, max_gram=3, token_chars=('letter', 'digit'), ) # Filters out "-" so that t-shirt and tshirt can be matched special_chars = analysis.char_filter('special_chars', 'mapping', mappings=('-=>', )) trigram_analyzer = analysis.CustomAnalyzer( 'trigram_analyzer', tokenizer=trigram, char_filter=special_chars, filter=('lowercase', ), ) english_possessive_stemmer = analysis.token_filter( 'english_possessive_stemmer', type='stemmer', language='possessive_english', ) english_stemmer = analysis.token_filter( 'english_stemmer', type='stemmer', language='english', )