def test_double_metaphone(): from whoosh.lang.dmetaphone import double_metaphone names = {'maurice': ('MRS', None), 'aubrey': ('APR', None), 'cambrillo': ('KMPRL', 'KMPR'), 'heidi': ('HT', None), 'katherine': ('K0RN', 'KTRN'), 'Thumbail': ('0MPL', 'TMPL'), 'catherine': ('K0RN', 'KTRN'), 'richard': ('RXRT', 'RKRT'), 'bob': ('PP', None), 'eric': ('ARK', None), 'geoff': ('JF', 'KF'), 'Through': ('0R', 'TR'), 'Schwein': ('XN', 'XFN'), 'dave': ('TF', None), 'ray': ('R', None), 'steven': ('STFN', None), 'bryce': ('PRS', None), 'randy': ('RNT', None), 'bryan': ('PRN', None), 'Rapelje': ('RPL', None), 'brian': ('PRN', None), 'otto': ('AT', None), 'auto': ('AT', None), 'Dallas': ('TLS', None), 'maisey': ('MS', None), 'zhang': ('JNK', None), 'Chile': ('XL', None), 'Jose': ('HS', None), 'Arnow': ('ARN', 'ARNF'), 'solilijs': ('SLLS', None), 'Parachute': ('PRKT', None), 'Nowhere': ('NR', None), 'Tux': ('TKS', None)} dmn = name = None for name in names.keys(): dmn = double_metaphone(name) assert dmn == names[name] mf = (analysis.RegexTokenizer() | analysis.LowercaseFilter() | analysis.DoubleMetaphoneFilter()) results = [(t.text, t.boost) for t in mf(u("Spruce View"))] assert results == [('SPRS', 1.0), ('F', 1.0), ('FF', 0.5)] mf = (analysis.RegexTokenizer() | analysis.LowercaseFilter() | analysis.DoubleMetaphoneFilter(combine=True)) results = [(t.text, t.boost) for t in mf(u("Spruce View"))] assert results == [('spruce', 1.0), ('SPRS', 1.0), ('view', 1.0), ('F', 1.0), ('FF', 0.5)] namefield = fields.TEXT(analyzer=mf) texts = list(namefield.process_text(u("Spruce View"), mode="query")) assert texts == [u('spruce'), 'SPRS', u('view'), 'F', 'FF']
def __call__(self, tokens): primary_boost = self.primary_boost secondary_boost = self.secondary_boost combine = self.combine for t in tokens: if combine: yield t primary, secondary = double_metaphone(t.text) b = t.boost # Overwrite the token's text and boost and yield it if primary: t.text = primary t.boost = b * primary_boost yield t if secondary: t.text = secondary t.boost = b * secondary_boost yield t