def secondary_should_rules(self, search_query, analyzer): """Return "secondary" should rules for the query. These are the ones using the weakest boosts, they are applied to fields containing more text like description & summary. Applied rules: * Look for phrase matches inside the summary (boost=2.0) * Look for phrase matches inside the summary using language specific analyzer (boost=3.0) * Look for phrase matches inside the description (boost=2.0). * Look for phrase matches inside the description using language specific analyzer (boost=3.0). """ should = [ query.MatchPhrase(summary={'query': search_query, 'boost': 2.0}), query.MatchPhrase(description={ 'query': search_query, 'boost': 2.0}), ] # For description and summary, also search in translated field with the # right language and analyzer. if analyzer: should.extend([ query.MatchPhrase(**{'summary_l10n_%s' % analyzer: { 'query': search_query, 'boost': 3.0, 'analyzer': analyzer}}), query.MatchPhrase(**{'description_l10n_%s' % analyzer: { 'query': search_query, 'boost': 3.0, 'analyzer': analyzer}}) ]) return should
def secondary_should_rules(self, search_query, analyzer): """Return "secondary" should rules for the query. These are the ones using the weakest boosts, they are applied to fields containing more text like description, summary and tags. Applied rules: * Look for phrase matches inside the summary (boost=0.8) * Look for phrase matches inside the summary using language specific analyzer (boost=0.6) * Look for phrase matches inside the description (boost=0.3). * Look for phrase matches inside the description using language specific analyzer (boost=0.1). * Look for matches inside tags (boost=0.1). """ should = [ query.MatchPhrase(summary={ 'query': search_query, 'boost': 0.8 }), query.MatchPhrase(description={ 'query': search_query, 'boost': 0.3 }), ] # Append a separate 'match' query for every word to boost tag matches for tag in search_query.split(): should.append(query.Match(tags={'query': tag, 'boost': 0.1})) # For description and summary, also search in translated field with the # right language and analyzer. if analyzer: should.extend([ query.MatchPhrase( **{ 'summary_l10n_%s' % analyzer: { 'query': search_query, 'boost': 0.6, 'analyzer': analyzer } }), query.MatchPhrase( **{ 'description_l10n_%s' % analyzer: { 'query': search_query, 'boost': 0.6, 'analyzer': analyzer } }) ]) return should
def secondary_should_rules(self, search_query, analyzer): """Return "secondary" should rules for the query. These are the ones using the weakest boosts, they are applied to fields containing more text: description & summary. Applied rules: * Look for phrase matches inside the summary (boost=3.0) * Look for phrase matches inside the description (boost=2.0). If we're using a supported language, both rules are done through a multi_match that considers both the default locale translation (using snowball analyzer) and the translation in the current language (using language-specific analyzer). If we're not using a supported language then only the first part is applied. """ if analyzer: summary_query_name = ( 'MultiMatch(MatchPhrase(summary),' 'MatchPhrase(summary_l10n_%s))' % analyzer) description_query_name = ( 'MultiMatch(MatchPhrase(description),' 'MatchPhrase(description_l10n_%s))' % analyzer) should = [ query.MultiMatch( _name=summary_query_name, query=search_query, type='phrase', fields=['summary', 'summary_l10n_%s' % analyzer], boost=3.0, ), query.MultiMatch( _name=description_query_name, query=search_query, type='phrase', fields=['description', 'description_l10n_%s' % analyzer], boost=2.0, ), ] else: should = [ query.MatchPhrase(summary={ '_name': 'MatchPhrase(summary)', 'query': search_query, 'boost': 3.0}), query.MatchPhrase(description={ '_name': 'MatchPhrase(description)', 'query': search_query, 'boost': 2.0}), ] return should
def primary_should_rules(self, search_query, lang): """Return "primary" should rules for the query. These are the ones using the strongest boosts and are only applied to the add-on name. Applied rules: * Exact match on the name, using the right translation if possible (boost=100.0) * Then text matches, using a language specific analyzer if possible (boost=5.0) * Phrase matches that allows swapped terms (boost=8.0) * Then text matches, using the standard text analyzer (boost=6.0) * Then look for the query as a prefix of a name (boost=3.0) """ should = [self.generate_exact_name_match_query(search_query, lang)] # If we are searching with a language that we support, we also try to # do a match against the translated field. If not, we'll do a match # against the name in default locale below. analyzer = self.get_locale_analyzer(lang) if analyzer: # Like in generate_exact_name_match_query() above, we want to # search in all languages supported by this analyzer. fields = [ 'name_l10n_%s' % lang for lang in amo.SEARCH_ANALYZER_MAP[analyzer] ] should.append( query.MultiMatch( **{ '_name': 'MultiMatch(%s)' % ','.join(fields), 'fields': fields, 'query': search_query, 'boost': 5.0, 'analyzer': analyzer, 'operator': 'and', } ) ) # The rest of the rules are applied to 'name', the field containing the # default locale translation only. That field has word delimiter rules # to help find matches, lowercase filter, etc, at the expense of any # language-specific features. should.extend( [ query.MatchPhrase( **{ 'name': { '_name': 'MatchPhrase(name)', 'query': search_query, 'boost': 8.0, 'slop': 1, }, } ), query.Match( **{ 'name': { '_name': 'Match(name)', 'analyzer': 'standard', 'query': search_query, 'boost': 6.0, 'operator': 'and', }, } ), query.Prefix( **{ 'name': { '_name': 'Prefix(name)', 'value': search_query, 'boost': 3.0, }, } ), ] ) # Add two queries inside a single DisMax rule (avoiding overboosting # when an add-on name matches both queries) to support partial & fuzzy # matches (both allowing some words in the query to be absent). # For short query strings only (long strings, depending on what # characters they contain and how many words are present, can be too # costly). # Again applied to 'name' in the default locale, without the # language-specific analysis. if len(search_query) < self.MAX_QUERY_LENGTH_FOR_FUZZY_SEARCH: should.append( query.DisMax( # We only care if one of these matches, so we leave tie_breaker # to the default value of 0.0. _name='DisMax(FuzzyMatch(name), Match(name.trigrams))', boost=4.0, queries=[ # For the fuzzy query, only slight mispellings should be # corrected, but we allow some of the words to be absent # as well: # 1 or 2 terms: should all be present # 3 terms: 2 should be present # 4 terms or more: 25% can be absent { 'match': { 'name': { 'query': search_query, 'prefix_length': 2, 'fuzziness': 'AUTO', 'minimum_should_match': '2<2 3<-25%', } } }, # For the trigrams query, we require at least 66% of the # trigrams to be present. { 'match': { 'name.trigrams': { 'query': search_query, 'minimum_should_match': '66%', } } }, ], ) ) return should