def test_prefix_to_dict():
    assert {"prefix": {"f": "value"}} == query.Prefix(f="value").to_dict()
示例#2
0
    def primary_should_rules(self, search_query, lang):
        """Return "primary" should rules for the query.

        These are the ones using the strongest boosts and are only applied to
        the add-on name.

        Applied rules:

        * Exact match on the name, using the right translation if possible
          (boost=100.0)
        * Then text matches, using a language specific analyzer if possible
          (boost=5.0)
        * Phrase matches that allows swapped terms (boost=8.0)
        * Then text matches, using the standard text analyzer (boost=6.0)
        * Then look for the query as a prefix of a name (boost=3.0)
        """
        should = [self.generate_exact_name_match_query(search_query, lang)]

        # If we are searching with a language that we support, we also try to
        # do a match against the translated field. If not, we'll do a match
        # against the name in default locale below.
        analyzer = self.get_locale_analyzer(lang)
        if analyzer:
            # Like in generate_exact_name_match_query() above, we want to
            # search in all languages supported by this analyzer.
            fields = [
                'name_l10n_%s' % lang
                for lang in amo.SEARCH_ANALYZER_MAP[analyzer]
            ]
            should.append(
                query.MultiMatch(
                    **{
                        '_name': 'MultiMatch(%s)' % ','.join(fields),
                        'fields': fields,
                        'query': search_query,
                        'boost': 5.0,
                        'analyzer': analyzer,
                        'operator': 'and'
                    }))

        # The rest of the rules are applied to 'name', the field containing the
        # default locale translation only. That field has word delimiter rules
        # to help find matches, lowercase filter, etc, at the expense of any
        # language-specific features.
        should.extend([
            query.MatchPhrase(
                **{
                    'name': {
                        '_name': 'MatchPhrase(name)',
                        'query': search_query,
                        'boost': 8.0,
                        'slop': 1,
                    },
                }),
            query.Match(
                **{
                    'name': {
                        '_name': 'Match(name)',
                        'analyzer': 'standard',
                        'query': search_query,
                        'boost': 6.0,
                        'operator': 'and',
                    },
                }),
            query.Prefix(
                **{
                    'name': {
                        '_name': 'Prefix(name)',
                        'value': search_query,
                        'boost': 3.0
                    },
                }),
        ])

        # Add two queries inside a single DisMax rule (avoiding overboosting
        # when an add-on name matches both queries) to support partial & fuzzy
        # matches (both allowing some words in the query to be absent).
        # For short query strings only (long strings, depending on what
        # characters they contain and how many words are present, can be too
        # costly).
        # Again applied to 'name' in the default locale, without the
        # language-specific analysis.
        if len(search_query) < self.MAX_QUERY_LENGTH_FOR_FUZZY_SEARCH:
            should.append(
                query.DisMax(
                    # We only care if one of these matches, so we leave tie_breaker
                    # to the default value of 0.0.
                    _name='DisMax(FuzzyMatch(name), Match(name.trigrams))',
                    boost=4.0,
                    queries=[
                        # For the fuzzy query, only slight mispellings should be
                        # corrected, but we allow some of the words to be absent
                        # as well:
                        # 1 or 2 terms: should all be present
                        # 3 terms: 2 should be present
                        # 4 terms or more: 25% can be absent
                        {
                            'match': {
                                'name': {
                                    'query': search_query,
                                    'prefix_length': 2,
                                    'fuzziness': 'AUTO',
                                    'minimum_should_match': '2<2 3<-25%'
                                }
                            }
                        },
                        # For the trigrams query, we require at least 66% of the
                        # trigrams to be present.
                        {
                            'match': {
                                'name.trigrams': {
                                    'query': search_query,
                                    'minimum_should_match': '66%'
                                }
                            }
                        },
                    ]))

        return should