Exemplo n.º 1
0
def autocomplete_search(q, doc_type=None, fuzzy_mode=False, **kwargs):

    query = autocomplete_query(q, fuzzy_mode)
    limit = kwargs.get('limit', 20)
    offset = kwargs.get('offset', 0)
    filters = kwargs.get('filters', {})

    if limit and limit > 100:
        limit = 100

    s = Search().index('_all')

    if doc_type:
        s = s.doc_type(doc_type)

    s = s.query('match', autocomplete=query)

    # TODO: implement in a generic way
    # add filters like: `&filter_status=Ready&filter_type=Broadcasts`
    for key, value in filters.iteritems():
        s = s.query('term', **{key: value[0]})

    s = s[offset:limit + offset]

    return format_search_results(s.execute())
def autocomplete(query: str) -> Response:
    """
    https://www.elastic.co/guide/en/elasticsearch/guide/current/_index_time_search_as_you_type.html
    We use the ngram-based autocomplete-analyzer for indexing, but the standard analyzer for searching
    This way we enforce that the whole entered word has to be matched (save for some fuzziness) and the algorithm
    does not fall back to matching only the first character in extreme cases. This prevents absurd cases where
    "Garret Walker" and "Hector Mendoza" are suggested when we're entering "Mahatma Ghandi"
    """
    search_query = Search(index=list(DOCUMENT_INDICES.values()))
    search_query = search_query.query(
        "match",
        autocomplete={
            "query": escape_elasticsearch_query(query),
            "analyzer": "standard",
            "fuzziness": "AUTO",
            "prefix_length": 1,
        },
    )
    search_query = search_query.extra(min_score=1)
    search_query = search_query.update_from_dict({
        "indices_boost": [
            {
                DOCUMENT_INDICES["person"]: 4
            },
            {
                DOCUMENT_INDICES["organization"]: 4
            },
            {
                DOCUMENT_INDICES["paper"]: 2
            },
        ]
    })
    response = search_query.execute()
    return response
Exemplo n.º 3
0
    def filter(self, qs, value):
        client = Elasticsearch([settings.ELASTICSEARCH_HOST])
        value = value.lower()

        search_query = {
            "bool": {
                "must_not": [  # исключает из выдачи is_published=False
                    {
                        "term": {
                            "is_published": False
                        }
                    }
                ],
                "should": [
                    {
                        "simple_query_string": {
                            "fields": ["category_name"],
                            "quote_field_suffix": ".exact",
                            "query": value
                        }
                    },
                ]
            }
        }

        s = Search(using=client, index='category') \
            .query(search_query)\
            .sort("_score", "-views")\
            .extra(size=self.max_result, from_=0)

        hits_list = []
        items = s.execute()
        if items:
            for item in items:
                hits_list.append(item.meta.id)
            hits_order = Case(
                *[When(pk=pk, then=pos) for pos, pk in enumerate(hits_list)])
            qs = qs.filter(id__in=hits_list).order_by(hits_order)
        else:
            qs = qs.none()

            # TODO: fallback?
            # bits = value.split(' ')
            # search_clauses = reduce(operator.and_,
            #                         [Q(title__icontains=v) for v in bits])
            # unpublished = Category.objects.get_queryset_descendants(
            #     Category.objects.filter(is_published=False), include_self=True)
            # qs = (qs
            #       .exclude(pk__in=unpublished)
            #       .filter(search_clauses)
            #       .order_by('-views'))
        return qs[:self.max_result]
Exemplo n.º 4
0
    def filter(self, qs, value):
        # инициализируем подключение
        client = Elasticsearch([settings.ELASTICSEARCH_HOST])
        value = value.lower()

        # формируем запрос
        search_query = {
            "bool": {
                "must_not": [  # исключает из выдачи is_published=False
                    {
                        "term": {
                            "is_published": False
                        }
                    }
                ],
                "should": [
                    {
                        "simple_query_string": {  # ищем что-то разумное
                            "fields": ["fullname", "category_name"],
                            "quote_field_suffix": ".exact",
                            "query": value
                        }
                    },
                    {
                        # частичное вхождение по строкам с транслитом (англ->рус)
                        # constant_score запрещает буст по частоте вхождения
                        "constant_score": {
                            "filter": {
                                "match": {
                                    "fullname_translit": {
                                        "query": value,
                                        "fuzziness": 1,
                                        "operator": "and",
                                    }
                                }
                            }
                        }
                    },
                ]
            }
        }

        # Инициализация запроса
        s = Search(using=client, index='offer') \
            .query(search_query)\
            .sort("_score", "-views")\
            .extra(size=self.max_result, from_=0)

        self.hits_list = []
        items = s.execute()
        if items:
            for item in items:
                self.hits_list.append(item.meta.id)
            # нужно для того, чтобы у выборки из пусгреса сохранился порядок, который вернул эластик
            self.hits_order = Case(*[
                When(pk=pk, then=pos) for pos, pk in enumerate(self.hits_list)
            ])
            qs = qs.filter(id__in=self.hits_list).order_by(self.hits_order)
        else:
            qs = qs.none()

        # TODO: старая реализация. Может, оставить, как fallback?
        # else:
        #     qs = qs.annotate(full_name=Concat(
        #         'product__article', Value(' '),
        #         'product__title', Value(' '),
        #         'product__search_title'))
        #     bits = value.split(' ')
        #     if len(bits) is 1 and bits[0].isdecimal():
        #         full_name_clauses = Q(full_name__icontains=bits[0])
        #     else:
        #         full_name_clauses = reduce(
        #             operator.and_,
        #             [Q(full_name__iregex=r'(^|\s)%s' % escape(v))
        #              for v in bits])
        #
        #     unpublished = Category.objects.get_queryset_descendants(
        #         Category.objects.filter(is_published=False),
        #         include_self=True)
        #
        #     qs = (qs.filter(full_name_clauses)
        #             .exclude(product__category__in=unpublished))
        #
        #     if self.uniq_category:
        #         products = (qs.order_by('product__category__title')
        #                       .distinct('product__category__title'))
        #         qs = (qs.filter(id__in=products)
        #                 .order_by('-product__category__views'))

        return qs