def test_generate_bounding_box(self):
     downtown_bottom_left = Point(-95.23947, 38.9637903)
     downtown_top_right = Point(-95.23362278938293, 38.973081081164715)
     ((min_lat, min_lng), (max_lat, max_lng)) = generate_bounding_box(downtown_bottom_left, downtown_top_right)
     self.assertEqual(min_lat, 38.9637903)
     self.assertEqual(min_lng, -95.23947)
     self.assertEqual(max_lat, 38.973081081164715)
     self.assertEqual(max_lng, -95.23362278938293)
 def test_generate_bounding_box_crossing_line_date(self):
     downtown_bottom_left = Point(95.23947, 38.9637903)
     downtown_top_right = Point(-95.23362278938293, 38.973081081164715)
     ((south, west), (north, east)) = generate_bounding_box(downtown_bottom_left, downtown_top_right)
     self.assertEqual(south, 38.9637903)
     self.assertEqual(west, 95.23947)
     self.assertEqual(north, 38.973081081164715)
     self.assertEqual(east, -95.23362278938293)
Exemplo n.º 3
0
    def _build_search_query_within(self, within):
        from haystack.utils.geo import generate_bounding_box

        ((south, west), (north, east)) = generate_bounding_box(
            within["point_1"], within["point_2"]
        )
        return {
            "geo_bounding_box": {
                within["field"]: {
                    "top_left": {"lat": north, "lon": west},
                    "bottom_right": {"lat": south, "lon": east},
                }
            }
        }
    def _build_search_query_within(self, within):
        from haystack.utils.geo import generate_bounding_box

        ((south, west), (north, east)) = generate_bounding_box(
            within["point_1"], within["point_2"]
        )
        return {
            "geo_bounding_box": {
                within["field"]: {
                    "top_left": {"lat": north, "lon": west},
                    "bottom_right": {"lat": south, "lon": east},
                }
            }
        }
Exemplo n.º 5
0
 def _build_search_filters_within(self, within):
     from haystack.utils.geo import generate_bounding_box
     ((south, west), (north, east)) = generate_bounding_box(
         within['point_1'], within['point_2'])
     within_filter = {
         "geo_bounding_box": {
             within['field']: {
                 "top_left": {
                     "lat": north,
                     "lon": west
                 },
                 "bottom_right": {
                     "lat": south,
                     "lon": east
                 }
             }
         },
     }
     return within_filter
    def build_search_kwargs(self, query_string, sort_by=None, start_offset=0, end_offset=None,
                            fields='', highlight=False, facets=None,
                            date_facets=None, query_facets=None,
                            narrow_queries=None, spelling_query=None,
                            within=None, dwithin=None, distance_point=None,
                            models=None, limit_to_registered_models=None,
                            result_class=None):
        index = haystack.connections[self.connection_alias].get_unified_index()
        content_field = index.document_field

        if query_string == '*:*':
            kwargs = {
                'query': {
                    "match_all": {}
                },
            }
        else:
            kwargs = {
                'query': {
                    'query_string': {
                        'default_field': content_field,
                        'default_operator': DEFAULT_OPERATOR,
                        'query': query_string,
                        'analyze_wildcard': True,
                        'auto_generate_phrase_queries': True,
                    },
                },
            }

        # so far, no filters
        filters = []

        if fields:
            if isinstance(fields, (list, set)):
                fields = " ".join(fields)

            kwargs['fields'] = fields

        if sort_by is not None:
            order_list = []
            for field, direction in sort_by:
                if field == 'distance' and distance_point:
                    # Do the geo-enabled sort.
                    lng, lat = distance_point['point'].get_coords()
                    sort_kwargs = {
                        "_geo_distance": {
                            distance_point['field']: [lng, lat],
                            "order": direction,
                            "unit": "km"
                        }
                    }
                else:
                    if field == 'distance':
                        warnings.warn("In order to sort by distance, you must call the '.distance(...)' method.")

                    # Regular sorting.
                    sort_kwargs = {field: {'order': direction}}

                order_list.append(sort_kwargs)

            kwargs['sort'] = order_list

        # From/size offsets don't seem to work right in Elasticsearch's DSL. :/
        # if start_offset is not None:
        #     kwargs['from'] = start_offset

        # if end_offset is not None:
        #     kwargs['size'] = end_offset - start_offset

        if highlight is True:
            kwargs['highlight'] = {
                'fields': {
                    content_field: {'store': 'yes'},
                }
            }

        if self.include_spelling:
            kwargs['suggest'] = {
                'suggest': {
                    'text': spelling_query or query_string,
                    'term': {
                        # Using content_field here will result in suggestions of stemmed words.
                        'field': '_all',
                    },
                },
            }

        if narrow_queries is None:
            narrow_queries = set()

        if facets is not None:
            kwargs.setdefault('facets', {})

            for facet_fieldname, extra_options in facets.items():
                facet_options = {
                    'terms': {
                        'field': facet_fieldname,
                        'size': 100,
                    },
                }
                # Special cases for options applied at the facet level (not the terms level).
                if extra_options.pop('global_scope', False):
                    # Renamed "global_scope" since "global" is a python keyword.
                    facet_options['global'] = True
                if 'facet_filter' in extra_options:
                    facet_options['facet_filter'] = extra_options.pop('facet_filter')
                facet_options['terms'].update(extra_options)
                kwargs['facets'][facet_fieldname] = facet_options

        if date_facets is not None:
            kwargs.setdefault('facets', {})

            for facet_fieldname, value in date_facets.items():
                # Need to detect on gap_by & only add amount if it's more than one.
                interval = value.get('gap_by').lower()

                # Need to detect on amount (can't be applied on months or years).
                if value.get('gap_amount', 1) != 1 and interval not in ('month', 'year'):
                    # Just the first character is valid for use.
                    interval = "%s%s" % (value['gap_amount'], interval[:1])

                kwargs['facets'][facet_fieldname] = {
                    'date_histogram': {
                        'field': facet_fieldname,
                        'interval': interval,
                    },
                    'facet_filter': {
                        "range": {
                            facet_fieldname: {
                                'from': self._from_python(value.get('start_date')),
                                'to': self._from_python(value.get('end_date')),
                            }
                        }
                    }
                }

        if query_facets is not None:
            kwargs.setdefault('facets', {})

            for facet_fieldname, value in query_facets:
                kwargs['facets'][facet_fieldname] = {
                    'query': {
                        'query_string': {
                            'query': value,
                        }
                    },
                }

        if limit_to_registered_models is None:
            limit_to_registered_models = getattr(settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True)

        if models and len(models):
            model_choices = sorted(get_model_ct(model) for model in models)
        elif limit_to_registered_models:
            # Using narrow queries, limit the results to only models handled
            # with the current routers.
            model_choices = self.build_models_list()
        else:
            model_choices = []

        if len(model_choices) > 0:
            filters.append({"terms": {DJANGO_CT: model_choices}})

        for q in narrow_queries:
            filters.append({
                'fquery': {
                    'query': {
                        'query_string': {
                            'query': q
                        },
                    },
                    '_cache': True,
                }
            })

        if within is not None:
            from haystack.utils.geo import generate_bounding_box

            ((south, west), (north, east)) = generate_bounding_box(within['point_1'], within['point_2'])
            within_filter = {
                "geo_bounding_box": {
                    within['field']: {
                        "top_left": {
                            "lat": north,
                            "lon": west
                        },
                        "bottom_right": {
                            "lat": south,
                            "lon": east
                        }
                    }
                },
            }
            filters.append(within_filter)

        if dwithin is not None:
            lng, lat = dwithin['point'].get_coords()

            # NB: the 1.0.0 release of elasticsearch introduce an
            #     incompatible change on the distance filter formating
            if elasticsearch.VERSION >= (1, 0, 0):
                distance = "%(dist).6f%(unit)s" % {
                        'dist': dwithin['distance'].km,
                        'unit': "km"
                    }
            else:
                distance = dwithin['distance'].km

            dwithin_filter = {
                "geo_distance": {
                    "distance": distance,
                    dwithin['field']: {
                        "lat": lat,
                        "lon": lng
                    }
                }
            }
            filters.append(dwithin_filter)

        # if we want to filter, change the query type to filteres
        if filters:
            kwargs["query"] = {"filtered": {"query": kwargs.pop("query")}}
            if len(filters) == 1:
                kwargs['query']['filtered']["filter"] = filters[0]
            else:
                kwargs['query']['filtered']["filter"] = {"bool": {"must": filters}}

        return kwargs
    def build_search_kwargs(self, query_string, sort_by=None, start_offset=0, end_offset=None,
                            fields='', highlight=False, facets=None,
                            date_facets=None, query_facets=None,
                            narrow_queries=None, spelling_query=None,
                            within=None, dwithin=None, distance_point=None,
                            models=None, limit_to_registered_models=None,
                            result_class=None):
        index = haystack.connections[self.connection_alias].get_unified_index()
        content_field = index.document_field

        if query_string == '*:*':
            kwargs = {
                'query': {
                    "match_all": {}
                },
            }
        else:
            kwargs = {
                'query': {
                    'query_string': {
                        'default_field': content_field,
                        'default_operator': DEFAULT_OPERATOR,
                        'query': query_string,
                        'analyze_wildcard': True,
                        'auto_generate_phrase_queries': True,
                    },
                },
            }

        # so far, no filters
        filters = []

        if fields:
            if isinstance(fields, (list, set)):
                fields = " ".join(fields)

            kwargs['fields'] = fields

        if sort_by is not None:
            order_list = []
            for field, direction in sort_by:
                if field == 'distance' and distance_point:
                    # Do the geo-enabled sort.
                    lng, lat = distance_point['point'].get_coords()
                    sort_kwargs = {
                        "_geo_distance": {
                            distance_point['field']: [lng, lat],
                            "order": direction,
                            "unit": "km"
                        }
                    }
                else:
                    if field == 'distance':
                        warnings.warn("In order to sort by distance, you must call the '.distance(...)' method.")

                    # Regular sorting.
                    sort_kwargs = {field: {'order': direction}}

                order_list.append(sort_kwargs)

            kwargs['sort'] = order_list

        # From/size offsets don't seem to work right in Elasticsearch's DSL. :/
        # if start_offset is not None:
        #     kwargs['from'] = start_offset

        # if end_offset is not None:
        #     kwargs['size'] = end_offset - start_offset

        if highlight is True:
            kwargs['highlight'] = {
                'fields': {
                    content_field: {'store': 'yes'},
                }
            }

        if self.include_spelling:
            kwargs['suggest'] = {
                'suggest': {
                    'text': spelling_query or query_string,
                    'term': {
                        # Using content_field here will result in suggestions of stemmed words.
                        'field': '_all',
                    },
                },
            }

        if narrow_queries is None:
            narrow_queries = set()

        if facets is not None:
            kwargs.setdefault('facets', {})

            for facet_fieldname, extra_options in facets.items():
                facet_options = {
                    'terms': {
                        'field': facet_fieldname,
                        'size': 100,
                    },
                }
                # Special cases for options applied at the facet level (not the terms level).
                if extra_options.pop('global_scope', False):
                    # Renamed "global_scope" since "global" is a python keyword.
                    facet_options['global'] = True
                if 'facet_filter' in extra_options:
                    facet_options['facet_filter'] = extra_options.pop('facet_filter')
                facet_options['terms'].update(extra_options)
                kwargs['facets'][facet_fieldname] = facet_options

        if date_facets is not None:
            kwargs.setdefault('facets', {})

            for facet_fieldname, value in date_facets.items():
                # Need to detect on gap_by & only add amount if it's more than one.
                interval = value.get('gap_by').lower()

                # Need to detect on amount (can't be applied on months or years).
                if value.get('gap_amount', 1) != 1 and not interval in ('month', 'year'):
                    # Just the first character is valid for use.
                    interval = "%s%s" % (value['gap_amount'], interval[:1])

                kwargs['facets'][facet_fieldname] = {
                    'date_histogram': {
                        'field': facet_fieldname,
                        'interval': interval,
                    },
                    'facet_filter': {
                        "range": {
                            facet_fieldname: {
                                'from': self._from_python(value.get('start_date')),
                                'to': self._from_python(value.get('end_date')),
                            }
                        }
                    }
                }

        if query_facets is not None:
            kwargs.setdefault('facets', {})

            for facet_fieldname, value in query_facets:
                kwargs['facets'][facet_fieldname] = {
                    'query': {
                        'query_string': {
                            'query': value,
                        }
                    },
                }

        if limit_to_registered_models is None:
            limit_to_registered_models = getattr(settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True)

        if models and len(models):
            model_choices = sorted(['%s.%s' % (model._meta.app_label, model._meta.module_name) for model in models])
        elif limit_to_registered_models:
            # Using narrow queries, limit the results to only models handled
            # with the current routers.
            model_choices = self.build_models_list()
        else:
            model_choices = []

        if len(model_choices) > 0:
            if narrow_queries is None:
                narrow_queries = set()

            filters.append({"terms": {DJANGO_CT: model_choices}})

        if narrow_queries:
            filters.append({
                'fquery': {
                    'query': {
                        'query_string': {
                            'query': u' AND '.join(list(narrow_queries)),
                        },
                    },
                    '_cache': True,
                }
            })

        if within is not None:
            from haystack.utils.geo import generate_bounding_box

            ((min_lat, min_lng), (max_lat, max_lng)) = generate_bounding_box(within['point_1'], within['point_2'])
            within_filter = {
                "geo_bounding_box": {
                    within['field']: {
                        "top_left": {
                            "lat": max_lat,
                            "lon": min_lng
                        },
                        "bottom_right": {
                            "lat": min_lat,
                            "lon": max_lng
                        }
                    }
                },
            }
            filters.append(within_filter)

        if dwithin is not None:
            lng, lat = dwithin['point'].get_coords()
            dwithin_filter = {
                "geo_distance": {
                    "distance": dwithin['distance'].km,
                    dwithin['field']: {
                        "lat": lat,
                        "lon": lng
                    }
                }
            }
            filters.append(dwithin_filter)

        # if we want to filter, change the query type to filteres
        if filters:
            kwargs["query"] = {"filtered": {"query": kwargs.pop("query")}}
            if len(filters) == 1:
                kwargs['query']['filtered']["filter"] = filters[0]
            else:
                kwargs['query']['filtered']["filter"] = {"bool": {"must": filters}}

        return kwargs
Exemplo n.º 8
0
    def build_search_kwargs(self,
                            query_string,
                            sort_by=None,
                            start_offset=0,
                            end_offset=None,
                            fields="",
                            highlight=False,
                            facets=None,
                            date_facets=None,
                            query_facets=None,
                            narrow_queries=None,
                            spelling_query=None,
                            within=None,
                            dwithin=None,
                            distance_point=None,
                            models=None,
                            limit_to_registered_models=None,
                            result_class=None,
                            stats=None,
                            collate=None,
                            **extra_kwargs):

        index = haystack.connections[self.connection_alias].get_unified_index()

        kwargs = {"fl": "* score", "df": index.document_field}

        if fields:
            if isinstance(fields, (list, set)):
                fields = " ".join(fields)

            kwargs["fl"] = fields

        if sort_by is not None:
            if sort_by in ["distance asc", "distance desc"] and distance_point:
                # Do the geo-enabled sort.
                lng, lat = distance_point["point"].coords
                kwargs["sfield"] = distance_point["field"]
                kwargs["pt"] = "%s,%s" % (lat, lng)

                if sort_by == "distance asc":
                    kwargs["sort"] = "geodist() asc"
                else:
                    kwargs["sort"] = "geodist() desc"
            else:
                if sort_by.startswith("distance "):
                    warnings.warn(
                        "In order to sort by distance, you must call the '.distance(...)' method."
                    )

                # Regular sorting.
                kwargs["sort"] = sort_by

        if start_offset is not None:
            kwargs["start"] = start_offset

        if end_offset is not None:
            kwargs["rows"] = end_offset - start_offset

        if highlight:
            # `highlight` can either be True or a dictionary containing custom parameters
            # which will be passed to the backend and may override our default settings:

            kwargs["hl"] = "true"
            kwargs["hl.fragsize"] = "200"

            if isinstance(highlight, dict):
                # autoprefix highlighter options with 'hl.', all of them start with it anyway
                # this makes option dicts shorter: {'maxAnalyzedChars': 42}
                # and lets some of options be used as keyword arguments: `.highlight(preserveMulti=False)`
                kwargs.update({
                    key if key.startswith("hl.") else ("hl." + key):
                    highlight[key]
                    for key in highlight.keys()
                })

        if collate is None:
            collate = self.collate
        if self.include_spelling is True:
            kwargs["spellcheck"] = "true"
            kwargs["spellcheck.collate"] = str(collate).lower()
            kwargs["spellcheck.count"] = 1

            if spelling_query:
                kwargs["spellcheck.q"] = spelling_query

        if facets is not None:
            kwargs["facet"] = "on"
            kwargs["facet.field"] = facets.keys()

            for facet_field, options in facets.items():
                for key, value in options.items():
                    kwargs["f.%s.facet.%s" %
                           (facet_field, key)] = self.conn._from_python(value)

        if date_facets is not None:
            kwargs["facet"] = "on"
            kwargs["facet.date"] = date_facets.keys()
            kwargs["facet.date.other"] = "none"

            for key, value in date_facets.items():
                kwargs["f.%s.facet.date.start" % key] = self.conn._from_python(
                    value.get("start_date"))
                kwargs["f.%s.facet.date.end" % key] = self.conn._from_python(
                    value.get("end_date"))
                gap_by_string = value.get("gap_by").upper()
                gap_string = "%d%s" % (value.get("gap_amount"), gap_by_string)

                if value.get("gap_amount") != 1:
                    gap_string += "S"

                kwargs["f.%s.facet.date.gap" % key] = "+%s/%s" % (
                    gap_string,
                    gap_by_string,
                )

        if query_facets is not None:
            kwargs["facet"] = "on"
            kwargs["facet.query"] = [
                "%s:%s" % (field, value) for field, value in query_facets
            ]

        if limit_to_registered_models is None:
            limit_to_registered_models = getattr(
                settings, "HAYSTACK_LIMIT_TO_REGISTERED_MODELS", True)

        if models and len(models):
            model_choices = sorted(get_model_ct(model) for model in models)
        elif limit_to_registered_models:
            # Using narrow queries, limit the results to only models handled
            # with the current routers.
            model_choices = self.build_models_list()
        else:
            model_choices = []

        if len(model_choices) > 0:
            if narrow_queries is None:
                narrow_queries = set()

            narrow_queries.add("%s:(%s)" %
                               (DJANGO_CT, " OR ".join(model_choices)))

        if narrow_queries is not None:
            kwargs["fq"] = list(narrow_queries)

        if stats:
            kwargs["stats"] = "true"

            for k in stats.keys():
                kwargs["stats.field"] = k

                for facet in stats[k]:
                    kwargs["f.%s.stats.facet" % k] = facet

        if within is not None:
            from haystack.utils.geo import generate_bounding_box

            kwargs.setdefault("fq", [])
            ((min_lat, min_lng), (max_lat, max_lng)) = generate_bounding_box(
                within["point_1"], within["point_2"])
            # Bounding boxes are min, min TO max, max. Solr's wiki was *NOT*
            # very clear on this.
            bbox = "%s:[%s,%s TO %s,%s]" % (
                within["field"],
                min_lat,
                min_lng,
                max_lat,
                max_lng,
            )
            kwargs["fq"].append(bbox)

        if dwithin is not None:
            kwargs.setdefault("fq", [])
            lng, lat = dwithin["point"].coords
            geofilt = "{!geofilt pt=%s,%s sfield=%s d=%s}" % (
                lat,
                lng,
                dwithin["field"],
                dwithin["distance"].km,
            )
            kwargs["fq"].append(geofilt)

        # Check to see if the backend should try to include distances
        # (Solr 4.X+) in the results.
        if self.distance_available and distance_point:
            # In early testing, you can't just hand Solr 4.X a proper bounding box
            # & request distances. To enable native distance would take calculating
            # a center point & a radius off the user-provided box, which kinda
            # sucks. We'll avoid it for now, since Solr 4.x's release will be some
            # time yet.
            # kwargs['fl'] += ' _dist_:geodist()'
            pass

        if extra_kwargs:
            kwargs.update(extra_kwargs)

        return kwargs
Exemplo n.º 9
0
    def build_search_kwargs(
        self,
        query_string,
        sort_by=None,
        start_offset=0,
        end_offset=None,
        fields="",
        highlight=False,
        facets=None,
        date_facets=None,
        query_facets=None,
        narrow_queries=None,
        spelling_query=None,
        within=None,
        dwithin=None,
        distance_point=None,
        models=None,
        limit_to_registered_models=None,
        result_class=None,
    ):
        index = haystack.connections[self.connection_alias].get_unified_index()
        content_field = index.document_field

        if query_string == "*:*":
            kwargs = {"query": {"match_all": {}}}
        elif query_string.startswith("(") and query_string.endswith(")"):
            kwargs = {
                "query": {
                    "query_string": {
                        "default_field": content_field,
                        "default_operator": DEFAULT_OPERATOR,
                        "query": query_string,
                        "analyze_wildcard": True,
                        "auto_generate_phrase_queries": True,
                    }
                }
            }
        else:
            kwargs = {
                "query": {
                    "match": {
                        str(content_field): {
                            "query": query_string,
                            "analyzer": self.DEFAULT_ANALYZER,  # setting courtesy of ConfigurableElasticBackend
                            "minimum_should_match": self.DEFAULT_MINIMUM_MATCH,
                        }
                    }
                }
            }

        # so far, no filters
        filters = []

        if fields:
            if isinstance(fields, (list, set)):
                fields = " ".join(fields)

            kwargs["fields"] = fields

        if sort_by is not None:
            order_list = []
            for field, direction in sort_by:
                if field == "distance" and distance_point:
                    # Do the geo-enabled sort.
                    lng, lat = distance_point["point"].get_coords()
                    sort_kwargs = {
                        "_geo_distance": {distance_point["field"]: [lng, lat], "order": direction, "unit": "km"}
                    }
                else:
                    if field == "distance":
                        warnings.warn("In order to sort by distance, you must call the '.distance(...)' method.")

                    # Regular sorting.
                    sort_kwargs = {field: {"order": direction}}

                order_list.append(sort_kwargs)

            kwargs["sort"] = order_list

        # From/size offsets don't seem to work right in Elasticsearch's DSL. :/
        # if start_offset is not None:
        #     kwargs['from'] = start_offset

        # if end_offset is not None:
        #     kwargs['size'] = end_offset - start_offset

        if highlight is True:
            kwargs["highlight"] = {"fields": {content_field: {"store": "yes"}}}

        if self.include_spelling:
            kwargs["suggest"] = {
                "suggest": {
                    "text": spelling_query or query_string,
                    "term": {
                        # Using content_field here will result in suggestions of stemmed words.
                        "field": "_all"
                    },
                }
            }

        if narrow_queries is None:
            narrow_queries = set()

        if facets is not None:
            kwargs.setdefault("facets", {})

            for facet_fieldname, extra_options in facets.items():
                facet_options = {"terms": {"field": facet_fieldname, "size": 100}}
                # Special cases for options applied at the facet level (not the terms level).
                if extra_options.pop("global_scope", False):
                    # Renamed "global_scope" since "global" is a python keyword.
                    facet_options["global"] = True
                if "facet_filter" in extra_options:
                    facet_options["facet_filter"] = extra_options.pop("facet_filter")
                facet_options["terms"].update(extra_options)
                kwargs["facets"][facet_fieldname] = facet_options

        if date_facets is not None:
            kwargs.setdefault("facets", {})

            for facet_fieldname, value in date_facets.items():
                # Need to detect on gap_by & only add amount if it's more than one.
                interval = value.get("gap_by").lower()

                # Need to detect on amount (can't be applied on months or years).
                if value.get("gap_amount", 1) != 1 and interval not in ("month", "year"):
                    # Just the first character is valid for use.
                    interval = "%s%s" % (value["gap_amount"], interval[:1])

                kwargs["facets"][facet_fieldname] = {
                    "date_histogram": {"field": facet_fieldname, "interval": interval},
                    "facet_filter": {
                        "range": {
                            facet_fieldname: {
                                "from": self._from_python(value.get("start_date")),
                                "to": self._from_python(value.get("end_date")),
                            }
                        }
                    },
                }

        if query_facets is not None:
            kwargs.setdefault("facets", {})

            for facet_fieldname, value in query_facets:
                kwargs["facets"][facet_fieldname] = {"query": {"match": {"query": value}}}

        if limit_to_registered_models is None:
            limit_to_registered_models = getattr(settings, "HAYSTACK_LIMIT_TO_REGISTERED_MODELS", True)

        if models and len(models):
            model_choices = sorted(get_model_ct(model) for model in models)
        elif limit_to_registered_models:
            # Using narrow queries, limit the results to only models handled
            # with the current routers.
            model_choices = self.build_models_list()
        else:
            model_choices = []

        if len(model_choices) > 0:
            filters.append({"terms": {DJANGO_CT: model_choices}})

        for q in narrow_queries:
            filters.append({"fquery": {"query": {"query_string": {"query": q}}, "_cache": True}})

        if within is not None:
            from haystack.utils.geo import generate_bounding_box

            ((south, west), (north, east)) = generate_bounding_box(within["point_1"], within["point_2"])
            within_filter = {
                "geo_bounding_box": {
                    within["field"]: {
                        "top_left": {"lat": north, "lon": west},
                        "bottom_right": {"lat": south, "lon": east},
                    }
                }
            }
            filters.append(within_filter)

        if dwithin is not None:
            lng, lat = dwithin["point"].get_coords()

            # NB: the 1.0.0 release of elasticsearch introduce an
            #     incompatible change on the distance filter formating
            if elasticsearch.VERSION >= (1, 0, 0):
                distance = "%(dist).6f%(unit)s" % {"dist": dwithin["distance"].km, "unit": "km"}
            else:
                distance = dwithin["distance"].km

            dwithin_filter = {"geo_distance": {"distance": distance, dwithin["field"]: {"lat": lat, "lon": lng}}}
            filters.append(dwithin_filter)

        # if we want to filter, change the query type to filteres
        if filters:
            kwargs["query"] = {"filtered": {"query": kwargs.pop("query")}}
            if len(filters) == 1:
                kwargs["query"]["filtered"]["filter"] = filters[0]
            else:
                kwargs["query"]["filtered"]["filter"] = {"bool": {"must": filters}}

        return kwargs
Exemplo n.º 10
0
    def build_search_kwargs(self,
                            query_string,
                            sort_by=None,
                            start_offset=0,
                            end_offset=None,
                            fields='',
                            highlight=False,
                            boost_fields=None,
                            boost_negative=None,
                            filter_context=None,
                            narrow_queries=None,
                            spelling_query=None,
                            facets=None,
                            date_facets=None,
                            query_facets=None,
                            within=None,
                            dwithin=None,
                            distance_point=None,
                            models=None,
                            limit_to_registered_models=None,
                            result_class=None,
                            **extra_kwargs):

        index = haystack.connections[self.connection_alias].get_unified_index()
        content_field = index.document_field

        filters = []
        filters_with_score = []
        filter_query_strings = {
            'content': u'%s',
            'contains': u'*%s*',
            'endswith': u'*%s',
            'startswith': u'%s*',
            'exact': u'%s',
            'gt': u'{%s TO *}',
            'gte': u'[%s TO *]',
            'lt': u'{* TO %s}',
            'lte': u'[* TO %s]',
            'fuzzy': u'%s~',
        }

        # if filter_context:
        #     for f in filter_context:
        #         if f.get('content'):
        #             content = str(f.pop('content'))
        #             if query_string == '*:*':
        #                 query_string = content
        #             else:
        #                 query_string = '%s %s' % (query_string, content)
        #         for k, v in f.items():
        #             _filter = None
        #             _filter_with_score = None
        #             try:
        #                 _value = v.prepare()
        #             except AttributeError:
        #                 _value = str(v)
        #             _field, _lookup = self.get_filter_lookup(k)
        #             _is_nested = NESTED_FILTER_SEPARATOR in _field
        #             _nested_path = None
        #             if _is_nested:
        #                 _nested_path = _field.split(NESTED_FILTER_SEPARATOR)[0]
        #                 _field = ('.').join(_field.split(NESTED_FILTER_SEPARATOR))
        #             if _lookup == 'exact':
        #                 if _is_nested:
        #                     _filter = {'term': {_field: _value}}
        #                 else:
        #                     _filter = {'term': {_field + '.raw': _value}}
        #             elif _lookup == 'content':
        #                 _filter_with_score = {'match': {_field: _value}}
        #             elif _lookup == 'in':
        #                 if not isinstance(_value, list):
        #                     _value = ast.literal_eval(str(_value))
        #                 _filter = {
        #                     'query_string': {
        #                         'fields': [_field],
        #                         'query': ' OR '.join(['"%s"' % i for i in _value])
        #                     }}
        #             elif _lookup == 'range':
        #                 if isinstance(_value, dict):
        #                     _filter = {'range': {_field: _value}}
        #                 elif _value:
        #                     if not isinstance(_value, list):
        #                         _value = _value.split(',')
        #                     if len(_value) >= 2:
        #                         _range = {}
        #                         _range['gte'] = _value[0]
        #                         _range['lte'] = _value[1]
        #                         _filter = {'range': {_field: _range}}
        #                     else:
        #                         raise ValueError(
        #                             _('Range lookup requires minimum and maximum values,'
        #                               'only one value was provided'))
        #             else:
        #                 _filter = {
        #                     'query_string': {
        #                         'fields': [_field],
        #                         'query': filter_query_strings[_lookup] % _value,
        #                     }}
        #
        #             # nested filter
        #             if _is_nested:
        #                 if _filter:
        #                     _filter = {
        #                         'nested': {
        #                             'path': _nested_path,
        #                             'query': _filter
        #                         }
        #                     }
        #                 if _filter_with_score:
        #                     _filter_with_score = {
        #                         'nested': {
        #                             'path': _nested_path,
        #                             'query': _filter_with_score
        #                         }
        #                     }
        #
        #             if _filter:
        #                 filters.append(_filter)
        #             if _filter_with_score:
        #                 filters.append(_filter_with_score)

        if query_string == '*:*':
            kwargs = {
                'query': {
                    "match_all": {}
                },
            }
        else:
            kwargs = {
                'query': {
                    'query_string': {
                        'fields': [content_field],
                        'default_operator': DEFAULT_OPERATOR,
                        'query': query_string,
                        'analyze_wildcard': True,
                        'auto_generate_phrase_queries': True,
                        'fuzzy_max_expansions': FUZZY_MAX_EXPANSIONS,
                    },
                },
            }
            if boost_fields:
                kwargs['query']['query_string']['fields'] = []
                for boost_field, boost_value in boost_fields.items():
                    kwargs['query']['query_string']['fields'].append(
                        '%s^%s' % (boost_field, boost_value))
            if boost_negative:
                boosting = {
                    'positive': kwargs['query'],
                    'negative': boost_negative[0],
                    'negative_boost': boost_negative[1]
                }
                kwargs['query'] = {'boosting': boosting}

        if filters_with_score:
            kwargs['query'] = {"bool": {"must": [kwargs.pop("query")]}}
            kwargs['query']['bool']['must'] += filters_with_score

        if fields:
            if isinstance(fields, (list, set)):
                fields = " ".join(fields)

            kwargs['stored_fields'] = fields

        if sort_by is not None:
            order_list = []
            for field, direction in sort_by:
                if field == 'distance' and distance_point:
                    lng, lat = distance_point['point'].get_coords()
                    sort_kwargs = {
                        "_geo_distance": {
                            distance_point['field']: [lng, lat],
                            "order": direction,
                            "unit": "km"
                        }
                    }
                else:
                    if field == 'distance':
                        warnings.warn(
                            "In order to sort by distance, "
                            "you must call the '.distance(...)' method.")

                    sort_kwargs = {field: {'order': direction}}

                order_list.append(sort_kwargs)

            kwargs['sort'] = order_list

        if highlight:
            kwargs['highlight'] = {
                'fields': {
                    content_field: {},
                }
            }

            if isinstance(highlight, dict):
                kwargs['highlight'].update(highlight)

        if self.include_spelling:
            kwargs['suggest'] = {
                'suggest': {
                    'text': spelling_query or query_string,
                    'term': {
                        'field': '_all',
                    },
                },
            }

        if narrow_queries is None:
            narrow_queries = set()

        if facets is not None:
            kwargs.setdefault('aggregations', {})

            for facet_fieldname, extra_options in facets.items():
                facet_options = {
                    'terms': {
                        'field': facet_fieldname + '.raw',
                        'size': 100,
                    },
                }
                # Special cases for options applied at the facet level (not the terms level).
                if extra_options.pop('global_scope', False):
                    # Renamed "global_scope" since "global" is a python keyword.
                    facet_options['global'] = True
                if 'facet_filter' in extra_options:
                    facet_options['facet_filter'] = extra_options.pop(
                        'facet_filter')
                facet_options['terms'].update(extra_options)
                kwargs['aggregations'][facet_fieldname] = facet_options

        if date_facets is not None:
            kwargs.setdefault('aggregations', {})

            for facet_fieldname, value in date_facets.items():
                # Need to detect on gap_by & only add amount if it's more than one.
                interval = value.get('gap_by').lower()

                # Need to detect on amount (can't be applied on months or years).
                if value.get('gap_amount',
                             1) != 1 and interval not in ('month', 'year'):
                    # Just the first character is valid for use.
                    interval = "%s%s" % (value['gap_amount'], interval[:1])

                date_histogram_aggregation_name = "{0}{1}".format(
                    facet_fieldname, DATE_HISTOGRAM_FIELD_NAME_SUFFIX)
                date_range_aggregation_name = "{0}{1}".format(
                    facet_fieldname, DATE_RANGE_FIELD_NAME_SUFFIX)

                kwargs['aggregations'][date_histogram_aggregation_name] = {
                    'meta': {
                        '_type': 'haystack_date_histogram',
                    },
                    'date_histogram': {
                        'field': facet_fieldname,
                        'interval': interval,
                    },
                }

                kwargs['aggregations'][date_range_aggregation_name] = {
                    'meta': {
                        '_type': 'haystack_date_range',
                    },
                    'date_range': {  # agg type
                        'field':
                        facet_fieldname,
                        'ranges': [{
                            'from':
                            self._from_python(value.get('start_date')),
                            'to':
                            self._from_python(value.get('end_date')),
                        }]
                    }
                }

        if query_facets is not None:
            kwargs.setdefault('aggregations', {})

            for facet_fieldname, value in query_facets:
                kwargs['aggregations'][facet_fieldname] = {
                    'filter': {
                        'query_string': {
                            'query': value,
                        }
                    }
                }

        if limit_to_registered_models is None:
            limit_to_registered_models = getattr(
                settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True)

        if models and len(models):
            model_choices = sorted(get_model_ct(model) for model in models)
        elif limit_to_registered_models:
            # Using narrow queries, limit the results to only models handled
            # with the current routers.
            model_choices = self.build_models_list()
        else:
            model_choices = []

        if len(model_choices) > 0:
            filters.append({"terms": {DJANGO_CT: model_choices}})

        if within is not None:
            from haystack.utils.geo import generate_bounding_box

            ((south, west),
             (north, east)) = generate_bounding_box(within['point_1'],
                                                    within['point_2'])
            within_filter = {
                "geo_bounding_box": {
                    within['field']: {
                        "top_left": {
                            "lat": north,
                            "lon": west
                        },
                        "bottom_right": {
                            "lat": south,
                            "lon": east
                        }
                    }
                },
            }
            filters.append(within_filter)

        if dwithin is not None:
            lng, lat = dwithin['point'].get_coords()

            # NB: the 1.0.0 release of elasticsearch introduce an
            #     incompatible change on the distance filter formating
            if elasticsearch.VERSION >= (1, 0, 0):
                distance = "%(dist).6f%(unit)s" % {
                    'dist': dwithin['distance'].km,
                    'unit': "km"
                }
            else:
                distance = dwithin['distance'].km

            dwithin_filter = {
                "geo_distance": {
                    "distance": distance,
                    dwithin['field']: {
                        "lat": lat,
                        "lon": lng
                    }
                }
            }
            filters.append(dwithin_filter)

        # for q in narrow_queries:
        #     key, value = q.split(':')[0], ':'.join(q.split(':')[1:])
        #     filters.append({
        #         'match': {
        #             key: value
        #         },
        #     })

        for q in narrow_queries:
            filters.append({'query_string': {'query': q}})

        # if we want to filter, change the query type to filtered
        if filters:
            kwargs["query"] = {"bool": {"must": kwargs.pop("query")}}

            if len(filters) == 1:
                kwargs['query']['bool']["filter"] = filters[0]
            else:
                kwargs['query']['bool']["filter"] = {"bool": {"must": filters}}

        if extra_kwargs:
            kwargs.update(extra_kwargs)
        return kwargs
    def build_search_kwargs(
        self,
        query_string,
        sort_by=None,
        start_offset=0,
        end_offset=None,
        fields="",
        highlight=False,
        facets=None,
        date_facets=None,
        query_facets=None,
        narrow_queries=None,
        spelling_query=None,
        within=None,
        dwithin=None,
        distance_point=None,
        models=None,
        limit_to_registered_models=None,
        result_class=None,
        custom_score=None,
    ):
        index = haystack.connections[self.connection_alias].get_unified_index()
        content_field = index.document_field

        if query_string == "*:*":
            kwargs = {"query": {"filtered": {"query": {"match_all": {}}}}}
        else:
            query_string_content = self._get_query_string_content(content_field, query_string)
            kwargs = {"query": {"filtered": {"query": {"query_string": query_string_content}}}}

        if fields:
            if isinstance(fields, (list, set)):
                fields = " ".join(fields)

            kwargs["fields"] = fields

        if sort_by is not None:
            order_list = []
            for field, direction in sort_by:
                if field == "distance" and distance_point:
                    # Do the geo-enabled sort.
                    lng, lat = distance_point["point"].get_coords()
                    sort_kwargs = {
                        "_geo_distance": {distance_point["field"]: [lng, lat], "order": direction, "unit": "km"}
                    }
                else:
                    if field == "distance":
                        warnings.warn("In order to sort by distance, you must call the '.distance(...)' method.")

                    # Regular sorting.
                    sort_kwargs = {field: {"order": direction}}

                order_list.append(sort_kwargs)

            kwargs["sort"] = order_list

        # From/size offsets don't seem to work right in Elasticsearch's DSL. :/
        # if start_offset is not None:
        #     kwargs['from'] = start_offset

        # if end_offset is not None:
        #     kwargs['size'] = end_offset - start_offset

        if highlight is True:
            kwargs["highlight"] = {"fields": {content_field: {"store": "yes"}}}

        if self.include_spelling is True:
            warnings.warn("Elasticsearch does not handle spelling suggestions.", Warning, stacklevel=2)

        if narrow_queries is None:
            narrow_queries = set()

        if facets is not None:
            kwargs.setdefault("facets", {})

            for facet_fieldname in facets:
                kwargs["facets"][facet_fieldname] = {"terms": {"field": facet_fieldname, "size": 300}}

        if date_facets is not None:
            kwargs.setdefault("facets", {})

            for facet_fieldname, value in date_facets.items():
                # Need to detect on gap_by & only add amount if it's more than one.
                interval = value.get("gap_by").lower()

                # Need to detect on amount (can't be applied on months or years).
                if value.get("gap_amount", 1) != 1 and not interval in ("month", "year"):
                    # Just the first character is valid for use.
                    interval = "%s%s" % (value["gap_amount"], interval[:1])

                kwargs["facets"][facet_fieldname] = {
                    "date_histogram": {"field": facet_fieldname, "interval": interval},
                    "facet_filter": {
                        "range": {
                            facet_fieldname: {
                                "from": self._from_python(value.get("start_date")),
                                "to": self._from_python(value.get("end_date")),
                            }
                        }
                    },
                }

        if query_facets is not None:
            kwargs.setdefault("facets", {})

            for facet_fieldname, value in query_facets:
                kwargs["facets"][facet_fieldname] = {"query": {"query_string": {"query": value}}}

        if limit_to_registered_models is None:
            limit_to_registered_models = getattr(settings, "HAYSTACK_LIMIT_TO_REGISTERED_MODELS", True)

        if models and len(models):
            model_choices = sorted(["%s.%s" % (model._meta.app_label, model._meta.module_name) for model in models])
        elif limit_to_registered_models:
            # Using narrow queries, limit the results to only models handled
            # with the current routers.
            model_choices = self.build_models_list()
        else:
            model_choices = []

        if len(model_choices) > 0:
            if narrow_queries is None:
                narrow_queries = set()

            narrow_queries.add("%s:(%s)" % (DJANGO_CT, " OR ".join(model_choices)))

        if narrow_queries:
            kwargs["query"].setdefault("filtered", {})
            kwargs["query"]["filtered"].setdefault("filter", {})
            kwargs["query"]["filtered"]["filter"] = {
                "fquery": {"query": {"query_string": {"query": u" AND ".join(list(narrow_queries))}}, "_cache": True}
            }

        if within is not None:
            from haystack.utils.geo import generate_bounding_box

            ((south, west), (north, east)) = generate_bounding_box(within["point_1"], within["point_2"])
            within_filter = {
                "geo_bounding_box": {
                    within["field"]: {
                        "top_left": {"lat": north, "lon": west},
                        "bottom_right": {"lat": south, "lon": east},
                    }
                }
            }
            kwargs["query"].setdefault("filtered", {})
            kwargs["query"]["filtered"].setdefault("filter", {})
            if kwargs["query"]["filtered"]["filter"]:
                compound_filter = {"and": [kwargs["query"]["filtered"]["filter"], within_filter]}
                kwargs["query"]["filtered"]["filter"] = compound_filter
            else:
                kwargs["query"]["filtered"]["filter"] = within_filter

        if dwithin is not None:
            lng, lat = dwithin["point"].get_coords()
            dwithin_filter = {
                "geo_distance": {"distance": dwithin["distance"].km, dwithin["field"]: {"lat": lat, "lon": lng}}
            }
            kwargs["query"].setdefault("filtered", {})
            kwargs["query"]["filtered"].setdefault("filter", {})
            if kwargs["query"]["filtered"]["filter"]:
                compound_filter = {"and": [kwargs["query"]["filtered"]["filter"], dwithin_filter]}
                kwargs["query"]["filtered"]["filter"] = compound_filter
            else:
                kwargs["query"]["filtered"]["filter"] = dwithin_filter

        # Remove the "filtered" key if we're not filtering. Otherwise,
        # Elasticsearch will blow up.
        if not kwargs["query"]["filtered"].get("filter"):
            kwargs["query"] = kwargs["query"]["filtered"]["query"]

        if custom_score:
            new_kwargs = kwargs.copy()
            new_kwargs["query"] = {}
            new_kwargs["query"]["custom_score"] = custom_score.copy()
            new_kwargs["query"]["custom_score"]["query"] = kwargs["query"].copy()
            return new_kwargs
        return kwargs
Exemplo n.º 12
0
    def build_search_kwargs(self,
                            query_string,
                            sort_by=None,
                            start_offset=0,
                            end_offset=None,
                            fields='',
                            highlight=False,
                            facets=None,
                            date_facets=None,
                            query_facets=None,
                            narrow_queries=None,
                            spelling_query=None,
                            within=None,
                            dwithin=None,
                            distance_point=None,
                            models=None,
                            limit_to_registered_models=None,
                            result_class=None,
                            **extra_kwargs):

        index = haystack.connections[self.connection_alias].get_unified_index()
        content_field = index.document_field

        if query_string == '*:*':
            kwargs = {
                'query': {
                    "match_all": {}
                },
            }
        else:
            kwargs = {
                'query': {
                    'query_string': {
                        'default_field': content_field,
                        'default_operator': DEFAULT_OPERATOR,
                        'query': query_string,
                        'analyze_wildcard': True,
                        'auto_generate_phrase_queries': True,
                        # elasticsearch.exceptions.RequestError: TransportError(400, 'parsing_exception', '[query_string] query does not support [fuzzy_min_sim]')
                        # 'fuzzy_min_sim': FUZZY_MIN_SIM,
                        'fuzzy_max_expansions': FUZZY_MAX_EXPANSIONS,
                    },
                },
            }

        # so far, no filters
        filters = []

        if fields:
            if isinstance(fields, (list, set)):
                fields = " ".join(fields)

            kwargs['stored_fields'] = fields

        if sort_by is not None:
            order_list = []
            for field, direction in sort_by:
                if field == 'distance' and distance_point:
                    # Do the geo-enabled sort.
                    lng, lat = distance_point['point'].get_coords()
                    sort_kwargs = {
                        "_geo_distance": {
                            distance_point['field']: [lng, lat],
                            "order": direction,
                            "unit": "km"
                        }
                    }
                else:
                    if field == 'distance':
                        warnings.warn(
                            "In order to sort by distance, you must call the '.distance(...)' method."
                        )

                    # Regular sorting.
                    sort_kwargs = {field: {'order': direction}}

                order_list.append(sort_kwargs)

            kwargs['sort'] = order_list

        # From/size offsets don't seem to work right in Elasticsearch's DSL. :/
        # if start_offset is not None:
        #     kwargs['from'] = start_offset

        # if end_offset is not None:
        #     kwargs['size'] = end_offset - start_offset

        if highlight:
            # `highlight` can either be True or a dictionary containing custom parameters
            # which will be passed to the backend and may override our default settings:

            kwargs['highlight'] = {
                'fields': {
                    # content_field: {'store': 'yes'},
                    content_field: {},
                }
            }

            if isinstance(highlight, dict):
                kwargs['highlight'].update(highlight)

        if self.include_spelling:
            kwargs['suggest'] = {
                'suggest': {
                    'text': spelling_query or query_string,
                    'term': {
                        # Using content_field here will result in suggestions of stemmed words.
                        'field': '_all',
                    },
                },
            }

        if narrow_queries is None:
            narrow_queries = set()

        if facets is not None:
            kwargs.setdefault('aggregations', {})

            for facet_fieldname, extra_options in facets.items():
                facet_options = {
                    'terms': {
                        'field': facet_fieldname,
                        'size': 100,
                    },
                }
                # Special cases for options applied at the facet level (not the terms level).
                if extra_options.pop('global_scope', False):
                    # Renamed "global_scope" since "global" is a python keyword.
                    facet_options['global'] = True
                if 'facet_filter' in extra_options:
                    facet_options['facet_filter'] = extra_options.pop(
                        'facet_filter')
                facet_options['terms'].update(extra_options)
                kwargs['aggregations'][facet_fieldname] = facet_options

        if date_facets is not None:
            kwargs.setdefault('aggregations', {})

            for facet_fieldname, value in date_facets.items():
                # Need to detect on gap_by & only add amount if it's more than one.
                interval = value.get('gap_by').lower()

                # Need to detect on amount (can't be applied on months or years).
                if value.get('gap_amount',
                             1) != 1 and interval not in ('month', 'year'):
                    # Just the first character is valid for use.
                    interval = "%s%s" % (value['gap_amount'], interval[:1])

                date_histogram_aggregation_name = "{0}{1}".format(
                    facet_fieldname, DATE_HISTOGRAM_FIELD_NAME_SUFFIX)
                date_range_aggregation_name = "{0}{1}".format(
                    facet_fieldname, DATE_RANGE_FIELD_NAME_SUFFIX)

                kwargs['aggregations'][date_histogram_aggregation_name] = {
                    'meta': {
                        '_type': 'haystack_date_histogram',
                    },
                    'date_histogram': {
                        'field': facet_fieldname,
                        'interval': interval,
                    },
                }

                kwargs['aggregations'][date_range_aggregation_name] = {
                    'meta': {
                        '_type': 'haystack_date_range',
                    },
                    'date_range': {  # agg type
                        'field':
                        facet_fieldname,
                        'ranges': [{
                            'from':
                            self._from_python(value.get('start_date')),
                            'to':
                            self._from_python(value.get('end_date')),
                        }]
                    }
                }

        if query_facets is not None:
            kwargs.setdefault('aggregations', {})

            for facet_fieldname, value in query_facets:
                kwargs['aggregations'][facet_fieldname] = {
                    'filter': {
                        'query_string': {
                            'query': value,
                        }
                    }
                }

        if limit_to_registered_models is None:
            limit_to_registered_models = getattr(
                settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True)

        if models and len(models):
            model_choices = sorted(get_model_ct(model) for model in models)
        elif limit_to_registered_models:
            # Using narrow queries, limit the results to only models handled
            # with the current routers.
            model_choices = self.build_models_list()
        else:
            model_choices = []

        if len(model_choices) > 0:
            filters.append({"terms": {DJANGO_CT: model_choices}})

        for q in narrow_queries:
            filters.append({'query_string': {'query': q}})

        if within is not None:
            from haystack.utils.geo import generate_bounding_box

            ((south, west),
             (north, east)) = generate_bounding_box(within['point_1'],
                                                    within['point_2'])
            within_filter = {
                "geo_bounding_box": {
                    within['field']: {
                        "top_left": {
                            "lat": north,
                            "lon": west
                        },
                        "bottom_right": {
                            "lat": south,
                            "lon": east
                        }
                    }
                },
            }
            filters.append(within_filter)

        if dwithin is not None:
            lng, lat = dwithin['point'].get_coords()

            # NB: the 1.0.0 release of elasticsearch introduce an
            #     incompatible change on the distance filter formating
            if elasticsearch.VERSION >= (1, 0, 0):
                distance = "%(dist).6f%(unit)s" % {
                    'dist': dwithin['distance'].km,
                    'unit': "km"
                }
            else:
                distance = dwithin['distance'].km

            dwithin_filter = {
                "geo_distance": {
                    "distance": distance,
                    dwithin['field']: {
                        "lat": lat,
                        "lon": lng
                    }
                }
            }
            filters.append(dwithin_filter)

        # if we want to filter, change the query type to filteres
        if filters:
            kwargs["query"] = {"bool": {"must": kwargs.pop("query")}}

            if len(filters) == 1:
                kwargs['query']['bool']["filter"] = filters[0]
            else:
                kwargs['query']['bool']["filter"] = {"bool": {"must": filters}}

        if extra_kwargs:
            kwargs.update(extra_kwargs)

        return kwargs
Exemplo n.º 13
0
    def build_search_kwargs(
        self,
        query_string,
        sort_by=None,
        start_offset=0,
        end_offset=None,
        fields="",
        highlight=False,
        facets=None,
        date_facets=None,
        query_facets=None,
        narrow_queries=None,
        spelling_query=None,
        within=None,
        dwithin=None,
        distance_point=None,
        models=None,
        limit_to_registered_models=None,
        result_class=None,
        pivot_facets=None,
        rows=None,
        group=None,
    ):
        kwargs = {"fl": "* score"}

        if fields:
            if isinstance(fields, (list, set)):
                fields = " ".join(fields)

            kwargs["fl"] = fields

        if sort_by is not None:
            if distance_point:
                # Do the geo-enabled sort.
                lng, lat = distance_point["point"].get_coords()
                kwargs["sfield"] = distance_point["field"]
                kwargs["pt"] = "%s,%s" % (lat, lng)
                if sort_by in ["distance asc", "distance desc"]:
                    if sort_by == "distance asc":
                        kwargs["sort"] = "geodist() asc"
                    else:
                        kwargs["sort"] = "geodist() desc"
            else:
                if sort_by.startswith("distance "):
                    warnings.warn("In order to sort by distance, you must call the '.distance(...)' method.")

            # Regular sorting.
            kwargs["sort"] = sort_by

        if start_offset is not None:
            kwargs["start"] = start_offset

        if end_offset is not None:
            kwargs["rows"] = end_offset - start_offset

        if highlight is True:
            kwargs["hl"] = "true"
            kwargs["hl.fragsize"] = "200"

        if self.include_spelling is True:
            kwargs["spellcheck"] = "true"
            kwargs["spellcheck.collate"] = "true"
            kwargs["spellcheck.count"] = 1

            if spelling_query:
                kwargs["spellcheck.q"] = spelling_query

        if facets is not None:
            kwargs["facet"] = "on"
            kwargs["facet.field"] = facets
            kwargs["facet.limit"] = 350

        if pivot_facets is not None:
            kwargs["facet"] = "on"
            kwargs["facet.pivot"] = pivot_facets
            kwargs["facet.limit"] = 400

        if group is not None:
            kwargs["group"] = "true"
            kwargs["group.field"] = group["field"]
            kwargs["group.facet"] = "true" if group["facet"] else "false"
            kwargs["group.ngroup"] = group["ngroup"]

        if rows is not None:
            kwargs["rows"] = rows

        if date_facets is not None:
            kwargs["facet"] = "on"
            kwargs["facet.date"] = date_facets.keys()
            kwargs["facet.date.other"] = "none"

            for key, value in date_facets.items():
                kwargs["f.%s.facet.date.start" % key] = self.conn._from_python(value.get("start_date"))
                kwargs["f.%s.facet.date.end" % key] = self.conn._from_python(value.get("end_date"))
                gap_by_string = value.get("gap_by").upper()
                gap_string = "%d%s" % (value.get("gap_amount"), gap_by_string)

                if value.get("gap_amount") != 1:
                    gap_string += "S"

                kwargs["f.%s.facet.date.gap" % key] = "+%s/%s" % (gap_string, gap_by_string)

        if query_facets is not None:
            kwargs["facet"] = "on"
            kwargs["facet.query"] = ["%s:%s" % (field, value) for field, value in query_facets]

        if limit_to_registered_models is None:
            limit_to_registered_models = getattr(settings, "HAYSTACK_LIMIT_TO_REGISTERED_MODELS", True)

        if models and len(models):
            model_choices = sorted(["%s.%s" % (model._meta.app_label, model._meta.module_name) for model in models])
        elif limit_to_registered_models:
            # Using narrow queries, limit the results to only models handled
            # with the current routers.
            model_choices = self.build_models_list()
        else:
            model_choices = []

        if len(model_choices) > 0:
            if narrow_queries is None:
                narrow_queries = set()

            narrow_queries.add("%s:(%s)" % (DJANGO_CT, " OR ".join(model_choices)))

        if narrow_queries is not None:
            kwargs["fq"] = list(narrow_queries)

        if within is not None:
            from haystack.utils.geo import generate_bounding_box

            kwargs.setdefault("fq", [])
            ((min_lat, min_lng), (max_lat, max_lng)) = generate_bounding_box(within["point_1"], within["point_2"])
            # Bounding boxes are min, min TO max, max. Solr's wiki was *NOT*
            # very clear on this.
            bbox = "%s:[%s,%s TO %s,%s]" % (within["field"], min_lat, min_lng, max_lat, max_lng)
            kwargs["fq"].append(bbox)

        if dwithin is not None:
            kwargs.setdefault("fq", [])
            lng, lat = dwithin["point"].get_coords()
            geofilt = "{!geofilt pt=%s,%s sfield=%s d=%s}" % (lat, lng, dwithin["field"], dwithin["distance"].km)
            kwargs["fq"].append(geofilt)

        # Check to see if the backend should try to include distances
        # (Solr 4.X+) in the results.
        if self.distance_available and distance_point:
            # In early testing, you can't just hand Solr 4.X a proper bounding box
            # & request distances. To enable native distance would take calculating
            # a center point & a radius off the user-provided box, which kinda
            # sucks. We'll avoid it for now, since Solr 4.x's release will be some
            # time yet.
            # kwargs['fl'] += ' _dist_:geodist()'
            pass

        return kwargs
Exemplo n.º 14
0
    def build_search_kwargs(self, query_string, sort_by=None, start_offset=0, end_offset=None,
                            fields='', highlight=False, facets=None,
                            date_facets=None, query_facets=None,
                            narrow_queries=None, spelling_query=None,
                            within=None, dwithin=None, distance_point=None,
                            models=None, limit_to_registered_models=None,
                            result_class=None):
        """Build all kwargs necessaries to perform the query.

        :param query_string: Query string.
        :type query_string: str
        :param sort_by:
        :param start_offset: If query is partially done, this parameters will represents where the slice begins.
        :type start_offset: int
        :param end_offset: If query is partially done, this parameters will represents where the slice ends.
        :type end_offset: int
        :param fields: Fields that will be searched for.
        :type fields: str
        :param highlight:
        :param facets:
        :param date_facets:
        :param query_facets:
        :param narrow_queries:
        :param spelling_query:
        :param within:
        :param dwithin:
        :param distance_point:
        :param models: List of models over the query will be performed.
        :type models: list
        :param limit_to_registered_models:
        :param result_class: Class used for search results.
        :type result_class: object
        :return: Search kwargs.
        :rtype: dict
        """
        if query_string == '*:*':
            kwargs = {
                'query': {
                    "match_all": {}
                },
            }
        else:
            kwargs = {
                'query': {
                    'query_string': {
                        'default_operator': DEFAULT_OPERATOR,
                        'query': query_string,
                        'analyze_wildcard': True,
                        'auto_generate_phrase_queries': True,
                    },
                },
            }

        if limit_to_registered_models is None:
            limit_to_registered_models = getattr(settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True)

        if models and len(models):
            model_choices = sorted(get_model_ct(model) for model in models)
        elif limit_to_registered_models:
            # Using narrow queries, limit the results to only models handled
            # with the current routers.
            model_choices = self.build_models_list()
        else:
            model_choices = []

        kwargs['models'] = model_choices

        filters = []

        if fields:
            if isinstance(fields, (list, set)):
                fields = " ".join(fields)

            kwargs['fields'] = fields

        if sort_by is not None:
            order_list = []
            for field, direction in sort_by:
                if field == 'distance' and distance_point:
                    # Do the geo-enabled sort.
                    lng, lat = distance_point['point'].get_coords()
                    sort_kwargs = {
                        "_geo_distance": {
                            distance_point['field']: [lng, lat],
                            "order": direction,
                            "unit": "km"
                        }
                    }
                else:
                    if field == 'distance':
                        warnings.warn("In order to sort by distance, you must call the '.distance(...)' method.")

                    # Regular sorting.
                    sort_kwargs = {field: {'order': direction}}

                order_list.append(sort_kwargs)

            kwargs['sort'] = order_list

        if highlight is True:
            kwargs['highlight'] = {
                'fields': {
                    '_all': {'store': 'yes'},
                }
            }

        if self.include_spelling:
            kwargs['suggest'] = {
                'suggest': {
                    'text': spelling_query or query_string,
                    'term': {
                        # Using content_field here will result in suggestions of stemmed words.
                        'field': '_all',
                    },
                },
            }

        if narrow_queries is None:
            narrow_queries = set()

        if facets is not None:
            kwargs.setdefault('facets', {})

            for facet_fieldname, extra_options in facets.items():
                facet_options = {
                    'terms': {
                        'field': facet_fieldname,
                        'size': 100,
                    },
                }
                # Special cases for options applied at the facet level (not the terms level).
                if extra_options.pop('global_scope', False):
                    # Renamed "global_scope" since "global" is a python keyword.
                    facet_options['global'] = True
                if 'facet_filter' in extra_options:
                    facet_options['facet_filter'] = extra_options.pop('facet_filter')
                facet_options['terms'].update(extra_options)
                kwargs['facets'][facet_fieldname] = facet_options

        if date_facets is not None:
            kwargs.setdefault('facets', {})

            for facet_fieldname, value in date_facets.items():
                # Need to detect on gap_by & only add amount if it's more than one.
                interval = value.get('gap_by').lower()

                # Need to detect on amount (can't be applied on months or years).
                if value.get('gap_amount', 1) != 1 and interval not in ('month', 'year'):
                    # Just the first character is valid for use.
                    interval = "%s%s" % (value['gap_amount'], interval[:1])

                kwargs['facets'][facet_fieldname] = {
                    'date_histogram': {
                        'field': facet_fieldname,
                        'interval': interval,
                    },
                    'facet_filter': {
                        "range": {
                            facet_fieldname: {
                                'from': self._from_python(value.get('start_date')),
                                'to': self._from_python(value.get('end_date')),
                            }
                        }
                    }
                }

        if query_facets is not None:
            kwargs.setdefault('facets', {})

            for facet_fieldname, value in query_facets:
                kwargs['facets'][facet_fieldname] = {
                    'query': {
                        'query_string': {
                            'query': value,
                        }
                    },
                }

        for q in narrow_queries:
            filters.append({
                'fquery': {
                    'query': {
                        'query_string': {
                            'query': q
                        },
                    },
                    '_cache': True,
                }
            })

        if within is not None:
            from haystack.utils.geo import generate_bounding_box

            ((south, west), (north, east)) = generate_bounding_box(within['point_1'], within['point_2'])
            within_filter = {
                "geo_bounding_box": {
                    within['field']: {
                        "top_left": {
                            "lat": north,
                            "lon": west
                        },
                        "bottom_right": {
                            "lat": south,
                            "lon": east
                        }
                    }
                },
            }
            filters.append(within_filter)

        if dwithin is not None:
            lng, lat = dwithin['point'].get_coords()
            dwithin_filter = {
                "geo_distance": {
                    "distance": dwithin['distance'].km,
                    dwithin['field']: {
                        "lat": lat,
                        "lon": lng
                    }
                }
            }
            filters.append(dwithin_filter)

        # if we want to filter, change the query type to filteres
        if filters:
            kwargs["query"] = {"filtered": {"query": kwargs.pop("query")}}
            if len(filters) == 1:
                kwargs['query']['filtered']["filter"] = filters[0]
            else:
                kwargs['query']['filtered']["filter"] = {"bool": {"must": filters}}

        return kwargs
Exemplo n.º 15
0
    def build_search_kwargs(self, query_string, sort_by=None, start_offset=0, end_offset=None,
                            fields='', highlight=False, facets=None,
                            date_facets=None, query_facets=None,
                            narrow_queries=None, spelling_query=None,
                            within=None, dwithin=None, distance_point=None,
                            models=None, limit_to_registered_models=None,
                            result_class=None, stats=None):
        kwargs = {'fl': '* score'}

        if fields:
            if isinstance(fields, (list, set)):
                fields = " ".join(fields)

            kwargs['fl'] = fields

        if sort_by is not None:
            if sort_by in ['distance asc', 'distance desc'] and distance_point:
                # Do the geo-enabled sort.
                lng, lat = distance_point['point'].get_coords()
                kwargs['sfield'] = distance_point['field']
                kwargs['pt'] = '%s,%s' % (lat, lng)

                if sort_by == 'distance asc':
                    kwargs['sort'] = 'geodist() asc'
                else:
                    kwargs['sort'] = 'geodist() desc'
            else:
                if sort_by.startswith('distance '):
                    warnings.warn("In order to sort by distance, you must call the '.distance(...)' method.")

                # Regular sorting.
                kwargs['sort'] = sort_by

        if start_offset is not None:
            kwargs['start'] = start_offset

        if end_offset is not None:
            kwargs['rows'] = end_offset - start_offset

        if highlight is True:
            kwargs['hl'] = 'true'
            kwargs['hl.fragsize'] = '200'

        if self.include_spelling is True:
            kwargs['spellcheck'] = 'true'
            kwargs['spellcheck.collate'] = 'true'
            kwargs['spellcheck.count'] = 1

            if spelling_query:
                kwargs['spellcheck.q'] = spelling_query

        if facets is not None:
            kwargs['facet'] = 'on'
            kwargs['facet.field'] = facets.keys()

            for facet_field, options in facets.items():
                for key, value in options.items():
                    kwargs['f.%s.facet.%s' % (facet_field, key)] = self.conn._from_python(value)

        if date_facets is not None:
            kwargs['facet'] = 'on'
            kwargs['facet.date'] = date_facets.keys()
            kwargs['facet.date.other'] = 'none'

            for key, value in date_facets.items():
                kwargs["f.%s.facet.date.start" % key] = self.conn._from_python(value.get('start_date'))
                kwargs["f.%s.facet.date.end" % key] = self.conn._from_python(value.get('end_date'))
                gap_by_string = value.get('gap_by').upper()
                gap_string = "%d%s" % (value.get('gap_amount'), gap_by_string)

                if value.get('gap_amount') != 1:
                    gap_string += "S"

                kwargs["f.%s.facet.date.gap" % key] = '+%s/%s' % (gap_string, gap_by_string)

        if query_facets is not None:
            kwargs['facet'] = 'on'
            kwargs['facet.query'] = ["%s:%s" % (field, value) for field, value in query_facets]

        if limit_to_registered_models is None:
            limit_to_registered_models = getattr(settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True)

        if models and len(models):
            model_choices = sorted(get_model_ct(model) for model in models)
        elif limit_to_registered_models:
            # Using narrow queries, limit the results to only models handled
            # with the current routers.
            model_choices = self.build_models_list()
        else:
            model_choices = []

        if len(model_choices) > 0:
            if narrow_queries is None:
                narrow_queries = set()

            narrow_queries.add('%s:(%s)' % (DJANGO_CT, ' OR '.join(model_choices)))

        if narrow_queries is not None:
            kwargs['fq'] = list(narrow_queries)

        if stats:
            kwargs['stats'] = "true"

            for k in stats.keys():
                kwargs['stats.field'] = k

                for facet in stats[k]:
                    kwargs['f.%s.stats.facet' % k] = facet

        if within is not None:
            from haystack.utils.geo import generate_bounding_box

            kwargs.setdefault('fq', [])
            ((min_lat, min_lng), (max_lat, max_lng)) = generate_bounding_box(within['point_1'], within['point_2'])
            # Bounding boxes are min, min TO max, max. Solr's wiki was *NOT*
            # very clear on this.
            bbox = '%s:[%s,%s TO %s,%s]' % (within['field'], min_lat, min_lng, max_lat, max_lng)
            kwargs['fq'].append(bbox)

        if dwithin is not None:
            kwargs.setdefault('fq', [])
            lng, lat = dwithin['point'].get_coords()
            geofilt = '{!geofilt pt=%s,%s sfield=%s d=%s}' % (lat, lng, dwithin['field'], dwithin['distance'].km)
            kwargs['fq'].append(geofilt)

        # Check to see if the backend should try to include distances
        # (Solr 4.X+) in the results.
        if self.distance_available and distance_point:
            # In early testing, you can't just hand Solr 4.X a proper bounding box
            # & request distances. To enable native distance would take calculating
            # a center point & a radius off the user-provided box, which kinda
            # sucks. We'll avoid it for now, since Solr 4.x's release will be some
            # time yet.
            # kwargs['fl'] += ' _dist_:geodist()'
            pass

        return kwargs
Exemplo n.º 16
0
    def build_search_kwargs(
        self,
        query_string,
        sort_by=None,
        start_offset=0,
        end_offset=None,
        fields="",
        highlight=False,
        facets=None,
        date_facets=None,
        query_facets=None,
        narrow_queries=None,
        spelling_query=None,
        within=None,
        dwithin=None,
        distance_point=None,
        models=None,
        limit_to_registered_models=None,
        result_class=None,
        stats=None,
        collate=None,
        **extra_kwargs
    ):

        index = haystack.connections[self.connection_alias].get_unified_index()

        kwargs = {"fl": "* score", "df": index.document_field}

        if fields:
            if isinstance(fields, (list, set)):
                fields = " ".join(fields)

            kwargs["fl"] = fields

        if sort_by is not None:
            if sort_by in ["distance asc", "distance desc"] and distance_point:
                # Do the geo-enabled sort.
                lng, lat = distance_point["point"].coords
                kwargs["sfield"] = distance_point["field"]
                kwargs["pt"] = "%s,%s" % (lat, lng)

                if sort_by == "distance asc":
                    kwargs["sort"] = "geodist() asc"
                else:
                    kwargs["sort"] = "geodist() desc"
            else:
                if sort_by.startswith("distance "):
                    warnings.warn(
                        "In order to sort by distance, you must call the '.distance(...)' method."
                    )

                # Regular sorting.
                kwargs["sort"] = sort_by

        if start_offset is not None:
            kwargs["start"] = start_offset

        if end_offset is not None:
            kwargs["rows"] = end_offset - start_offset

        if highlight:
            # `highlight` can either be True or a dictionary containing custom parameters
            # which will be passed to the backend and may override our default settings:

            kwargs["hl"] = "true"
            kwargs["hl.fragsize"] = "200"

            if isinstance(highlight, dict):
                # autoprefix highlighter options with 'hl.', all of them start with it anyway
                # this makes option dicts shorter: {'maxAnalyzedChars': 42}
                # and lets some of options be used as keyword arguments: `.highlight(preserveMulti=False)`
                kwargs.update(
                    {
                        key if key.startswith("hl.") else ("hl." + key): highlight[key]
                        for key in highlight.keys()
                    }
                )

        if collate is None:
            collate = self.collate
        if self.include_spelling is True:
            kwargs["spellcheck"] = "true"
            kwargs["spellcheck.collate"] = str(collate).lower()
            kwargs["spellcheck.count"] = 1

            if spelling_query:
                kwargs["spellcheck.q"] = spelling_query

        if facets is not None:
            kwargs["facet"] = "on"
            kwargs["facet.field"] = facets.keys()

            for facet_field, options in facets.items():
                for key, value in options.items():
                    kwargs[
                        "f.%s.facet.%s" % (facet_field, key)
                    ] = self.conn._from_python(value)

        if date_facets is not None:
            kwargs["facet"] = "on"
            kwargs["facet.date"] = date_facets.keys()
            kwargs["facet.date.other"] = "none"

            for key, value in date_facets.items():
                kwargs["f.%s.facet.date.start" % key] = self.conn._from_python(
                    value.get("start_date")
                )
                kwargs["f.%s.facet.date.end" % key] = self.conn._from_python(
                    value.get("end_date")
                )
                gap_by_string = value.get("gap_by").upper()
                gap_string = "%d%s" % (value.get("gap_amount"), gap_by_string)

                if value.get("gap_amount") != 1:
                    gap_string += "S"

                kwargs["f.%s.facet.date.gap" % key] = "+%s/%s" % (
                    gap_string,
                    gap_by_string,
                )

        if query_facets is not None:
            kwargs["facet"] = "on"
            kwargs["facet.query"] = [
                "%s:%s" % (field, value) for field, value in query_facets
            ]

        if limit_to_registered_models is None:
            limit_to_registered_models = getattr(
                settings, "HAYSTACK_LIMIT_TO_REGISTERED_MODELS", True
            )

        if models and len(models):
            model_choices = sorted(get_model_ct(model) for model in models)
        elif limit_to_registered_models:
            # Using narrow queries, limit the results to only models handled
            # with the current routers.
            model_choices = self.build_models_list()
        else:
            model_choices = []

        if len(model_choices) > 0:
            if narrow_queries is None:
                narrow_queries = set()

            narrow_queries.add("%s:(%s)" % (DJANGO_CT, " OR ".join(model_choices)))

        if narrow_queries is not None:
            kwargs["fq"] = list(narrow_queries)

        if stats:
            kwargs["stats"] = "true"

            for k in stats.keys():
                kwargs["stats.field"] = k

                for facet in stats[k]:
                    kwargs["f.%s.stats.facet" % k] = facet

        if within is not None:
            from haystack.utils.geo import generate_bounding_box

            kwargs.setdefault("fq", [])
            ((min_lat, min_lng), (max_lat, max_lng)) = generate_bounding_box(
                within["point_1"], within["point_2"]
            )
            # Bounding boxes are min, min TO max, max. Solr's wiki was *NOT*
            # very clear on this.
            bbox = "%s:[%s,%s TO %s,%s]" % (
                within["field"],
                min_lat,
                min_lng,
                max_lat,
                max_lng,
            )
            kwargs["fq"].append(bbox)

        if dwithin is not None:
            kwargs.setdefault("fq", [])
            lng, lat = dwithin["point"].coords
            geofilt = "{!geofilt pt=%s,%s sfield=%s d=%s}" % (
                lat,
                lng,
                dwithin["field"],
                dwithin["distance"].km,
            )
            kwargs["fq"].append(geofilt)

        # Check to see if the backend should try to include distances
        # (Solr 4.X+) in the results.
        if self.distance_available and distance_point:
            # In early testing, you can't just hand Solr 4.X a proper bounding box
            # & request distances. To enable native distance would take calculating
            # a center point & a radius off the user-provided box, which kinda
            # sucks. We'll avoid it for now, since Solr 4.x's release will be some
            # time yet.
            # kwargs['fl'] += ' _dist_:geodist()'
            pass

        if extra_kwargs:
            kwargs.update(extra_kwargs)

        return kwargs
    def search(self, query_string, sort_by=None, start_offset=0, end_offset=None,
               fields='', highlight=False, facets=None, date_facets=None, query_facets=None,
               narrow_queries=None, spelling_query=None, within=None,
               dwithin=None, distance_point=None,
               limit_to_registered_models=None, result_class=None, **kwargs):
        if len(query_string) == 0:
            return {
                'results': [],
                'hits': 0,
            }

        if not self.setup_complete:
            self.setup()

        index = haystack.connections[self.connection_alias].get_unified_index()
        content_field = index.document_field

        if query_string == '*:*':
            kwargs = {
                'query': {
                    'filtered': {
                        'query': {
                            'query_string': {
                                'query': '*:*',
                            },
                        },
                    },
                },
            }
        else:
            kwargs = {
                'query': {
                    'filtered': {
                        'query': {
                            'query_string': {
                                'default_field': content_field,
                                'default_operator': DEFAULT_OPERATOR,
                                'query': query_string,
                                'analyze_wildcard': True,
                                'auto_generate_phrase_queries': True,
                            },
                        },
                    },
                },
            }

        geo_sort = False

        if fields:
            if isinstance(fields, (list, set)):
                fields = " ".join(fields)

            kwargs['fields'] = fields

        if sort_by is not None:
            if sort_by in ['distance asc', 'distance desc'] and distance_point:
                # Do the geo-enabled sort.
                lng, lat = distance_point['point'].get_coords()
                sort_kwargs = {
                    "_geo_distance": {
                        distance_point['field']: [lng, lat],
                        "order" : "asc",
                        "unit" : "km"
                    }
                }
                geo_sort = True

                if sort_by == 'distance asc':
                    sort_kwargs['sort']['_geo_distance']['order'] = 'asc'
                else:
                    sort_kwargs['sort']['_geo_distance']['order'] = 'desc'

                kwargs['sort'] = [sort_kwargs]
            else:
                if sort_by.startswith('distance '):
                    warnings.warn("In order to sort by distance, you must call the '.distance(...)' method.")

                # Regular sorting.
                order_by_list = []

                for order_by in self.order_by:
                    if order_by.startswith('-'):
                        order_by_list.append({order_by[1:]: 'desc'})
                    else:
                        order_by_list.append({order_by: 'asc'})

                kwargs['sort'] = order_by_list

        # From/size offsets don't seem to work right in Elasticsearch's DSL. :/
        # if start_offset is not None:
        #     kwargs['from'] = start_offset

        # if end_offset is not None:
        #     kwargs['size'] = end_offset - start_offset

        if highlight is True:
            kwargs['highlight'] = {
                'fields': {
                    content_field: {'store': 'yes'},
                }
            }

        if self.include_spelling is True:
            warnings.warn("Elasticsearch does not handle spelling suggestions.", Warning, stacklevel=2)

        if narrow_queries is None:
            narrow_queries = set()

        if facets is not None:
            kwargs.setdefault('facets', {})

            for facet_fieldname in facets:
                kwargs['facets'][facet_fieldname] = {
                    'terms': {
                        'field': facet_fieldname,
                    },
                }

        if date_facets is not None:
            kwargs.setdefault('facets', {})

            for facet_fieldname, value in date_facets.items():
                # Need to detect on gap_by & only add amount if it's more than one.
                interval = value.get('gap_by').lower()

                # Need to detect on amount (can't be applied on months or years).
                if value.get('gap_amount', 1) != 1 and not interval in ('month', 'year'):
                    # Just the first character is valid for use.
                    interval = "%s%s" % (value['gap_amount'], interval[:1])

                kwargs['facets'][facet_fieldname] = {
                    'date_histogram': {
                        'field': facet_fieldname,
                        'interval': interval,
                    },
                    'facet_filter': {
                        "range": {
                            facet_fieldname: {
                                'from': self.conn.from_python(value.get('start_date')),
                                'to': self.conn.from_python(value.get('end_date')),
                            }
                        }
                    }
                }

        if query_facets is not None:
            kwargs.setdefault('facets', {})

            for facet_fieldname, value in query_facets:
                kwargs['facets'][facet_fieldname] = {
                    'query': {
                        'query_string': {
                            'query': value,
                        }
                    },
                }

        if limit_to_registered_models is None:
            limit_to_registered_models = getattr(settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True)

        if limit_to_registered_models:
            # Using narrow queries, limit the results to only models handled
            # with the current routers.
            registered_models = self.build_models_list()

            if len(registered_models) > 0:
                narrow_queries.add('%s:(%s)' % (DJANGO_CT, ' OR '.join(registered_models)))

        if narrow_queries:
            kwargs['query'].setdefault('filtered', {})
            kwargs['query']['filtered'].setdefault('filter', {})
            kwargs['query']['filtered']['filter'] = {
                'fquery': {
                    'query': {
                        'query_string': {
                            'query': u' AND '.join(list(narrow_queries)),
                        },
                    },
                    '_cache': True,
                }
            }

        if within is not None:
            from haystack.utils.geo import generate_bounding_box

            ((min_lat, min_lng), (max_lat, max_lng)) = generate_bounding_box(within['point_1'], within['point_2'])
            kwargs['query'].setdefault('filtered', {})
            kwargs['query']['filtered'].setdefault('filter', {})
            kwargs['query']['filtered']['filter'] = {
                "geo_bounding_box": {
                    within['field']: {
                        "top_left": {
                            "lat": max_lat,
                            "lon": max_lng
                        },
                        "bottom_right": {
                            "lat": min_lat,
                            "lon": min_lng
                        }
                    }
                },
            }

        if dwithin is not None:
            lng, lat = dwithin['point'].get_coords()
            kwargs['query'].setdefault('filtered', {})
            kwargs['query']['filtered'].setdefault('filter', {})
            kwargs['query']['filtered']['filter'] = {
                "geo_distance": {
                    "distance": dwithin['distance'].km,
                    dwithin['field']: {
                        "lat": lat,
                        "lon": lng
                    }
                }
            }

        # Remove the "filtered" key if we're not filtering. Otherwise,
        # Elasticsearch will blow up.
        if not kwargs['query']['filtered'].get('filter'):
            kwargs['query'] = kwargs['query']['filtered']['query']

        # Because Elasticsearch.
        query_params = {
            'from': start_offset,
        }

        if end_offset is not None and end_offset > start_offset:
            query_params['size'] = end_offset - start_offset

        try:
            raw_results = self.conn.search(None, kwargs, indexes=[self.index_name], doc_types=['modelresult'], **query_params)
        except (requests.RequestException, pyelasticsearch.ElasticSearchError), e:
            if not self.silently_fail:
                raise

            self.log.error("Failed to query Elasticsearch using '%s': %s", query_string, e)
            raw_results = {}
    def search(
        self,
        query_string,
        sort_by=None,
        start_offset=0,
        end_offset=None,
        fields="",
        highlight=False,
        facets=None,
        date_facets=None,
        query_facets=None,
        narrow_queries=None,
        spelling_query=None,
        within=None,
        dwithin=None,
        distance_point=None,
        models=None,
        limit_to_registered_models=None,
        result_class=None,
        **kwargs
    ):
        if len(query_string) == 0:
            return {"results": [], "hits": 0}

        if not self.setup_complete:
            self.setup()

        index = haystack.connections[self.connection_alias].get_unified_index()
        content_field = index.document_field

        if query_string == "*:*":
            kwargs = {"query": {"filtered": {"query": {"query_string": {"query": "*:*"}}}}}
        else:
            kwargs = {
                "query": {
                    "filtered": {
                        "query": {
                            "query_string": {
                                "default_field": content_field,
                                "default_operator": DEFAULT_OPERATOR,
                                "query": query_string,
                                "analyze_wildcard": True,
                                "auto_generate_phrase_queries": True,
                            }
                        }
                    }
                }
            }

        geo_sort = False

        if fields:
            if isinstance(fields, (list, set)):
                fields = " ".join(fields)

            kwargs["fields"] = fields

        if sort_by is not None:
            order_list = []
            for field, direction in sort_by:
                if field == "distance" and distance_point:
                    # Do the geo-enabled sort.
                    lng, lat = distance_point["point"].get_coords()
                    sort_kwargs = {
                        "_geo_distance": {distance_point["field"]: [lng, lat], "order": direction, "unit": "km"}
                    }
                else:
                    if field == "distance":
                        warnings.warn("In order to sort by distance, you must call the '.distance(...)' method.")

                    # Regular sorting.
                    sort_kwargs = {field: {"order": direction}}

                order_list.append(sort_kwargs)

            kwargs["sort"] = order_list

        # From/size offsets don't seem to work right in Elasticsearch's DSL. :/
        # if start_offset is not None:
        #     kwargs['from'] = start_offset

        # if end_offset is not None:
        #     kwargs['size'] = end_offset - start_offset

        if highlight is True:
            kwargs["highlight"] = {"fields": {content_field: {"store": "yes"}}}

        if self.include_spelling is True:
            warnings.warn("Elasticsearch does not handle spelling suggestions.", Warning, stacklevel=2)

        if narrow_queries is None:
            narrow_queries = set()

        if facets is not None:
            kwargs.setdefault("facets", {})

            for facet_fieldname in facets:
                kwargs["facets"][facet_fieldname] = {"terms": {"field": facet_fieldname}}

        if date_facets is not None:
            kwargs.setdefault("facets", {})

            for facet_fieldname, value in date_facets.items():
                # Need to detect on gap_by & only add amount if it's more than one.
                interval = value.get("gap_by").lower()

                # Need to detect on amount (can't be applied on months or years).
                if value.get("gap_amount", 1) != 1 and not interval in ("month", "year"):
                    # Just the first character is valid for use.
                    interval = "%s%s" % (value["gap_amount"], interval[:1])

                kwargs["facets"][facet_fieldname] = {
                    "date_histogram": {"field": facet_fieldname, "interval": interval},
                    "facet_filter": {
                        "range": {
                            facet_fieldname: {
                                "from": self.conn.from_python(value.get("start_date")),
                                "to": self.conn.from_python(value.get("end_date")),
                            }
                        }
                    },
                }

        if query_facets is not None:
            kwargs.setdefault("facets", {})

            for facet_fieldname, value in query_facets:
                kwargs["facets"][facet_fieldname] = {"query": {"query_string": {"query": value}}}

        if limit_to_registered_models is None:
            limit_to_registered_models = getattr(settings, "HAYSTACK_LIMIT_TO_REGISTERED_MODELS", True)

        if models and len(models):
            model_choices = sorted(["%s.%s" % (model._meta.app_label, model._meta.module_name) for model in models])
        elif limit_to_registered_models:
            # Using narrow queries, limit the results to only models handled
            # with the current routers.
            model_choices = self.build_models_list()
        else:
            model_choices = []

        if len(model_choices) > 0:
            if narrow_queries is None:
                narrow_queries = set()

            narrow_queries.add("%s:(%s)" % (DJANGO_CT, " OR ".join(model_choices)))

        if narrow_queries:
            kwargs["query"].setdefault("filtered", {})
            kwargs["query"]["filtered"].setdefault("filter", {})
            kwargs["query"]["filtered"]["filter"] = {
                "fquery": {"query": {"query_string": {"query": u" AND ".join(list(narrow_queries))}}, "_cache": True}
            }

        if within is not None:
            from haystack.utils.geo import generate_bounding_box

            ((min_lat, min_lng), (max_lat, max_lng)) = generate_bounding_box(within["point_1"], within["point_2"])
            kwargs["query"].setdefault("filtered", {})
            kwargs["query"]["filtered"].setdefault("filter", {})
            kwargs["query"]["filtered"]["filter"] = {
                "geo_bounding_box": {
                    within["field"]: {
                        "top_left": {"lat": max_lat, "lon": max_lng},
                        "bottom_right": {"lat": min_lat, "lon": min_lng},
                    }
                }
            }

        if dwithin is not None:
            lng, lat = dwithin["point"].get_coords()
            kwargs["query"].setdefault("filtered", {})
            kwargs["query"]["filtered"].setdefault("filter", {})
            kwargs["query"]["filtered"]["filter"] = {
                "geo_distance": {"distance": dwithin["distance"].km, dwithin["field"]: {"lat": lat, "lon": lng}}
            }

        # Remove the "filtered" key if we're not filtering. Otherwise,
        # Elasticsearch will blow up.
        if not kwargs["query"]["filtered"].get("filter"):
            kwargs["query"] = kwargs["query"]["filtered"]["query"]

        # Because Elasticsearch.
        query_params = {"from": start_offset}

        if end_offset is not None and end_offset > start_offset:
            query_params["size"] = end_offset - start_offset

        try:
            raw_results = self.conn.search(
                None, kwargs, indexes=[self.index_name], doc_types=["modelresult"], **query_params
            )
        except (requests.RequestException, pyelasticsearch.ElasticSearchError), e:
            if not self.silently_fail:
                raise

            self.log.error("Failed to query Elasticsearch using '%s': %s", query_string, e)
            raw_results = {}
Exemplo n.º 19
0
    def search(self,
               query_string,
               sort_by=None,
               start_offset=0,
               end_offset=None,
               fields='',
               highlight=False,
               facets=None,
               date_facets=None,
               query_facets=None,
               narrow_queries=None,
               spelling_query=None,
               within=None,
               dwithin=None,
               distance_point=None,
               limit_to_registered_models=None,
               result_class=None,
               **kwargs):
        if len(query_string) == 0:
            return {
                'results': [],
                'hits': 0,
            }

        kwargs = {
            'fl': '* score',
        }
        geo_sort = False

        if fields:
            if isinstance(fields, (list, set)):
                fields = " ".join(fields)

            kwargs['fl'] = fields

        if sort_by is not None:
            if sort_by in ['distance asc', 'distance desc'] and distance_point:
                # Do the geo-enabled sort.
                lng, lat = distance_point['point'].get_coords()
                kwargs['sfield'] = distance_point['field']
                kwargs['pt'] = '%s,%s' % (lat, lng)
                geo_sort = True

                if sort_by == 'distance asc':
                    kwargs['sort'] = 'geodist() asc'
                else:
                    kwargs['sort'] = 'geodist() desc'
            else:
                if sort_by.startswith('distance '):
                    warnings.warn(
                        "In order to sort by distance, you must call the '.distance(...)' method."
                    )

                # Regular sorting.
                kwargs['sort'] = sort_by

        if start_offset is not None:
            kwargs['start'] = start_offset

        if end_offset is not None:
            kwargs['rows'] = end_offset - start_offset

        if highlight is True:
            kwargs['hl'] = 'true'
            kwargs['hl.fragsize'] = '200'

        if self.include_spelling is True:
            kwargs['spellcheck'] = 'true'
            kwargs['spellcheck.collate'] = 'true'
            kwargs['spellcheck.count'] = 1

            if spelling_query:
                kwargs['spellcheck.q'] = spelling_query

        if facets is not None:
            kwargs['facet'] = 'on'
            kwargs['facet.field'] = facets

        if date_facets is not None:
            kwargs['facet'] = 'on'
            kwargs['facet.date'] = date_facets.keys()
            kwargs['facet.date.other'] = 'none'

            for key, value in date_facets.items():
                kwargs["f.%s.facet.date.start" % key] = self.conn._from_python(
                    value.get('start_date'))
                kwargs["f.%s.facet.date.end" % key] = self.conn._from_python(
                    value.get('end_date'))
                gap_by_string = value.get('gap_by').upper()
                gap_string = "%d%s" % (value.get('gap_amount'), gap_by_string)

                if value.get('gap_amount') != 1:
                    gap_string += "S"

                kwargs["f.%s.facet.date.gap" %
                       key] = '+%s/%s' % (gap_string, gap_by_string)

        if query_facets is not None:
            kwargs['facet'] = 'on'
            kwargs['facet.query'] = [
                "%s:%s" % (field, value) for field, value in query_facets
            ]

        if limit_to_registered_models is None:
            limit_to_registered_models = getattr(
                settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True)

        if limit_to_registered_models:
            # Using narrow queries, limit the results to only models handled
            # with the current routers.
            if narrow_queries is None:
                narrow_queries = set()

            registered_models = self.build_models_list()

            if len(registered_models) > 0:
                narrow_queries.add('%s:(%s)' %
                                   (DJANGO_CT, ' OR '.join(registered_models)))

        if narrow_queries is not None:
            kwargs['fq'] = list(narrow_queries)

        if within is not None:
            kwargs.setdefault('fq', [])
            ((min_lat, min_lng), (max_lat, max_lng)) = generate_bounding_box(
                within['point_1'], within['point_2'])
            # Bounding boxes are min, min TO max, max. Solr's wiki was *NOT*
            # very clear on this.
            bbox = '%s:[%s,%s TO %s,%s]' % (within['field'], min_lat, min_lng,
                                            max_lat, max_lng)
            kwargs['fq'].append(bbox)

        if dwithin is not None:
            kwargs.setdefault('fq', [])
            lng, lat = dwithin['point'].get_coords()
            geofilt = '{!geofilt pt=%s,%s sfield=%s d=%s}' % (
                lat, lng, dwithin['field'], dwithin['distance'].km)
            kwargs['fq'].append(geofilt)

        # Check to see if the backend should try to include distances
        # (Solr 4.X+) in the results.
        if self.distance_available and distance_point:
            # In early testing, you can't just hand Solr 4.X a proper bounding box
            # & request distances. To enable native distance would take calculating
            # a center point & a radius off the user-provided box, which kinda
            # sucks. We'll avoid it for now, since Solr 4.x's release will be some
            # time yet.
            # kwargs['fl'] += ' _dist_:geodist()'
            pass

        try:
            raw_results = self.conn.search(query_string, **kwargs)
        except (IOError, SolrError), e:
            if not self.silently_fail:
                raise

            self.log.error("Failed to query Solr using '%s': %s", query_string,
                           e)
            raw_results = EmptyResults()
Exemplo n.º 20
0
    def build_search_kwargs(self,
                            query_string,
                            sort_by=None,
                            start_offset=0,
                            end_offset=None,
                            fields='',
                            highlight=False,
                            facets=None,
                            date_facets=None,
                            query_facets=None,
                            narrow_queries=None,
                            spelling_query=None,
                            within=None,
                            dwithin=None,
                            distance_point=None,
                            models=None,
                            limit_to_registered_models=None,
                            result_class=None,
                            stats=None,
                            **extra_kwargs):
        kwargs = {'fl': '* score'}

        if fields:
            if isinstance(fields, (list, set)):
                fields = " ".join(fields)

            kwargs['fl'] = fields

        if sort_by is not None:
            if sort_by in ['distance asc', 'distance desc'] and distance_point:
                # Do the geo-enabled sort.
                lng, lat = distance_point['point'].get_coords()
                kwargs['sfield'] = distance_point['field']
                kwargs['pt'] = '%s,%s' % (lat, lng)

                if sort_by == 'distance asc':
                    kwargs['sort'] = 'geodist() asc'
                else:
                    kwargs['sort'] = 'geodist() desc'
            else:
                if sort_by.startswith('distance '):
                    warnings.warn(
                        "In order to sort by distance, you must call the '.distance(...)' method."
                    )

                # Regular sorting.
                kwargs['sort'] = sort_by

        if start_offset is not None:
            kwargs['start'] = start_offset

        if end_offset is not None:
            kwargs['rows'] = end_offset - start_offset

        if highlight:
            # `highlight` can either be True or a dictionary containing custom parameters
            # which will be passed to the backend and may override our default settings:

            kwargs['hl'] = 'true'
            kwargs['hl.fragsize'] = '200'

            if isinstance(highlight, dict):
                kwargs.update(highlight)

        if self.include_spelling is True:
            kwargs['spellcheck'] = 'true'
            kwargs['spellcheck.collate'] = 'true'
            kwargs['spellcheck.count'] = 1

            if spelling_query:
                kwargs['spellcheck.q'] = spelling_query

        if facets is not None:
            kwargs['facet'] = 'on'
            kwargs['facet.field'] = facets.keys()

            for facet_field, options in facets.items():
                for key, value in options.items():
                    kwargs['f.%s.facet.%s' %
                           (facet_field, key)] = self.conn._from_python(value)

        if date_facets is not None:
            kwargs['facet'] = 'on'
            kwargs['facet.date'] = date_facets.keys()
            kwargs['facet.date.other'] = 'none'

            for key, value in date_facets.items():
                kwargs["f.%s.facet.date.start" % key] = self.conn._from_python(
                    value.get('start_date'))
                kwargs["f.%s.facet.date.end" % key] = self.conn._from_python(
                    value.get('end_date'))
                gap_by_string = value.get('gap_by').upper()
                gap_string = "%d%s" % (value.get('gap_amount'), gap_by_string)

                if value.get('gap_amount') != 1:
                    gap_string += "S"

                kwargs["f.%s.facet.date.gap" %
                       key] = '+%s/%s' % (gap_string, gap_by_string)

        if query_facets is not None:
            kwargs['facet'] = 'on'
            kwargs['facet.query'] = [
                "%s:%s" % (field, value) for field, value in query_facets
            ]

        if limit_to_registered_models is None:
            limit_to_registered_models = getattr(
                settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True)

        if models and len(models):
            model_choices = sorted(get_model_ct(model) for model in models)
        elif limit_to_registered_models:
            # Using narrow queries, limit the results to only models handled
            # with the current routers.
            model_choices = self.build_models_list()
        else:
            model_choices = []

        if len(model_choices) > 0:
            if narrow_queries is None:
                narrow_queries = set()

            narrow_queries.add('%s:(%s)' %
                               (DJANGO_CT, ' OR '.join(model_choices)))

        if narrow_queries is not None:
            kwargs['fq'] = list(narrow_queries)

        if stats:
            kwargs['stats'] = "true"

            for k in stats.keys():
                kwargs['stats.field'] = k

                for facet in stats[k]:
                    kwargs['f.%s.stats.facet' % k] = facet

        if within is not None:
            from haystack.utils.geo import generate_bounding_box

            kwargs.setdefault('fq', [])
            ((min_lat, min_lng), (max_lat, max_lng)) = generate_bounding_box(
                within['point_1'], within['point_2'])
            # Bounding boxes are min, min TO max, max. Solr's wiki was *NOT*
            # very clear on this.
            bbox = '%s:[%s,%s TO %s,%s]' % (within['field'], min_lat, min_lng,
                                            max_lat, max_lng)
            kwargs['fq'].append(bbox)

        if dwithin is not None:
            kwargs.setdefault('fq', [])
            lng, lat = dwithin['point'].get_coords()
            geofilt = '{!geofilt pt=%s,%s sfield=%s d=%s}' % (
                lat, lng, dwithin['field'], dwithin['distance'].km)
            kwargs['fq'].append(geofilt)

        # Check to see if the backend should try to include distances
        # (Solr 4.X+) in the results.
        if self.distance_available and distance_point:
            # In early testing, you can't just hand Solr 4.X a proper bounding box
            # & request distances. To enable native distance would take calculating
            # a center point & a radius off the user-provided box, which kinda
            # sucks. We'll avoid it for now, since Solr 4.x's release will be some
            # time yet.
            # kwargs['fl'] += ' _dist_:geodist()'
            pass

        if extra_kwargs:
            kwargs.update(extra_kwargs)

        return kwargs
    def build_search_kwargs(self,
                            query_string,
                            sort_by=None,
                            start_offset=0,
                            end_offset=None,
                            fields="",
                            highlight=False,
                            facets=None,
                            date_facets=None,
                            query_facets=None,
                            narrow_queries=None,
                            spelling_query=None,
                            within=None,
                            dwithin=None,
                            distance_point=None,
                            models=None,
                            limit_to_registered_models=None,
                            result_class=None,
                            **extra_kwargs):
        index = haystack.connections[self.connection_alias].get_unified_index()
        content_field = index.document_field

        if query_string == "*:*":
            kwargs = {"query": {"match_all": {}}}
        else:
            kwargs = {
                "query": {
                    "query_string": {
                        "default_field": content_field,
                        "default_operator": DEFAULT_OPERATOR,
                        "query": query_string,
                        "analyze_wildcard": True,
                        "auto_generate_phrase_queries": True,
                        "fuzzy_min_sim": FUZZY_MIN_SIM,
                        "fuzzy_max_expansions": FUZZY_MAX_EXPANSIONS,
                    }
                }
            }

        # so far, no filters
        filters = []

        if fields:
            if isinstance(fields, (list, set)):
                fields = " ".join(fields)

            kwargs["fields"] = fields

        if sort_by is not None:
            order_list = []
            for field, direction in sort_by:
                if field == "distance" and distance_point:
                    # Do the geo-enabled sort.
                    lng, lat = distance_point["point"].coords
                    sort_kwargs = {
                        "_geo_distance": {
                            distance_point["field"]: [lng, lat],
                            "order": direction,
                            "unit": "km",
                        }
                    }
                else:
                    if field == "distance":
                        warnings.warn(
                            "In order to sort by distance, you must call the '.distance(...)' method."
                        )

                    # Regular sorting.
                    sort_kwargs = {field: {"order": direction}}

                order_list.append(sort_kwargs)

            kwargs["sort"] = order_list

        # From/size offsets don't seem to work right in Elasticsearch's DSL. :/
        # if start_offset is not None:
        #     kwargs['from'] = start_offset

        # if end_offset is not None:
        #     kwargs['size'] = end_offset - start_offset

        if highlight:
            # `highlight` can either be True or a dictionary containing custom parameters
            # which will be passed to the backend and may override our default settings:

            kwargs["highlight"] = {"fields": {content_field: {"store": "yes"}}}

            if isinstance(highlight, dict):
                kwargs["highlight"].update(highlight)

        if self.include_spelling:
            kwargs["suggest"] = {
                "suggest": {
                    "text": spelling_query or query_string,
                    "term": {
                        # Using content_field here will result in suggestions of stemmed words.
                        "field": "_all"
                    },
                }
            }

        if narrow_queries is None:
            narrow_queries = set()

        if facets is not None:
            kwargs.setdefault("facets", {})

            for facet_fieldname, extra_options in facets.items():
                facet_options = {
                    "terms": {
                        "field": facet_fieldname,
                        "size": 100
                    }
                }
                # Special cases for options applied at the facet level (not the terms level).
                if extra_options.pop("global_scope", False):
                    # Renamed "global_scope" since "global" is a python keyword.
                    facet_options["global"] = True
                if "facet_filter" in extra_options:
                    facet_options["facet_filter"] = extra_options.pop(
                        "facet_filter")
                facet_options["terms"].update(extra_options)
                kwargs["facets"][facet_fieldname] = facet_options

        if date_facets is not None:
            kwargs.setdefault("facets", {})

            for facet_fieldname, value in date_facets.items():
                # Need to detect on gap_by & only add amount if it's more than one.
                interval = value.get("gap_by").lower()

                # Need to detect on amount (can't be applied on months or years).
                if value.get("gap_amount", 1) != 1 and interval not in (
                        "month",
                        "year",
                ):
                    # Just the first character is valid for use.
                    interval = "%s%s" % (value["gap_amount"], interval[:1])

                kwargs["facets"][facet_fieldname] = {
                    "date_histogram": {
                        "field": facet_fieldname,
                        "interval": interval
                    },
                    "facet_filter": {
                        "range": {
                            facet_fieldname: {
                                "from":
                                self._from_python(value.get("start_date")),
                                "to": self._from_python(value.get("end_date")),
                            }
                        }
                    },
                }

        if query_facets is not None:
            kwargs.setdefault("facets", {})

            for facet_fieldname, value in query_facets:
                kwargs["facets"][facet_fieldname] = {
                    "query": {
                        "query_string": {
                            "query": value
                        }
                    }
                }

        if limit_to_registered_models is None:
            limit_to_registered_models = getattr(
                settings, "HAYSTACK_LIMIT_TO_REGISTERED_MODELS", True)

        if models and len(models):
            model_choices = sorted(get_model_ct(model) for model in models)
        elif limit_to_registered_models:
            # Using narrow queries, limit the results to only models handled
            # with the current routers.
            model_choices = self.build_models_list()
        else:
            model_choices = []

        if len(model_choices) > 0:
            filters.append({"terms": {DJANGO_CT: model_choices}})

        for q in narrow_queries:
            filters.append({
                "fquery": {
                    "query": {
                        "query_string": {
                            "query": q
                        }
                    },
                    "_cache": True
                }
            })

        if within is not None:
            from haystack.utils.geo import generate_bounding_box

            ((south, west),
             (north, east)) = generate_bounding_box(within["point_1"],
                                                    within["point_2"])
            within_filter = {
                "geo_bounding_box": {
                    within["field"]: {
                        "top_left": {
                            "lat": north,
                            "lon": west
                        },
                        "bottom_right": {
                            "lat": south,
                            "lon": east
                        },
                    }
                }
            }
            filters.append(within_filter)

        if dwithin is not None:
            lng, lat = dwithin["point"].coords

            # NB: the 1.0.0 release of elasticsearch introduce an
            #     incompatible change on the distance filter formating
            if elasticsearch.VERSION >= (1, 0, 0):
                distance = "%(dist).6f%(unit)s" % {
                    "dist": dwithin["distance"].km,
                    "unit": "km",
                }
            else:
                distance = dwithin["distance"].km

            dwithin_filter = {
                "geo_distance": {
                    "distance": distance,
                    dwithin["field"]: {
                        "lat": lat,
                        "lon": lng
                    },
                }
            }
            filters.append(dwithin_filter)

        # if we want to filter, change the query type to filteres
        if filters:
            kwargs["query"] = {"filtered": {"query": kwargs.pop("query")}}
            if len(filters) == 1:
                kwargs["query"]["filtered"]["filter"] = filters[0]
            else:
                kwargs["query"]["filtered"]["filter"] = {
                    "bool": {
                        "must": filters
                    }
                }

        if extra_kwargs:
            kwargs.update(extra_kwargs)

        return kwargs
Exemplo n.º 22
0
    def build_search_kwargs(self, query_string, sort_by=None, start_offset=0, end_offset=None,
                            fields='', highlight=False, facets=None,
                            date_facets=None, query_facets=None,
                            narrow_queries=None, spelling_query=None,
                            within=None, dwithin=None, distance_point=None, polygon=None,
                            models=None, limit_to_registered_models=None,
                            result_class=None):
        index = haystack.connections[self.connection_alias].get_unified_index()
        content_field = index.document_field

        if query_string == '*:*':
            kwargs = {
                'query': {
                    'filtered': {
                        'query': {
                            "match_all": {}
                        },
                    },
                },
            }
        else:
            kwargs = {
                'query': {
                    'filtered': {
                        'query': {
                            'query_string': {
                                'default_field': content_field,
                                'default_operator': DEFAULT_OPERATOR,
                                'query': query_string,
                                'analyze_wildcard': True,
                                'auto_generate_phrase_queries': True,
                            },
                        },
                    },
                },
            }

        if fields:
            if isinstance(fields, (list, set)):
                fields = " ".join(fields)

            kwargs['fields'] = fields

        if sort_by is not None:
            order_list = []
            for field, direction in sort_by:
                if field == 'distance' and distance_point:
                    # Do the geo-enabled sort.
                    lng, lat = distance_point['point'].get_coords()
                    sort_kwargs = {
                        "_geo_distance": {
                            distance_point['field']: [lng, lat],
                            "order": direction,
                            "unit": "km"
                        }
                    }
                else:
                    if field == 'distance':
                        warnings.warn("In order to sort by distance, you must call the '.distance(...)' method.")

                    # Regular sorting.
                    sort_kwargs = {field: {'order': direction}}

                order_list.append(sort_kwargs)

            kwargs['sort'] = order_list

        # From/size offsets don't seem to work right in Elasticsearch's DSL. :/
        # if start_offset is not None:
        #     kwargs['from'] = start_offset

        # if end_offset is not None:
        #     kwargs['size'] = end_offset - start_offset

        if highlight is True:
            kwargs['highlight'] = {
                'fields': {
                    content_field: {'store': 'yes'},
                }
            }

        if self.include_spelling:
            kwargs['suggest'] = {
                'suggest': {
                    'text': spelling_query or query_string,
                    'term': {
                        # Using content_field here will result in suggestions of stemmed words.
                        'field': '_all',
                    },
                },
            }

        if narrow_queries is None:
            narrow_queries = set()

        if facets is not None:
            kwargs.setdefault('facets', {})

            for facet_fieldname, extra_options in facets.items():
                facet_options = {
                    'terms': {
                        'field': facet_fieldname,
                        'size': 100,
                    },
                }
                # Special cases for options applied at the facet level (not the terms level).
                if extra_options.pop('global_scope', False):
                    # Renamed "global_scope" since "global" is a python keyword.
                    facet_options['global'] = True
                if 'facet_filter' in extra_options:
                    facet_options['facet_filter'] = extra_options.pop('facet_filter')
                facet_options['terms'].update(extra_options)
                kwargs['facets'][facet_fieldname] = facet_options

        if date_facets is not None:
            kwargs.setdefault('facets', {})

            for facet_fieldname, value in date_facets.items():
                # Need to detect on gap_by & only add amount if it's more than one.
                interval = value.get('gap_by').lower()

                # Need to detect on amount (can't be applied on months or years).
                if value.get('gap_amount', 1) != 1 and not interval in ('month', 'year'):
                    # Just the first character is valid for use.
                    interval = "%s%s" % (value['gap_amount'], interval[:1])

                kwargs['facets'][facet_fieldname] = {
                    'date_histogram': {
                        'field': facet_fieldname,
                        'interval': interval,
                    },
                    'facet_filter': {
                        "range": {
                            facet_fieldname: {
                                'from': self._from_python(value.get('start_date')),
                                'to': self._from_python(value.get('end_date')),
                            }
                        }
                    }
                }

        if query_facets is not None:
            kwargs.setdefault('facets', {})

            for facet_fieldname, value in query_facets:
                kwargs['facets'][facet_fieldname] = {
                    'query': {
                        'query_string': {
                            'query': value,
                        }
                    },
                }

        if limit_to_registered_models is None:
            limit_to_registered_models = getattr(settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True)

        if models and len(models):
            model_choices = sorted(['%s.%s' % (model._meta.app_label, model._meta.module_name) for model in models])
        elif limit_to_registered_models:
            # Using narrow queries, limit the results to only models handled
            # with the current routers.
            model_choices = self.build_models_list()
        else:
            model_choices = []

        if len(model_choices) > 0:
            if narrow_queries is None:
                narrow_queries = set()

            narrow_queries.add('%s:(%s)' % (DJANGO_CT, ' OR '.join(model_choices)))

        if narrow_queries:
            kwargs['query'].setdefault('filtered', {})
            kwargs['query']['filtered'].setdefault('filter', {})
            kwargs['query']['filtered']['filter'] = {
                'fquery': {
                    'query': {
                        'query_string': {
                            'query': u' AND '.join(list(narrow_queries)),
                        },
                    },
                    '_cache': True,
                }
            }

        if within is not None:
            from haystack.utils.geo import generate_bounding_box

            ((min_lat, min_lng), (max_lat, max_lng)) = generate_bounding_box(within['point_1'], within['point_2'])
            within_filter = {
                "geo_bounding_box": {
                    within['field']: {
                        "top_left": {
                            "lat": max_lat,
                            "lon": min_lng
                        },
                        "bottom_right": {
                            "lat": min_lat,
                            "lon": max_lng
                        }
                    }
                },
            }
            kwargs['query'].setdefault('filtered', {})
            kwargs['query']['filtered'].setdefault('filter', {})
            if kwargs['query']['filtered']['filter']:
                compound_filter = {
                    "and": [
                        kwargs['query']['filtered']['filter'],
                        within_filter,
                    ]
                }
                kwargs['query']['filtered']['filter'] = compound_filter
            else:
                kwargs['query']['filtered']['filter'] = within_filter

        if dwithin is not None:
            lng, lat = dwithin['point'].get_coords()
            dwithin_filter = {
                "geo_distance": {
                    "distance": dwithin['distance'].km,
                    dwithin['field']: {
                        "lat": lat,
                        "lon": lng
                    }
                }
            }
            kwargs['query'].setdefault('filtered', {})
            kwargs['query']['filtered'].setdefault('filter', {})
            if kwargs['query']['filtered']['filter']:
                compound_filter = {
                    "and": [
                        kwargs['query']['filtered']['filter'],
                        dwithin_filter
                    ]
                }
                kwargs['query']['filtered']['filter'] = compound_filter
            else:
                kwargs['query']['filtered']['filter'] = dwithin_filter

        if polygon is not None:
            points = map(lambda x: {"lat": x[1], "lon": x[0]},
                         polygon['polygon'].coords[0])
            polygon_filter = {
                "geo_polygon": {
                    polygon['field']: {
                        "points": points
                    }
                },
            }
            kwargs['query'].setdefault('filtered', {})
            kwargs['query']['filtered'].setdefault('filter', {})
            if kwargs['query']['filtered']['filter']:
                compound_filter = {
                    "and": [
                        kwargs['query']['filtered']['filter'],
                        polygon_filter,
                    ]
                }
                kwargs['query']['filtered']['filter'] = compound_filter
            else:
                kwargs['query']['filtered']['filter'] = polygon_filter

        # Remove the "filtered" key if we're not filtering. Otherwise,
        # Elasticsearch will blow up.
        if not kwargs['query']['filtered'].get('filter'):
            kwargs['query'] = kwargs['query']['filtered']['query']

        return kwargs
    def search(self, query_string, sort_by=None, start_offset=0, end_offset=None,
               fields='', highlight=False, facets=None, date_facets=None, query_facets=None,
               narrow_queries=None, spelling_query=None, within=None,
               dwithin=None, distance_point=None, models=None,
               limit_to_registered_models=None, result_class=None, **kwargs):
        if len(query_string) == 0:
            return {
                'results': [],
                'hits': 0,
            }

        if not self.setup_complete:
            self.setup()

        index = haystack.connections[self.connection_alias].get_unified_index()
        content_field = index.document_field

        if query_string == '*:*':
            kwargs = {
                'query': {
                    'filtered': {
                        'query': {
                            'query_string': {
                                'query': '*:*',
                            },
                        },
                    },
                },
            }
        else:
            kwargs = {
                'query': {
                    'filtered': {
                        'query': {
                            'query_string': {
                                'default_field': content_field,
                                'default_operator': DEFAULT_OPERATOR,
                                'query': query_string,
                                'analyze_wildcard': True,
                                'auto_generate_phrase_queries': True,
                            },
                        },
                    },
                },
            }

        geo_sort = False

        if fields:
            if isinstance(fields, (list, set)):
                fields = " ".join(fields)

            kwargs['fields'] = fields

        if sort_by is not None:
            order_list = []
            for field, direction in sort_by:
                if field == 'distance' and distance_point:
                    # Do the geo-enabled sort.
                    lng, lat = distance_point['point'].get_coords()
                    sort_kwargs = {
                        "_geo_distance": {
                            distance_point['field']: [lng, lat],
                            "order" : direction,
                            "unit" : "km"
                        }
                    }
                else:
                    if field == 'distance':
                        warnings.warn("In order to sort by distance, you must call the '.distance(...)' method.")

                    # Regular sorting.
                    sort_kwargs = {field: {'order': direction}}

                order_list.append(sort_kwargs)

            kwargs['sort'] = order_list

        # From/size offsets don't seem to work right in Elasticsearch's DSL. :/
        # if start_offset is not None:
        #     kwargs['from'] = start_offset

        # if end_offset is not None:
        #     kwargs['size'] = end_offset - start_offset

        if highlight is True:
            kwargs['highlight'] = {
                'fields': {
                    content_field: {'store': 'yes'},
                }
            }

        if self.include_spelling is True:
            warnings.warn("Elasticsearch does not handle spelling suggestions.", Warning, stacklevel=2)

        if narrow_queries is None:
            narrow_queries = set()

        if facets is not None:
            kwargs.setdefault('facets', {})

            for facet_fieldname in facets:
                kwargs['facets'][facet_fieldname] = {
                    'terms': {
                        'field': facet_fieldname,
                    },
                }

        if date_facets is not None:
            kwargs.setdefault('facets', {})

            for facet_fieldname, value in date_facets.items():
                # Need to detect on gap_by & only add amount if it's more than one.
                interval = value.get('gap_by').lower()

                # Need to detect on amount (can't be applied on months or years).
                if value.get('gap_amount', 1) != 1 and not interval in ('month', 'year'):
                    # Just the first character is valid for use.
                    interval = "%s%s" % (value['gap_amount'], interval[:1])

                kwargs['facets'][facet_fieldname] = {
                    'date_histogram': {
                        'field': facet_fieldname,
                        'interval': interval,
                    },
                    'facet_filter': {
                        "range": {
                            facet_fieldname: {
                                'from': self.conn.from_python(value.get('start_date')),
                                'to': self.conn.from_python(value.get('end_date')),
                            }
                        }
                    }
                }

        if query_facets is not None:
            kwargs.setdefault('facets', {})

            for facet_fieldname, value in query_facets:
                kwargs['facets'][facet_fieldname] = {
                    'query': {
                        'query_string': {
                            'query': value,
                        }
                    },
                }

        if limit_to_registered_models is None:
            limit_to_registered_models = getattr(settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True)

        if models and len(models):
            model_choices = sorted(['%s.%s' % (model._meta.app_label, model._meta.module_name) for model in models])
        elif limit_to_registered_models:
            # Using narrow queries, limit the results to only models handled
            # with the current routers.
            model_choices = self.build_models_list()
        else:
            model_choices = []

        if len(model_choices) > 0:
            if narrow_queries is None:
                narrow_queries = set()

            narrow_queries.add('%s:(%s)' % (DJANGO_CT, ' OR '.join(model_choices)))

        if narrow_queries:
            kwargs['query'].setdefault('filtered', {})
            kwargs['query']['filtered'].setdefault('filter', {})
            kwargs['query']['filtered']['filter'] = {
                'fquery': {
                    'query': {
                        'query_string': {
                            'query': u' AND '.join(list(narrow_queries)),
                        },
                    },
                    '_cache': True,
                }
            }

        if within is not None:
            from haystack.utils.geo import generate_bounding_box

            ((min_lat, min_lng), (max_lat, max_lng)) = generate_bounding_box(within['point_1'], within['point_2'])
            kwargs['query'].setdefault('filtered', {})
            kwargs['query']['filtered'].setdefault('filter', {})
            kwargs['query']['filtered']['filter'] = {
                "geo_bounding_box": {
                    within['field']: {
                        "top_left": {
                            "lat": max_lat,
                            "lon": max_lng
                        },
                        "bottom_right": {
                            "lat": min_lat,
                            "lon": min_lng
                        }
                    }
                },
            }

        if dwithin is not None:
            lng, lat = dwithin['point'].get_coords()
            kwargs['query'].setdefault('filtered', {})
            kwargs['query']['filtered'].setdefault('filter', {})
            kwargs['query']['filtered']['filter'] = {
                "geo_distance": {
                    "distance": dwithin['distance'].km,
                    dwithin['field']: {
                        "lat": lat,
                        "lon": lng
                    }
                }
            }

        # Remove the "filtered" key if we're not filtering. Otherwise,
        # Elasticsearch will blow up.
        if not kwargs['query']['filtered'].get('filter'):
            kwargs['query'] = kwargs['query']['filtered']['query']

        # Because Elasticsearch.
        query_params = {
            'from': start_offset,
        }

        if end_offset is not None and end_offset > start_offset:
            query_params['size'] = end_offset - start_offset

        try:
            raw_results = self.conn.search(None, kwargs, indexes=[self.index_name], doc_types=['modelresult'], **query_params)
        except (requests.RequestException, pyelasticsearch.ElasticSearchError), e:
            if not self.silently_fail:
                raise

            self.log.error("Failed to query Elasticsearch using '%s': %s", query_string, e)
            raw_results = {}