def test_generate_bounding_box(self): downtown_bottom_left = Point(-95.23947, 38.9637903) downtown_top_right = Point(-95.23362278938293, 38.973081081164715) ((min_lat, min_lng), (max_lat, max_lng)) = generate_bounding_box(downtown_bottom_left, downtown_top_right) self.assertEqual(min_lat, 38.9637903) self.assertEqual(min_lng, -95.23947) self.assertEqual(max_lat, 38.973081081164715) self.assertEqual(max_lng, -95.23362278938293)
def test_generate_bounding_box_crossing_line_date(self): downtown_bottom_left = Point(95.23947, 38.9637903) downtown_top_right = Point(-95.23362278938293, 38.973081081164715) ((south, west), (north, east)) = generate_bounding_box(downtown_bottom_left, downtown_top_right) self.assertEqual(south, 38.9637903) self.assertEqual(west, 95.23947) self.assertEqual(north, 38.973081081164715) self.assertEqual(east, -95.23362278938293)
def _build_search_query_within(self, within): from haystack.utils.geo import generate_bounding_box ((south, west), (north, east)) = generate_bounding_box( within["point_1"], within["point_2"] ) return { "geo_bounding_box": { within["field"]: { "top_left": {"lat": north, "lon": west}, "bottom_right": {"lat": south, "lon": east}, } } }
def _build_search_filters_within(self, within): from haystack.utils.geo import generate_bounding_box ((south, west), (north, east)) = generate_bounding_box( within['point_1'], within['point_2']) within_filter = { "geo_bounding_box": { within['field']: { "top_left": { "lat": north, "lon": west }, "bottom_right": { "lat": south, "lon": east } } }, } return within_filter
def build_search_kwargs(self, query_string, sort_by=None, start_offset=0, end_offset=None, fields='', highlight=False, facets=None, date_facets=None, query_facets=None, narrow_queries=None, spelling_query=None, within=None, dwithin=None, distance_point=None, models=None, limit_to_registered_models=None, result_class=None): index = haystack.connections[self.connection_alias].get_unified_index() content_field = index.document_field if query_string == '*:*': kwargs = { 'query': { "match_all": {} }, } else: kwargs = { 'query': { 'query_string': { 'default_field': content_field, 'default_operator': DEFAULT_OPERATOR, 'query': query_string, 'analyze_wildcard': True, 'auto_generate_phrase_queries': True, }, }, } # so far, no filters filters = [] if fields: if isinstance(fields, (list, set)): fields = " ".join(fields) kwargs['fields'] = fields if sort_by is not None: order_list = [] for field, direction in sort_by: if field == 'distance' and distance_point: # Do the geo-enabled sort. lng, lat = distance_point['point'].get_coords() sort_kwargs = { "_geo_distance": { distance_point['field']: [lng, lat], "order": direction, "unit": "km" } } else: if field == 'distance': warnings.warn("In order to sort by distance, you must call the '.distance(...)' method.") # Regular sorting. sort_kwargs = {field: {'order': direction}} order_list.append(sort_kwargs) kwargs['sort'] = order_list # From/size offsets don't seem to work right in Elasticsearch's DSL. :/ # if start_offset is not None: # kwargs['from'] = start_offset # if end_offset is not None: # kwargs['size'] = end_offset - start_offset if highlight is True: kwargs['highlight'] = { 'fields': { content_field: {'store': 'yes'}, } } if self.include_spelling: kwargs['suggest'] = { 'suggest': { 'text': spelling_query or query_string, 'term': { # Using content_field here will result in suggestions of stemmed words. 'field': '_all', }, }, } if narrow_queries is None: narrow_queries = set() if facets is not None: kwargs.setdefault('facets', {}) for facet_fieldname, extra_options in facets.items(): facet_options = { 'terms': { 'field': facet_fieldname, 'size': 100, }, } # Special cases for options applied at the facet level (not the terms level). if extra_options.pop('global_scope', False): # Renamed "global_scope" since "global" is a python keyword. facet_options['global'] = True if 'facet_filter' in extra_options: facet_options['facet_filter'] = extra_options.pop('facet_filter') facet_options['terms'].update(extra_options) kwargs['facets'][facet_fieldname] = facet_options if date_facets is not None: kwargs.setdefault('facets', {}) for facet_fieldname, value in date_facets.items(): # Need to detect on gap_by & only add amount if it's more than one. interval = value.get('gap_by').lower() # Need to detect on amount (can't be applied on months or years). if value.get('gap_amount', 1) != 1 and interval not in ('month', 'year'): # Just the first character is valid for use. interval = "%s%s" % (value['gap_amount'], interval[:1]) kwargs['facets'][facet_fieldname] = { 'date_histogram': { 'field': facet_fieldname, 'interval': interval, }, 'facet_filter': { "range": { facet_fieldname: { 'from': self._from_python(value.get('start_date')), 'to': self._from_python(value.get('end_date')), } } } } if query_facets is not None: kwargs.setdefault('facets', {}) for facet_fieldname, value in query_facets: kwargs['facets'][facet_fieldname] = { 'query': { 'query_string': { 'query': value, } }, } if limit_to_registered_models is None: limit_to_registered_models = getattr(settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True) if models and len(models): model_choices = sorted(get_model_ct(model) for model in models) elif limit_to_registered_models: # Using narrow queries, limit the results to only models handled # with the current routers. model_choices = self.build_models_list() else: model_choices = [] if len(model_choices) > 0: filters.append({"terms": {DJANGO_CT: model_choices}}) for q in narrow_queries: filters.append({ 'fquery': { 'query': { 'query_string': { 'query': q }, }, '_cache': True, } }) if within is not None: from haystack.utils.geo import generate_bounding_box ((south, west), (north, east)) = generate_bounding_box(within['point_1'], within['point_2']) within_filter = { "geo_bounding_box": { within['field']: { "top_left": { "lat": north, "lon": west }, "bottom_right": { "lat": south, "lon": east } } }, } filters.append(within_filter) if dwithin is not None: lng, lat = dwithin['point'].get_coords() # NB: the 1.0.0 release of elasticsearch introduce an # incompatible change on the distance filter formating if elasticsearch.VERSION >= (1, 0, 0): distance = "%(dist).6f%(unit)s" % { 'dist': dwithin['distance'].km, 'unit': "km" } else: distance = dwithin['distance'].km dwithin_filter = { "geo_distance": { "distance": distance, dwithin['field']: { "lat": lat, "lon": lng } } } filters.append(dwithin_filter) # if we want to filter, change the query type to filteres if filters: kwargs["query"] = {"filtered": {"query": kwargs.pop("query")}} if len(filters) == 1: kwargs['query']['filtered']["filter"] = filters[0] else: kwargs['query']['filtered']["filter"] = {"bool": {"must": filters}} return kwargs
def build_search_kwargs(self, query_string, sort_by=None, start_offset=0, end_offset=None, fields='', highlight=False, facets=None, date_facets=None, query_facets=None, narrow_queries=None, spelling_query=None, within=None, dwithin=None, distance_point=None, models=None, limit_to_registered_models=None, result_class=None): index = haystack.connections[self.connection_alias].get_unified_index() content_field = index.document_field if query_string == '*:*': kwargs = { 'query': { "match_all": {} }, } else: kwargs = { 'query': { 'query_string': { 'default_field': content_field, 'default_operator': DEFAULT_OPERATOR, 'query': query_string, 'analyze_wildcard': True, 'auto_generate_phrase_queries': True, }, }, } # so far, no filters filters = [] if fields: if isinstance(fields, (list, set)): fields = " ".join(fields) kwargs['fields'] = fields if sort_by is not None: order_list = [] for field, direction in sort_by: if field == 'distance' and distance_point: # Do the geo-enabled sort. lng, lat = distance_point['point'].get_coords() sort_kwargs = { "_geo_distance": { distance_point['field']: [lng, lat], "order": direction, "unit": "km" } } else: if field == 'distance': warnings.warn("In order to sort by distance, you must call the '.distance(...)' method.") # Regular sorting. sort_kwargs = {field: {'order': direction}} order_list.append(sort_kwargs) kwargs['sort'] = order_list # From/size offsets don't seem to work right in Elasticsearch's DSL. :/ # if start_offset is not None: # kwargs['from'] = start_offset # if end_offset is not None: # kwargs['size'] = end_offset - start_offset if highlight is True: kwargs['highlight'] = { 'fields': { content_field: {'store': 'yes'}, } } if self.include_spelling: kwargs['suggest'] = { 'suggest': { 'text': spelling_query or query_string, 'term': { # Using content_field here will result in suggestions of stemmed words. 'field': '_all', }, }, } if narrow_queries is None: narrow_queries = set() if facets is not None: kwargs.setdefault('facets', {}) for facet_fieldname, extra_options in facets.items(): facet_options = { 'terms': { 'field': facet_fieldname, 'size': 100, }, } # Special cases for options applied at the facet level (not the terms level). if extra_options.pop('global_scope', False): # Renamed "global_scope" since "global" is a python keyword. facet_options['global'] = True if 'facet_filter' in extra_options: facet_options['facet_filter'] = extra_options.pop('facet_filter') facet_options['terms'].update(extra_options) kwargs['facets'][facet_fieldname] = facet_options if date_facets is not None: kwargs.setdefault('facets', {}) for facet_fieldname, value in date_facets.items(): # Need to detect on gap_by & only add amount if it's more than one. interval = value.get('gap_by').lower() # Need to detect on amount (can't be applied on months or years). if value.get('gap_amount', 1) != 1 and not interval in ('month', 'year'): # Just the first character is valid for use. interval = "%s%s" % (value['gap_amount'], interval[:1]) kwargs['facets'][facet_fieldname] = { 'date_histogram': { 'field': facet_fieldname, 'interval': interval, }, 'facet_filter': { "range": { facet_fieldname: { 'from': self._from_python(value.get('start_date')), 'to': self._from_python(value.get('end_date')), } } } } if query_facets is not None: kwargs.setdefault('facets', {}) for facet_fieldname, value in query_facets: kwargs['facets'][facet_fieldname] = { 'query': { 'query_string': { 'query': value, } }, } if limit_to_registered_models is None: limit_to_registered_models = getattr(settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True) if models and len(models): model_choices = sorted(['%s.%s' % (model._meta.app_label, model._meta.module_name) for model in models]) elif limit_to_registered_models: # Using narrow queries, limit the results to only models handled # with the current routers. model_choices = self.build_models_list() else: model_choices = [] if len(model_choices) > 0: if narrow_queries is None: narrow_queries = set() filters.append({"terms": {DJANGO_CT: model_choices}}) if narrow_queries: filters.append({ 'fquery': { 'query': { 'query_string': { 'query': u' AND '.join(list(narrow_queries)), }, }, '_cache': True, } }) if within is not None: from haystack.utils.geo import generate_bounding_box ((min_lat, min_lng), (max_lat, max_lng)) = generate_bounding_box(within['point_1'], within['point_2']) within_filter = { "geo_bounding_box": { within['field']: { "top_left": { "lat": max_lat, "lon": min_lng }, "bottom_right": { "lat": min_lat, "lon": max_lng } } }, } filters.append(within_filter) if dwithin is not None: lng, lat = dwithin['point'].get_coords() dwithin_filter = { "geo_distance": { "distance": dwithin['distance'].km, dwithin['field']: { "lat": lat, "lon": lng } } } filters.append(dwithin_filter) # if we want to filter, change the query type to filteres if filters: kwargs["query"] = {"filtered": {"query": kwargs.pop("query")}} if len(filters) == 1: kwargs['query']['filtered']["filter"] = filters[0] else: kwargs['query']['filtered']["filter"] = {"bool": {"must": filters}} return kwargs
def build_search_kwargs(self, query_string, sort_by=None, start_offset=0, end_offset=None, fields="", highlight=False, facets=None, date_facets=None, query_facets=None, narrow_queries=None, spelling_query=None, within=None, dwithin=None, distance_point=None, models=None, limit_to_registered_models=None, result_class=None, stats=None, collate=None, **extra_kwargs): index = haystack.connections[self.connection_alias].get_unified_index() kwargs = {"fl": "* score", "df": index.document_field} if fields: if isinstance(fields, (list, set)): fields = " ".join(fields) kwargs["fl"] = fields if sort_by is not None: if sort_by in ["distance asc", "distance desc"] and distance_point: # Do the geo-enabled sort. lng, lat = distance_point["point"].coords kwargs["sfield"] = distance_point["field"] kwargs["pt"] = "%s,%s" % (lat, lng) if sort_by == "distance asc": kwargs["sort"] = "geodist() asc" else: kwargs["sort"] = "geodist() desc" else: if sort_by.startswith("distance "): warnings.warn( "In order to sort by distance, you must call the '.distance(...)' method." ) # Regular sorting. kwargs["sort"] = sort_by if start_offset is not None: kwargs["start"] = start_offset if end_offset is not None: kwargs["rows"] = end_offset - start_offset if highlight: # `highlight` can either be True or a dictionary containing custom parameters # which will be passed to the backend and may override our default settings: kwargs["hl"] = "true" kwargs["hl.fragsize"] = "200" if isinstance(highlight, dict): # autoprefix highlighter options with 'hl.', all of them start with it anyway # this makes option dicts shorter: {'maxAnalyzedChars': 42} # and lets some of options be used as keyword arguments: `.highlight(preserveMulti=False)` kwargs.update({ key if key.startswith("hl.") else ("hl." + key): highlight[key] for key in highlight.keys() }) if collate is None: collate = self.collate if self.include_spelling is True: kwargs["spellcheck"] = "true" kwargs["spellcheck.collate"] = str(collate).lower() kwargs["spellcheck.count"] = 1 if spelling_query: kwargs["spellcheck.q"] = spelling_query if facets is not None: kwargs["facet"] = "on" kwargs["facet.field"] = facets.keys() for facet_field, options in facets.items(): for key, value in options.items(): kwargs["f.%s.facet.%s" % (facet_field, key)] = self.conn._from_python(value) if date_facets is not None: kwargs["facet"] = "on" kwargs["facet.date"] = date_facets.keys() kwargs["facet.date.other"] = "none" for key, value in date_facets.items(): kwargs["f.%s.facet.date.start" % key] = self.conn._from_python( value.get("start_date")) kwargs["f.%s.facet.date.end" % key] = self.conn._from_python( value.get("end_date")) gap_by_string = value.get("gap_by").upper() gap_string = "%d%s" % (value.get("gap_amount"), gap_by_string) if value.get("gap_amount") != 1: gap_string += "S" kwargs["f.%s.facet.date.gap" % key] = "+%s/%s" % ( gap_string, gap_by_string, ) if query_facets is not None: kwargs["facet"] = "on" kwargs["facet.query"] = [ "%s:%s" % (field, value) for field, value in query_facets ] if limit_to_registered_models is None: limit_to_registered_models = getattr( settings, "HAYSTACK_LIMIT_TO_REGISTERED_MODELS", True) if models and len(models): model_choices = sorted(get_model_ct(model) for model in models) elif limit_to_registered_models: # Using narrow queries, limit the results to only models handled # with the current routers. model_choices = self.build_models_list() else: model_choices = [] if len(model_choices) > 0: if narrow_queries is None: narrow_queries = set() narrow_queries.add("%s:(%s)" % (DJANGO_CT, " OR ".join(model_choices))) if narrow_queries is not None: kwargs["fq"] = list(narrow_queries) if stats: kwargs["stats"] = "true" for k in stats.keys(): kwargs["stats.field"] = k for facet in stats[k]: kwargs["f.%s.stats.facet" % k] = facet if within is not None: from haystack.utils.geo import generate_bounding_box kwargs.setdefault("fq", []) ((min_lat, min_lng), (max_lat, max_lng)) = generate_bounding_box( within["point_1"], within["point_2"]) # Bounding boxes are min, min TO max, max. Solr's wiki was *NOT* # very clear on this. bbox = "%s:[%s,%s TO %s,%s]" % ( within["field"], min_lat, min_lng, max_lat, max_lng, ) kwargs["fq"].append(bbox) if dwithin is not None: kwargs.setdefault("fq", []) lng, lat = dwithin["point"].coords geofilt = "{!geofilt pt=%s,%s sfield=%s d=%s}" % ( lat, lng, dwithin["field"], dwithin["distance"].km, ) kwargs["fq"].append(geofilt) # Check to see if the backend should try to include distances # (Solr 4.X+) in the results. if self.distance_available and distance_point: # In early testing, you can't just hand Solr 4.X a proper bounding box # & request distances. To enable native distance would take calculating # a center point & a radius off the user-provided box, which kinda # sucks. We'll avoid it for now, since Solr 4.x's release will be some # time yet. # kwargs['fl'] += ' _dist_:geodist()' pass if extra_kwargs: kwargs.update(extra_kwargs) return kwargs
def build_search_kwargs( self, query_string, sort_by=None, start_offset=0, end_offset=None, fields="", highlight=False, facets=None, date_facets=None, query_facets=None, narrow_queries=None, spelling_query=None, within=None, dwithin=None, distance_point=None, models=None, limit_to_registered_models=None, result_class=None, ): index = haystack.connections[self.connection_alias].get_unified_index() content_field = index.document_field if query_string == "*:*": kwargs = {"query": {"match_all": {}}} elif query_string.startswith("(") and query_string.endswith(")"): kwargs = { "query": { "query_string": { "default_field": content_field, "default_operator": DEFAULT_OPERATOR, "query": query_string, "analyze_wildcard": True, "auto_generate_phrase_queries": True, } } } else: kwargs = { "query": { "match": { str(content_field): { "query": query_string, "analyzer": self.DEFAULT_ANALYZER, # setting courtesy of ConfigurableElasticBackend "minimum_should_match": self.DEFAULT_MINIMUM_MATCH, } } } } # so far, no filters filters = [] if fields: if isinstance(fields, (list, set)): fields = " ".join(fields) kwargs["fields"] = fields if sort_by is not None: order_list = [] for field, direction in sort_by: if field == "distance" and distance_point: # Do the geo-enabled sort. lng, lat = distance_point["point"].get_coords() sort_kwargs = { "_geo_distance": {distance_point["field"]: [lng, lat], "order": direction, "unit": "km"} } else: if field == "distance": warnings.warn("In order to sort by distance, you must call the '.distance(...)' method.") # Regular sorting. sort_kwargs = {field: {"order": direction}} order_list.append(sort_kwargs) kwargs["sort"] = order_list # From/size offsets don't seem to work right in Elasticsearch's DSL. :/ # if start_offset is not None: # kwargs['from'] = start_offset # if end_offset is not None: # kwargs['size'] = end_offset - start_offset if highlight is True: kwargs["highlight"] = {"fields": {content_field: {"store": "yes"}}} if self.include_spelling: kwargs["suggest"] = { "suggest": { "text": spelling_query or query_string, "term": { # Using content_field here will result in suggestions of stemmed words. "field": "_all" }, } } if narrow_queries is None: narrow_queries = set() if facets is not None: kwargs.setdefault("facets", {}) for facet_fieldname, extra_options in facets.items(): facet_options = {"terms": {"field": facet_fieldname, "size": 100}} # Special cases for options applied at the facet level (not the terms level). if extra_options.pop("global_scope", False): # Renamed "global_scope" since "global" is a python keyword. facet_options["global"] = True if "facet_filter" in extra_options: facet_options["facet_filter"] = extra_options.pop("facet_filter") facet_options["terms"].update(extra_options) kwargs["facets"][facet_fieldname] = facet_options if date_facets is not None: kwargs.setdefault("facets", {}) for facet_fieldname, value in date_facets.items(): # Need to detect on gap_by & only add amount if it's more than one. interval = value.get("gap_by").lower() # Need to detect on amount (can't be applied on months or years). if value.get("gap_amount", 1) != 1 and interval not in ("month", "year"): # Just the first character is valid for use. interval = "%s%s" % (value["gap_amount"], interval[:1]) kwargs["facets"][facet_fieldname] = { "date_histogram": {"field": facet_fieldname, "interval": interval}, "facet_filter": { "range": { facet_fieldname: { "from": self._from_python(value.get("start_date")), "to": self._from_python(value.get("end_date")), } } }, } if query_facets is not None: kwargs.setdefault("facets", {}) for facet_fieldname, value in query_facets: kwargs["facets"][facet_fieldname] = {"query": {"match": {"query": value}}} if limit_to_registered_models is None: limit_to_registered_models = getattr(settings, "HAYSTACK_LIMIT_TO_REGISTERED_MODELS", True) if models and len(models): model_choices = sorted(get_model_ct(model) for model in models) elif limit_to_registered_models: # Using narrow queries, limit the results to only models handled # with the current routers. model_choices = self.build_models_list() else: model_choices = [] if len(model_choices) > 0: filters.append({"terms": {DJANGO_CT: model_choices}}) for q in narrow_queries: filters.append({"fquery": {"query": {"query_string": {"query": q}}, "_cache": True}}) if within is not None: from haystack.utils.geo import generate_bounding_box ((south, west), (north, east)) = generate_bounding_box(within["point_1"], within["point_2"]) within_filter = { "geo_bounding_box": { within["field"]: { "top_left": {"lat": north, "lon": west}, "bottom_right": {"lat": south, "lon": east}, } } } filters.append(within_filter) if dwithin is not None: lng, lat = dwithin["point"].get_coords() # NB: the 1.0.0 release of elasticsearch introduce an # incompatible change on the distance filter formating if elasticsearch.VERSION >= (1, 0, 0): distance = "%(dist).6f%(unit)s" % {"dist": dwithin["distance"].km, "unit": "km"} else: distance = dwithin["distance"].km dwithin_filter = {"geo_distance": {"distance": distance, dwithin["field"]: {"lat": lat, "lon": lng}}} filters.append(dwithin_filter) # if we want to filter, change the query type to filteres if filters: kwargs["query"] = {"filtered": {"query": kwargs.pop("query")}} if len(filters) == 1: kwargs["query"]["filtered"]["filter"] = filters[0] else: kwargs["query"]["filtered"]["filter"] = {"bool": {"must": filters}} return kwargs
def build_search_kwargs(self, query_string, sort_by=None, start_offset=0, end_offset=None, fields='', highlight=False, boost_fields=None, boost_negative=None, filter_context=None, narrow_queries=None, spelling_query=None, facets=None, date_facets=None, query_facets=None, within=None, dwithin=None, distance_point=None, models=None, limit_to_registered_models=None, result_class=None, **extra_kwargs): index = haystack.connections[self.connection_alias].get_unified_index() content_field = index.document_field filters = [] filters_with_score = [] filter_query_strings = { 'content': u'%s', 'contains': u'*%s*', 'endswith': u'*%s', 'startswith': u'%s*', 'exact': u'%s', 'gt': u'{%s TO *}', 'gte': u'[%s TO *]', 'lt': u'{* TO %s}', 'lte': u'[* TO %s]', 'fuzzy': u'%s~', } # if filter_context: # for f in filter_context: # if f.get('content'): # content = str(f.pop('content')) # if query_string == '*:*': # query_string = content # else: # query_string = '%s %s' % (query_string, content) # for k, v in f.items(): # _filter = None # _filter_with_score = None # try: # _value = v.prepare() # except AttributeError: # _value = str(v) # _field, _lookup = self.get_filter_lookup(k) # _is_nested = NESTED_FILTER_SEPARATOR in _field # _nested_path = None # if _is_nested: # _nested_path = _field.split(NESTED_FILTER_SEPARATOR)[0] # _field = ('.').join(_field.split(NESTED_FILTER_SEPARATOR)) # if _lookup == 'exact': # if _is_nested: # _filter = {'term': {_field: _value}} # else: # _filter = {'term': {_field + '.raw': _value}} # elif _lookup == 'content': # _filter_with_score = {'match': {_field: _value}} # elif _lookup == 'in': # if not isinstance(_value, list): # _value = ast.literal_eval(str(_value)) # _filter = { # 'query_string': { # 'fields': [_field], # 'query': ' OR '.join(['"%s"' % i for i in _value]) # }} # elif _lookup == 'range': # if isinstance(_value, dict): # _filter = {'range': {_field: _value}} # elif _value: # if not isinstance(_value, list): # _value = _value.split(',') # if len(_value) >= 2: # _range = {} # _range['gte'] = _value[0] # _range['lte'] = _value[1] # _filter = {'range': {_field: _range}} # else: # raise ValueError( # _('Range lookup requires minimum and maximum values,' # 'only one value was provided')) # else: # _filter = { # 'query_string': { # 'fields': [_field], # 'query': filter_query_strings[_lookup] % _value, # }} # # # nested filter # if _is_nested: # if _filter: # _filter = { # 'nested': { # 'path': _nested_path, # 'query': _filter # } # } # if _filter_with_score: # _filter_with_score = { # 'nested': { # 'path': _nested_path, # 'query': _filter_with_score # } # } # # if _filter: # filters.append(_filter) # if _filter_with_score: # filters.append(_filter_with_score) if query_string == '*:*': kwargs = { 'query': { "match_all": {} }, } else: kwargs = { 'query': { 'query_string': { 'fields': [content_field], 'default_operator': DEFAULT_OPERATOR, 'query': query_string, 'analyze_wildcard': True, 'auto_generate_phrase_queries': True, 'fuzzy_max_expansions': FUZZY_MAX_EXPANSIONS, }, }, } if boost_fields: kwargs['query']['query_string']['fields'] = [] for boost_field, boost_value in boost_fields.items(): kwargs['query']['query_string']['fields'].append( '%s^%s' % (boost_field, boost_value)) if boost_negative: boosting = { 'positive': kwargs['query'], 'negative': boost_negative[0], 'negative_boost': boost_negative[1] } kwargs['query'] = {'boosting': boosting} if filters_with_score: kwargs['query'] = {"bool": {"must": [kwargs.pop("query")]}} kwargs['query']['bool']['must'] += filters_with_score if fields: if isinstance(fields, (list, set)): fields = " ".join(fields) kwargs['stored_fields'] = fields if sort_by is not None: order_list = [] for field, direction in sort_by: if field == 'distance' and distance_point: lng, lat = distance_point['point'].get_coords() sort_kwargs = { "_geo_distance": { distance_point['field']: [lng, lat], "order": direction, "unit": "km" } } else: if field == 'distance': warnings.warn( "In order to sort by distance, " "you must call the '.distance(...)' method.") sort_kwargs = {field: {'order': direction}} order_list.append(sort_kwargs) kwargs['sort'] = order_list if highlight: kwargs['highlight'] = { 'fields': { content_field: {}, } } if isinstance(highlight, dict): kwargs['highlight'].update(highlight) if self.include_spelling: kwargs['suggest'] = { 'suggest': { 'text': spelling_query or query_string, 'term': { 'field': '_all', }, }, } if narrow_queries is None: narrow_queries = set() if facets is not None: kwargs.setdefault('aggregations', {}) for facet_fieldname, extra_options in facets.items(): facet_options = { 'terms': { 'field': facet_fieldname + '.raw', 'size': 100, }, } # Special cases for options applied at the facet level (not the terms level). if extra_options.pop('global_scope', False): # Renamed "global_scope" since "global" is a python keyword. facet_options['global'] = True if 'facet_filter' in extra_options: facet_options['facet_filter'] = extra_options.pop( 'facet_filter') facet_options['terms'].update(extra_options) kwargs['aggregations'][facet_fieldname] = facet_options if date_facets is not None: kwargs.setdefault('aggregations', {}) for facet_fieldname, value in date_facets.items(): # Need to detect on gap_by & only add amount if it's more than one. interval = value.get('gap_by').lower() # Need to detect on amount (can't be applied on months or years). if value.get('gap_amount', 1) != 1 and interval not in ('month', 'year'): # Just the first character is valid for use. interval = "%s%s" % (value['gap_amount'], interval[:1]) date_histogram_aggregation_name = "{0}{1}".format( facet_fieldname, DATE_HISTOGRAM_FIELD_NAME_SUFFIX) date_range_aggregation_name = "{0}{1}".format( facet_fieldname, DATE_RANGE_FIELD_NAME_SUFFIX) kwargs['aggregations'][date_histogram_aggregation_name] = { 'meta': { '_type': 'haystack_date_histogram', }, 'date_histogram': { 'field': facet_fieldname, 'interval': interval, }, } kwargs['aggregations'][date_range_aggregation_name] = { 'meta': { '_type': 'haystack_date_range', }, 'date_range': { # agg type 'field': facet_fieldname, 'ranges': [{ 'from': self._from_python(value.get('start_date')), 'to': self._from_python(value.get('end_date')), }] } } if query_facets is not None: kwargs.setdefault('aggregations', {}) for facet_fieldname, value in query_facets: kwargs['aggregations'][facet_fieldname] = { 'filter': { 'query_string': { 'query': value, } } } if limit_to_registered_models is None: limit_to_registered_models = getattr( settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True) if models and len(models): model_choices = sorted(get_model_ct(model) for model in models) elif limit_to_registered_models: # Using narrow queries, limit the results to only models handled # with the current routers. model_choices = self.build_models_list() else: model_choices = [] if len(model_choices) > 0: filters.append({"terms": {DJANGO_CT: model_choices}}) if within is not None: from haystack.utils.geo import generate_bounding_box ((south, west), (north, east)) = generate_bounding_box(within['point_1'], within['point_2']) within_filter = { "geo_bounding_box": { within['field']: { "top_left": { "lat": north, "lon": west }, "bottom_right": { "lat": south, "lon": east } } }, } filters.append(within_filter) if dwithin is not None: lng, lat = dwithin['point'].get_coords() # NB: the 1.0.0 release of elasticsearch introduce an # incompatible change on the distance filter formating if elasticsearch.VERSION >= (1, 0, 0): distance = "%(dist).6f%(unit)s" % { 'dist': dwithin['distance'].km, 'unit': "km" } else: distance = dwithin['distance'].km dwithin_filter = { "geo_distance": { "distance": distance, dwithin['field']: { "lat": lat, "lon": lng } } } filters.append(dwithin_filter) # for q in narrow_queries: # key, value = q.split(':')[0], ':'.join(q.split(':')[1:]) # filters.append({ # 'match': { # key: value # }, # }) for q in narrow_queries: filters.append({'query_string': {'query': q}}) # if we want to filter, change the query type to filtered if filters: kwargs["query"] = {"bool": {"must": kwargs.pop("query")}} if len(filters) == 1: kwargs['query']['bool']["filter"] = filters[0] else: kwargs['query']['bool']["filter"] = {"bool": {"must": filters}} if extra_kwargs: kwargs.update(extra_kwargs) return kwargs
def build_search_kwargs( self, query_string, sort_by=None, start_offset=0, end_offset=None, fields="", highlight=False, facets=None, date_facets=None, query_facets=None, narrow_queries=None, spelling_query=None, within=None, dwithin=None, distance_point=None, models=None, limit_to_registered_models=None, result_class=None, custom_score=None, ): index = haystack.connections[self.connection_alias].get_unified_index() content_field = index.document_field if query_string == "*:*": kwargs = {"query": {"filtered": {"query": {"match_all": {}}}}} else: query_string_content = self._get_query_string_content(content_field, query_string) kwargs = {"query": {"filtered": {"query": {"query_string": query_string_content}}}} if fields: if isinstance(fields, (list, set)): fields = " ".join(fields) kwargs["fields"] = fields if sort_by is not None: order_list = [] for field, direction in sort_by: if field == "distance" and distance_point: # Do the geo-enabled sort. lng, lat = distance_point["point"].get_coords() sort_kwargs = { "_geo_distance": {distance_point["field"]: [lng, lat], "order": direction, "unit": "km"} } else: if field == "distance": warnings.warn("In order to sort by distance, you must call the '.distance(...)' method.") # Regular sorting. sort_kwargs = {field: {"order": direction}} order_list.append(sort_kwargs) kwargs["sort"] = order_list # From/size offsets don't seem to work right in Elasticsearch's DSL. :/ # if start_offset is not None: # kwargs['from'] = start_offset # if end_offset is not None: # kwargs['size'] = end_offset - start_offset if highlight is True: kwargs["highlight"] = {"fields": {content_field: {"store": "yes"}}} if self.include_spelling is True: warnings.warn("Elasticsearch does not handle spelling suggestions.", Warning, stacklevel=2) if narrow_queries is None: narrow_queries = set() if facets is not None: kwargs.setdefault("facets", {}) for facet_fieldname in facets: kwargs["facets"][facet_fieldname] = {"terms": {"field": facet_fieldname, "size": 300}} if date_facets is not None: kwargs.setdefault("facets", {}) for facet_fieldname, value in date_facets.items(): # Need to detect on gap_by & only add amount if it's more than one. interval = value.get("gap_by").lower() # Need to detect on amount (can't be applied on months or years). if value.get("gap_amount", 1) != 1 and not interval in ("month", "year"): # Just the first character is valid for use. interval = "%s%s" % (value["gap_amount"], interval[:1]) kwargs["facets"][facet_fieldname] = { "date_histogram": {"field": facet_fieldname, "interval": interval}, "facet_filter": { "range": { facet_fieldname: { "from": self._from_python(value.get("start_date")), "to": self._from_python(value.get("end_date")), } } }, } if query_facets is not None: kwargs.setdefault("facets", {}) for facet_fieldname, value in query_facets: kwargs["facets"][facet_fieldname] = {"query": {"query_string": {"query": value}}} if limit_to_registered_models is None: limit_to_registered_models = getattr(settings, "HAYSTACK_LIMIT_TO_REGISTERED_MODELS", True) if models and len(models): model_choices = sorted(["%s.%s" % (model._meta.app_label, model._meta.module_name) for model in models]) elif limit_to_registered_models: # Using narrow queries, limit the results to only models handled # with the current routers. model_choices = self.build_models_list() else: model_choices = [] if len(model_choices) > 0: if narrow_queries is None: narrow_queries = set() narrow_queries.add("%s:(%s)" % (DJANGO_CT, " OR ".join(model_choices))) if narrow_queries: kwargs["query"].setdefault("filtered", {}) kwargs["query"]["filtered"].setdefault("filter", {}) kwargs["query"]["filtered"]["filter"] = { "fquery": {"query": {"query_string": {"query": u" AND ".join(list(narrow_queries))}}, "_cache": True} } if within is not None: from haystack.utils.geo import generate_bounding_box ((south, west), (north, east)) = generate_bounding_box(within["point_1"], within["point_2"]) within_filter = { "geo_bounding_box": { within["field"]: { "top_left": {"lat": north, "lon": west}, "bottom_right": {"lat": south, "lon": east}, } } } kwargs["query"].setdefault("filtered", {}) kwargs["query"]["filtered"].setdefault("filter", {}) if kwargs["query"]["filtered"]["filter"]: compound_filter = {"and": [kwargs["query"]["filtered"]["filter"], within_filter]} kwargs["query"]["filtered"]["filter"] = compound_filter else: kwargs["query"]["filtered"]["filter"] = within_filter if dwithin is not None: lng, lat = dwithin["point"].get_coords() dwithin_filter = { "geo_distance": {"distance": dwithin["distance"].km, dwithin["field"]: {"lat": lat, "lon": lng}} } kwargs["query"].setdefault("filtered", {}) kwargs["query"]["filtered"].setdefault("filter", {}) if kwargs["query"]["filtered"]["filter"]: compound_filter = {"and": [kwargs["query"]["filtered"]["filter"], dwithin_filter]} kwargs["query"]["filtered"]["filter"] = compound_filter else: kwargs["query"]["filtered"]["filter"] = dwithin_filter # Remove the "filtered" key if we're not filtering. Otherwise, # Elasticsearch will blow up. if not kwargs["query"]["filtered"].get("filter"): kwargs["query"] = kwargs["query"]["filtered"]["query"] if custom_score: new_kwargs = kwargs.copy() new_kwargs["query"] = {} new_kwargs["query"]["custom_score"] = custom_score.copy() new_kwargs["query"]["custom_score"]["query"] = kwargs["query"].copy() return new_kwargs return kwargs
def build_search_kwargs(self, query_string, sort_by=None, start_offset=0, end_offset=None, fields='', highlight=False, facets=None, date_facets=None, query_facets=None, narrow_queries=None, spelling_query=None, within=None, dwithin=None, distance_point=None, models=None, limit_to_registered_models=None, result_class=None, **extra_kwargs): index = haystack.connections[self.connection_alias].get_unified_index() content_field = index.document_field if query_string == '*:*': kwargs = { 'query': { "match_all": {} }, } else: kwargs = { 'query': { 'query_string': { 'default_field': content_field, 'default_operator': DEFAULT_OPERATOR, 'query': query_string, 'analyze_wildcard': True, 'auto_generate_phrase_queries': True, # elasticsearch.exceptions.RequestError: TransportError(400, 'parsing_exception', '[query_string] query does not support [fuzzy_min_sim]') # 'fuzzy_min_sim': FUZZY_MIN_SIM, 'fuzzy_max_expansions': FUZZY_MAX_EXPANSIONS, }, }, } # so far, no filters filters = [] if fields: if isinstance(fields, (list, set)): fields = " ".join(fields) kwargs['stored_fields'] = fields if sort_by is not None: order_list = [] for field, direction in sort_by: if field == 'distance' and distance_point: # Do the geo-enabled sort. lng, lat = distance_point['point'].get_coords() sort_kwargs = { "_geo_distance": { distance_point['field']: [lng, lat], "order": direction, "unit": "km" } } else: if field == 'distance': warnings.warn( "In order to sort by distance, you must call the '.distance(...)' method." ) # Regular sorting. sort_kwargs = {field: {'order': direction}} order_list.append(sort_kwargs) kwargs['sort'] = order_list # From/size offsets don't seem to work right in Elasticsearch's DSL. :/ # if start_offset is not None: # kwargs['from'] = start_offset # if end_offset is not None: # kwargs['size'] = end_offset - start_offset if highlight: # `highlight` can either be True or a dictionary containing custom parameters # which will be passed to the backend and may override our default settings: kwargs['highlight'] = { 'fields': { # content_field: {'store': 'yes'}, content_field: {}, } } if isinstance(highlight, dict): kwargs['highlight'].update(highlight) if self.include_spelling: kwargs['suggest'] = { 'suggest': { 'text': spelling_query or query_string, 'term': { # Using content_field here will result in suggestions of stemmed words. 'field': '_all', }, }, } if narrow_queries is None: narrow_queries = set() if facets is not None: kwargs.setdefault('aggregations', {}) for facet_fieldname, extra_options in facets.items(): facet_options = { 'terms': { 'field': facet_fieldname, 'size': 100, }, } # Special cases for options applied at the facet level (not the terms level). if extra_options.pop('global_scope', False): # Renamed "global_scope" since "global" is a python keyword. facet_options['global'] = True if 'facet_filter' in extra_options: facet_options['facet_filter'] = extra_options.pop( 'facet_filter') facet_options['terms'].update(extra_options) kwargs['aggregations'][facet_fieldname] = facet_options if date_facets is not None: kwargs.setdefault('aggregations', {}) for facet_fieldname, value in date_facets.items(): # Need to detect on gap_by & only add amount if it's more than one. interval = value.get('gap_by').lower() # Need to detect on amount (can't be applied on months or years). if value.get('gap_amount', 1) != 1 and interval not in ('month', 'year'): # Just the first character is valid for use. interval = "%s%s" % (value['gap_amount'], interval[:1]) date_histogram_aggregation_name = "{0}{1}".format( facet_fieldname, DATE_HISTOGRAM_FIELD_NAME_SUFFIX) date_range_aggregation_name = "{0}{1}".format( facet_fieldname, DATE_RANGE_FIELD_NAME_SUFFIX) kwargs['aggregations'][date_histogram_aggregation_name] = { 'meta': { '_type': 'haystack_date_histogram', }, 'date_histogram': { 'field': facet_fieldname, 'interval': interval, }, } kwargs['aggregations'][date_range_aggregation_name] = { 'meta': { '_type': 'haystack_date_range', }, 'date_range': { # agg type 'field': facet_fieldname, 'ranges': [{ 'from': self._from_python(value.get('start_date')), 'to': self._from_python(value.get('end_date')), }] } } if query_facets is not None: kwargs.setdefault('aggregations', {}) for facet_fieldname, value in query_facets: kwargs['aggregations'][facet_fieldname] = { 'filter': { 'query_string': { 'query': value, } } } if limit_to_registered_models is None: limit_to_registered_models = getattr( settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True) if models and len(models): model_choices = sorted(get_model_ct(model) for model in models) elif limit_to_registered_models: # Using narrow queries, limit the results to only models handled # with the current routers. model_choices = self.build_models_list() else: model_choices = [] if len(model_choices) > 0: filters.append({"terms": {DJANGO_CT: model_choices}}) for q in narrow_queries: filters.append({'query_string': {'query': q}}) if within is not None: from haystack.utils.geo import generate_bounding_box ((south, west), (north, east)) = generate_bounding_box(within['point_1'], within['point_2']) within_filter = { "geo_bounding_box": { within['field']: { "top_left": { "lat": north, "lon": west }, "bottom_right": { "lat": south, "lon": east } } }, } filters.append(within_filter) if dwithin is not None: lng, lat = dwithin['point'].get_coords() # NB: the 1.0.0 release of elasticsearch introduce an # incompatible change on the distance filter formating if elasticsearch.VERSION >= (1, 0, 0): distance = "%(dist).6f%(unit)s" % { 'dist': dwithin['distance'].km, 'unit': "km" } else: distance = dwithin['distance'].km dwithin_filter = { "geo_distance": { "distance": distance, dwithin['field']: { "lat": lat, "lon": lng } } } filters.append(dwithin_filter) # if we want to filter, change the query type to filteres if filters: kwargs["query"] = {"bool": {"must": kwargs.pop("query")}} if len(filters) == 1: kwargs['query']['bool']["filter"] = filters[0] else: kwargs['query']['bool']["filter"] = {"bool": {"must": filters}} if extra_kwargs: kwargs.update(extra_kwargs) return kwargs
def build_search_kwargs( self, query_string, sort_by=None, start_offset=0, end_offset=None, fields="", highlight=False, facets=None, date_facets=None, query_facets=None, narrow_queries=None, spelling_query=None, within=None, dwithin=None, distance_point=None, models=None, limit_to_registered_models=None, result_class=None, pivot_facets=None, rows=None, group=None, ): kwargs = {"fl": "* score"} if fields: if isinstance(fields, (list, set)): fields = " ".join(fields) kwargs["fl"] = fields if sort_by is not None: if distance_point: # Do the geo-enabled sort. lng, lat = distance_point["point"].get_coords() kwargs["sfield"] = distance_point["field"] kwargs["pt"] = "%s,%s" % (lat, lng) if sort_by in ["distance asc", "distance desc"]: if sort_by == "distance asc": kwargs["sort"] = "geodist() asc" else: kwargs["sort"] = "geodist() desc" else: if sort_by.startswith("distance "): warnings.warn("In order to sort by distance, you must call the '.distance(...)' method.") # Regular sorting. kwargs["sort"] = sort_by if start_offset is not None: kwargs["start"] = start_offset if end_offset is not None: kwargs["rows"] = end_offset - start_offset if highlight is True: kwargs["hl"] = "true" kwargs["hl.fragsize"] = "200" if self.include_spelling is True: kwargs["spellcheck"] = "true" kwargs["spellcheck.collate"] = "true" kwargs["spellcheck.count"] = 1 if spelling_query: kwargs["spellcheck.q"] = spelling_query if facets is not None: kwargs["facet"] = "on" kwargs["facet.field"] = facets kwargs["facet.limit"] = 350 if pivot_facets is not None: kwargs["facet"] = "on" kwargs["facet.pivot"] = pivot_facets kwargs["facet.limit"] = 400 if group is not None: kwargs["group"] = "true" kwargs["group.field"] = group["field"] kwargs["group.facet"] = "true" if group["facet"] else "false" kwargs["group.ngroup"] = group["ngroup"] if rows is not None: kwargs["rows"] = rows if date_facets is not None: kwargs["facet"] = "on" kwargs["facet.date"] = date_facets.keys() kwargs["facet.date.other"] = "none" for key, value in date_facets.items(): kwargs["f.%s.facet.date.start" % key] = self.conn._from_python(value.get("start_date")) kwargs["f.%s.facet.date.end" % key] = self.conn._from_python(value.get("end_date")) gap_by_string = value.get("gap_by").upper() gap_string = "%d%s" % (value.get("gap_amount"), gap_by_string) if value.get("gap_amount") != 1: gap_string += "S" kwargs["f.%s.facet.date.gap" % key] = "+%s/%s" % (gap_string, gap_by_string) if query_facets is not None: kwargs["facet"] = "on" kwargs["facet.query"] = ["%s:%s" % (field, value) for field, value in query_facets] if limit_to_registered_models is None: limit_to_registered_models = getattr(settings, "HAYSTACK_LIMIT_TO_REGISTERED_MODELS", True) if models and len(models): model_choices = sorted(["%s.%s" % (model._meta.app_label, model._meta.module_name) for model in models]) elif limit_to_registered_models: # Using narrow queries, limit the results to only models handled # with the current routers. model_choices = self.build_models_list() else: model_choices = [] if len(model_choices) > 0: if narrow_queries is None: narrow_queries = set() narrow_queries.add("%s:(%s)" % (DJANGO_CT, " OR ".join(model_choices))) if narrow_queries is not None: kwargs["fq"] = list(narrow_queries) if within is not None: from haystack.utils.geo import generate_bounding_box kwargs.setdefault("fq", []) ((min_lat, min_lng), (max_lat, max_lng)) = generate_bounding_box(within["point_1"], within["point_2"]) # Bounding boxes are min, min TO max, max. Solr's wiki was *NOT* # very clear on this. bbox = "%s:[%s,%s TO %s,%s]" % (within["field"], min_lat, min_lng, max_lat, max_lng) kwargs["fq"].append(bbox) if dwithin is not None: kwargs.setdefault("fq", []) lng, lat = dwithin["point"].get_coords() geofilt = "{!geofilt pt=%s,%s sfield=%s d=%s}" % (lat, lng, dwithin["field"], dwithin["distance"].km) kwargs["fq"].append(geofilt) # Check to see if the backend should try to include distances # (Solr 4.X+) in the results. if self.distance_available and distance_point: # In early testing, you can't just hand Solr 4.X a proper bounding box # & request distances. To enable native distance would take calculating # a center point & a radius off the user-provided box, which kinda # sucks. We'll avoid it for now, since Solr 4.x's release will be some # time yet. # kwargs['fl'] += ' _dist_:geodist()' pass return kwargs
def build_search_kwargs(self, query_string, sort_by=None, start_offset=0, end_offset=None, fields='', highlight=False, facets=None, date_facets=None, query_facets=None, narrow_queries=None, spelling_query=None, within=None, dwithin=None, distance_point=None, models=None, limit_to_registered_models=None, result_class=None): """Build all kwargs necessaries to perform the query. :param query_string: Query string. :type query_string: str :param sort_by: :param start_offset: If query is partially done, this parameters will represents where the slice begins. :type start_offset: int :param end_offset: If query is partially done, this parameters will represents where the slice ends. :type end_offset: int :param fields: Fields that will be searched for. :type fields: str :param highlight: :param facets: :param date_facets: :param query_facets: :param narrow_queries: :param spelling_query: :param within: :param dwithin: :param distance_point: :param models: List of models over the query will be performed. :type models: list :param limit_to_registered_models: :param result_class: Class used for search results. :type result_class: object :return: Search kwargs. :rtype: dict """ if query_string == '*:*': kwargs = { 'query': { "match_all": {} }, } else: kwargs = { 'query': { 'query_string': { 'default_operator': DEFAULT_OPERATOR, 'query': query_string, 'analyze_wildcard': True, 'auto_generate_phrase_queries': True, }, }, } if limit_to_registered_models is None: limit_to_registered_models = getattr(settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True) if models and len(models): model_choices = sorted(get_model_ct(model) for model in models) elif limit_to_registered_models: # Using narrow queries, limit the results to only models handled # with the current routers. model_choices = self.build_models_list() else: model_choices = [] kwargs['models'] = model_choices filters = [] if fields: if isinstance(fields, (list, set)): fields = " ".join(fields) kwargs['fields'] = fields if sort_by is not None: order_list = [] for field, direction in sort_by: if field == 'distance' and distance_point: # Do the geo-enabled sort. lng, lat = distance_point['point'].get_coords() sort_kwargs = { "_geo_distance": { distance_point['field']: [lng, lat], "order": direction, "unit": "km" } } else: if field == 'distance': warnings.warn("In order to sort by distance, you must call the '.distance(...)' method.") # Regular sorting. sort_kwargs = {field: {'order': direction}} order_list.append(sort_kwargs) kwargs['sort'] = order_list if highlight is True: kwargs['highlight'] = { 'fields': { '_all': {'store': 'yes'}, } } if self.include_spelling: kwargs['suggest'] = { 'suggest': { 'text': spelling_query or query_string, 'term': { # Using content_field here will result in suggestions of stemmed words. 'field': '_all', }, }, } if narrow_queries is None: narrow_queries = set() if facets is not None: kwargs.setdefault('facets', {}) for facet_fieldname, extra_options in facets.items(): facet_options = { 'terms': { 'field': facet_fieldname, 'size': 100, }, } # Special cases for options applied at the facet level (not the terms level). if extra_options.pop('global_scope', False): # Renamed "global_scope" since "global" is a python keyword. facet_options['global'] = True if 'facet_filter' in extra_options: facet_options['facet_filter'] = extra_options.pop('facet_filter') facet_options['terms'].update(extra_options) kwargs['facets'][facet_fieldname] = facet_options if date_facets is not None: kwargs.setdefault('facets', {}) for facet_fieldname, value in date_facets.items(): # Need to detect on gap_by & only add amount if it's more than one. interval = value.get('gap_by').lower() # Need to detect on amount (can't be applied on months or years). if value.get('gap_amount', 1) != 1 and interval not in ('month', 'year'): # Just the first character is valid for use. interval = "%s%s" % (value['gap_amount'], interval[:1]) kwargs['facets'][facet_fieldname] = { 'date_histogram': { 'field': facet_fieldname, 'interval': interval, }, 'facet_filter': { "range": { facet_fieldname: { 'from': self._from_python(value.get('start_date')), 'to': self._from_python(value.get('end_date')), } } } } if query_facets is not None: kwargs.setdefault('facets', {}) for facet_fieldname, value in query_facets: kwargs['facets'][facet_fieldname] = { 'query': { 'query_string': { 'query': value, } }, } for q in narrow_queries: filters.append({ 'fquery': { 'query': { 'query_string': { 'query': q }, }, '_cache': True, } }) if within is not None: from haystack.utils.geo import generate_bounding_box ((south, west), (north, east)) = generate_bounding_box(within['point_1'], within['point_2']) within_filter = { "geo_bounding_box": { within['field']: { "top_left": { "lat": north, "lon": west }, "bottom_right": { "lat": south, "lon": east } } }, } filters.append(within_filter) if dwithin is not None: lng, lat = dwithin['point'].get_coords() dwithin_filter = { "geo_distance": { "distance": dwithin['distance'].km, dwithin['field']: { "lat": lat, "lon": lng } } } filters.append(dwithin_filter) # if we want to filter, change the query type to filteres if filters: kwargs["query"] = {"filtered": {"query": kwargs.pop("query")}} if len(filters) == 1: kwargs['query']['filtered']["filter"] = filters[0] else: kwargs['query']['filtered']["filter"] = {"bool": {"must": filters}} return kwargs
def build_search_kwargs(self, query_string, sort_by=None, start_offset=0, end_offset=None, fields='', highlight=False, facets=None, date_facets=None, query_facets=None, narrow_queries=None, spelling_query=None, within=None, dwithin=None, distance_point=None, models=None, limit_to_registered_models=None, result_class=None, stats=None): kwargs = {'fl': '* score'} if fields: if isinstance(fields, (list, set)): fields = " ".join(fields) kwargs['fl'] = fields if sort_by is not None: if sort_by in ['distance asc', 'distance desc'] and distance_point: # Do the geo-enabled sort. lng, lat = distance_point['point'].get_coords() kwargs['sfield'] = distance_point['field'] kwargs['pt'] = '%s,%s' % (lat, lng) if sort_by == 'distance asc': kwargs['sort'] = 'geodist() asc' else: kwargs['sort'] = 'geodist() desc' else: if sort_by.startswith('distance '): warnings.warn("In order to sort by distance, you must call the '.distance(...)' method.") # Regular sorting. kwargs['sort'] = sort_by if start_offset is not None: kwargs['start'] = start_offset if end_offset is not None: kwargs['rows'] = end_offset - start_offset if highlight is True: kwargs['hl'] = 'true' kwargs['hl.fragsize'] = '200' if self.include_spelling is True: kwargs['spellcheck'] = 'true' kwargs['spellcheck.collate'] = 'true' kwargs['spellcheck.count'] = 1 if spelling_query: kwargs['spellcheck.q'] = spelling_query if facets is not None: kwargs['facet'] = 'on' kwargs['facet.field'] = facets.keys() for facet_field, options in facets.items(): for key, value in options.items(): kwargs['f.%s.facet.%s' % (facet_field, key)] = self.conn._from_python(value) if date_facets is not None: kwargs['facet'] = 'on' kwargs['facet.date'] = date_facets.keys() kwargs['facet.date.other'] = 'none' for key, value in date_facets.items(): kwargs["f.%s.facet.date.start" % key] = self.conn._from_python(value.get('start_date')) kwargs["f.%s.facet.date.end" % key] = self.conn._from_python(value.get('end_date')) gap_by_string = value.get('gap_by').upper() gap_string = "%d%s" % (value.get('gap_amount'), gap_by_string) if value.get('gap_amount') != 1: gap_string += "S" kwargs["f.%s.facet.date.gap" % key] = '+%s/%s' % (gap_string, gap_by_string) if query_facets is not None: kwargs['facet'] = 'on' kwargs['facet.query'] = ["%s:%s" % (field, value) for field, value in query_facets] if limit_to_registered_models is None: limit_to_registered_models = getattr(settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True) if models and len(models): model_choices = sorted(get_model_ct(model) for model in models) elif limit_to_registered_models: # Using narrow queries, limit the results to only models handled # with the current routers. model_choices = self.build_models_list() else: model_choices = [] if len(model_choices) > 0: if narrow_queries is None: narrow_queries = set() narrow_queries.add('%s:(%s)' % (DJANGO_CT, ' OR '.join(model_choices))) if narrow_queries is not None: kwargs['fq'] = list(narrow_queries) if stats: kwargs['stats'] = "true" for k in stats.keys(): kwargs['stats.field'] = k for facet in stats[k]: kwargs['f.%s.stats.facet' % k] = facet if within is not None: from haystack.utils.geo import generate_bounding_box kwargs.setdefault('fq', []) ((min_lat, min_lng), (max_lat, max_lng)) = generate_bounding_box(within['point_1'], within['point_2']) # Bounding boxes are min, min TO max, max. Solr's wiki was *NOT* # very clear on this. bbox = '%s:[%s,%s TO %s,%s]' % (within['field'], min_lat, min_lng, max_lat, max_lng) kwargs['fq'].append(bbox) if dwithin is not None: kwargs.setdefault('fq', []) lng, lat = dwithin['point'].get_coords() geofilt = '{!geofilt pt=%s,%s sfield=%s d=%s}' % (lat, lng, dwithin['field'], dwithin['distance'].km) kwargs['fq'].append(geofilt) # Check to see if the backend should try to include distances # (Solr 4.X+) in the results. if self.distance_available and distance_point: # In early testing, you can't just hand Solr 4.X a proper bounding box # & request distances. To enable native distance would take calculating # a center point & a radius off the user-provided box, which kinda # sucks. We'll avoid it for now, since Solr 4.x's release will be some # time yet. # kwargs['fl'] += ' _dist_:geodist()' pass return kwargs
def build_search_kwargs( self, query_string, sort_by=None, start_offset=0, end_offset=None, fields="", highlight=False, facets=None, date_facets=None, query_facets=None, narrow_queries=None, spelling_query=None, within=None, dwithin=None, distance_point=None, models=None, limit_to_registered_models=None, result_class=None, stats=None, collate=None, **extra_kwargs ): index = haystack.connections[self.connection_alias].get_unified_index() kwargs = {"fl": "* score", "df": index.document_field} if fields: if isinstance(fields, (list, set)): fields = " ".join(fields) kwargs["fl"] = fields if sort_by is not None: if sort_by in ["distance asc", "distance desc"] and distance_point: # Do the geo-enabled sort. lng, lat = distance_point["point"].coords kwargs["sfield"] = distance_point["field"] kwargs["pt"] = "%s,%s" % (lat, lng) if sort_by == "distance asc": kwargs["sort"] = "geodist() asc" else: kwargs["sort"] = "geodist() desc" else: if sort_by.startswith("distance "): warnings.warn( "In order to sort by distance, you must call the '.distance(...)' method." ) # Regular sorting. kwargs["sort"] = sort_by if start_offset is not None: kwargs["start"] = start_offset if end_offset is not None: kwargs["rows"] = end_offset - start_offset if highlight: # `highlight` can either be True or a dictionary containing custom parameters # which will be passed to the backend and may override our default settings: kwargs["hl"] = "true" kwargs["hl.fragsize"] = "200" if isinstance(highlight, dict): # autoprefix highlighter options with 'hl.', all of them start with it anyway # this makes option dicts shorter: {'maxAnalyzedChars': 42} # and lets some of options be used as keyword arguments: `.highlight(preserveMulti=False)` kwargs.update( { key if key.startswith("hl.") else ("hl." + key): highlight[key] for key in highlight.keys() } ) if collate is None: collate = self.collate if self.include_spelling is True: kwargs["spellcheck"] = "true" kwargs["spellcheck.collate"] = str(collate).lower() kwargs["spellcheck.count"] = 1 if spelling_query: kwargs["spellcheck.q"] = spelling_query if facets is not None: kwargs["facet"] = "on" kwargs["facet.field"] = facets.keys() for facet_field, options in facets.items(): for key, value in options.items(): kwargs[ "f.%s.facet.%s" % (facet_field, key) ] = self.conn._from_python(value) if date_facets is not None: kwargs["facet"] = "on" kwargs["facet.date"] = date_facets.keys() kwargs["facet.date.other"] = "none" for key, value in date_facets.items(): kwargs["f.%s.facet.date.start" % key] = self.conn._from_python( value.get("start_date") ) kwargs["f.%s.facet.date.end" % key] = self.conn._from_python( value.get("end_date") ) gap_by_string = value.get("gap_by").upper() gap_string = "%d%s" % (value.get("gap_amount"), gap_by_string) if value.get("gap_amount") != 1: gap_string += "S" kwargs["f.%s.facet.date.gap" % key] = "+%s/%s" % ( gap_string, gap_by_string, ) if query_facets is not None: kwargs["facet"] = "on" kwargs["facet.query"] = [ "%s:%s" % (field, value) for field, value in query_facets ] if limit_to_registered_models is None: limit_to_registered_models = getattr( settings, "HAYSTACK_LIMIT_TO_REGISTERED_MODELS", True ) if models and len(models): model_choices = sorted(get_model_ct(model) for model in models) elif limit_to_registered_models: # Using narrow queries, limit the results to only models handled # with the current routers. model_choices = self.build_models_list() else: model_choices = [] if len(model_choices) > 0: if narrow_queries is None: narrow_queries = set() narrow_queries.add("%s:(%s)" % (DJANGO_CT, " OR ".join(model_choices))) if narrow_queries is not None: kwargs["fq"] = list(narrow_queries) if stats: kwargs["stats"] = "true" for k in stats.keys(): kwargs["stats.field"] = k for facet in stats[k]: kwargs["f.%s.stats.facet" % k] = facet if within is not None: from haystack.utils.geo import generate_bounding_box kwargs.setdefault("fq", []) ((min_lat, min_lng), (max_lat, max_lng)) = generate_bounding_box( within["point_1"], within["point_2"] ) # Bounding boxes are min, min TO max, max. Solr's wiki was *NOT* # very clear on this. bbox = "%s:[%s,%s TO %s,%s]" % ( within["field"], min_lat, min_lng, max_lat, max_lng, ) kwargs["fq"].append(bbox) if dwithin is not None: kwargs.setdefault("fq", []) lng, lat = dwithin["point"].coords geofilt = "{!geofilt pt=%s,%s sfield=%s d=%s}" % ( lat, lng, dwithin["field"], dwithin["distance"].km, ) kwargs["fq"].append(geofilt) # Check to see if the backend should try to include distances # (Solr 4.X+) in the results. if self.distance_available and distance_point: # In early testing, you can't just hand Solr 4.X a proper bounding box # & request distances. To enable native distance would take calculating # a center point & a radius off the user-provided box, which kinda # sucks. We'll avoid it for now, since Solr 4.x's release will be some # time yet. # kwargs['fl'] += ' _dist_:geodist()' pass if extra_kwargs: kwargs.update(extra_kwargs) return kwargs
def search(self, query_string, sort_by=None, start_offset=0, end_offset=None, fields='', highlight=False, facets=None, date_facets=None, query_facets=None, narrow_queries=None, spelling_query=None, within=None, dwithin=None, distance_point=None, limit_to_registered_models=None, result_class=None, **kwargs): if len(query_string) == 0: return { 'results': [], 'hits': 0, } if not self.setup_complete: self.setup() index = haystack.connections[self.connection_alias].get_unified_index() content_field = index.document_field if query_string == '*:*': kwargs = { 'query': { 'filtered': { 'query': { 'query_string': { 'query': '*:*', }, }, }, }, } else: kwargs = { 'query': { 'filtered': { 'query': { 'query_string': { 'default_field': content_field, 'default_operator': DEFAULT_OPERATOR, 'query': query_string, 'analyze_wildcard': True, 'auto_generate_phrase_queries': True, }, }, }, }, } geo_sort = False if fields: if isinstance(fields, (list, set)): fields = " ".join(fields) kwargs['fields'] = fields if sort_by is not None: if sort_by in ['distance asc', 'distance desc'] and distance_point: # Do the geo-enabled sort. lng, lat = distance_point['point'].get_coords() sort_kwargs = { "_geo_distance": { distance_point['field']: [lng, lat], "order" : "asc", "unit" : "km" } } geo_sort = True if sort_by == 'distance asc': sort_kwargs['sort']['_geo_distance']['order'] = 'asc' else: sort_kwargs['sort']['_geo_distance']['order'] = 'desc' kwargs['sort'] = [sort_kwargs] else: if sort_by.startswith('distance '): warnings.warn("In order to sort by distance, you must call the '.distance(...)' method.") # Regular sorting. order_by_list = [] for order_by in self.order_by: if order_by.startswith('-'): order_by_list.append({order_by[1:]: 'desc'}) else: order_by_list.append({order_by: 'asc'}) kwargs['sort'] = order_by_list # From/size offsets don't seem to work right in Elasticsearch's DSL. :/ # if start_offset is not None: # kwargs['from'] = start_offset # if end_offset is not None: # kwargs['size'] = end_offset - start_offset if highlight is True: kwargs['highlight'] = { 'fields': { content_field: {'store': 'yes'}, } } if self.include_spelling is True: warnings.warn("Elasticsearch does not handle spelling suggestions.", Warning, stacklevel=2) if narrow_queries is None: narrow_queries = set() if facets is not None: kwargs.setdefault('facets', {}) for facet_fieldname in facets: kwargs['facets'][facet_fieldname] = { 'terms': { 'field': facet_fieldname, }, } if date_facets is not None: kwargs.setdefault('facets', {}) for facet_fieldname, value in date_facets.items(): # Need to detect on gap_by & only add amount if it's more than one. interval = value.get('gap_by').lower() # Need to detect on amount (can't be applied on months or years). if value.get('gap_amount', 1) != 1 and not interval in ('month', 'year'): # Just the first character is valid for use. interval = "%s%s" % (value['gap_amount'], interval[:1]) kwargs['facets'][facet_fieldname] = { 'date_histogram': { 'field': facet_fieldname, 'interval': interval, }, 'facet_filter': { "range": { facet_fieldname: { 'from': self.conn.from_python(value.get('start_date')), 'to': self.conn.from_python(value.get('end_date')), } } } } if query_facets is not None: kwargs.setdefault('facets', {}) for facet_fieldname, value in query_facets: kwargs['facets'][facet_fieldname] = { 'query': { 'query_string': { 'query': value, } }, } if limit_to_registered_models is None: limit_to_registered_models = getattr(settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True) if limit_to_registered_models: # Using narrow queries, limit the results to only models handled # with the current routers. registered_models = self.build_models_list() if len(registered_models) > 0: narrow_queries.add('%s:(%s)' % (DJANGO_CT, ' OR '.join(registered_models))) if narrow_queries: kwargs['query'].setdefault('filtered', {}) kwargs['query']['filtered'].setdefault('filter', {}) kwargs['query']['filtered']['filter'] = { 'fquery': { 'query': { 'query_string': { 'query': u' AND '.join(list(narrow_queries)), }, }, '_cache': True, } } if within is not None: from haystack.utils.geo import generate_bounding_box ((min_lat, min_lng), (max_lat, max_lng)) = generate_bounding_box(within['point_1'], within['point_2']) kwargs['query'].setdefault('filtered', {}) kwargs['query']['filtered'].setdefault('filter', {}) kwargs['query']['filtered']['filter'] = { "geo_bounding_box": { within['field']: { "top_left": { "lat": max_lat, "lon": max_lng }, "bottom_right": { "lat": min_lat, "lon": min_lng } } }, } if dwithin is not None: lng, lat = dwithin['point'].get_coords() kwargs['query'].setdefault('filtered', {}) kwargs['query']['filtered'].setdefault('filter', {}) kwargs['query']['filtered']['filter'] = { "geo_distance": { "distance": dwithin['distance'].km, dwithin['field']: { "lat": lat, "lon": lng } } } # Remove the "filtered" key if we're not filtering. Otherwise, # Elasticsearch will blow up. if not kwargs['query']['filtered'].get('filter'): kwargs['query'] = kwargs['query']['filtered']['query'] # Because Elasticsearch. query_params = { 'from': start_offset, } if end_offset is not None and end_offset > start_offset: query_params['size'] = end_offset - start_offset try: raw_results = self.conn.search(None, kwargs, indexes=[self.index_name], doc_types=['modelresult'], **query_params) except (requests.RequestException, pyelasticsearch.ElasticSearchError), e: if not self.silently_fail: raise self.log.error("Failed to query Elasticsearch using '%s': %s", query_string, e) raw_results = {}
def search( self, query_string, sort_by=None, start_offset=0, end_offset=None, fields="", highlight=False, facets=None, date_facets=None, query_facets=None, narrow_queries=None, spelling_query=None, within=None, dwithin=None, distance_point=None, models=None, limit_to_registered_models=None, result_class=None, **kwargs ): if len(query_string) == 0: return {"results": [], "hits": 0} if not self.setup_complete: self.setup() index = haystack.connections[self.connection_alias].get_unified_index() content_field = index.document_field if query_string == "*:*": kwargs = {"query": {"filtered": {"query": {"query_string": {"query": "*:*"}}}}} else: kwargs = { "query": { "filtered": { "query": { "query_string": { "default_field": content_field, "default_operator": DEFAULT_OPERATOR, "query": query_string, "analyze_wildcard": True, "auto_generate_phrase_queries": True, } } } } } geo_sort = False if fields: if isinstance(fields, (list, set)): fields = " ".join(fields) kwargs["fields"] = fields if sort_by is not None: order_list = [] for field, direction in sort_by: if field == "distance" and distance_point: # Do the geo-enabled sort. lng, lat = distance_point["point"].get_coords() sort_kwargs = { "_geo_distance": {distance_point["field"]: [lng, lat], "order": direction, "unit": "km"} } else: if field == "distance": warnings.warn("In order to sort by distance, you must call the '.distance(...)' method.") # Regular sorting. sort_kwargs = {field: {"order": direction}} order_list.append(sort_kwargs) kwargs["sort"] = order_list # From/size offsets don't seem to work right in Elasticsearch's DSL. :/ # if start_offset is not None: # kwargs['from'] = start_offset # if end_offset is not None: # kwargs['size'] = end_offset - start_offset if highlight is True: kwargs["highlight"] = {"fields": {content_field: {"store": "yes"}}} if self.include_spelling is True: warnings.warn("Elasticsearch does not handle spelling suggestions.", Warning, stacklevel=2) if narrow_queries is None: narrow_queries = set() if facets is not None: kwargs.setdefault("facets", {}) for facet_fieldname in facets: kwargs["facets"][facet_fieldname] = {"terms": {"field": facet_fieldname}} if date_facets is not None: kwargs.setdefault("facets", {}) for facet_fieldname, value in date_facets.items(): # Need to detect on gap_by & only add amount if it's more than one. interval = value.get("gap_by").lower() # Need to detect on amount (can't be applied on months or years). if value.get("gap_amount", 1) != 1 and not interval in ("month", "year"): # Just the first character is valid for use. interval = "%s%s" % (value["gap_amount"], interval[:1]) kwargs["facets"][facet_fieldname] = { "date_histogram": {"field": facet_fieldname, "interval": interval}, "facet_filter": { "range": { facet_fieldname: { "from": self.conn.from_python(value.get("start_date")), "to": self.conn.from_python(value.get("end_date")), } } }, } if query_facets is not None: kwargs.setdefault("facets", {}) for facet_fieldname, value in query_facets: kwargs["facets"][facet_fieldname] = {"query": {"query_string": {"query": value}}} if limit_to_registered_models is None: limit_to_registered_models = getattr(settings, "HAYSTACK_LIMIT_TO_REGISTERED_MODELS", True) if models and len(models): model_choices = sorted(["%s.%s" % (model._meta.app_label, model._meta.module_name) for model in models]) elif limit_to_registered_models: # Using narrow queries, limit the results to only models handled # with the current routers. model_choices = self.build_models_list() else: model_choices = [] if len(model_choices) > 0: if narrow_queries is None: narrow_queries = set() narrow_queries.add("%s:(%s)" % (DJANGO_CT, " OR ".join(model_choices))) if narrow_queries: kwargs["query"].setdefault("filtered", {}) kwargs["query"]["filtered"].setdefault("filter", {}) kwargs["query"]["filtered"]["filter"] = { "fquery": {"query": {"query_string": {"query": u" AND ".join(list(narrow_queries))}}, "_cache": True} } if within is not None: from haystack.utils.geo import generate_bounding_box ((min_lat, min_lng), (max_lat, max_lng)) = generate_bounding_box(within["point_1"], within["point_2"]) kwargs["query"].setdefault("filtered", {}) kwargs["query"]["filtered"].setdefault("filter", {}) kwargs["query"]["filtered"]["filter"] = { "geo_bounding_box": { within["field"]: { "top_left": {"lat": max_lat, "lon": max_lng}, "bottom_right": {"lat": min_lat, "lon": min_lng}, } } } if dwithin is not None: lng, lat = dwithin["point"].get_coords() kwargs["query"].setdefault("filtered", {}) kwargs["query"]["filtered"].setdefault("filter", {}) kwargs["query"]["filtered"]["filter"] = { "geo_distance": {"distance": dwithin["distance"].km, dwithin["field"]: {"lat": lat, "lon": lng}} } # Remove the "filtered" key if we're not filtering. Otherwise, # Elasticsearch will blow up. if not kwargs["query"]["filtered"].get("filter"): kwargs["query"] = kwargs["query"]["filtered"]["query"] # Because Elasticsearch. query_params = {"from": start_offset} if end_offset is not None and end_offset > start_offset: query_params["size"] = end_offset - start_offset try: raw_results = self.conn.search( None, kwargs, indexes=[self.index_name], doc_types=["modelresult"], **query_params ) except (requests.RequestException, pyelasticsearch.ElasticSearchError), e: if not self.silently_fail: raise self.log.error("Failed to query Elasticsearch using '%s': %s", query_string, e) raw_results = {}
def search(self, query_string, sort_by=None, start_offset=0, end_offset=None, fields='', highlight=False, facets=None, date_facets=None, query_facets=None, narrow_queries=None, spelling_query=None, within=None, dwithin=None, distance_point=None, limit_to_registered_models=None, result_class=None, **kwargs): if len(query_string) == 0: return { 'results': [], 'hits': 0, } kwargs = { 'fl': '* score', } geo_sort = False if fields: if isinstance(fields, (list, set)): fields = " ".join(fields) kwargs['fl'] = fields if sort_by is not None: if sort_by in ['distance asc', 'distance desc'] and distance_point: # Do the geo-enabled sort. lng, lat = distance_point['point'].get_coords() kwargs['sfield'] = distance_point['field'] kwargs['pt'] = '%s,%s' % (lat, lng) geo_sort = True if sort_by == 'distance asc': kwargs['sort'] = 'geodist() asc' else: kwargs['sort'] = 'geodist() desc' else: if sort_by.startswith('distance '): warnings.warn( "In order to sort by distance, you must call the '.distance(...)' method." ) # Regular sorting. kwargs['sort'] = sort_by if start_offset is not None: kwargs['start'] = start_offset if end_offset is not None: kwargs['rows'] = end_offset - start_offset if highlight is True: kwargs['hl'] = 'true' kwargs['hl.fragsize'] = '200' if self.include_spelling is True: kwargs['spellcheck'] = 'true' kwargs['spellcheck.collate'] = 'true' kwargs['spellcheck.count'] = 1 if spelling_query: kwargs['spellcheck.q'] = spelling_query if facets is not None: kwargs['facet'] = 'on' kwargs['facet.field'] = facets if date_facets is not None: kwargs['facet'] = 'on' kwargs['facet.date'] = date_facets.keys() kwargs['facet.date.other'] = 'none' for key, value in date_facets.items(): kwargs["f.%s.facet.date.start" % key] = self.conn._from_python( value.get('start_date')) kwargs["f.%s.facet.date.end" % key] = self.conn._from_python( value.get('end_date')) gap_by_string = value.get('gap_by').upper() gap_string = "%d%s" % (value.get('gap_amount'), gap_by_string) if value.get('gap_amount') != 1: gap_string += "S" kwargs["f.%s.facet.date.gap" % key] = '+%s/%s' % (gap_string, gap_by_string) if query_facets is not None: kwargs['facet'] = 'on' kwargs['facet.query'] = [ "%s:%s" % (field, value) for field, value in query_facets ] if limit_to_registered_models is None: limit_to_registered_models = getattr( settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True) if limit_to_registered_models: # Using narrow queries, limit the results to only models handled # with the current routers. if narrow_queries is None: narrow_queries = set() registered_models = self.build_models_list() if len(registered_models) > 0: narrow_queries.add('%s:(%s)' % (DJANGO_CT, ' OR '.join(registered_models))) if narrow_queries is not None: kwargs['fq'] = list(narrow_queries) if within is not None: kwargs.setdefault('fq', []) ((min_lat, min_lng), (max_lat, max_lng)) = generate_bounding_box( within['point_1'], within['point_2']) # Bounding boxes are min, min TO max, max. Solr's wiki was *NOT* # very clear on this. bbox = '%s:[%s,%s TO %s,%s]' % (within['field'], min_lat, min_lng, max_lat, max_lng) kwargs['fq'].append(bbox) if dwithin is not None: kwargs.setdefault('fq', []) lng, lat = dwithin['point'].get_coords() geofilt = '{!geofilt pt=%s,%s sfield=%s d=%s}' % ( lat, lng, dwithin['field'], dwithin['distance'].km) kwargs['fq'].append(geofilt) # Check to see if the backend should try to include distances # (Solr 4.X+) in the results. if self.distance_available and distance_point: # In early testing, you can't just hand Solr 4.X a proper bounding box # & request distances. To enable native distance would take calculating # a center point & a radius off the user-provided box, which kinda # sucks. We'll avoid it for now, since Solr 4.x's release will be some # time yet. # kwargs['fl'] += ' _dist_:geodist()' pass try: raw_results = self.conn.search(query_string, **kwargs) except (IOError, SolrError), e: if not self.silently_fail: raise self.log.error("Failed to query Solr using '%s': %s", query_string, e) raw_results = EmptyResults()
def build_search_kwargs(self, query_string, sort_by=None, start_offset=0, end_offset=None, fields='', highlight=False, facets=None, date_facets=None, query_facets=None, narrow_queries=None, spelling_query=None, within=None, dwithin=None, distance_point=None, models=None, limit_to_registered_models=None, result_class=None, stats=None, **extra_kwargs): kwargs = {'fl': '* score'} if fields: if isinstance(fields, (list, set)): fields = " ".join(fields) kwargs['fl'] = fields if sort_by is not None: if sort_by in ['distance asc', 'distance desc'] and distance_point: # Do the geo-enabled sort. lng, lat = distance_point['point'].get_coords() kwargs['sfield'] = distance_point['field'] kwargs['pt'] = '%s,%s' % (lat, lng) if sort_by == 'distance asc': kwargs['sort'] = 'geodist() asc' else: kwargs['sort'] = 'geodist() desc' else: if sort_by.startswith('distance '): warnings.warn( "In order to sort by distance, you must call the '.distance(...)' method." ) # Regular sorting. kwargs['sort'] = sort_by if start_offset is not None: kwargs['start'] = start_offset if end_offset is not None: kwargs['rows'] = end_offset - start_offset if highlight: # `highlight` can either be True or a dictionary containing custom parameters # which will be passed to the backend and may override our default settings: kwargs['hl'] = 'true' kwargs['hl.fragsize'] = '200' if isinstance(highlight, dict): kwargs.update(highlight) if self.include_spelling is True: kwargs['spellcheck'] = 'true' kwargs['spellcheck.collate'] = 'true' kwargs['spellcheck.count'] = 1 if spelling_query: kwargs['spellcheck.q'] = spelling_query if facets is not None: kwargs['facet'] = 'on' kwargs['facet.field'] = facets.keys() for facet_field, options in facets.items(): for key, value in options.items(): kwargs['f.%s.facet.%s' % (facet_field, key)] = self.conn._from_python(value) if date_facets is not None: kwargs['facet'] = 'on' kwargs['facet.date'] = date_facets.keys() kwargs['facet.date.other'] = 'none' for key, value in date_facets.items(): kwargs["f.%s.facet.date.start" % key] = self.conn._from_python( value.get('start_date')) kwargs["f.%s.facet.date.end" % key] = self.conn._from_python( value.get('end_date')) gap_by_string = value.get('gap_by').upper() gap_string = "%d%s" % (value.get('gap_amount'), gap_by_string) if value.get('gap_amount') != 1: gap_string += "S" kwargs["f.%s.facet.date.gap" % key] = '+%s/%s' % (gap_string, gap_by_string) if query_facets is not None: kwargs['facet'] = 'on' kwargs['facet.query'] = [ "%s:%s" % (field, value) for field, value in query_facets ] if limit_to_registered_models is None: limit_to_registered_models = getattr( settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True) if models and len(models): model_choices = sorted(get_model_ct(model) for model in models) elif limit_to_registered_models: # Using narrow queries, limit the results to only models handled # with the current routers. model_choices = self.build_models_list() else: model_choices = [] if len(model_choices) > 0: if narrow_queries is None: narrow_queries = set() narrow_queries.add('%s:(%s)' % (DJANGO_CT, ' OR '.join(model_choices))) if narrow_queries is not None: kwargs['fq'] = list(narrow_queries) if stats: kwargs['stats'] = "true" for k in stats.keys(): kwargs['stats.field'] = k for facet in stats[k]: kwargs['f.%s.stats.facet' % k] = facet if within is not None: from haystack.utils.geo import generate_bounding_box kwargs.setdefault('fq', []) ((min_lat, min_lng), (max_lat, max_lng)) = generate_bounding_box( within['point_1'], within['point_2']) # Bounding boxes are min, min TO max, max. Solr's wiki was *NOT* # very clear on this. bbox = '%s:[%s,%s TO %s,%s]' % (within['field'], min_lat, min_lng, max_lat, max_lng) kwargs['fq'].append(bbox) if dwithin is not None: kwargs.setdefault('fq', []) lng, lat = dwithin['point'].get_coords() geofilt = '{!geofilt pt=%s,%s sfield=%s d=%s}' % ( lat, lng, dwithin['field'], dwithin['distance'].km) kwargs['fq'].append(geofilt) # Check to see if the backend should try to include distances # (Solr 4.X+) in the results. if self.distance_available and distance_point: # In early testing, you can't just hand Solr 4.X a proper bounding box # & request distances. To enable native distance would take calculating # a center point & a radius off the user-provided box, which kinda # sucks. We'll avoid it for now, since Solr 4.x's release will be some # time yet. # kwargs['fl'] += ' _dist_:geodist()' pass if extra_kwargs: kwargs.update(extra_kwargs) return kwargs
def build_search_kwargs(self, query_string, sort_by=None, start_offset=0, end_offset=None, fields="", highlight=False, facets=None, date_facets=None, query_facets=None, narrow_queries=None, spelling_query=None, within=None, dwithin=None, distance_point=None, models=None, limit_to_registered_models=None, result_class=None, **extra_kwargs): index = haystack.connections[self.connection_alias].get_unified_index() content_field = index.document_field if query_string == "*:*": kwargs = {"query": {"match_all": {}}} else: kwargs = { "query": { "query_string": { "default_field": content_field, "default_operator": DEFAULT_OPERATOR, "query": query_string, "analyze_wildcard": True, "auto_generate_phrase_queries": True, "fuzzy_min_sim": FUZZY_MIN_SIM, "fuzzy_max_expansions": FUZZY_MAX_EXPANSIONS, } } } # so far, no filters filters = [] if fields: if isinstance(fields, (list, set)): fields = " ".join(fields) kwargs["fields"] = fields if sort_by is not None: order_list = [] for field, direction in sort_by: if field == "distance" and distance_point: # Do the geo-enabled sort. lng, lat = distance_point["point"].coords sort_kwargs = { "_geo_distance": { distance_point["field"]: [lng, lat], "order": direction, "unit": "km", } } else: if field == "distance": warnings.warn( "In order to sort by distance, you must call the '.distance(...)' method." ) # Regular sorting. sort_kwargs = {field: {"order": direction}} order_list.append(sort_kwargs) kwargs["sort"] = order_list # From/size offsets don't seem to work right in Elasticsearch's DSL. :/ # if start_offset is not None: # kwargs['from'] = start_offset # if end_offset is not None: # kwargs['size'] = end_offset - start_offset if highlight: # `highlight` can either be True or a dictionary containing custom parameters # which will be passed to the backend and may override our default settings: kwargs["highlight"] = {"fields": {content_field: {"store": "yes"}}} if isinstance(highlight, dict): kwargs["highlight"].update(highlight) if self.include_spelling: kwargs["suggest"] = { "suggest": { "text": spelling_query or query_string, "term": { # Using content_field here will result in suggestions of stemmed words. "field": "_all" }, } } if narrow_queries is None: narrow_queries = set() if facets is not None: kwargs.setdefault("facets", {}) for facet_fieldname, extra_options in facets.items(): facet_options = { "terms": { "field": facet_fieldname, "size": 100 } } # Special cases for options applied at the facet level (not the terms level). if extra_options.pop("global_scope", False): # Renamed "global_scope" since "global" is a python keyword. facet_options["global"] = True if "facet_filter" in extra_options: facet_options["facet_filter"] = extra_options.pop( "facet_filter") facet_options["terms"].update(extra_options) kwargs["facets"][facet_fieldname] = facet_options if date_facets is not None: kwargs.setdefault("facets", {}) for facet_fieldname, value in date_facets.items(): # Need to detect on gap_by & only add amount if it's more than one. interval = value.get("gap_by").lower() # Need to detect on amount (can't be applied on months or years). if value.get("gap_amount", 1) != 1 and interval not in ( "month", "year", ): # Just the first character is valid for use. interval = "%s%s" % (value["gap_amount"], interval[:1]) kwargs["facets"][facet_fieldname] = { "date_histogram": { "field": facet_fieldname, "interval": interval }, "facet_filter": { "range": { facet_fieldname: { "from": self._from_python(value.get("start_date")), "to": self._from_python(value.get("end_date")), } } }, } if query_facets is not None: kwargs.setdefault("facets", {}) for facet_fieldname, value in query_facets: kwargs["facets"][facet_fieldname] = { "query": { "query_string": { "query": value } } } if limit_to_registered_models is None: limit_to_registered_models = getattr( settings, "HAYSTACK_LIMIT_TO_REGISTERED_MODELS", True) if models and len(models): model_choices = sorted(get_model_ct(model) for model in models) elif limit_to_registered_models: # Using narrow queries, limit the results to only models handled # with the current routers. model_choices = self.build_models_list() else: model_choices = [] if len(model_choices) > 0: filters.append({"terms": {DJANGO_CT: model_choices}}) for q in narrow_queries: filters.append({ "fquery": { "query": { "query_string": { "query": q } }, "_cache": True } }) if within is not None: from haystack.utils.geo import generate_bounding_box ((south, west), (north, east)) = generate_bounding_box(within["point_1"], within["point_2"]) within_filter = { "geo_bounding_box": { within["field"]: { "top_left": { "lat": north, "lon": west }, "bottom_right": { "lat": south, "lon": east }, } } } filters.append(within_filter) if dwithin is not None: lng, lat = dwithin["point"].coords # NB: the 1.0.0 release of elasticsearch introduce an # incompatible change on the distance filter formating if elasticsearch.VERSION >= (1, 0, 0): distance = "%(dist).6f%(unit)s" % { "dist": dwithin["distance"].km, "unit": "km", } else: distance = dwithin["distance"].km dwithin_filter = { "geo_distance": { "distance": distance, dwithin["field"]: { "lat": lat, "lon": lng }, } } filters.append(dwithin_filter) # if we want to filter, change the query type to filteres if filters: kwargs["query"] = {"filtered": {"query": kwargs.pop("query")}} if len(filters) == 1: kwargs["query"]["filtered"]["filter"] = filters[0] else: kwargs["query"]["filtered"]["filter"] = { "bool": { "must": filters } } if extra_kwargs: kwargs.update(extra_kwargs) return kwargs
def build_search_kwargs(self, query_string, sort_by=None, start_offset=0, end_offset=None, fields='', highlight=False, facets=None, date_facets=None, query_facets=None, narrow_queries=None, spelling_query=None, within=None, dwithin=None, distance_point=None, polygon=None, models=None, limit_to_registered_models=None, result_class=None): index = haystack.connections[self.connection_alias].get_unified_index() content_field = index.document_field if query_string == '*:*': kwargs = { 'query': { 'filtered': { 'query': { "match_all": {} }, }, }, } else: kwargs = { 'query': { 'filtered': { 'query': { 'query_string': { 'default_field': content_field, 'default_operator': DEFAULT_OPERATOR, 'query': query_string, 'analyze_wildcard': True, 'auto_generate_phrase_queries': True, }, }, }, }, } if fields: if isinstance(fields, (list, set)): fields = " ".join(fields) kwargs['fields'] = fields if sort_by is not None: order_list = [] for field, direction in sort_by: if field == 'distance' and distance_point: # Do the geo-enabled sort. lng, lat = distance_point['point'].get_coords() sort_kwargs = { "_geo_distance": { distance_point['field']: [lng, lat], "order": direction, "unit": "km" } } else: if field == 'distance': warnings.warn("In order to sort by distance, you must call the '.distance(...)' method.") # Regular sorting. sort_kwargs = {field: {'order': direction}} order_list.append(sort_kwargs) kwargs['sort'] = order_list # From/size offsets don't seem to work right in Elasticsearch's DSL. :/ # if start_offset is not None: # kwargs['from'] = start_offset # if end_offset is not None: # kwargs['size'] = end_offset - start_offset if highlight is True: kwargs['highlight'] = { 'fields': { content_field: {'store': 'yes'}, } } if self.include_spelling: kwargs['suggest'] = { 'suggest': { 'text': spelling_query or query_string, 'term': { # Using content_field here will result in suggestions of stemmed words. 'field': '_all', }, }, } if narrow_queries is None: narrow_queries = set() if facets is not None: kwargs.setdefault('facets', {}) for facet_fieldname, extra_options in facets.items(): facet_options = { 'terms': { 'field': facet_fieldname, 'size': 100, }, } # Special cases for options applied at the facet level (not the terms level). if extra_options.pop('global_scope', False): # Renamed "global_scope" since "global" is a python keyword. facet_options['global'] = True if 'facet_filter' in extra_options: facet_options['facet_filter'] = extra_options.pop('facet_filter') facet_options['terms'].update(extra_options) kwargs['facets'][facet_fieldname] = facet_options if date_facets is not None: kwargs.setdefault('facets', {}) for facet_fieldname, value in date_facets.items(): # Need to detect on gap_by & only add amount if it's more than one. interval = value.get('gap_by').lower() # Need to detect on amount (can't be applied on months or years). if value.get('gap_amount', 1) != 1 and not interval in ('month', 'year'): # Just the first character is valid for use. interval = "%s%s" % (value['gap_amount'], interval[:1]) kwargs['facets'][facet_fieldname] = { 'date_histogram': { 'field': facet_fieldname, 'interval': interval, }, 'facet_filter': { "range": { facet_fieldname: { 'from': self._from_python(value.get('start_date')), 'to': self._from_python(value.get('end_date')), } } } } if query_facets is not None: kwargs.setdefault('facets', {}) for facet_fieldname, value in query_facets: kwargs['facets'][facet_fieldname] = { 'query': { 'query_string': { 'query': value, } }, } if limit_to_registered_models is None: limit_to_registered_models = getattr(settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True) if models and len(models): model_choices = sorted(['%s.%s' % (model._meta.app_label, model._meta.module_name) for model in models]) elif limit_to_registered_models: # Using narrow queries, limit the results to only models handled # with the current routers. model_choices = self.build_models_list() else: model_choices = [] if len(model_choices) > 0: if narrow_queries is None: narrow_queries = set() narrow_queries.add('%s:(%s)' % (DJANGO_CT, ' OR '.join(model_choices))) if narrow_queries: kwargs['query'].setdefault('filtered', {}) kwargs['query']['filtered'].setdefault('filter', {}) kwargs['query']['filtered']['filter'] = { 'fquery': { 'query': { 'query_string': { 'query': u' AND '.join(list(narrow_queries)), }, }, '_cache': True, } } if within is not None: from haystack.utils.geo import generate_bounding_box ((min_lat, min_lng), (max_lat, max_lng)) = generate_bounding_box(within['point_1'], within['point_2']) within_filter = { "geo_bounding_box": { within['field']: { "top_left": { "lat": max_lat, "lon": min_lng }, "bottom_right": { "lat": min_lat, "lon": max_lng } } }, } kwargs['query'].setdefault('filtered', {}) kwargs['query']['filtered'].setdefault('filter', {}) if kwargs['query']['filtered']['filter']: compound_filter = { "and": [ kwargs['query']['filtered']['filter'], within_filter, ] } kwargs['query']['filtered']['filter'] = compound_filter else: kwargs['query']['filtered']['filter'] = within_filter if dwithin is not None: lng, lat = dwithin['point'].get_coords() dwithin_filter = { "geo_distance": { "distance": dwithin['distance'].km, dwithin['field']: { "lat": lat, "lon": lng } } } kwargs['query'].setdefault('filtered', {}) kwargs['query']['filtered'].setdefault('filter', {}) if kwargs['query']['filtered']['filter']: compound_filter = { "and": [ kwargs['query']['filtered']['filter'], dwithin_filter ] } kwargs['query']['filtered']['filter'] = compound_filter else: kwargs['query']['filtered']['filter'] = dwithin_filter if polygon is not None: points = map(lambda x: {"lat": x[1], "lon": x[0]}, polygon['polygon'].coords[0]) polygon_filter = { "geo_polygon": { polygon['field']: { "points": points } }, } kwargs['query'].setdefault('filtered', {}) kwargs['query']['filtered'].setdefault('filter', {}) if kwargs['query']['filtered']['filter']: compound_filter = { "and": [ kwargs['query']['filtered']['filter'], polygon_filter, ] } kwargs['query']['filtered']['filter'] = compound_filter else: kwargs['query']['filtered']['filter'] = polygon_filter # Remove the "filtered" key if we're not filtering. Otherwise, # Elasticsearch will blow up. if not kwargs['query']['filtered'].get('filter'): kwargs['query'] = kwargs['query']['filtered']['query'] return kwargs
def search(self, query_string, sort_by=None, start_offset=0, end_offset=None, fields='', highlight=False, facets=None, date_facets=None, query_facets=None, narrow_queries=None, spelling_query=None, within=None, dwithin=None, distance_point=None, models=None, limit_to_registered_models=None, result_class=None, **kwargs): if len(query_string) == 0: return { 'results': [], 'hits': 0, } if not self.setup_complete: self.setup() index = haystack.connections[self.connection_alias].get_unified_index() content_field = index.document_field if query_string == '*:*': kwargs = { 'query': { 'filtered': { 'query': { 'query_string': { 'query': '*:*', }, }, }, }, } else: kwargs = { 'query': { 'filtered': { 'query': { 'query_string': { 'default_field': content_field, 'default_operator': DEFAULT_OPERATOR, 'query': query_string, 'analyze_wildcard': True, 'auto_generate_phrase_queries': True, }, }, }, }, } geo_sort = False if fields: if isinstance(fields, (list, set)): fields = " ".join(fields) kwargs['fields'] = fields if sort_by is not None: order_list = [] for field, direction in sort_by: if field == 'distance' and distance_point: # Do the geo-enabled sort. lng, lat = distance_point['point'].get_coords() sort_kwargs = { "_geo_distance": { distance_point['field']: [lng, lat], "order" : direction, "unit" : "km" } } else: if field == 'distance': warnings.warn("In order to sort by distance, you must call the '.distance(...)' method.") # Regular sorting. sort_kwargs = {field: {'order': direction}} order_list.append(sort_kwargs) kwargs['sort'] = order_list # From/size offsets don't seem to work right in Elasticsearch's DSL. :/ # if start_offset is not None: # kwargs['from'] = start_offset # if end_offset is not None: # kwargs['size'] = end_offset - start_offset if highlight is True: kwargs['highlight'] = { 'fields': { content_field: {'store': 'yes'}, } } if self.include_spelling is True: warnings.warn("Elasticsearch does not handle spelling suggestions.", Warning, stacklevel=2) if narrow_queries is None: narrow_queries = set() if facets is not None: kwargs.setdefault('facets', {}) for facet_fieldname in facets: kwargs['facets'][facet_fieldname] = { 'terms': { 'field': facet_fieldname, }, } if date_facets is not None: kwargs.setdefault('facets', {}) for facet_fieldname, value in date_facets.items(): # Need to detect on gap_by & only add amount if it's more than one. interval = value.get('gap_by').lower() # Need to detect on amount (can't be applied on months or years). if value.get('gap_amount', 1) != 1 and not interval in ('month', 'year'): # Just the first character is valid for use. interval = "%s%s" % (value['gap_amount'], interval[:1]) kwargs['facets'][facet_fieldname] = { 'date_histogram': { 'field': facet_fieldname, 'interval': interval, }, 'facet_filter': { "range": { facet_fieldname: { 'from': self.conn.from_python(value.get('start_date')), 'to': self.conn.from_python(value.get('end_date')), } } } } if query_facets is not None: kwargs.setdefault('facets', {}) for facet_fieldname, value in query_facets: kwargs['facets'][facet_fieldname] = { 'query': { 'query_string': { 'query': value, } }, } if limit_to_registered_models is None: limit_to_registered_models = getattr(settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True) if models and len(models): model_choices = sorted(['%s.%s' % (model._meta.app_label, model._meta.module_name) for model in models]) elif limit_to_registered_models: # Using narrow queries, limit the results to only models handled # with the current routers. model_choices = self.build_models_list() else: model_choices = [] if len(model_choices) > 0: if narrow_queries is None: narrow_queries = set() narrow_queries.add('%s:(%s)' % (DJANGO_CT, ' OR '.join(model_choices))) if narrow_queries: kwargs['query'].setdefault('filtered', {}) kwargs['query']['filtered'].setdefault('filter', {}) kwargs['query']['filtered']['filter'] = { 'fquery': { 'query': { 'query_string': { 'query': u' AND '.join(list(narrow_queries)), }, }, '_cache': True, } } if within is not None: from haystack.utils.geo import generate_bounding_box ((min_lat, min_lng), (max_lat, max_lng)) = generate_bounding_box(within['point_1'], within['point_2']) kwargs['query'].setdefault('filtered', {}) kwargs['query']['filtered'].setdefault('filter', {}) kwargs['query']['filtered']['filter'] = { "geo_bounding_box": { within['field']: { "top_left": { "lat": max_lat, "lon": max_lng }, "bottom_right": { "lat": min_lat, "lon": min_lng } } }, } if dwithin is not None: lng, lat = dwithin['point'].get_coords() kwargs['query'].setdefault('filtered', {}) kwargs['query']['filtered'].setdefault('filter', {}) kwargs['query']['filtered']['filter'] = { "geo_distance": { "distance": dwithin['distance'].km, dwithin['field']: { "lat": lat, "lon": lng } } } # Remove the "filtered" key if we're not filtering. Otherwise, # Elasticsearch will blow up. if not kwargs['query']['filtered'].get('filter'): kwargs['query'] = kwargs['query']['filtered']['query'] # Because Elasticsearch. query_params = { 'from': start_offset, } if end_offset is not None and end_offset > start_offset: query_params['size'] = end_offset - start_offset try: raw_results = self.conn.search(None, kwargs, indexes=[self.index_name], doc_types=['modelresult'], **query_params) except (requests.RequestException, pyelasticsearch.ElasticSearchError), e: if not self.silently_fail: raise self.log.error("Failed to query Elasticsearch using '%s': %s", query_string, e) raw_results = {}