def test_mixed_quoted_query(self): es = Search() querystring = '"foo bar" baz' query = queries.get_es_query(es, querystring, "", []) query_dict = query.to_dict() assert len(query_dict["query"]["bool"]["should"]) == 2 assert query_dict["query"]["bool"]["should"][1] == { "prefix": { "normalized_name": '"foo bar" baz' } } must_params = query_dict["query"]["bool"]["should"][0]["bool"]["must"] assert len(must_params) == 2 assert must_params[0]["multi_match"] == { "fields": EXPECTED_SEARCH_FIELDS, "type": "phrase", "query": "foo bar", } assert must_params[1]["multi_match"] == { "fields": EXPECTED_SEARCH_FIELDS, "type": "best_fields", "query": "baz", } assert query_dict["suggest"] == { "name_suggestion": { "text": querystring, "term": { "field": "name" } } } assert "sort" not in query_dict # default "relevance" mode does no sorting
def test_quoted_query(self, querystring, expected_prefix, expected_type): es = Search() query = queries.get_es_query(es, querystring, "", []) query_dict = query.to_dict() assert len(query_dict["query"]["bool"]["should"]) == 2 assert query_dict["query"]["bool"]["should"][1] == { "prefix": { "normalized_name": expected_prefix } } must_params = query_dict["query"]["bool"]["should"][0]["bool"]["must"] assert len(must_params) == 1 assert must_params[0]["multi_match"] == { "fields": EXPECTED_SEARCH_FIELDS, "type": expected_type, "query": "foo bar" if querystring != '"a"' else "a", } assert query_dict["suggest"] == { "name_suggestion": { "text": querystring, "term": { "field": "name" } } } assert "sort" not in query_dict # default "relevance" mode does no sorting
def test_sort_order(self, order, field): es = Search() querystring = "foo bar" query = queries.get_es_query(es, querystring, order, []) query_dict = query.to_dict() assert len(query_dict["query"]["bool"]["should"]) == 2 assert query_dict["query"]["bool"]["should"][1] == { "prefix": { "normalized_name": "foo bar" } } must_params = query_dict["query"]["bool"]["should"][0]["bool"]["must"] assert len(must_params) == 1 assert must_params[0]["multi_match"] == { "fields": EXPECTED_SEARCH_FIELDS, "type": "best_fields", "query": "foo bar", } assert query_dict["suggest"] == { "name_suggestion": { "text": querystring, "term": { "field": "name" } } } assert query_dict["sort"] == [{ field: { "order": "desc" if order.startswith("-") else "asc", "unmapped_type": "long", } }]
def test_with_classifiers_with_querystring(self): es = Search() querystring = "foo bar" classifiers = [("c", "foo :: bar"), ("c", "fiz :: buz")] query = queries.get_es_query(es, querystring, "", classifiers) query_dict = query.to_dict() assert len(query_dict["query"]["bool"]["should"]) == 2 assert query_dict["query"]["bool"]["should"][1] == { "prefix": {"normalized_name": "foo bar"} } must_params = query_dict["query"]["bool"]["should"][0]["bool"]["must"] assert len(must_params) == 1 assert must_params[0]["multi_match"] == { "fields": EXPECTED_SEARCH_FIELDS, "type": "best_fields", "query": "foo bar", } assert query_dict["suggest"] == { "name_suggestion": {"text": querystring, "term": {"field": "name"}} } assert "sort" not in query_dict assert query_dict["query"]["bool"]["must"] == [ {"prefix": {"classifiers": classifier}} for classifier in classifiers ] assert query_dict["query"]["bool"]["minimum_should_match"] == 1
def test_with_classifiers_with_no_querystring(self): es = Search() querystring = "" classifiers = [("c", "foo :: bar"), ("c", "fiz :: buz")] query = queries.get_es_query(es, querystring, "", classifiers) query_dict = query.to_dict() assert query_dict["query"]["bool"]["must"] == [ {"prefix": {"classifiers": classifier}} for classifier in classifiers ]
def test_single_not_quoted_character(self): es = Search() querystring = "a" query = queries.get_es_query(es, querystring, "", []) query_dict = query.to_dict() must_params = query_dict["query"]["bool"]["must"] assert len(must_params) == 1 assert must_params[0]["multi_match"] == { "fields": EXPECTED_SEARCH_FIELDS, "type": "best_fields", "query": "a", } assert query_dict["suggest"] == { "name_suggestion": {"text": querystring, "term": {"field": "name"}} } assert "sort" not in query_dict # default "relevance" mode does no sorting
def test_with_classifier_with_no_terms_and_order(self): es = Search() terms = "" classifiers = [("c", "foo :: bar")] query = queries.get_es_query(es, terms, "-created", classifiers) query_dict = query.to_dict() assert query_dict["query"] == { "prefix": { "classifiers": ("c", "foo :: bar") } } assert query_dict["sort"] == [{ "created": { "order": "desc", "unmapped_type": "long" } }]
def test_no_querystring(self): es = Search() query = queries.get_es_query(es, "", "", []) assert query == es.query()
def search(request): metrics = request.find_service(IMetricsService, context=None) querystring = request.params.get("q", "").replace("'", '"') order = request.params.get("o", "") classifiers = request.params.getall("c") query = get_es_query(request.es, querystring, order, classifiers) try: page_num = int(request.params.get("page", 1)) except ValueError: raise HTTPBadRequest("'page' must be an integer.") try: page = ElasticsearchPage(query, page=page_num, url_maker=paginate_url_factory(request)) except elasticsearch.TransportError: metrics.increment("warehouse.views.search.error") raise HTTPServiceUnavailable if page.page_count and page_num > page.page_count: raise HTTPNotFound available_filters = collections.defaultdict(list) classifiers_q = (request.db.query(Classifier).with_entities( Classifier.classifier).filter( exists([release_classifiers.c.trove_id ]).where(release_classifiers.c.trove_id == Classifier.id), Classifier.classifier.notin_(deprecated_classifiers.keys()), ).order_by(Classifier.classifier)) for cls in classifiers_q: first, *_ = cls.classifier.split(" :: ") available_filters[first].append(cls.classifier) def filter_key(item): try: return 0, SEARCH_FILTER_ORDER.index(item[0]), item[0] except ValueError: return 1, 0, item[0] def form_filters_tree(split_list): """ Takes a list of lists, each of them containing a filter and one of its children. Returns a dictionary, each key being a filter and each value being the filter's children. """ d = {} for l in split_list: current_level = d for part in l: if part not in current_level: current_level[part] = {} current_level = current_level[part] return d def process_available_filters(): """ Processes available filters and returns a list of dictionaries. The value of a key in the dictionary represents its children """ sorted_filters = sorted(available_filters.items(), key=filter_key) output = [] for f in sorted_filters: classifier_list = f[1] split_list = [i.split(" :: ") for i in classifier_list] tree = form_filters_tree(split_list) output.append(tree) return output metrics = request.find_service(IMetricsService, context=None) metrics.histogram("warehouse.views.search.results", page.item_count) return { "page": page, "term": querystring, "order": order, "available_filters": process_available_filters(), "applied_filters": request.params.getall("c"), }