def test_should_not_include_highlights_if_not_in_es_results( current_app, services_mapping): copy = SEARCH_RESULTS_JSON del copy["hits"]["hits"][0]["highlight"] res = convert_es_results(services_mapping, copy, {"category": "some catergory"}) assert "highlight" not in res["documents"][0]
def keyword_search(index_name, doc_type, query_args): try: page_size = int(current_app.config['DM_SEARCH_PAGE_SIZE']) res = es.search(index=index_name, doc_type=doc_type, body=construct_query(query_args, page_size)) results = convert_es_results(res, query_args) url_for_search = lambda **kwargs: \ url_for('.search', index_name=index_name, doc_type=doc_type, _external=True, **kwargs) response = { "meta": results['meta'], "services": results['services'], "links": generate_pagination_links(query_args, results['meta']['total'], page_size, url_for_search) } return response, 200 except TransportError as e: return _get_an_error_message(e), e.status_code except ValueError as e: return str(e), 400
def keyword_search(index_name, doc_type, query_args): try: page_size = int(current_app.config['DM_SEARCH_PAGE_SIZE']) res = es.search( index=index_name, doc_type=doc_type, body=construct_query(query_args, page_size) ) results = convert_es_results(res, query_args) url_for_search = lambda **kwargs: \ url_for('.search', index_name=index_name, doc_type=doc_type, _external=True, **kwargs) response = { "meta": results['meta'], "services": results['services'], "links": generate_pagination_links( query_args, results['meta']['total'], page_size, url_for_search ) } return response, 200 except TransportError as e: return _get_an_error_message(e), e.status_code except ValueError as e: return str(e), 400
def test_should_build_query_block_in_response(current_app, services_mapping): res = convert_es_results(services_mapping, SEARCH_RESULTS_JSON, { "q": "keywords", "category": "some catergory" }) assert res["meta"]["query"]["q"] == "keywords" assert res["meta"]["query"]["category"] == "some catergory"
def test_should_build_query_block_in_response(): res = convert_es_results(SEARCH_RESULTS_JSON, { "q": "keywords", "category": "some catergory" }) assert_equal(res["meta"]["query"]["q"], "keywords") assert_equal(res["meta"]["query"]["category"], "some catergory")
def test_should_build_highlights_es_response(): res = convert_es_results(SEARCH_RESULTS_JSON, {"q": "keywords"}) assert_equal(res["services"][0]["highlight"]["serviceName"], ["Email Verification"]) assert_equal(res["services"][0]["highlight"]["serviceFeatures"], [ "Verify email addresses at the point of entry", "Validate email address format", "Live email account", "Safe to email" ]) assert_equal(res["services"][0]["highlight"]["serviceBenefits"], ["Increase email deliverability"])
def test_should_build_highlights_es_response(current_app, services_mapping): res = convert_es_results(services_mapping, SEARCH_RESULTS_JSON, {"q": "keywords"}) assert res["documents"][0]["highlight"]["serviceName"] == [ "Plant-based cloud hosting" ] assert res["documents"][0]["highlight"]["serviceFeatures"] == [ "Independent advice and expertise", ] assert res["documents"][0]["highlight"]["serviceBenefits"] == [ "Fully scalable and flexible solutions to suit changing needs" ]
def test_should_build_search_response_from_es_response(): res = convert_es_results(SEARCH_RESULTS_JSON, {"q": "keywords"}) assert_equal(res["meta"]["query"]["q"], "keywords") assert_equal(res["meta"]["total"], 628) assert_equal(res["meta"]["took"], 69) assert_equal(len(res["services"]), 10) assert_equal(res["services"][0]["id"], "5390159512076288") assert_equal(res["services"][0]["lot"], "SaaS") assert_equal(res["services"][0]["frameworkName"], "G-Cloud 6") assert_equal(res["services"][0]["supplierName"], "Supplier Name") assert_equal(res["services"][0]["serviceName"], "Email Verification") assert_equal(res["services"][0]["serviceTypes"], ["Data management"])
def test_should_build_highlights_es_response(): res = convert_es_results(SEARCH_RESULTS_JSON, {"q": "keywords"}) assert_equal( res["services"][0]["highlight"]["serviceName"], ["Email Verification"]) assert_equal(res["services"][0]["highlight"]["serviceFeatures"], [ "Verify email addresses at the point of entry", "Validate email address format", "Live email account", "Safe to email" ]) assert_equal(res["services"][0]["highlight"]["serviceBenefits"], [ "Increase email deliverability" ])
def test_should_build_search_response_from_es_response(): res = convert_es_results(SEARCH_RESULTS_JSON, {"q": "keywords"}) assert_equal(res["meta"]["query"]["q"], "keywords") assert_equal(res["meta"]["total"], 628) assert_equal(res["meta"]["took"], 69) assert_equal(len(res["services"]), 10) assert_equal(res["services"][0]["id"], "5390159512076288") assert_equal(res["services"][0]["lot"], "SaaS") assert_equal(res["services"][0]["frameworkName"], "G-Cloud 6") assert_equal(res["services"][0]["supplierName"], "Supplier Name") assert_equal(res["services"][0]["serviceName"], "Email Verification") assert_equal(res["services"][0]["serviceTypes"], [ "Data management" ])
def test_should_build_search_response_from_es_response(current_app, services_mapping): current_app.config = {'DM_SEARCH_PAGE_SIZE': 30} res = convert_es_results(services_mapping, SEARCH_RESULTS_JSON, {"q": "keywords"}) assert res["meta"]["query"]["q"] == "keywords" assert res["meta"]["total"] == 10 assert res["meta"]["took"] == 15 assert res["meta"]["results_per_page"] == 30 assert len(res["documents"]) == 10 assert res["documents"][0]["id"] == "144159043984122" assert res["documents"][0]["lot"] == "cloud-support" assert res["documents"][0]["frameworkName"] == "G-Cloud 12" assert res["documents"][0]["supplierName"] == "Supplier Name" assert res["documents"][0]["serviceName"] == "Plant-based cloud hosting" assert res["documents"][0]["serviceCategories"] == ["Ongoing support"]
def core_search_and_aggregate(index_name, doc_type, query_args, search=False, aggregations=[]): try: mapping = app.mapping.get_mapping(index_name, doc_type) page_size = int(current_app.config['DM_SEARCH_PAGE_SIZE']) if 'idOnly' in query_args: page_size *= int(current_app.config['DM_ID_ONLY_SEARCH_PAGE_SIZE_MULTIPLIER']) es_search_kwargs = {'search_type': 'dfs_query_then_fetch'} if search else {} constructed_query = construct_query(mapping, query_args, aggregations, page_size) with logged_duration_for_external_request('es'): res = es.search( index=index_name, body=constructed_query, track_total_hits=True, **es_search_kwargs ) results = convert_es_results(mapping, res, query_args) def url_for_search(**kwargs): return url_for('.search', index_name=index_name, doc_type=doc_type, _external=True, **kwargs) response = { "meta": results['meta'], "documents": results['documents'], "links": generate_pagination_links( query_args, results['meta']['total'], page_size, url_for_search ), } if aggregations: # Return aggregations in a slightly cleaner format. response['aggregations'] = { k: {d['key']: d['doc_count'] for d in v['buckets']} for k, v in res.get('aggregations', {}).items() } # determine whether we're actually off the end of the results. ES handles this as a result-less-yet-happy # response, but we probably want to turn it into a 404 not least so we can match our behaviour when fetching # beyond the `max_result_window` below if search and constructed_query.get("from") and not response["documents"]: return _page_404_response(query_args.get("page", None)) return response, 200 except TransportError as e: try: root_causes = getattr(e, "info", {}).get("error", {}).get("root_cause", {}) except AttributeError: # Catch if the contents of 'info' has no ability to get attributes return _get_an_error_message(e), e.status_code if root_causes and root_causes[0].get("reason").startswith("Result window is too large"): # in this case we have to fire off another request to determine how we should handle this error... # (note minor race condition possible if index is modified between the original call and this one) try: body = construct_query(mapping, query_args, page_size=None) with logged_duration_for_external_request('es'): result_count = es.count( index=index_name, body=body )["count"] except TransportError as e: return _get_an_error_message(e), e.status_code else: if result_count < constructed_query.get("from", 0): # there genuinely aren't enough results for this number of pages, so this should be a 404 return _page_404_response(query_args.get("page", None)) # else fall through and allow this to 500 - we probably don't have max_result_window set high enough # for the number of results it's possible to access using this index. return _get_an_error_message(e), e.status_code except ValueError as e: return str(e), 400
def test_should_not_include_highlights_if_not_in_es_results(): copy = SEARCH_RESULTS_JSON del copy["hits"]["hits"][0]["highlight"] res = convert_es_results(copy, {"category": "some catergory"}) assert_equal("highlight" in res["services"][0], False)
def test_should_build_query_block_in_response(): res = convert_es_results(SEARCH_RESULTS_JSON, {"q": "keywords", "category": "some catergory"}) assert_equal(res["meta"]["query"]["q"], "keywords") assert_equal(res["meta"]["query"]["category"], "some catergory")