def test_unknown_state(endpoint): example_response = get_example_response(f"{endpoint}.json") example_response["result"]["state"] = "lockdown" with pytest.raises(jsonschema.ValidationError): validate_against_schema(example_response, endpoint)
def test_missing_ids(endpoint): example_response = get_example_response(f"{endpoint}.json") del example_response["result"]["id"] with pytest.raises(jsonschema.ValidationError): validate_against_schema(example_response, endpoint)
def test_search_datasets_stable_package_by_title_general_term( subtests, base_url_3, rsession, stable_pkg, variables): limit_param, offset_param = _get_limit_offset_params(base_url_3, variables=variables) name_terms = extract_search_terms(stable_pkg["name"], 3) response = rsession.get( f"{base_url_3}/search/dataset?q=name:{stable_pkg['name']}&fl=name&{limit_param}=100" ) assert response.status_code == 200 rj = get_dataset_search_json_response(response, base_url_3, variables=variables) with subtests.test("response validity"): validate_against_schema(rj, "search_dataset") if variables.get("ckan_version") == "2.9": # in CKAN 2.9, v1 dataset search has been dropped so results come back as v3 assert isinstance(rj["results"][0], dict) else: # check it's using the raw-string result format assert isinstance(rj["results"][0], str) assert len(rj["results"]) <= 100 with subtests.test("desired result present"): if variables.get("ckan_version") == "2.9": assert stable_pkg["name"] in [n['name'] for n in rj["results"]] else: assert stable_pkg["name"] in rj["results"]
def test_search_dataset_pii_extra(): example_response = get_example_response("search_dataset.all_fields.json") example_response["results"][1]["extras"]["author"] = "Rieux" with pytest.raises(jsonschema.ValidationError): validate_against_schema(example_response, "search_dataset")
def test_organization_list_all_fields_inc_optional(subtests, base_url_3, rsession): response = rsession.get( f"{base_url_3}/action/organization_list?all_fields=1&include_extras=1&include_tags=1" "&include_groups=1&limit=5") assert response.status_code == 200 rj = response.json() with subtests.test("response validity"): validate_against_schema(rj, "organization_list") assert rj["success"] is True # assert this is the correct variant of the response schema assert isinstance(rj["result"][0], dict) # extras are not available when creating the organisation # via the ckan static mock harvest source # assert "extras" in rj["result"][0] assert "tags" in rj["result"][0] assert "groups" in rj["result"][0] with subtests.test("consistency with organization_show"): os_response = rsession.get( f"{base_url_3}/action/organization_show?id={rj['result'][0]['id']}" ) assert os_response.status_code == 200 assert os_response.json()["result"] == AnySupersetOf(rj['result'][0])
def test_format_autocomplete_result_missing_format(): example_response = get_example_response("format_autocomplete.json") del example_response["ResultSet"]["Result"][1]["Format"] with pytest.raises(jsonschema.ValidationError): validate_against_schema(example_response, "format_autocomplete")
def test_package_search_pii_field(): example_response = get_example_response("package_search.json") example_response["result"]["results"][1]["author"] = "Rieux" with pytest.raises(jsonschema.ValidationError): validate_against_schema(example_response, "package_search")
def test_package_search_by_full_slug_general_term( subtests, inc_sync_sensitive, base_url_3, rsession, stable_pkg_slug, ): response = rsession.get( f"{base_url_3}/action/package_search?q={stable_pkg_slug}&rows=100") assert response.status_code == 200 rj = response.json() with subtests.test("response validity"): validate_against_schema(rj, "package_search") assert rj["success"] is True assert len(rj["result"]["results"]) <= 100 if inc_sync_sensitive: desired_result = tuple(pkg for pkg in response.json()["result"]["results"] if pkg["name"] == stable_pkg_slug) assert desired_result if len(desired_result) > 1: warn( f"Multiple results ({len(desired_result)}) with name = {stable_pkg_slug!r})" ) with subtests.test("approx consistency with package_show"): ps_response = rsession.get( f"{base_url_3}/action/package_show?id={stable_pkg_slug}") assert ps_response.status_code == 200 assert any(ps_response.json()["result"]["id"] == result["id"] for result in desired_result)
def test_package_search_stable_package(subtests, base_url_3, rsession, stable_pkg_search): stable_pkg = stable_pkg_search response = rsession.get( f"{base_url_3}/action/package_search?q=name:{stable_pkg['name']}&rows=30" ) assert response.status_code == 200 rj = response.json() with subtests.test("response validity"): validate_against_schema(rj, "package_search") assert rj["success"] is True assert len(rj["result"]["results"]) <= 30 desired_result = tuple(pkg for pkg in rj["result"]["results"] if pkg["name"] == stable_pkg["name"]) assert len(desired_result) == 1 clean_unstable_elements(desired_result[0]) clean_unstable_elements(stable_pkg) with subtests.test("desired result equality"): assert desired_result[0] == AnySupersetOf(stable_pkg, recursive=True, seq_norm_order=True)
def test_search_datasets_by_full_slug_general_term(subtests, inc_sync_sensitive, base_url_3, rsession, random_pkg_slug, variables): limit_param, offset_param = _get_limit_offset_params(base_url_3, variables=variables) response = rsession.get( f"{base_url_3}/search/dataset?q={random_pkg_slug}&{limit_param}=100") assert response.status_code == 200 rj = get_dataset_search_json_response(response, base_url_3, variables) with subtests.test("response validity"): validate_against_schema(rj, "search_dataset") # check it's using the raw-string result format if variables.get('ckan_version') == "2.9": assert isinstance(rj["results"][0], dict) else: assert isinstance(rj["results"][0], str) assert len(rj["results"]) <= 100 if inc_sync_sensitive: with subtests.test("desired result present"): results = [ r["name"] for r in rj["results"] ] if variables.get("ckan_version") == "2.9" else rj["results"] desired_result = tuple(name for name in results if name == random_pkg_slug) assert desired_result if len(desired_result) > 1: warn( f"Multiple results ({len(desired_result)}) with name = {random_pkg_slug!r})" )
def test_i18n(base_url, rsession, subtests): response = rsession.get(f"{base_url}/i18n/en_GB") assert response.status_code == 200 rj = response.json() with subtests.test("response validity"): validate_against_schema(rj, "i18n")
def test_search_datasets_by_full_slug_specific_field_all_fields_response( subtests, inc_sync_sensitive, base_url_3, rsession, random_pkg, allfields_term, variables): if allfields_term.startswith("all_fields") and base_url_3.endswith("/3"): pytest.skip("all_fields parameter not supported in v3 endpoint") limit_param, offset_param = _get_limit_offset_params(base_url_3, variables=variables) response = rsession.get( f"{base_url_3}/search/dataset?q=name:{random_pkg['name']}&{allfields_term}&{limit_param}=10" ) assert response.status_code == 200 rj = response.json() with subtests.test("response validity"): validate_against_schema(rj, "search_dataset") assert isinstance(rj["results"][0], dict) assert len(rj["results"]) <= 10 _validate_embedded_keys(rj) if inc_sync_sensitive: with subtests.test("desired result present"): desired_result = tuple(dst for dst in rj["results"] if random_pkg["id"] == dst["id"]) assert len(desired_result) == 1 assert desired_result[0]["title"] == random_pkg["title"] assert desired_result[0]["state"] == random_pkg["state"] assert desired_result[0]["organization"] == random_pkg[ "organization"]["name"]
def test_package_show(subtests, base_url_3, rsession, random_pkg_slug): response = rsession.get( f"{base_url_3}/action/package_show?id={random_pkg_slug}") assert response.status_code == 200 rj = response.json() with subtests.test("response validity"): validate_against_schema(rj, "package_show") assert rj["success"] is True assert rj["result"]["name"] == random_pkg_slug assert all(res["package_id"] == rj['result']['id'] for res in rj["result"]["resources"]) with subtests.test("uuid lookup consistency"): # we should be able to look up this same package by its uuid and get an identical response uuid_response = rsession.get( f"{base_url_3}/action/package_show?id={rj['result']['id']}") assert uuid_response.status_code == 200 assert uuid_response.json() == rj with subtests.test("organization consistency"): org_response = rsession.get( f"{base_url_3}/action/organization_show?id={rj['result']['organization']['id']}" ) assert org_response.status_code == 200 assert org_response.json()["result"] == AnySupersetOf( rj['result']['organization'], recursive=True)
def test_search_datasets_by_full_slug_general_term_id_response( subtests, inc_sync_sensitive, base_url_3, rsession, random_pkg, variables): limit_param, offset_param = _get_limit_offset_params(base_url_3, variables=variables) response = rsession.get( f"{base_url_3}/search/dataset?q={random_pkg['name']}&fl=id&{limit_param}=100" ) assert response.status_code == 200 rj = get_dataset_search_json_response(response, base_url_3, variables) with subtests.test("response validity"): validate_against_schema(rj, "search_dataset") if variables.get("ckan_version") == "2.9": # in CKAN 2.9, v1 dataset search has been dropped so results come back as v3 assert isinstance(rj["results"][0], dict) else: # when "id" is chosen for the response, it is presented as raw strings assert isinstance(rj["results"][0], str) assert len(rj["results"]) <= 100 if inc_sync_sensitive: with subtests.test("desired result present"): if variables.get("ckan_version") == "2.9": assert random_pkg["id"] in [r['id'] for r in rj["results"]] else: assert random_pkg["id"] in rj["results"]
def test_package_search_facets(subtests, inc_sync_sensitive, base_url_3, rsession, random_pkg): notes_terms = extract_search_terms(random_pkg["notes"], 2) response = rsession.get( f"{base_url_3}/action/package_search?q={notes_terms}&rows=10" "&facet.field=[\"license_id\",\"organization\"]&facet.limit=-1") assert response.status_code == 200 rj = response.json() with subtests.test("response validity"): validate_against_schema(rj, "package_search") assert rj["success"] is True assert len(rj["result"]["results"]) <= 10 if inc_sync_sensitive: with subtests.test("facets include random_pkg's value"): assert random_pkg["organization"]["name"] in rj["result"][ "facets"]["organization"] assert any(random_pkg["organization"]["name"] == val["name"] for val in rj["result"]["search_facets"]["organization"] ["items"]) # not all packages have a license_id if random_pkg.get("license_id"): assert random_pkg["license_id"] in rj["result"]["facets"][ "license_id"] assert any(random_pkg["license_id"] == val["name"] for val in rj["result"]["search_facets"] ["license_id"]["items"])
def test_organization_show_inc_datasets_stable_pkg( subtests, base_url_3, rsession, stable_org_with_datasets, ): response = rsession.get( f"{base_url_3}/action/organization_show?id={stable_org_with_datasets['name']}" "&include_datasets=1") assert response.status_code == 200 rj = response.json() with subtests.test("response validity"): validate_against_schema(rj, "organization_show") desired_result = [ clean_unstable_elements(pkg) for pkg in rj["result"]["packages"] if pkg["organization"]["name"] == stable_org_with_datasets["name"] ] if rj["result"]["package_count"] > 1000 and not desired_result: # this view only shows the first 1000 packages - it may have missed the cut warn( f"Expected package name {stable_org_with_datasets['name']!r} not found in first 1000 listed packages" ) else: # example publisher 1 should have 2 datasets attached assert len(desired_result) == 2 with subtests.test("response equality"): clean_unstable_elements(stable_org_with_datasets["packages"][0]) assert stable_org_with_datasets["packages"][0] in desired_result
def test_package_search_by_revision_id_specific_field( subtests, inc_sync_sensitive, base_url_3, rsession, stable_pkg, ): response = rsession.get( f"{base_url_3}/action/package_search?fq=revision_id:{stable_pkg['revision_id']}" "&rows=1000") assert response.status_code == 200 rj = response.json() with subtests.test("response validity"): validate_against_schema(rj, "package_search") assert rj["success"] is True assert len(rj["result"]["results"]) <= 1000 with subtests.test("all results match criteria"): assert all(stable_pkg["revision_id"] == pkg["revision_id"] for pkg in rj["result"]["results"]) if inc_sync_sensitive: desired_result = tuple(pkg for pkg in rj["result"]["results"] if pkg["id"] == stable_pkg["id"]) assert len(desired_result) == 1 with subtests.test("approx consistency with package_show"): assert stable_pkg["name"] == desired_result[0]["name"] assert stable_pkg["organization"] == desired_result[0][ "organization"]
def test_package_search_empty_tag(): example_response = get_example_response("package_search.json") example_response["result"]["results"][2]["tags"].append({}) with pytest.raises(jsonschema.ValidationError): validate_against_schema(example_response, "package_search")
def test_search_datasets_by_full_slug_specific_field_all_fields_response( subtests, base_url_3, rsession, stable_dataset, allfields_term, variables): if allfields_term.startswith("all_fields") and ( base_url_3.endswith("/3") or variables.get('ckan_version') == '2.9'): pytest.skip("all_fields parameter not supported in v3 endpoint") limit_param, offset_param = _get_limit_offset_params(base_url_3, variables=variables) response = rsession.get( f"{base_url_3}/search/dataset?q=name:{stable_dataset['name']}" f"&{allfields_term}&{limit_param}=10") assert response.status_code == 200 rj = response.json().get('result') if variables.get('ckan_version') == '2.9' and base_url_3.endswith("/3")\ else response.json() with subtests.test("response validity"): validate_against_schema(rj, "search_dataset") assert isinstance(rj["results"][0], dict) assert len(rj["results"]) <= 10 _validate_embedded_keys(rj) desired_result = tuple(dst for dst in rj["results"] if stable_dataset["name"] == dst["name"]) assert len(desired_result) == 1 with subtests.test("desired result equality"): clean_unstable_elements(stable_dataset, is_key_value=False) clean_unstable_elements(desired_result[0], is_key_value=False) assert desired_result[0] == AnySupersetOf(stable_dataset, recursive=True, seq_norm_order=True)
def test_organization_show_is_organization(): example_response = get_example_response("organization_show.json") example_response["result"]["is_organization"] = False with pytest.raises(jsonschema.ValidationError): validate_against_schema(example_response, "organization_show")
def test_package_search_noninteger_facet(): example_response = get_example_response("package_search.json") example_response["result"]["facets"]["organization"][ "natural-england"] = "2" with pytest.raises(jsonschema.ValidationError): validate_against_schema(example_response, "package_search")
def test_package_show_org_must_follow_org_schema(): example_response = get_example_response("package_show.json") # note non-existent leap-day example_response["result"]["organization"][ "created"] = "2019-02-29T16:00:00.123Z" with pytest.raises(jsonschema.ValidationError): validate_against_schema(example_response, "package_show")
def test_organization_list(base_url_3, rsession): response = rsession.get(f"{base_url_3}/action/organization_list") assert response.status_code == 200 rj = response.json() validate_against_schema(rj, "organization_list") assert rj["success"] is True # assert this is the correct variant of the response assert isinstance(rj["result"][0], str)
def test_package_search_pii_extra(): example_response = get_example_response("package_search.json") example_response["result"]["results"][1]["extras"].append({ "key": "author", "value": "Rieux" }) with pytest.raises(jsonschema.ValidationError): validate_against_schema(example_response, "package_search")
def test_no_results(base_url, rsession, subtests): response = rsession.get(f"{base_url}/2/util/resource/format_autocomplete?incomplete=telegrams") assert response.status_code == 200 rj = response.json() with subtests.test("response validity"): validate_against_schema(rj, "format_autocomplete") with subtests.test("no results"): assert rj["ResultSet"]["Result"] == []
def test_csv(base_url, rsession, subtests): response = rsession.get(f"{base_url}/2/util/resource/format_autocomplete?incomplete=cs") assert response.status_code == 200 rj = response.json() with subtests.test("response validity"): validate_against_schema(rj, "format_autocomplete") with subtests.test("expected result present"): assert any(result["Format"].lower().strip() == "csv" for result in rj["ResultSet"]["Result"])
def test_package_show_default_schema(base_url_3, rsession, stable_pkg): # cannot use random slugs as they sometimes contain harvest packages which cannot be handled properly response = rsession.get( f"{base_url_3}/action/package_show?id={stable_pkg['name']}&use_default_schema=1" ) assert response.status_code == 200 rj = response.json() validate_against_schema(rj, "package_show") assert rj["success"] is True
def _validate_embedded_keys(response_json): for result in response_json["results"]: for key in ( "data_dict", "validated_data_dict", ): if key in result: # note this embedded json uses the "package" schema, despite being # in a "dataset". inner_package = json.loads(result[key]) validate_against_schema(inner_package, "package_base")
def test_organization_show_stable_org(subtests, base_url_3, rsession, stable_org): response = rsession.get( f"{base_url_3}/action/organization_show?id={stable_org['name']}") assert response.status_code == 200 rj = response.json() with subtests.test("response validity"): validate_against_schema(rj, "organization_show") with subtests.test("response equality"): assert rj["result"] == AnySupersetOf(stable_org, recursive=True, seq_norm_order=True)
def test_search_datasets_by_org_slug_specific_field_and_title_general_term( subtests, inc_sync_sensitive, base_url_3, rsession, stable_pkg, org_as_q, variables): if base_url_3.endswith("/3") and not org_as_q: pytest.skip( "field filtering as separate params not supported in v3 endpoint") limit_param, offset_param = _get_limit_offset_params(base_url_3, variables=variables) name_terms = "name:" + stable_pkg["name"] # it's possible to query specific fields in two different ways query_frag = f"q={name_terms}" + ( f"+organization:{stable_pkg['organization']['name']}" if org_as_q else (f"&organization={stable_pkg['organization']['name']}" if variables.get("ckan_version") != "2.9" else f"+organization:{stable_pkg['organization']['name']}" # ckan 2.9 is stricter with search params )) response = rsession.get(f"{base_url_3}/search/dataset?{query_frag}" f"&fl=id,organization,title&{limit_param}=1000") assert response.status_code == 200 rj = get_dataset_search_json_response(response, base_url_3, variables=variables) with subtests.test("response validity"): validate_against_schema(rj, "search_dataset") assert isinstance(rj["results"][0], dict) assert len(rj["results"]) <= 1000 with subtests.test("all results match criteria"): assert all(stable_pkg["organization"]["name"] == dst["organization"] for dst in rj["results"]) # we can't reliably test for the search terms because they may have been stemmed # and not correspond to exact matches if inc_sync_sensitive: with subtests.test("desired result present"): desired_result = tuple(dst for dst in rj["results"] if stable_pkg["id"] == dst["id"]) if rj["count"] > 1000 and not desired_result: # we don't have all results - it may well be on a latter page warn( f"Expected dataset id {stable_pkg['id']!r} not found on first page of results" ) else: assert len(desired_result) == 1 assert desired_result[0]["title"] == stable_pkg["title"]