def test_cleanup_enrich_then_lookup1(): """Should produce both name and iso639_3 language fields""" INPUT = [ "en", "English", ["eng"], ["English"], ["en", "English"] ] EXPECTED = { "sourceResource": { "language": [{"name": "English", "iso639_3": "eng"}] } } for i in range(len(INPUT)): input = {"sourceResource": {"language": INPUT[i]}} url = server() + "cleanup_language" resp, content = H.request(url, "POST", json.dumps(input)) assert resp.status == 200 url = server() + "enrich_language" resp, content = H.request(url, "POST", content) assert resp.status == 200 url = server() + "lookup?prop=sourceResource%2Flanguage%2Fname" + \ "&target=sourceResource%2Flanguage%2Fname&substitution=iso639_3" resp, content = H.request(url, "POST", content) assert resp.status == 200 url = server() + "lookup?prop=sourceResource%2Flanguage%2Fname" + \ "&target=sourceResource%2Flanguage%2Fiso639_3" + \ "&substitution=iso639_3&inverse=True" resp, content = H.request(url, "POST", content) assert resp.status == 200 assert_same_jsons(content, EXPECTED)
def test_physical_format_from_format_and_type(): """ Test physical format appending from format and type fields """ INPUT = { "format": ["76.8 x 104 cm", "Oil on canvas", "7 1/4 x 6 inches (18.4 x 15.2 cm)", "Sheet: 9 1/2 x 12 1/8 inches (24.1 x 30.8 cm)"], "type": ["Paintings", "Painting"] } EXPECTED = { "format": ["76.8 x 104 cm", "Oil on canvas", "7 1/4 x 6 inches (18.4 x 15.2 cm)", "Sheet: 9 1/2 x 12 1/8 inches (24.1 x 30.8 cm)", "Paintings", "Painting"] } resp, content = H.request(server() + "enrich-type?prop=type&format_field=format", "POST", body=json.dumps(INPUT)) assert str(resp.status).startswith("2") FETCHED = json.loads(content) assert FETCHED == EXPECTED, DictDiffer(EXPECTED, FETCHED).diff() resp, content = H.request(server() + "enrich-format?prop=format&type_field=type", "POST", body=content) assert str(resp.status).startswith("2") FETCHED = json.loads(content) assert FETCHED == EXPECTED, DictDiffer(EXPECTED, FETCHED).diff()
def test_type_set_format(): """Format gets set correctly given invalid type value When send_rejects_to_format is true, format should get populated with the type strings that don't exactly match a valid type. """ url = server() + "enrich-type?send_rejects_to_format=true" INPUT = {"sourceResource": {"type": "digital photograph"}} EXPECTED = { "sourceResource": { "type": "image", "format": ["digital photograph"] } } resp, content = H.request(url, "POST", body=json.dumps(INPUT)) assert resp.status == 200 assert_same_jsons(EXPECTED, json.loads(content)) INPUT = {"sourceResource": {"type": "text"}} EXPECTED = {"sourceResource": {"type": "text"}} resp, content = H.request(url, "POST", body=json.dumps(INPUT)) assert resp.status == 200 assert_same_jsons(EXPECTED, json.loads(content)) INPUT = {"sourceResource": {"type": "weird thing"}} EXPECTED = {"sourceResource": {"format": ["weird thing"]}} resp, content = H.request(url, "POST", body=json.dumps(INPUT)) assert resp.status == 200 assert_same_jsons(EXPECTED, json.loads(content))
def test_enrich_date_dup_start_date_list(): """Recreate bug found""" # make sure we don't break it INPUT = {'date': ['1930', '1938']} EXPECTED = { 'date': [{ u'begin': u'1930', u'end': u'1938', u'displayDate': u'1930-1938' }] } url = server() + "enrich_earliest_date?prop=date" resp, content = H.request(url, "POST", body=json.dumps(INPUT)) assert_same_jsons(EXPECTED, content) # this is the fix INPUT = {'date': ['1930', '1930']} EXPECTED = { 'date': [{ u'begin': u'1930', u'end': u'1930', u'displayDate': u'1930' }] } url = server() + "enrich_earliest_date?prop=date" resp, content = H.request(url, "POST", body=json.dumps(INPUT)) assert_same_jsons(EXPECTED, content)
def test_physical_format_from_format_and_type(): """ Test physical format appending from format and type fields """ INPUT = { "format": [ "76.8 x 104 cm", "Oil on canvas", "7 1/4 x 6 inches (18.4 x 15.2 cm)", "Sheet: 9 1/2 x 12 1/8 inches (24.1 x 30.8 cm)" ], "type": ["Paintings", "Painting"] } EXPECTED = { "format": [ "76.8 x 104 cm", "Oil on canvas", "7 1/4 x 6 inches (18.4 x 15.2 cm)", "Sheet: 9 1/2 x 12 1/8 inches (24.1 x 30.8 cm)", "Paintings", "Painting" ] } resp, content = H.request(server() + "enrich-type?prop=type&format_field=format", "POST", body=json.dumps(INPUT)) assert str(resp.status).startswith("2") FETCHED = json.loads(content) assert FETCHED == EXPECTED, DictDiffer(EXPECTED, FETCHED).diff() resp, content = H.request(server() + "enrich-format?prop=format&type_field=type", "POST", body=content) assert str(resp.status).startswith("2") FETCHED = json.loads(content) assert FETCHED == EXPECTED, DictDiffer(EXPECTED, FETCHED).diff()
def test_enrich_date_parse_century_date(): """Correctly transform a date of format '19th c.'""" url = server() + "enrich_earliest_date?prop=date" INPUT = {"date": "19th c."} EXPECTED = { "date": { "begin": None, "end": None, "displayDate": "19th c" # period stripped assumed OK } } resp, content = H.request(url, "POST", body=json.dumps(INPUT)) result = json.loads(content) assert result["date"] == EXPECTED["date"], \ "%s != %s" % (result["date"], EXPECTED["date"]) INPUT = {"date": "19th century"} EXPECTED = { "date": { "begin": None, "end": None, "displayDate": "19th century" } } resp, content = H.request(url, "POST", body=json.dumps(INPUT)) result = json.loads(content) assert result["date"] == EXPECTED["date"], \ "%s != %s" % (result["date"], EXPECTED["date"])
def test_enrich_date_parse_century_date(): """Correctly transform a date of format '19th c.'""" url = server() + "enrich_earliest_date?prop=date" INPUT = {"date": "19th c."} EXPECTED = { "date": { "begin": None, "end": None, "displayDate": "19th c" # period stripped assumed OK } } resp,content = H.request(url,"POST",body=json.dumps(INPUT)) result = json.loads(content) assert result["date"] == EXPECTED["date"], \ "%s != %s" % (result["date"], EXPECTED["date"]) INPUT = {"date": "19th century"} EXPECTED = { "date": { "begin": None, "end": None, "displayDate": "19th century" } } resp,content = H.request(url,"POST",body=json.dumps(INPUT)) result = json.loads(content) assert result["date"] == EXPECTED["date"], \ "%s != %s" % (result["date"], EXPECTED["date"])
def test_cleanup_enrich_then_lookup1(): """Should produce both name and iso639_3 language fields""" INPUT = ["en", "English", ["eng"], ["English"], ["en", "English"]] EXPECTED = { "sourceResource": { "language": [{ "name": "English", "iso639_3": "eng" }] } } for i in range(len(INPUT)): input = {"sourceResource": {"language": INPUT[i]}} url = server() + "cleanup_language" resp, content = H.request(url, "POST", json.dumps(input)) assert resp.status == 200 url = server() + "enrich_language" resp, content = H.request(url, "POST", content) assert resp.status == 200 url = server() + "lookup?prop=sourceResource%2Flanguage%2Fname" + \ "&target=sourceResource%2Flanguage%2Fname&substitution=iso639_3" resp, content = H.request(url, "POST", content) assert resp.status == 200 url = server() + "lookup?prop=sourceResource%2Flanguage%2Fname" + \ "&target=sourceResource%2Flanguage%2Fiso639_3" + \ "&substitution=iso639_3&inverse=True" resp, content = H.request(url, "POST", content) assert resp.status == 200 assert_same_jsons(content, EXPECTED)
def test_enrich_temporal_date(): """Correctly enrich temporal dates""" INPUT = { "sourceResource": { "spatial": [ "1901-1999", " 1901 - 1999 ", " 1901 / 01 / 01", "1905-04-12", "01/01/1901", "1901", "North Carolina" ] } } EXPECTED = { "sourceResource": { "temporal": [ { "begin": "1901", "end": "1999", "displayDate": "1901-1999" }, { "begin": "1901", "end": "1999", "displayDate": "1901 - 1999" }, { "begin": "1901", "end": "1901", "displayDate": "1901" }, { "begin": "1901-01-01", "end": "1901-01-01", "displayDate": "1901 / 01 / 01" }, { "begin": "1901-01-01", "end": "1901-01-01", "displayDate": "01/01/1901" }, { "begin": "1905-04-12", "end": "1905-04-12", "displayDate": "1905-04-12" }, ], "spatial": ["North Carolina"] } } url = server() + "move_date_values?prop=sourceResource/spatial" resp, content = H.request(url, "POST", body=json.dumps(INPUT)) print_error_log() assert resp.status == 200 url = server() + "enrich_date" resp, content = H.request(url, "POST", body=content) print_error_log() assert resp.status == 200 assert_same_jsons(EXPECTED, content)
def test_geocode_unicode(): """Handles unicode values that can be cast as UTF-8""" INPUT = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": { "name": u"États-Unis" } } } EXPECTED = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": [{ "country": "United States", "name": u"États-Unis" }] } } url = server() + "geocode" resp, content = H.request(url, "POST", body=json.dumps(INPUT)) assert resp.status == 200 assert_same_jsons(EXPECTED, json.loads(content))
def test_geocode_geonames_name_search_context(): """Should find a place name, only if matching other data. """ INPUT = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": { "name": "Portland", "state": "Maine" } } } EXPECTED = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": [{ "county": "Cumberland County", "country": "United States", "state": "Maine", "name": "Portland", "coordinates": "43.66147, -70.25533" }] } } url = server() + "geocode" resp, content = H.request(url, "POST", body=json.dumps(INPUT)) assert resp.status == 200 assert_same_jsons(EXPECTED, json.loads(content))
def test_geocode_unicode(): """Should handle unicode values """ INPUT = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": { "name": u"États-Unis" } } } EXPECTED = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": [{ "name": u"États-Unis" }] } } url = server() + "geocode" resp, content = H.request(url, "POST", body=json.dumps(INPUT)) assert resp.status == 200 assert_same_jsons(EXPECTED, json.loads(content))
def test_geocode_set_name_by_feature(): """Should set the name property to the smallest available feature value""" INPUT = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": { "country": "Canada", "city": "Bananas" } } } EXPECTED = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": [{ 'coordinates': '62.8329086304, -95.9133224487', 'country': 'Canada', 'name': 'Bananas', 'state': 'Nunavut', "city": "Bananas" }] } } url = server() + "geocode" resp, content = H.request(url, "POST", body=json.dumps(INPUT)) assert resp.status == 200 assert_same_jsons(EXPECTED, json.loads(content))
def test_geocode_exclude_coordinates_from_countries(): """Should not include coordinates or smaller administrative units in country enhancements """ INPUT = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": { "name": "Greece" } } } EXPECTED = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": [{ "country": "Greece", "name": "Greece" }] } } url = server() + "geocode" resp, content = H.request(url, "POST", body=json.dumps(INPUT)) assert resp.status == 200 assert_same_jsons(EXPECTED, json.loads(content))
def test_convert_spatial_string_to_dictionary(): """ Format UIUC spatial dictionaries """ INPUT = { "id": "12345", "sourceResource": { "spatial": [ { "name": "Honolulu, HI" }, { "name": "1972 to Present" } ] }, "creator": "David" } EXPECTED = { "id": "12345", "sourceResource": { "spatial": [ { "name": "Honolulu, HI" } ] }, "creator": "David" } url = server() + "uiuc_enrich_location" resp,content = H.request(url,"POST",body=json.dumps(INPUT)) assert resp.status == 200 assert json.loads(content) == EXPECTED
def test_geocode_set_name_region(): """Should set the name property to the region value""" INPUT = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": { "region": "Ecuador" } } } EXPECTED = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": [{ "region": "Ecuador", "name": "Ecuador", "country": "Ecuador" }] } } url = server() + "geocode" resp, content = H.request(url, "POST", body=json.dumps(INPUT)) assert resp.status == 200 assert_same_jsons(EXPECTED, json.loads(content))
def test_geocode_set_name_state(): """Should set the name property to the state value""" INPUT = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": { "state": "California" } } } EXPECTED = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": [{ 'coordinates': '37.25022, -119.75126', "country": "United States", "state": "California", "name": "California" }] } } url = server() + "geocode" resp,content = H.request(url,"POST",body=json.dumps(INPUT)) assert resp.status == 200 assert_same_jsons(EXPECTED, json.loads(content))
def test_geocode_set_name_by_feature(): """Should set the name property to the smallest available feature value""" INPUT = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": { "country": "Canada", "city": "Bananas" } } } EXPECTED = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": [{ 'coordinates': '62.8329086304, -95.9133224487', 'country': 'Canada', 'name': 'Bananas', 'state': 'Nunavut', "city": "Bananas" }] } } url = server() + "geocode" resp,content = H.request(url,"POST",body=json.dumps(INPUT)) assert resp.status == 200 assert_same_jsons(EXPECTED, json.loads(content))
def test_geocode_set_name_county(): """Should set the name property to the county value""" INPUT = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": { "county": "Los Angeles County", "country": "Bananas" } } } EXPECTED = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": [ { "county": "Los Angeles County", "country": "Bananas", "name": "Los Angeles County", "state": "California", #uses bing because geonames wants to match country values "coordinates": "33.9934997559, -118.29750824" } ] } } url = server() + "geocode" resp,content = H.request(url,"POST",body=json.dumps(INPUT)) assert resp.status == 200 assert_same_jsons(EXPECTED, json.loads(content))
def test_geocode_set_name_region(): """Should set the name property to the region value""" INPUT = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": { "region": "Ecuador" } } } EXPECTED = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": [ { "region": "Ecuador", "name": "Ecuador", "country": "Ecuador" } ] } } url = server() + "geocode" resp,content = H.request(url,"POST",body=json.dumps(INPUT)) assert resp.status == 200 assert_same_jsons(EXPECTED, json.loads(content))
def test_geocode_set_name_city(): """Should set the name property to the city value""" INPUT = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": { "city": "Los Angeles", "state": "California" } } } EXPECTED = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": [ { "coordinates": '34.05223, -118.24368', "city": "Los Angeles", 'county': 'Los Angeles County', "state": "California", "country": "United States", "name": "Los Angeles" } ] } } url = server() + "geocode" resp,content = H.request(url,"POST",body=json.dumps(INPUT)) assert resp.status == 200 assert_same_jsons(EXPECTED, json.loads(content))
def test_geocode_set_name_coordinates(): """Should set the name property to the lowest hierarchy value""" INPUT = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": { "coordinates": "37.7771186829, -122.419639587", "city": "Bananas" } } } EXPECTED = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": [ { "coordinates": "37.7771186829, -122.419639587", "city": "Bananas", "state": "California", "name": "Bananas", "county": "San Francisco County", "country": "United States" } ] } } url = server() + "geocode" resp,content = H.request(url,"POST",body=json.dumps(INPUT)) assert resp.status == 200 assert_same_jsons(EXPECTED, json.loads(content))
def test_geocode_works_with_dotted_abbreviations(): """Resolves something like "Greenville (S.C.)" as well as "SC" """ # Note when retrofitting Twofishes later: Twofishes handles "(S.C.)" just # fine, so most of this test's assertion should be kept, but the code that # works around this syntax should be altered. When we use Twofishes, # we're going to be able to preserve the "S.C." spelling in the "name" # property, and when we do this for Ingestion 3 with MAPv4 we'll be able # to preserve that spelling in the providedLabel property. INPUT = { "_id": "foo", "sourceResource": { "spatial": { "name": "Greenville (S.C.)" } } } EXPECTED = { "_id": "foo", "sourceResource": { "spatial": [ { "city": "Greenville", "county": "Greenville County", "country": "United States", "state": "South Carolina", "name": "Greenville (S.C.)", "coordinates": "34.85262, -82.39401" } ] } } url = server() + "geocode" resp, content = H.request(url, "POST", body=json.dumps(INPUT)) assert resp.status == 200 assert_same_jsons(EXPECTED, json.loads(content))
def _get_server_response(body): url = server() + "dpla_mapper?mapper_type=lapl_oai" return H.request( url, "POST", body=body, )
def test_geocode_do_not_skip_united_states(): """Should geocode when name value is 'United States' is followed by a '-' """ INPUT = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": {"name": "United States--California"} } } EXPECTED = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": [{ "coordinates": "37.25022, -119.75126", "country": "United States", "name": "United States--California", "state": "California" }] } } url = server() + "geocode" resp, content = H.request(url, "POST", body=json.dumps(INPUT)) assert resp.status == 200 assert_same_jsons(EXPECTED, json.loads(content))
def test_strip_non_spatial_entries(): """ Strip out strings that are not locations. """ INPUT = { "id": "12345", "sourceResource": { "spatial": ["Pictorial works", "Somerville, MA"] }, "creator": "David" } EXPECTED = { "id": "12345", "sourceResource": { "spatial": [ { "name": "Somerville, MA" } ] }, "creator": "David" } url = server() + "digital_commonwealth_enrich_location" resp,content = H.request(url,"POST",body=json.dumps(INPUT)) assert resp.status == 200 assert json.loads(content) == EXPECTED
def test_geocode_exclude_coordinates_from_countries(): """Should not include coordinates or smaller administrative units in country enhancements """ INPUT = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": {"name": "Greece"} } } EXPECTED = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": [{ "country": "Greece", "name": "Greece" }] } } url = server() + "geocode" resp, content = H.request(url, "POST", body=json.dumps(INPUT)) assert resp.status == 200 assert_same_jsons(EXPECTED, json.loads(content))
def test_convert_spatial_string_to_dictionary(): """ Convert a spatial string into a dictionary with a key of 'name' """ INPUT = { "id": "12345", "sourceResource": { "spatial": [u'42.24 N 71.49 W', u"Bear Park (Reading Mass.)"] }, "creator": "David" } EXPECTED = { "id": "12345", "sourceResource": { "spatial": [ { "name": u"42.24N 71.49W" }, { "name": u"Bear Park (Reading MA)" } ] }, "creator": "David" } url = server() + "digital_commonwealth_enrich_location" resp,content = H.request(url,"POST",body=json.dumps(INPUT)) assert resp.status == 200 assert json.loads(content) == EXPECTED
def test_geocode_geonames_name_search(): """Should find a place name. """ INPUT = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": {"name": "Portland, OR"} } } EXPECTED = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": [{ "county": "Multnomah County", "country": "United States", "state": "Oregon", "name": "Portland, OR", "coordinates": "45.52345, -122.67621" }] } } url = server() + "geocode" resp, content = H.request(url, "POST", body=json.dumps(INPUT)) assert resp.status == 200 assert_same_jsons(EXPECTED, json.loads(content))
def test_geocode_set_name_state(): """Should set the name property to the state value""" INPUT = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": { "state": "California" } } } EXPECTED = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": [{ 'coordinates': '37.25022, -119.75126', "country": "United States", "state": "California", "name": "California" }] } } url = server() + "geocode" resp, content = H.request(url, "POST", body=json.dumps(INPUT)) assert resp.status == 200 assert_same_jsons(EXPECTED, json.loads(content))
def test_geocode_coordinate_provided(): INPUT = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": [ { "name": "42.358631134, -71.0567016602" } ] }, "creator": "David" } EXPECTED = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": [ { "state": "Massachusetts", "country": "United States", "name": "42.358631134, -71.0567016602", "coordinates": "42.358631134, -71.0567016602" } ] }, "creator": "David" } url = server() + "geocode" resp,content = H.request(url,"POST",body=json.dumps(INPUT)) assert resp.status == 200 assert_same_jsons(json.loads(content), EXPECTED)
def test_geocode_do_not_skip_united_states(): """Should geocode when name value is 'United States' is followed by a '-' """ INPUT = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": { "name": "United States--California" } } } EXPECTED = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": [{ "coordinates": "37.25022, -119.75126", "country": "United States", "name": "United States--California", "state": "California" }] } } url = server() + "geocode" resp, content = H.request(url, "POST", body=json.dumps(INPUT)) assert resp.status == 200 assert_same_jsons(EXPECTED, json.loads(content))
def test_filtering_with_given_keys(): """ Filtering with given keys """ INPUT = { "id": "999", "prop1": "value1", "empty_key": "", "filter_me": { 'notempty': ['a', 'b', 'c'], 'empty': '', 'none': None, 'crumb': ['x', None, 'y', ''] } } EXPECTED = { "id": "999", "prop1": "value1", "empty_key": "", "filter_me": { 'notempty': ['a', 'b', 'c'], 'crumb': ['x', 'y'] } } url = server() + "filter_fields?keys=filter_me" resp, content = H.request(url, "POST", body=json.dumps(INPUT)) assert str(resp.status).startswith("2") assert json.loads(content) == EXPECTED
def test_geocode_geonames_name_search(): """Should find a place name. """ INPUT = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": { "name": "Portland, OR" } } } EXPECTED = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": [{ "county": "Multnomah County", "country": "United States", "state": "Oregon", "name": "Portland, OR", "coordinates": "45.52345, -122.67621" }] } } url = server() + "geocode" resp, content = H.request(url, "POST", body=json.dumps(INPUT)) assert resp.status == 200 assert_same_jsons(EXPECTED, json.loads(content))
def test_contentdm_identify_object_with_download(): """ Should add a thumbnail URL made of the source URL. """ INPUT = { u"something": "x", u"somethink": "y", u"originalRecord": { "handle": ["aaa", "http://repository.clemson.edu/u?/scp,104"] }, u"left": "right now!" } EXPECTED = { u"something": "x", u"somethink": "y", u"originalRecord": { "handle": ["aaa", "http://repository.clemson.edu/u?/scp,104"] }, u"object": ("http://repository.clemson.edu/cgi-bin/" + "thumbnail.exe?CISOROOT=/scp&CISOPTR=104"), u"admin": {u"object_status": 1}, u"left": "right now!" } url = contentdm_url("True") resp, content = H.request(url, "POST", body=json.dumps(INPUT)) assert str(resp.status).startswith("2") result = json.loads(content) assert_same_jsons(EXPECTED, result)
def test_geocode_geonames_name_search_failure(): """Shouldn't fall down when nothing is returned. """ INPUT = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": { "name": "1234567" } } } EXPECTED = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": [{ "name": "1234567" }] } } url = server() + "geocode" resp, content = H.request(url, "POST", body=json.dumps(INPUT)) assert resp.status == 200 assert_same_jsons(EXPECTED, json.loads(content))
def test_range_with_brackets(): """Should transform date range with brackets.""" ranges = [ ("1960-05-01 - 1960-05-15", "1960-05-01 - 1960-05-15"), ("[ 1960-05-01 - 1960-05-15 ]", "1960-05-01 - 1960-05-15"), ("[1960-05-01 - 1960-05-15]", "1960-05-01 - 1960-05-15"), ("[1960-05-01 / 1960-05-15]", "1960-05-01 / 1960-05-15"), ("[1960-05-01/1960-05-15]", "1960-05-01/1960-05-15"), ] for r in ranges: INPUT = {"date": r[0]} EXPECTED = { u'date': { u'begin': u'1960-05-01', u'end': u'1960-05-15', "displayDate": r[1] } } url = server() + "enrich_earliest_date?prop=date" resp, content = H.request(url, "POST", body=json.dumps(INPUT)) assert str(resp.status).startswith("2") print_error_log() assert_same_jsons(EXPECTED, content)
def test_geocode_geonames_name_search_context(): """Should find a place name, only if matching other data. """ INPUT = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": { "name": "Portland", "state": "Maine" } } } EXPECTED = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": [{ "county": "Cumberland County", "country": "United States", "state": "Maine", "name": "Portland", "coordinates": "43.66147, -70.25533" } ]} } url = server() + "geocode" resp, content = H.request(url, "POST", body=json.dumps(INPUT)) assert resp.status == 200 assert_same_jsons(EXPECTED, json.loads(content))
def test_range_with_brackets(): """Should transform date range with brackets.""" ranges = [ ("1960-05-01 - 1960-05-15", "1960-05-01 - 1960-05-15"), ("[ 1960-05-01 - 1960-05-15 ]", "1960-05-01 - 1960-05-15"), ("[1960-05-01 - 1960-05-15]", "1960-05-01 - 1960-05-15"), ("[1960-05-01 / 1960-05-15]", "1960-05-01 / 1960-05-15"), ("[1960-05-01/1960-05-15]", "1960-05-01/1960-05-15"), ] for r in ranges: INPUT = {"date": r[0]} EXPECTED = { u'date' : { u'begin' : u'1960-05-01', u'end' : u'1960-05-15', "displayDate" : r[1] } } url = server() + "enrich_earliest_date?prop=date" resp, content = H.request(url, "POST", body=json.dumps(INPUT)) assert str(resp.status).startswith("2") print_error_log() assert_same_jsons(EXPECTED, content)
def _get_server_response(body, prop=None, to_prop=None): url = server() + "move_date_values" if prop: url = "%s?prop=%s" % (url, prop) if to_prop: url = "%s&to_prop=%s" % (url, to_prop) return H.request(url,"POST",body=body)
def test_contentdm_identify_object_usc(): """ Should add a thumbnail URL made of the source URL. """ INPUT = { u"something": "x", u"somethink": "y", u"originalRecord": {"handle": ["aaa", "http://some.url/cdm/ref/12345"] }, u"left": "right now!" } EXPECTED = { u"something": "x", u"somethink": "y", u"originalRecord": { "handle": ["aaa", "http://some.url/cdm/ref/12345"] }, u"object": ("http://some.url/utils/getthumbnail/12345"), u"admin": {u"object_status": 0}, u"left": "right now!" } url = contentdm_url("False") resp, content = H.request(url, "POST", body=json.dumps(INPUT)) print_error_log() assert str(resp.status).startswith("2") result = json.loads(content) assert_same_jsons(EXPECTED, result)
def test_strip_html(): """'strip_html' strips HTML tags and entities recursively""" request_data = { 'a': { 'b': [' <i>string</i> <b>one</b> \n \t', 'string < two '] }, 'c': ' \n <p>string three</p>', 'd': {}, 'e': 1, 'f': '1 film negative: b&w ;' } expected_result = { 'a': { 'b': [u'string one', u'string < two'] }, 'c': u'string three', 'd': {}, # unaltered 'e': 1, # unaltered 'f': '1 film negative: b&w ;' #unaltered } url = server() + 'strip_html' resp_meta, resp_body = H.request(url, 'POST', body=json.dumps(request_data)) assert resp_meta.status == 200 assert_same_jsons(expected_result, resp_body)
def test_enrich_location_no_provider_specific_enrich_location1(): """ No previous provider-specific location enrichment and does not contain states or state abbreviations. """ INPUT = { "id": "12345", "sourceResource": {"spatial": [ "Asheville", "Buncombe", "United States" ]}, "creator": "Miguel" } OUTPUT = { "id": "12345", "sourceResource": {"spatial": [ { "name": "Asheville" }, { "name": "Buncombe" }, { "name": "United States" } ]}, "creator": "Miguel" } url = server() + "enrich_location" resp,content = H.request(url,"POST",body=json.dumps(INPUT)) assert resp.status == 200 assert json.loads(content) == OUTPUT
def test_enrich_location_after_provider_specific_enrich_location4(): """ Previous specific-provider location did not set state. """ INPUT = { "id": "12345", "sourceResource": { "spatial": [{"city": "Asheville; La Jolla", "county": "Buncombe;San Diego", "country": "United States"}] }, "creator": "Miguel", } EXPECTED = { "id": "12345", "sourceResource": { "spatial": [ {"city": "Asheville", "county": "Buncombe", "country": "United States"}, {"city": "La Jolla", "county": "San Diego"}, ] }, "creator": "Miguel", } url = server() + "enrich_location" resp, content = H.request(url, "POST", body=json.dumps(INPUT)) assert resp.status == 200 assert json.loads(content) == EXPECTED
def test_enrich_list_of_dictionaries_and_strings(): """Should handle list of dictionaries and strings""" INPUT = { "id": "12345", "sourceResource": { "spatial": [ {"country": "United States", "county": "Buncombe", "state": "North Carolina"}, "Rushmore, Mount", "Mount Rushmore National Memorial", ] }, } EXPECTED = { "id": "12345", "sourceResource": { "spatial": [ {"country": "United States", "county": "Buncombe", "state": "North Carolina"}, {"name": "Rushmore, Mount"}, {"name": "Mount Rushmore National Memorial"}, ] }, } url = server() + "enrich_location" resp, content = H.request(url, "POST", body=json.dumps(INPUT)) assert resp.status == 200 assert json.loads(content) == EXPECTED
def test_enrich_location_after_provider_specific_enrich_location4(): """ Previous specific-provider location did not set state. """ INPUT = { "id": "12345", "sourceResource": {"spatial": [ { "city": "Asheville; La Jolla", "county": "Buncombe;San Diego", "country": "United States" } ]}, "creator": "Miguel" } EXPECTED = { "id": "12345", "sourceResource": {"spatial": [ { "city": "Asheville", "county": "Buncombe", "country": "United States", }, { "city": "La Jolla", "county": "San Diego", } ]}, "creator": "Miguel" } url = server() + "enrich_location" resp,content = H.request(url,"POST",body=json.dumps(INPUT)) assert resp.status == 200 assert json.loads(content) == EXPECTED
def test_year_month(): """Should recognize YYYY-MM and not YYYY-YY""" INPUT = [ "1940/2", "1940/02", "1940 / 2", "1940 / 02", "1940-2", "1940-02", "1940 - 2", "1940 - 02", "2/1940", "02/1940", "2 / 1940", "02 / 1940", "2-1940", "02-1940", "2 - 1940", "02 - 1940", ] url = server() + "enrich_earliest_date?prop=date" for date in INPUT: d = "1940-02" input = {"date": date} expected = {"date": {"begin": d, "end": d, "displayDate": date}} resp, content = H.request(url, "POST", body=json.dumps(input)) print_error_log() assert str(resp.status).startswith("2") assert_same_jsons(expected, content)
def test_enrich_list_of_dictionaries_and_strings(): """Should handle list of dictionaries and strings""" INPUT = { "id": "12345", "sourceResource": {"spatial": [ { "country": "United States", "county": "Buncombe", "state": "North Carolina" }, "Rushmore, Mount", "Mount Rushmore National Memorial" ]} } EXPECTED = { "id": "12345", "sourceResource": {"spatial": [ { "country": "United States", "county": "Buncombe", "state": "North Carolina" }, { "name": "Rushmore, Mount" }, { "name": "Mount Rushmore National Memorial" } ]} } url = server() + "enrich_location" resp,content = H.request(url,"POST",body=json.dumps(INPUT)) assert resp.status == 200 assert json.loads(content) == EXPECTED
def test_capitalize_value_exclude(): """Should capitalize first letter of each property""" INPUT = { "id": "123", "sourceResource": { "format": [ "format1", "format2" ], "subject": [ "subject", "hi there", "hello" ] } } EXPECTED = { "id": "123", "sourceResource": { "format": [ "Format1", "Format2" ], "subject": [ "subject", "hi there", "hello" ] } } resp, content = H.request(url+"?exclude=sourceResource/subject", "POST", json.dumps(INPUT)) assert resp.status == 200 FETCHED = json.loads(content) assert FETCHED == EXPECTED, DictDiffer(EXPECTED, FETCHED).diff()
def test_removing_bracket(): """Should remove bracket from the beginning of the name""" INPUT = { "id": "12345", "sourceResource": {"spatial": ["Charleston (S.C.); [Germany; Poland; Israel; New York (N.Y.); Georgia (U.S.)"]}, "creator": "Miguel", } EXPECTED = { "id": "12345", "sourceResource": { "spatial": [ {"name": "Charleston (S.C.)"}, {"name": "Germany"}, {"name": "Poland"}, {"name": "Israel"}, {"name": "New York (N.Y.)"}, {"name": "Georgia (U.S.)"}, ] }, "creator": "Miguel", } url = server() + "enrich_location" resp, content = H.request(url, "POST", body=json.dumps(INPUT)) assert resp.status == 200 assert json.loads(content) == EXPECTED
def test_geocode_set_name_coordinates(): """Should set the name property to the lowest hierarchy value""" INPUT = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": { "coordinates": "37.7771186829, -122.419639587", "city": "Bananas" } } } EXPECTED = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": [{ "coordinates": "37.7771186829, -122.419639587", "city": "Bananas", "state": "California", "name": "Bananas", "county": "San Francisco County", "country": "United States" }] } } url = server() + "geocode" resp, content = H.request(url, "POST", body=json.dumps(INPUT)) assert resp.status == 200 assert_same_jsons(EXPECTED, json.loads(content))
def test_artstor_cleanup_creator2(): """ Cleanup the creator field """ INPUT = { "sourceResource": { "creator": [ " and bananas", " Artist: bananas", "Author: bananas", "Binder: bananas", "Drawn by bananas", "drawn by bananas", " illuminator: bananas", "Or bananas ", "Scribe: bananas", "Resolve bananas", " Apples" ] } } EXPECTED = { "sourceResource": { "creator": [ "bananas", "bananas", "bananas", "bananas", "bananas", "bananas", "bananas", "bananas", "bananas", "bananas", "Apples" ] } } url = server() + "artstor_cleanup_creator" resp, content = H.request(url, "POST", body=json.dumps(INPUT)) assert str(resp.status).startswith("2") data = json.loads(content) assert data == EXPECTED, DictDiffer(data, EXPECTED).diff()
def test_geocode_set_name_city(): """Should set the name property to the city value""" INPUT = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": { "city": "Los Angeles", "state": "California" } } } EXPECTED = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": [{ "coordinates": '34.05223, -118.24368', "city": "Los Angeles", 'county': 'Los Angeles County', "state": "California", "country": "United States", "name": "Los Angeles" }] } } url = server() + "geocode" resp, content = H.request(url, "POST", body=json.dumps(INPUT)) assert resp.status == 200 assert_same_jsons(EXPECTED, json.loads(content))
def _get_server_response(body): url = server() + "oai-to-dpla" return H.request( url, "POST", body=body, )
def test_geocode_set_name_county(): """Should set the name property to the county value""" INPUT = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": { "county": "Los Angeles County", "country": "Bananas" } } } EXPECTED = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": [{ "county": "Los Angeles County", "country": "Bananas", "name": "Los Angeles County", "state": "California", #uses bing because geonames wants to match country values "coordinates": "33.9934997559, -118.29750824" }] } } url = server() + "geocode" resp, content = H.request(url, "POST", body=json.dumps(INPUT)) assert resp.status == 200 assert_same_jsons(EXPECTED, json.loads(content))