def test_physical_format_from_format_and_type(): """ Test physical format appending from format and type fields """ INPUT = { "format": ["76.8 x 104 cm", "Oil on canvas", "7 1/4 x 6 inches (18.4 x 15.2 cm)", "Sheet: 9 1/2 x 12 1/8 inches (24.1 x 30.8 cm)"], "type": ["Paintings", "Painting"] } EXPECTED = { "format": ["76.8 x 104 cm", "Oil on canvas", "7 1/4 x 6 inches (18.4 x 15.2 cm)", "Sheet: 9 1/2 x 12 1/8 inches (24.1 x 30.8 cm)", "Paintings", "Painting"] } resp, content = H.request(server() + "enrich-type?prop=type&format_field=format", "POST", body=json.dumps(INPUT)) assert str(resp.status).startswith("2") FETCHED = json.loads(content) assert FETCHED == EXPECTED, DictDiffer(EXPECTED, FETCHED).diff() resp, content = H.request(server() + "enrich-format?prop=format&type_field=type", "POST", body=content) assert str(resp.status).startswith("2") FETCHED = json.loads(content) assert FETCHED == EXPECTED, DictDiffer(EXPECTED, FETCHED).diff()
def test_cleanup_enrich_then_lookup1(): """Should produce both name and iso639_3 language fields""" INPUT = [ "en", "English", ["eng"], ["English"], ["en", "English"] ] EXPECTED = { "sourceResource": { "language": [{"name": "English", "iso639_3": "eng"}] } } for i in range(len(INPUT)): input = {"sourceResource": {"language": INPUT[i]}} url = server() + "cleanup_language" resp, content = H.request(url, "POST", json.dumps(input)) assert resp.status == 200 url = server() + "enrich_language" resp, content = H.request(url, "POST", content) assert resp.status == 200 url = server() + "lookup?prop=sourceResource%2Flanguage%2Fname" + \ "&target=sourceResource%2Flanguage%2Fname&substitution=iso639_3" resp, content = H.request(url, "POST", content) assert resp.status == 200 url = server() + "lookup?prop=sourceResource%2Flanguage%2Fname" + \ "&target=sourceResource%2Flanguage%2Fiso639_3" + \ "&substitution=iso639_3&inverse=True" resp, content = H.request(url, "POST", content) assert resp.status == 200 assert_same_jsons(content, EXPECTED)
def test_enrich_date_parse_century_date(): """Correctly transform a date of format '19th c.'""" url = server() + "enrich_earliest_date?prop=date" INPUT = {"date": "19th c."} EXPECTED = { "date": { "begin": None, "end": None, "displayDate": "19th c" # period stripped assumed OK } } resp,content = H.request(url,"POST",body=json.dumps(INPUT)) result = json.loads(content) assert result["date"] == EXPECTED["date"], \ "%s != %s" % (result["date"], EXPECTED["date"]) INPUT = {"date": "19th century"} EXPECTED = { "date": { "begin": None, "end": None, "displayDate": "19th century" } } resp,content = H.request(url,"POST",body=json.dumps(INPUT)) result = json.loads(content) assert result["date"] == EXPECTED["date"], \ "%s != %s" % (result["date"], EXPECTED["date"])
def test_geocode_set_name_coordinates(): """Should set the name property to the lowest hierarchy value""" INPUT = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": { "coordinates": "37.7771186829, -122.419639587", "city": "Bananas" } } } EXPECTED = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": [ { "coordinates": "37.7771186829, -122.419639587", "city": "Bananas", "state": "California", "name": "Bananas", "county": "San Francisco County", "country": "United States" } ] } } url = server() + "geocode" resp,content = H.request(url,"POST",body=json.dumps(INPUT)) assert resp.status == 200 assert_same_jsons(EXPECTED, json.loads(content))
def test_convert_spatial_string_to_dictionary(): """ Format UIUC spatial dictionaries """ INPUT = { "id": "12345", "sourceResource": { "spatial": [ { "name": "Honolulu, HI" }, { "name": "1972 to Present" } ] }, "creator": "David" } EXPECTED = { "id": "12345", "sourceResource": { "spatial": [ { "name": "Honolulu, HI" } ] }, "creator": "David" } url = server() + "uiuc_enrich_location" resp,content = H.request(url,"POST",body=json.dumps(INPUT)) assert resp.status == 200 assert json.loads(content) == EXPECTED
def test_removing_bracket(): """Should remove bracket from the beginning of the name""" INPUT = { "id": "12345", "sourceResource": {"spatial": ["Charleston (S.C.); [Germany; Poland; Israel; New York (N.Y.); Georgia (U.S.)"]}, "creator": "Miguel", } EXPECTED = { "id": "12345", "sourceResource": { "spatial": [ {"name": "Charleston (S.C.)"}, {"name": "Germany"}, {"name": "Poland"}, {"name": "Israel"}, {"name": "New York (N.Y.)"}, {"name": "Georgia (U.S.)"}, ] }, "creator": "Miguel", } url = server() + "enrich_location" resp, content = H.request(url, "POST", body=json.dumps(INPUT)) assert resp.status == 200 assert json.loads(content) == EXPECTED
def test_enrich_list_of_dictionaries_and_strings(): """Should handle list of dictionaries and strings""" INPUT = { "id": "12345", "sourceResource": { "spatial": [ {"country": "United States", "county": "Buncombe", "state": "North Carolina"}, "Rushmore, Mount", "Mount Rushmore National Memorial", ] }, } EXPECTED = { "id": "12345", "sourceResource": { "spatial": [ {"country": "United States", "county": "Buncombe", "state": "North Carolina"}, {"name": "Rushmore, Mount"}, {"name": "Mount Rushmore National Memorial"}, ] }, } url = server() + "enrich_location" resp, content = H.request(url, "POST", body=json.dumps(INPUT)) assert resp.status == 200 assert json.loads(content) == EXPECTED
def test_geocode_geonames_name_search(): """Should find a place name. """ INPUT = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": {"name": "Portland, OR"} } } EXPECTED = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": [{ "county": "Multnomah County", "country": "United States", "state": "Oregon", "name": "Portland, OR", "coordinates": "45.52345, -122.67621" }] } } url = server() + "geocode" resp, content = H.request(url, "POST", body=json.dumps(INPUT)) assert resp.status == 200 assert_same_jsons(EXPECTED, json.loads(content))
def test_geocode_set_name_county(): """Should set the name property to the county value""" INPUT = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": { "county": "Los Angeles County", "country": "Bananas" } } } EXPECTED = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": [ { "county": "Los Angeles County", "country": "Bananas", "name": "Los Angeles County", "state": "California", #uses bing because geonames wants to match country values "coordinates": "33.9934997559, -118.29750824" } ] } } url = server() + "geocode" resp,content = H.request(url,"POST",body=json.dumps(INPUT)) assert resp.status == 200 assert_same_jsons(EXPECTED, json.loads(content))
def test_enrich_location_after_provider_specific_enrich_location4(): """ Previous specific-provider location did not set state. """ INPUT = { "id": "12345", "sourceResource": { "spatial": [{"city": "Asheville; La Jolla", "county": "Buncombe;San Diego", "country": "United States"}] }, "creator": "Miguel", } EXPECTED = { "id": "12345", "sourceResource": { "spatial": [ {"city": "Asheville", "county": "Buncombe", "country": "United States"}, {"city": "La Jolla", "county": "San Diego"}, ] }, "creator": "Miguel", } url = server() + "enrich_location" resp, content = H.request(url, "POST", body=json.dumps(INPUT)) assert resp.status == 200 assert json.loads(content) == EXPECTED
def test_geocode_do_not_skip_united_states(): """Should geocode when name value is 'United States' is followed by a '-' """ INPUT = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": {"name": "United States--California"} } } EXPECTED = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": [{ "coordinates": "37.25022, -119.75126", "country": "United States", "name": "United States--California", "state": "California" }] } } url = server() + "geocode" resp, content = H.request(url, "POST", body=json.dumps(INPUT)) assert resp.status == 200 assert_same_jsons(EXPECTED, json.loads(content))
def test_strip_non_spatial_entries(): """ Strip out strings that are not locations. """ INPUT = { "id": "12345", "sourceResource": { "spatial": ["Pictorial works", "Somerville, MA"] }, "creator": "David" } EXPECTED = { "id": "12345", "sourceResource": { "spatial": [ { "name": "Somerville, MA" } ] }, "creator": "David" } url = server() + "digital_commonwealth_enrich_location" resp,content = H.request(url,"POST",body=json.dumps(INPUT)) assert resp.status == 200 assert json.loads(content) == EXPECTED
def test_webfeedjson(): from amara.thirdparty import json import json url = server() + "akara.webfeed.json?url=http://feeds.delicious.com/v2/rss/recent%3Fmin=1%26count=15" response = urlopen(url) results = json.load(response) print results
def test_convert_spatial_string_to_dictionary(): """ Convert a spatial string into a dictionary with a key of 'name' """ INPUT = { "id": "12345", "sourceResource": { "spatial": [u'42.24 N 71.49 W', u"Bear Park (Reading Mass.)"] }, "creator": "David" } EXPECTED = { "id": "12345", "sourceResource": { "spatial": [ { "name": u"42.24N 71.49W" }, { "name": u"Bear Park (Reading MA)" } ] }, "creator": "David" } url = server() + "digital_commonwealth_enrich_location" resp,content = H.request(url,"POST",body=json.dumps(INPUT)) assert resp.status == 200 assert json.loads(content) == EXPECTED
def _make_log2json_request(query_args): from amara.thirdparty import json url = server() + "akara.wwwlog.json" + query_args req = urllib2.Request(url) req.add_header("Content-Type", "text/plain") response = urllib2.urlopen(req, _apache_query_data) return json.load(response)
def test_geocode_exclude_coordinates_from_countries(): """Should not include coordinates or smaller administrative units in country enhancements """ INPUT = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": {"name": "Greece"} } } EXPECTED = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": [{ "country": "Greece", "name": "Greece" }] } } url = server() + "geocode" resp, content = H.request(url, "POST", body=json.dumps(INPUT)) assert resp.status == 200 assert_same_jsons(EXPECTED, json.loads(content))
def test_geocode_set_name_state(): """Should set the name property to the state value""" INPUT = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": { "state": "California" } } } EXPECTED = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": [{ 'coordinates': '37.25022, -119.75126', "country": "United States", "state": "California", "name": "California" }] } } url = server() + "geocode" resp,content = H.request(url,"POST",body=json.dumps(INPUT)) assert resp.status == 200 assert_same_jsons(EXPECTED, json.loads(content))
def test_geocode_coordinate_provided(): INPUT = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": [ { "name": "42.358631134, -71.0567016602" } ] }, "creator": "David" } EXPECTED = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": [ { "state": "Massachusetts", "country": "United States", "name": "42.358631134, -71.0567016602", "coordinates": "42.358631134, -71.0567016602" } ] }, "creator": "David" } url = server() + "geocode" resp,content = H.request(url,"POST",body=json.dumps(INPUT)) assert resp.status == 200 assert_same_jsons(json.loads(content), EXPECTED)
def test_geocode_set_name_city(): """Should set the name property to the city value""" INPUT = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": { "city": "Los Angeles", "state": "California" } } } EXPECTED = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": [ { "coordinates": '34.05223, -118.24368', "city": "Los Angeles", 'county': 'Los Angeles County', "state": "California", "country": "United States", "name": "Los Angeles" } ] } } url = server() + "geocode" resp,content = H.request(url,"POST",body=json.dumps(INPUT)) assert resp.status == 200 assert_same_jsons(EXPECTED, json.loads(content))
def test_geocode_set_name_region(): """Should set the name property to the region value""" INPUT = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": { "region": "Ecuador" } } } EXPECTED = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": [ { "region": "Ecuador", "name": "Ecuador", "country": "Ecuador" } ] } } url = server() + "geocode" resp,content = H.request(url,"POST",body=json.dumps(INPUT)) assert resp.status == 200 assert_same_jsons(EXPECTED, json.loads(content))
def test_geocode_geonames_name_search_failure(): """Shouldn't fall down when nothing is returned. """ INPUT = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": { "name": "1234567" } } } EXPECTED = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": [{ "name": "1234567" }] } } url = server() + "geocode" resp, content = H.request(url, "POST", body=json.dumps(INPUT)) assert resp.status == 200 assert_same_jsons(EXPECTED, json.loads(content))
def test_year_month(): """Should recognize YYYY-MM and not YYYY-YY""" INPUT = [ "1940/2", "1940/02", "1940 / 2", "1940 / 02", "1940-2", "1940-02", "1940 - 2", "1940 - 02", "2/1940", "02/1940", "2 / 1940", "02 / 1940", "2-1940", "02-1940", "2 - 1940", "02 - 1940", ] url = server() + "enrich_earliest_date?prop=date" for date in INPUT: d = "1940-02" input = {"date": date} expected = {"date": {"begin": d, "end": d, "displayDate": date}} resp, content = H.request(url, "POST", body=json.dumps(input)) print_error_log() assert str(resp.status).startswith("2") assert_same_jsons(expected, content)
def GET3(name, args=None, data=None): url = server() + name if args: url += "?" + urllib.urlencode(args) f = urlopen(url, data) s = f.read() return f.code, f.headers, s
def test_geocode_geonames_name_search_context(): """Should find a place name, only if matching other data. """ INPUT = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": { "name": "Portland", "state": "Maine" } } } EXPECTED = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": [{ "county": "Cumberland County", "country": "United States", "state": "Maine", "name": "Portland", "coordinates": "43.66147, -70.25533" } ]} } url = server() + "geocode" resp, content = H.request(url, "POST", body=json.dumps(INPUT)) assert resp.status == 200 assert_same_jsons(EXPECTED, json.loads(content))
def test_geocode_set_name_by_feature(): """Should set the name property to the smallest available feature value""" INPUT = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": { "country": "Canada", "city": "Bananas" } } } EXPECTED = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": [{ 'coordinates': '62.8329086304, -95.9133224487', 'country': 'Canada', 'name': 'Bananas', 'state': 'Nunavut', "city": "Bananas" }] } } url = server() + "geocode" resp,content = H.request(url,"POST",body=json.dumps(INPUT)) assert resp.status == 200 assert_same_jsons(EXPECTED, json.loads(content))
def test_range_with_brackets(): """Should transform date range with brackets.""" ranges = [ ("1960-05-01 - 1960-05-15", "1960-05-01 - 1960-05-15"), ("[ 1960-05-01 - 1960-05-15 ]", "1960-05-01 - 1960-05-15"), ("[1960-05-01 - 1960-05-15]", "1960-05-01 - 1960-05-15"), ("[1960-05-01 / 1960-05-15]", "1960-05-01 / 1960-05-15"), ("[1960-05-01/1960-05-15]", "1960-05-01/1960-05-15"), ] for r in ranges: INPUT = {"date": r[0]} EXPECTED = { u'date' : { u'begin' : u'1960-05-01', u'end' : u'1960-05-15', "displayDate" : r[1] } } url = server() + "enrich_earliest_date?prop=date" resp, content = H.request(url, "POST", body=json.dumps(INPUT)) assert str(resp.status).startswith("2") print_error_log() assert_same_jsons(EXPECTED, content)
def test_geocode_unicode(): """Handles unicode values that can be cast as UTF-8""" INPUT = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": { "name": u"États-Unis" } } } EXPECTED = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": [{ "country": "United States", "name": u"États-Unis" }] } } url = server() + "geocode" resp, content = H.request(url, "POST", body=json.dumps(INPUT)) assert resp.status == 200 assert_same_jsons(EXPECTED, json.loads(content))
def test_geocode_works_with_dotted_abbreviations(): """Resolves something like "Greenville (S.C.)" as well as "SC" """ # Note when retrofitting Twofishes later: Twofishes handles "(S.C.)" just # fine, so most of this test's assertion should be kept, but the code that # works around this syntax should be altered. When we use Twofishes, # we're going to be able to preserve the "S.C." spelling in the "name" # property, and when we do this for Ingestion 3 with MAPv4 we'll be able # to preserve that spelling in the providedLabel property. INPUT = { "_id": "foo", "sourceResource": { "spatial": { "name": "Greenville (S.C.)" } } } EXPECTED = { "_id": "foo", "sourceResource": { "spatial": [ { "city": "Greenville", "county": "Greenville County", "country": "United States", "state": "South Carolina", "name": "Greenville (S.C.)", "coordinates": "34.85262, -82.39401" } ] } } url = server() + "geocode" resp, content = H.request(url, "POST", body=json.dumps(INPUT)) assert resp.status == 200 assert_same_jsons(EXPECTED, json.loads(content))
def test_geocode_unicode(): """Should handle unicode values """ INPUT = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": { "name": u"États-Unis" } } } EXPECTED = { "id": "12345", "_id": "12345", "sourceResource": { "spatial": [{ "name": u"États-Unis" }] } } url = server() + "geocode" resp, content = H.request(url, "POST", body=json.dumps(INPUT)) assert resp.status == 200 assert_same_jsons(EXPECTED, json.loads(content))
def _get_server_response(body, prop=None, to_prop=None): url = server() + "move_date_values" if prop: url = "%s?prop=%s" % (url, prop) if to_prop: url = "%s&to_prop=%s" % (url, to_prop) return H.request(url,"POST",body=body)
def test_replace_basestring_blank(): INPUT = {'sourceResource': {'title': 'Bicyclist [graphic]'}} EXPECTED = {'sourceResource': {'title': 'Bicyclist'}} url = server() + "replace_substring" url = "{0}?prop=sourceResource%2Ftitle&old=[graphic]&new=".format(url) resp, content = _get_server_response_raw_query(url, json.dumps(INPUT)) TC.assertEqual(resp.status, 200) TC.assertEqual(json.loads(content), EXPECTED)
def test_404_error_message(): url = server() + "this_does_not_exist/I_mean_it/Anybody_want_a_peanut?" try: urlopen(url) raise AssertionError("that URL should not be present") except urllib2.HTTPError, err: assert err.code == 404 assert err.headers["Content-Type"] == "text/html", err.headers["Content-Type"] tree = amara.parse(err.fp, standalone=True)
def test_shred5(): "Shredding multiple keys" INPUT = {"p": "a;b;c", "q": "d;e;f"} EXPECTED = {"p": ["a", "b", "c"], "q": ["d", "e", "f"]} url = server() + "shred?prop=p,q" resp, content = H.request(url, "POST", body=json.dumps(INPUT)) assert str(resp.status).startswith("2") assert json.loads(content) == EXPECTED
def test_405_error_message(): url = server() try: f = urlopen(url, "The server ignores this text") raise AssertionError("/ is not supposed to allow a POST") except urllib2.HTTPError, err: assert err.code == 405, err.code assert err.headers["Content-Type"] == "text/html", err.headers["Content-Type"] tree = amara.parse(err.fp, standalone=True)
def test_shred8(): "Shredding list with one value should return list with one value" INPUT = { "p": ["a"], } url = server() + "shred?prop=p" resp, content = H.request(url, "POST", body=json.dumps(INPUT)) assert str(resp.status).startswith("2") assert json.loads(content) == INPUT
def test_shred3(): "Shredding with a non-default delimeter" INPUT = {"p": "a,d,f ,, g"} EXPECTED = {"p": ["a,d,f", ",,", "g"]} url = server() + "shred?prop=p&delim=%20" resp, content = H.request(url, "POST", body=json.dumps(INPUT)) assert str(resp.status).startswith("2") assert json.loads(content) == EXPECTED
def _get_server_response(body, prop=None, old=None, new=None): url = server() + "replace_substring" if prop: url = "%s?prop=%s" % (url, prop) if old: url = "%s&old=%s" % (url, old) if new: url = "%s&new=%s" % (url, new) return _get_server_response_raw_query(url, body)
def test_unshred2(): "Unshredding of an unknown property" INPUT = {"id": "999", "prop1": ["lets", "go", "bluejays"]} EXPECTED = INPUT url = server() + "shred?action=unshred&prop=prop9" resp, content = H.request(url, "POST", body=json.dumps(INPUT)) assert str(resp.status).startswith("2") assert json.loads(content) == EXPECTED
def _get_server_response(body, prop=None, old=None, new=None): url = server() + "replace_substring" if prop: url = "%s?prop=%s" % (url, prop) if old: url = "%s&old=%s" % (url, old) if new: url = "%s&new=%s" % (url, new) return H.request(url, "POST", body=body, headers=CT_JSON)
def test_scdl_format_to_type3(): """Should not set sourceResource/type""" INPUT = {"sourceResource": {"format": ["bananas"]}} EXPECTED = {"sourceResource": {"format": ["bananas"]}} url = server() + "scdl_format_to_type" resp, content = H.request(url, "POST", body=json.dumps(INPUT)) assert resp.status == 200 assert json.loads(content) == EXPECTED
def test_static_last_modified(): url = server() + "static/README" req = urllib2.Request(url) req.add_header('If-Modified-Since', _modified_since) try: response = urllib2.urlopen(req) raise AssertionError("testing shows that this path isn't taken") except urllib2.HTTPError, err: assert err.code == 304, err.code
def test_xslt(): url = server() + "akara.xslt?" + urllib.urlencode({"@xslt": XSLT_URL}) req = urllib2.Request(url) req.add_header("Content-Type", "text/xml") response = urllib2.urlopen(req, XML_DATA) doc = bindery.parse(response) assert str(doc.html.head.title) == "Document Title", repr( str(doc.html.head.title))
def _get_server_response(body, field, mode): url = server( ) + "required-values-from-collection-registry?field={}&mode={}".format( field, mode) return H.request( url, "POST", body=body, )
def test_shred1(): "Valid shredding" INPUT = {"id": "999", "prop1": "lets;go;bluejays"} EXPECTED = {"id": "999", "prop1": ["lets", "go", "bluejays"]} url = server() + "shred?prop=prop1" resp, content = H.request(url, "POST", body=json.dumps(INPUT)) assert str(resp.status).startswith("2") assert json.loads(content) == EXPECTED
def test_cleanup_enrich_then_lookup2(): """Should produce both name and iso639_3 language fields""" INPUT = { "sourceResource": { "language": ["en", "French and arabic", "spanish Spanish", "Ze Germans"] } } EXPECTED = { "sourceResource": { "language": [{ "name": "English", "iso639_3": "eng" }, { "name": "French", "iso639_3": "fre" }, { "name": "Arabic", "iso639_3": "ara" }, { "name": "Spanish", "iso639_3": "spa" }] } } url = server() + "cleanup_language" resp, content = H.request(url, "POST", json.dumps(INPUT)) assert resp.status == 200 url = server() + "enrich_language" resp, content = H.request(url, "POST", content) assert resp.status == 200 url = server() + "lookup?prop=sourceResource%2Flanguage%2Fname" + \ "&target=sourceResource%2Flanguage%2Fname&substitution=iso639_3" resp, content = H.request(url, "POST", content) assert resp.status == 200 url = server() + "lookup?prop=sourceResource%2Flanguage%2Fname" + \ "&target=sourceResource%2Flanguage%2Fiso639_3" + \ "&substitution=iso639_3&inverse=True" resp, content = H.request(url, "POST", content) assert resp.status == 200 assert_same_jsons(content, EXPECTED)
def test_static(): # Pull up some static files url = server() + "resource/atom/entry1.atom" response = urllib2.urlopen(url) assert response.code == 200 assert response.headers[ "Content-Type"] == "application/atom+xml", response.headers[ "Content-Type"] s = response.read() assert "Poster Boy @ Flickr" in s url = server() + "static/README" s = urllib2.urlopen(url).read() assert "SECRET MESSAGE" in s # Check that that leading "/" is trimmed url = server() + "//static////README" s = urllib2.urlopen(url).read() assert "SECRET MESSAGE" in s
def test_date_with_parentheses_and_question_mark(): """Should handle date like 1928 (?)""" INPUT = {"date": "1928 (?)"} EXPECTED = {"date": {"begin": "1928", "end": "1928", "displayDate": "1928 (?)"}} url = server() + "enrich_earliest_date?prop=date" resp, content = H.request(url, "POST", body=json.dumps(INPUT)) assert str(resp.status).startswith("2") assert_same_jsons(EXPECTED, content)
def test_enrich_dates_square_brackets(): """Should remove square brackets""" INPUT = {"date": "[199?]-"} EXPECTED = {"date": {"begin": "1990", "end": "1999", "displayDate": "[199?]-"}} url = server() + "enrich_earliest_date?prop=date" resp, content = H.request(url, "POST", body=json.dumps(INPUT)) assert str(resp.status).startswith("2") assert_same_jsons(EXPECTED, content)
def test_wordy_date(): """Should handle very wordy dates""" INPUT = {"date": "mid 11th century AH/AD 17th century (Mughal)"} EXPECTED = {"date": {"begin": None, "end": None, "displayDate": INPUT["date"]}} url = server() + "enrich_earliest_date?prop=date" resp, content = H.request(url, "POST", body=json.dumps(INPUT)) assert str(resp.status).startswith("2") assert_same_jsons(EXPECTED, content)
def test_reversed_date_range(): """Should handle reversed date range""" INPUT = {"date": "1911/0140"} EXPECTED = {"date": {"begin": "0140", "end": "1911", "displayDate": INPUT["date"]}} url = server() + "enrich_earliest_date?prop=date" resp, content = H.request(url, "POST", body=json.dumps(INPUT)) assert str(resp.status).startswith("2") assert_same_jsons(EXPECTED, content)
def test_echo_simple_post_negative_content_length(): url = server() + "test_echo_simple_post" req = urllib2.Request(url, data="I was here.", headers={"Content-Length": "-100"}) try: f = urllib2.urlopen(req) raise AssertionError("Not supposed to handle negative lengths") except urllib2.HTTPError, err: assert err.code == 400
def test_rdfa2json_with_date(): from amara.thirdparty import json url = server() + "akara.rdfa.json?url=http://www.myspace.com/parishilton" results = json.load(urllib2.urlopen(url)) for item in results["items"]: if "canonical" in item: assert True break else: raise AssertionError("Could not find myspace:lastLogin")
def test_no_date_field(): """Handle case where date field doesn't exist""" INPUT = {"hat": "fits"} EXPECTED = {"hat": "fits"} url = server() + "enrich_earliest_date?prop=date" resp, content = H.request(url, "POST", body=json.dumps(INPUT)) assert str(resp.status).startswith("2") result = json.loads(content)
def test_rdfa2json(): from amara.thirdparty import json url = server() + "akara.rdfa.json?url=http://zepheira.com/" results = json.load(urllib2.urlopen(url)) for item in results["items"]: if "canonical" in item: assert "zepheira.com" in item["canonical"] break else: raise AssertionError("Could not find 'canonical'")
def test_oaitodpla_date_parse_format_bogus_string(): "Deal with a bogus date string" INPUT = {"date": "BOGUS!"} url = server() + "oai-to-dpla" resp, content = H.request(url, "POST", body=json.dumps(INPUT)) assert str(resp.status).startswith("2") result = json.loads(content) assert "temporal" not in result
def test_augment(): import simplejson url = server() + "augment.freemix.json" req = urllib2.Request(url) data = open(os.path.join(RESOURCE_DIR, "augment", "augment_test1.js")).read() response = urllib2.urlopen(req, data) results = simplejson.load(response) assert "items" in results
def test_enrich_format_cleanup(): "Test format normalization and removal of non IMT formats with one format" INPUT = {"format": "image/JPEG"} EXPECTED = {u"type": "image"} url = server() + "enrich-format?prop=format&type_field=type" resp, content = H.request(url, "POST", body=json.dumps(INPUT)) assert str(resp.status).startswith("2") result = json.loads(content) assert_same_jsons(content, EXPECTED)
def test_freemix_json(): import simplejson url = server() + "freemix.json" req = urllib2.Request(url) data = open(os.path.join(RESOURCE_DIR, "load", "tiny.csv")).read() response = urllib2.urlopen(req, data) results = simplejson.load(response) assert "items" in results
def test_enrich_single_subject_reformat_to_dict(): "Transform a subjects string to an array of dictionaries" INPUT = {"subject": "Cats"} EXPECTED = {u'subject': [{u'name': u'Cats'}]} url = server() + "enrich-subject?prop=subject" resp, content = H.request(url, "POST", body=json.dumps(INPUT)) assert str(resp.status).startswith("2") result = json.loads(content) assert result['subject'] == EXPECTED['subject']
def test_artstor_cleanup_creator3(): """ Should do nothing since creator field does not exist """ INPUT = {"sourceResource": {"subject": "bananas"}} url = server() + "artstor_cleanup_creator" resp, content = H.request(url, "POST", body=json.dumps(INPUT)) assert str(resp.status).startswith("2") data = json.loads(content) assert data == INPUT, DictDiffer(data, INPUT).diff()