def test_cleanup_enrich_then_lookup1():
    """Should produce both name and iso639_3 language fields"""
    INPUT = [
        "en", "English", ["eng"], ["English"], ["en", "English"]
    ]
    EXPECTED = {
        "sourceResource": {
            "language": [{"name": "English", "iso639_3": "eng"}]
        }
    }

    for i in range(len(INPUT)):
        input = {"sourceResource": {"language": INPUT[i]}}
        url = server() + "cleanup_language"
        resp, content = H.request(url, "POST", json.dumps(input))
        assert resp.status == 200
        url = server() + "enrich_language"
        resp, content = H.request(url, "POST", content)
        assert resp.status == 200
        url = server() + "lookup?prop=sourceResource%2Flanguage%2Fname" + \
              "&target=sourceResource%2Flanguage%2Fname&substitution=iso639_3"
        resp, content = H.request(url, "POST", content)
        assert resp.status == 200
        url = server() + "lookup?prop=sourceResource%2Flanguage%2Fname" + \
                         "&target=sourceResource%2Flanguage%2Fiso639_3" + \
                         "&substitution=iso639_3&inverse=True"
        resp, content = H.request(url, "POST", content)
        assert resp.status == 200
        assert_same_jsons(content, EXPECTED)
示例#2
0
def test_physical_format_from_format_and_type():
    """
Test physical format appending from format and type fields
"""
    INPUT = {
        "format": ["76.8 x 104 cm",
                   "Oil on canvas",
                   "7 1/4 x 6 inches (18.4 x 15.2 cm)",
                   "Sheet: 9 1/2 x 12 1/8 inches (24.1 x 30.8 cm)"],
        "type": ["Paintings", "Painting"]
    }
    EXPECTED = {
        "format": ["76.8 x 104 cm",
                   "Oil on canvas",
                   "7 1/4 x 6 inches (18.4 x 15.2 cm)",
                   "Sheet: 9 1/2 x 12 1/8 inches (24.1 x 30.8 cm)",
                   "Paintings", "Painting"]
    }

    resp, content = H.request(server() + "enrich-type?prop=type&format_field=format", "POST", body=json.dumps(INPUT))
    assert str(resp.status).startswith("2")
    FETCHED = json.loads(content)
    assert FETCHED == EXPECTED, DictDiffer(EXPECTED, FETCHED).diff()
    resp, content = H.request(server() + "enrich-format?prop=format&type_field=type", "POST", body=content)
    assert str(resp.status).startswith("2")
    FETCHED = json.loads(content)
    assert FETCHED == EXPECTED, DictDiffer(EXPECTED, FETCHED).diff()
def test_type_set_format():
    """Format gets set correctly given invalid type value

    When send_rejects_to_format is true, format should get populated with the
    type strings that don't exactly match a valid type.
    """
    url = server() + "enrich-type?send_rejects_to_format=true"
    INPUT = {"sourceResource": {"type": "digital photograph"}}
    EXPECTED = {
        "sourceResource": {
            "type": "image",
            "format": ["digital photograph"]
        }
    }
    resp, content = H.request(url, "POST", body=json.dumps(INPUT))
    assert resp.status == 200
    assert_same_jsons(EXPECTED, json.loads(content))
    INPUT = {"sourceResource": {"type": "text"}}
    EXPECTED = {"sourceResource": {"type": "text"}}
    resp, content = H.request(url, "POST", body=json.dumps(INPUT))
    assert resp.status == 200
    assert_same_jsons(EXPECTED, json.loads(content))
    INPUT = {"sourceResource": {"type": "weird thing"}}
    EXPECTED = {"sourceResource": {"format": ["weird thing"]}}
    resp, content = H.request(url, "POST", body=json.dumps(INPUT))
    assert resp.status == 200
    assert_same_jsons(EXPECTED, json.loads(content))
def test_enrich_date_dup_start_date_list():
    """Recreate bug found"""
    # make sure we don't break it
    INPUT = {'date': ['1930', '1938']}
    EXPECTED = {
        'date': [{
            u'begin': u'1930',
            u'end': u'1938',
            u'displayDate': u'1930-1938'
        }]
    }
    url = server() + "enrich_earliest_date?prop=date"
    resp, content = H.request(url, "POST", body=json.dumps(INPUT))
    assert_same_jsons(EXPECTED, content)
    # this is the fix
    INPUT = {'date': ['1930', '1930']}
    EXPECTED = {
        'date': [{
            u'begin': u'1930',
            u'end': u'1930',
            u'displayDate': u'1930'
        }]
    }
    url = server() + "enrich_earliest_date?prop=date"
    resp, content = H.request(url, "POST", body=json.dumps(INPUT))
    assert_same_jsons(EXPECTED, content)
示例#5
0
def test_physical_format_from_format_and_type():
    """
Test physical format appending from format and type fields
"""
    INPUT = {
        "format": [
            "76.8 x 104 cm", "Oil on canvas",
            "7 1/4 x 6 inches (18.4 x 15.2 cm)",
            "Sheet: 9 1/2 x 12 1/8 inches (24.1 x 30.8 cm)"
        ],
        "type": ["Paintings", "Painting"]
    }
    EXPECTED = {
        "format": [
            "76.8 x 104 cm", "Oil on canvas",
            "7 1/4 x 6 inches (18.4 x 15.2 cm)",
            "Sheet: 9 1/2 x 12 1/8 inches (24.1 x 30.8 cm)", "Paintings",
            "Painting"
        ]
    }

    resp, content = H.request(server() +
                              "enrich-type?prop=type&format_field=format",
                              "POST",
                              body=json.dumps(INPUT))
    assert str(resp.status).startswith("2")
    FETCHED = json.loads(content)
    assert FETCHED == EXPECTED, DictDiffer(EXPECTED, FETCHED).diff()
    resp, content = H.request(server() +
                              "enrich-format?prop=format&type_field=type",
                              "POST",
                              body=content)
    assert str(resp.status).startswith("2")
    FETCHED = json.loads(content)
    assert FETCHED == EXPECTED, DictDiffer(EXPECTED, FETCHED).diff()
示例#6
0
def test_enrich_date_parse_century_date():
    """Correctly transform a date of format '19th c.'"""
    url = server() + "enrich_earliest_date?prop=date"
    INPUT = {"date": "19th c."}
    EXPECTED = {
        "date": {
            "begin": None,
            "end": None,
            "displayDate": "19th c"  # period stripped assumed OK
        }
    }
    resp, content = H.request(url, "POST", body=json.dumps(INPUT))
    result = json.loads(content)
    assert result["date"] == EXPECTED["date"], \
           "%s != %s" % (result["date"], EXPECTED["date"])
    INPUT = {"date": "19th century"}
    EXPECTED = {
        "date": {
            "begin": None,
            "end": None,
            "displayDate": "19th century"
        }
    }
    resp, content = H.request(url, "POST", body=json.dumps(INPUT))
    result = json.loads(content)
    assert result["date"] == EXPECTED["date"], \
           "%s != %s" % (result["date"], EXPECTED["date"])
示例#7
0
def test_enrich_date_parse_century_date():
    """Correctly transform a date of format '19th c.'"""
    url = server() + "enrich_earliest_date?prop=date"
    INPUT = {"date": "19th c."}
    EXPECTED = {
        "date": {
            "begin": None,
            "end": None,
            "displayDate": "19th c"  # period stripped assumed OK
        }
    }
    resp,content = H.request(url,"POST",body=json.dumps(INPUT))
    result = json.loads(content)
    assert result["date"] == EXPECTED["date"], \
           "%s != %s" % (result["date"], EXPECTED["date"])
    INPUT = {"date": "19th century"}
    EXPECTED = {
        "date": {
            "begin": None,
            "end": None,
            "displayDate": "19th century"
        }
    }
    resp,content = H.request(url,"POST",body=json.dumps(INPUT))
    result = json.loads(content)
    assert result["date"] == EXPECTED["date"], \
           "%s != %s" % (result["date"], EXPECTED["date"])
def test_cleanup_enrich_then_lookup1():
    """Should produce both name and iso639_3 language fields"""
    INPUT = ["en", "English", ["eng"], ["English"], ["en", "English"]]
    EXPECTED = {
        "sourceResource": {
            "language": [{
                "name": "English",
                "iso639_3": "eng"
            }]
        }
    }

    for i in range(len(INPUT)):
        input = {"sourceResource": {"language": INPUT[i]}}
        url = server() + "cleanup_language"
        resp, content = H.request(url, "POST", json.dumps(input))
        assert resp.status == 200
        url = server() + "enrich_language"
        resp, content = H.request(url, "POST", content)
        assert resp.status == 200
        url = server() + "lookup?prop=sourceResource%2Flanguage%2Fname" + \
              "&target=sourceResource%2Flanguage%2Fname&substitution=iso639_3"
        resp, content = H.request(url, "POST", content)
        assert resp.status == 200
        url = server() + "lookup?prop=sourceResource%2Flanguage%2Fname" + \
                         "&target=sourceResource%2Flanguage%2Fiso639_3" + \
                         "&substitution=iso639_3&inverse=True"
        resp, content = H.request(url, "POST", content)
        assert resp.status == 200
        assert_same_jsons(content, EXPECTED)
示例#9
0
def test_enrich_temporal_date():
    """Correctly enrich temporal dates"""

    INPUT = {
        "sourceResource": {
            "spatial": [
                "1901-1999", " 1901 - 1999 ", " 1901 / 01 / 01", "1905-04-12",
                "01/01/1901", "1901", "North Carolina"
            ]
        }
    }
    EXPECTED = {
        "sourceResource": {
            "temporal": [
                {
                    "begin": "1901",
                    "end": "1999",
                    "displayDate": "1901-1999"
                },
                {
                    "begin": "1901",
                    "end": "1999",
                    "displayDate": "1901 - 1999"
                },
                {
                    "begin": "1901",
                    "end": "1901",
                    "displayDate": "1901"
                },
                {
                    "begin": "1901-01-01",
                    "end": "1901-01-01",
                    "displayDate": "1901 / 01 / 01"
                },
                {
                    "begin": "1901-01-01",
                    "end": "1901-01-01",
                    "displayDate": "01/01/1901"
                },
                {
                    "begin": "1905-04-12",
                    "end": "1905-04-12",
                    "displayDate": "1905-04-12"
                },
            ],
            "spatial": ["North Carolina"]
        }
    }

    url = server() + "move_date_values?prop=sourceResource/spatial"
    resp, content = H.request(url, "POST", body=json.dumps(INPUT))
    print_error_log()
    assert resp.status == 200

    url = server() + "enrich_date"
    resp, content = H.request(url, "POST", body=content)
    print_error_log()
    assert resp.status == 200
    assert_same_jsons(EXPECTED, content)
示例#10
0
def test_geocode_unicode():
    """Handles unicode values that can be cast as UTF-8"""
    INPUT = {
        "id": "12345",
        "_id": "12345",
        "sourceResource": {
            "spatial": {
                "name": u"États-Unis"
            }
        }
    }

    EXPECTED = {
        "id": "12345",
        "_id": "12345",
        "sourceResource": {
            "spatial": [{
                "country": "United States",
                "name": u"États-Unis"
            }]
        }
    }

    url = server() + "geocode"
    resp, content = H.request(url, "POST", body=json.dumps(INPUT))
    assert resp.status == 200
    assert_same_jsons(EXPECTED, json.loads(content))
示例#11
0
def test_geocode_geonames_name_search_context():
    """Should find a place name, only if matching other data.
    """
    INPUT = {
        "id": "12345",
        "_id": "12345",
        "sourceResource": {
            "spatial": {
                "name": "Portland",
                "state": "Maine"
            }
        }
    }

    EXPECTED = {
        "id": "12345",
        "_id": "12345",
        "sourceResource": {
            "spatial": [{
                "county": "Cumberland County",
                "country": "United States",
                "state": "Maine",
                "name": "Portland",
                "coordinates": "43.66147, -70.25533"
            }]
        }
    }

    url = server() + "geocode"
    resp, content = H.request(url, "POST", body=json.dumps(INPUT))
    assert resp.status == 200
    assert_same_jsons(EXPECTED, json.loads(content))
示例#12
0
def test_geocode_unicode():
    """Should handle unicode values
    """
    INPUT = {
        "id": "12345",
        "_id": "12345",
        "sourceResource": {
            "spatial": {
                "name": u"États-Unis"
            }
        }
    }

    EXPECTED = {
        "id": "12345",
        "_id": "12345",
        "sourceResource": {
            "spatial": [{
                "name": u"États-Unis"
            }]
        }
    }

    url = server() + "geocode"
    resp, content = H.request(url, "POST", body=json.dumps(INPUT))
    assert resp.status == 200
    assert_same_jsons(EXPECTED, json.loads(content))
示例#13
0
def test_geocode_set_name_by_feature():
    """Should set the name property to the smallest available feature value"""
    INPUT = {
        "id": "12345",
        "_id": "12345",
        "sourceResource": {
            "spatial": {
                "country": "Canada",
                "city": "Bananas"
            }
        }
    }
    EXPECTED = {
        "id": "12345",
        "_id": "12345",
        "sourceResource": {
            "spatial": [{
                'coordinates': '62.8329086304, -95.9133224487',
                'country': 'Canada',
                'name': 'Bananas',
                'state': 'Nunavut',
                "city": "Bananas"
            }]
        }
    }

    url = server() + "geocode"
    resp, content = H.request(url, "POST", body=json.dumps(INPUT))
    assert resp.status == 200
    assert_same_jsons(EXPECTED, json.loads(content))
示例#14
0
def test_geocode_exclude_coordinates_from_countries():
    """Should not include coordinates or smaller administrative units in 
    country enhancements
    """
    INPUT = {
        "id": "12345",
        "_id": "12345",
        "sourceResource": {
            "spatial": {
                "name": "Greece"
            }
        }
    }
    EXPECTED = {
        "id": "12345",
        "_id": "12345",
        "sourceResource": {
            "spatial": [{
                "country": "Greece",
                "name": "Greece"
            }]
        }
    }

    url = server() + "geocode"
    resp, content = H.request(url, "POST", body=json.dumps(INPUT))
    assert resp.status == 200
    assert_same_jsons(EXPECTED, json.loads(content))
def test_convert_spatial_string_to_dictionary():
    """
    Format UIUC spatial dictionaries 
    """
    INPUT = {
        "id": "12345",
        "sourceResource": {
            "spatial": [
                { 
                    "name": "Honolulu, HI"
                },
                { 
                    "name": "1972 to Present"
                }
            ]
        },
        "creator": "David"
    }
    EXPECTED = {
        "id": "12345",
        "sourceResource": {
            "spatial": [
                {
                    "name": "Honolulu, HI"
                }
            ]
        },
        "creator": "David"
    }
        
    url = server() + "uiuc_enrich_location"
    resp,content = H.request(url,"POST",body=json.dumps(INPUT))
    assert resp.status == 200
    assert json.loads(content) == EXPECTED
示例#16
0
def test_geocode_set_name_region():
    """Should set the name property to the region value"""
    INPUT = {
        "id": "12345",
        "_id": "12345",
        "sourceResource": {
            "spatial": {
                "region": "Ecuador"
            }
        }
    }
    EXPECTED = {
        "id": "12345",
        "_id": "12345",
        "sourceResource": {
            "spatial": [{
                "region": "Ecuador",
                "name": "Ecuador",
                "country": "Ecuador"
            }]
        }
    }

    url = server() + "geocode"
    resp, content = H.request(url, "POST", body=json.dumps(INPUT))
    assert resp.status == 200
    assert_same_jsons(EXPECTED, json.loads(content))
示例#17
0
def test_geocode_set_name_state():
    """Should set the name property to the state value"""
    INPUT = {
        "id": "12345",
        "_id": "12345",
        "sourceResource": {
            "spatial": {
                "state": "California"
            }
        }
    }
    EXPECTED = {
        "id": "12345",
        "_id": "12345",
        "sourceResource": {
            "spatial": [{
                'coordinates': '37.25022, -119.75126',
                "country": "United States",
                "state": "California",
                "name": "California"
            }]
        }
    }

    url = server() + "geocode"
    resp,content = H.request(url,"POST",body=json.dumps(INPUT))
    assert resp.status == 200
    assert_same_jsons(EXPECTED, json.loads(content))
示例#18
0
def test_geocode_set_name_by_feature():
    """Should set the name property to the smallest available feature value"""
    INPUT = {
        "id": "12345",
        "_id": "12345",
        "sourceResource": {
            "spatial": {
                "country": "Canada",
                "city": "Bananas"
            }
        }
    }
    EXPECTED = {
        "id": "12345",
        "_id": "12345",
        "sourceResource": {
            "spatial": [{
                'coordinates': '62.8329086304, -95.9133224487',
                'country': 'Canada',
                'name': 'Bananas',
                'state': 'Nunavut',
                "city": "Bananas"
            }]
        }
    }

    url = server() + "geocode"
    resp,content = H.request(url,"POST",body=json.dumps(INPUT))
    assert resp.status == 200
    assert_same_jsons(EXPECTED, json.loads(content))
示例#19
0
def test_geocode_set_name_county():
    """Should set the name property to the county value"""
    INPUT = {
        "id": "12345",
        "_id": "12345",
        "sourceResource": {
            "spatial": {
                "county": "Los Angeles County",
                "country": "Bananas"
            }
        }
    }
    EXPECTED = {
        "id": "12345",
        "_id": "12345",
        "sourceResource": {
            "spatial": [
                {
                    "county": "Los Angeles County",
                    "country": "Bananas",
                    "name": "Los Angeles County",
                    "state": "California",
                    #uses bing because geonames wants to match country values
                    "coordinates": "33.9934997559, -118.29750824"
                }
            ]
        }
    }

    url = server() + "geocode"
    resp,content = H.request(url,"POST",body=json.dumps(INPUT))
    assert resp.status == 200
    assert_same_jsons(EXPECTED, json.loads(content))
示例#20
0
def test_geocode_set_name_region():
    """Should set the name property to the region value"""
    INPUT = {
        "id": "12345",
        "_id": "12345",
        "sourceResource": {
            "spatial": {
                "region": "Ecuador"
            }
        }
    }
    EXPECTED = {
        "id": "12345",
        "_id": "12345",
        "sourceResource": {
            "spatial": [
                {
                    "region": "Ecuador",
                    "name": "Ecuador",
                    "country": "Ecuador"
                }
            ]
        }
    }

    url = server() + "geocode"
    resp,content = H.request(url,"POST",body=json.dumps(INPUT))
    assert resp.status == 200
    assert_same_jsons(EXPECTED, json.loads(content))
示例#21
0
def test_geocode_set_name_city():
    """Should set the name property to the city value"""
    INPUT = {
        "id": "12345",
        "_id": "12345",
        "sourceResource": {
            "spatial": {
                "city": "Los Angeles",
                "state": "California"
            }
        }
    }
    EXPECTED = {
        "id": "12345",
        "_id": "12345",
        "sourceResource": {
            "spatial": [
                {
                    "coordinates": '34.05223, -118.24368',
                    "city": "Los Angeles",
                    'county': 'Los Angeles County',
                    "state": "California",
                    "country": "United States",
                    "name": "Los Angeles"
                }
            ]
        }
    }

    url = server() + "geocode"
    resp,content = H.request(url,"POST",body=json.dumps(INPUT))
    assert resp.status == 200
    assert_same_jsons(EXPECTED, json.loads(content))
示例#22
0
def test_geocode_set_name_coordinates():
    """Should set the name property to the lowest hierarchy value"""
    INPUT = {
        "id": "12345",
        "_id": "12345",
        "sourceResource": {
            "spatial": {
                "coordinates": "37.7771186829, -122.419639587",
                "city": "Bananas"
            }
        }
    }
    EXPECTED = {
        "id": "12345",
        "_id": "12345",
        "sourceResource": {
            "spatial": [
                {
                "coordinates": "37.7771186829, -122.419639587",
                    "city": "Bananas",
                    "state": "California",
                    "name": "Bananas",
                    "county": "San Francisco County",
                    "country": "United States"
                }
            ]
        }
    }

    url = server() + "geocode"
    resp,content = H.request(url,"POST",body=json.dumps(INPUT))
    assert resp.status == 200
    assert_same_jsons(EXPECTED, json.loads(content))
示例#23
0
def test_geocode_works_with_dotted_abbreviations():
    """Resolves something like "Greenville (S.C.)" as well as "SC" """
    # Note when retrofitting Twofishes later: Twofishes handles "(S.C.)" just
    # fine, so most of this test's assertion should be kept, but the code that
    # works around this syntax should be altered.  When we use Twofishes,
    # we're going to be able to preserve the "S.C." spelling in the "name"
    # property, and when we do this for Ingestion 3 with MAPv4 we'll be able
    # to preserve that spelling in the providedLabel property.
    INPUT =  {
        "_id": "foo",
        "sourceResource": {
            "spatial": {
                "name": "Greenville (S.C.)"
            }
        }
    }
    EXPECTED = {
        "_id": "foo",
        "sourceResource": {
            "spatial": [
                {
                    "city": "Greenville",
                    "county": "Greenville County",
                    "country": "United States",
                    "state": "South Carolina",
                    "name": "Greenville (S.C.)",
                    "coordinates": "34.85262, -82.39401"
                }
            ]
        }
    }
    url = server() + "geocode"
    resp, content = H.request(url, "POST", body=json.dumps(INPUT))
    assert resp.status == 200
    assert_same_jsons(EXPECTED, json.loads(content))
def _get_server_response(body):
    url = server() + "dpla_mapper?mapper_type=lapl_oai"
    return H.request(
        url,
        "POST",
        body=body,
    )
示例#25
0
def test_geocode_do_not_skip_united_states():
    """Should geocode when name value is 'United States' is followed by a '-'
    """

    INPUT = {
        "id": "12345",
        "_id": "12345",
        "sourceResource": {
            "spatial": {"name": "United States--California"}
        }
    }
    EXPECTED = {
        "id": "12345",
        "_id": "12345",
        "sourceResource": {
            "spatial": [{
                "coordinates": "37.25022, -119.75126",
                "country": "United States",
                "name": "United States--California",
                "state": "California"
            }]
         }
    }
    url = server() + "geocode"
    resp, content = H.request(url, "POST", body=json.dumps(INPUT))
    assert resp.status == 200
    assert_same_jsons(EXPECTED, json.loads(content))
def test_strip_non_spatial_entries():
    """
    Strip out strings that are not locations.
    """
    INPUT = {
        "id": "12345",
        "sourceResource": {
            "spatial": ["Pictorial works", "Somerville, MA"]
        },
        "creator": "David"
    }
    EXPECTED = {
        "id": "12345",
        "sourceResource": {
            "spatial": [
                {
                    "name": "Somerville, MA"
                }
            ]
        },
        "creator": "David"
    }

    url = server() + "digital_commonwealth_enrich_location"
    resp,content = H.request(url,"POST",body=json.dumps(INPUT))
    assert resp.status == 200
    assert json.loads(content) == EXPECTED
示例#27
0
def test_geocode_exclude_coordinates_from_countries():
    """Should not include coordinates or smaller administrative units in 
    country enhancements
    """
    INPUT = {
        "id": "12345",
        "_id": "12345",
        "sourceResource": {
            "spatial": {"name": "Greece"}
        }
    }
    EXPECTED = {
        "id": "12345",
        "_id": "12345",
        "sourceResource": {
            "spatial": [{
                "country": "Greece",
                "name": "Greece"
            }]
        }
    }

    url = server() + "geocode"
    resp, content = H.request(url, "POST", body=json.dumps(INPUT))
    assert resp.status == 200
    assert_same_jsons(EXPECTED, json.loads(content))
def test_convert_spatial_string_to_dictionary():
    """
    Convert a spatial string into a dictionary with a key of 'name'
    """
    INPUT = {
        "id": "12345",
        "sourceResource": {
            "spatial": [u'42.24  N 71.49 W', 
                        u"Bear Park (Reading Mass.)"]
        },
        "creator": "David"
    }
    EXPECTED = {
        "id": "12345",
        "sourceResource": {
            "spatial": [
                {
                    "name": u"42.24N 71.49W"
                },
                {
                    "name": u"Bear Park (Reading MA)"
                }
            ]
        },
        "creator": "David"
    }

    url = server() + "digital_commonwealth_enrich_location"
    resp,content = H.request(url,"POST",body=json.dumps(INPUT))
    assert resp.status == 200
    assert json.loads(content) == EXPECTED
示例#29
0
def test_geocode_geonames_name_search():
    """Should find a place name.
    """
    INPUT = {
        "id": "12345",
        "_id": "12345",
        "sourceResource": {
            "spatial": {"name": "Portland, OR"}
        }
    }

    EXPECTED = {
        "id": "12345",
        "_id": "12345",
        "sourceResource": {
            "spatial": [{
                "county": "Multnomah County",
                "country": "United States",
                "state": "Oregon",
                "name": "Portland, OR",
                "coordinates": "45.52345, -122.67621"
            }]
        }
    }

    url = server() + "geocode"
    resp, content = H.request(url, "POST", body=json.dumps(INPUT))
    assert resp.status == 200
    assert_same_jsons(EXPECTED, json.loads(content))
示例#30
0
def test_geocode_set_name_state():
    """Should set the name property to the state value"""
    INPUT = {
        "id": "12345",
        "_id": "12345",
        "sourceResource": {
            "spatial": {
                "state": "California"
            }
        }
    }
    EXPECTED = {
        "id": "12345",
        "_id": "12345",
        "sourceResource": {
            "spatial": [{
                'coordinates': '37.25022, -119.75126',
                "country": "United States",
                "state": "California",
                "name": "California"
            }]
        }
    }

    url = server() + "geocode"
    resp, content = H.request(url, "POST", body=json.dumps(INPUT))
    assert resp.status == 200
    assert_same_jsons(EXPECTED, json.loads(content))
示例#31
0
def test_geocode_coordinate_provided():
    INPUT = {
        "id": "12345",
        "_id": "12345",
        "sourceResource": {
            "spatial": [
                { 
                    "name": "42.358631134, -71.0567016602"
                }
            ]
        },
        "creator": "David"
    }

    EXPECTED = {
        "id": "12345",
        "_id": "12345",
        "sourceResource": {
            "spatial": [
                {
                    "state": "Massachusetts",
                    "country": "United States",
                    "name": "42.358631134, -71.0567016602",
                    "coordinates": "42.358631134, -71.0567016602"
                }
            ]
        },
        "creator": "David"
    }
        
    url = server() + "geocode"
    resp,content = H.request(url,"POST",body=json.dumps(INPUT))
    assert resp.status == 200
    assert_same_jsons(json.loads(content), EXPECTED)
示例#32
0
def test_geocode_do_not_skip_united_states():
    """Should geocode when name value is 'United States' is followed by a '-'
    """

    INPUT = {
        "id": "12345",
        "_id": "12345",
        "sourceResource": {
            "spatial": {
                "name": "United States--California"
            }
        }
    }
    EXPECTED = {
        "id": "12345",
        "_id": "12345",
        "sourceResource": {
            "spatial": [{
                "coordinates": "37.25022, -119.75126",
                "country": "United States",
                "name": "United States--California",
                "state": "California"
            }]
        }
    }
    url = server() + "geocode"
    resp, content = H.request(url, "POST", body=json.dumps(INPUT))
    assert resp.status == 200
    assert_same_jsons(EXPECTED, json.loads(content))
def test_filtering_with_given_keys():
    """
    Filtering with given keys
    """

    INPUT = {
        "id": "999",
        "prop1": "value1",
        "empty_key": "",
        "filter_me": {
            'notempty': ['a', 'b', 'c'],
            'empty': '',
            'none': None,
            'crumb': ['x', None, 'y', '']
        }
    }
    EXPECTED = {
        "id": "999",
        "prop1": "value1",
        "empty_key": "",
        "filter_me": {
            'notempty': ['a', 'b', 'c'],
            'crumb': ['x', 'y']
        }
    }
    url = server() + "filter_fields?keys=filter_me"
    resp, content = H.request(url, "POST", body=json.dumps(INPUT))
    assert str(resp.status).startswith("2")

    assert json.loads(content) == EXPECTED
示例#34
0
def test_geocode_geonames_name_search():
    """Should find a place name.
    """
    INPUT = {
        "id": "12345",
        "_id": "12345",
        "sourceResource": {
            "spatial": {
                "name": "Portland, OR"
            }
        }
    }

    EXPECTED = {
        "id": "12345",
        "_id": "12345",
        "sourceResource": {
            "spatial": [{
                "county": "Multnomah County",
                "country": "United States",
                "state": "Oregon",
                "name": "Portland, OR",
                "coordinates": "45.52345, -122.67621"
            }]
        }
    }

    url = server() + "geocode"
    resp, content = H.request(url, "POST", body=json.dumps(INPUT))
    assert resp.status == 200
    assert_same_jsons(EXPECTED, json.loads(content))
def test_contentdm_identify_object_with_download():
    """
    Should add a thumbnail URL made of the source URL.
    """
    INPUT = {
            u"something": "x",
            u"somethink": "y",
            u"originalRecord": {
                "handle": ["aaa", "http://repository.clemson.edu/u?/scp,104"]
                },
            u"left": "right now!"
    }
    EXPECTED = {
            u"something": "x",
            u"somethink": "y",
            u"originalRecord": {
                "handle": ["aaa", "http://repository.clemson.edu/u?/scp,104"]
                },
            u"object": ("http://repository.clemson.edu/cgi-bin/" +
                        "thumbnail.exe?CISOROOT=/scp&CISOPTR=104"),
            u"admin": {u"object_status": 1},
            u"left": "right now!"
    }
    url = contentdm_url("True")

    resp, content = H.request(url, "POST", body=json.dumps(INPUT))

    assert str(resp.status).startswith("2")
    result = json.loads(content)

    assert_same_jsons(EXPECTED, result)
示例#36
0
def test_geocode_geonames_name_search_failure():
    """Shouldn't fall down when nothing is returned.
    """
    INPUT = {
        "id": "12345",
        "_id": "12345",
        "sourceResource": {
            "spatial": {
                "name": "1234567"
            }
        }
    }

    EXPECTED = {
        "id": "12345",
        "_id": "12345",
        "sourceResource": {
            "spatial": [{
                "name": "1234567"
            }]
        }
    }

    url = server() + "geocode"
    resp, content = H.request(url, "POST", body=json.dumps(INPUT))
    assert resp.status == 200
    assert_same_jsons(EXPECTED, json.loads(content))
示例#37
0
def test_range_with_brackets():
    """Should transform date range with brackets."""

    ranges = [
        ("1960-05-01 - 1960-05-15", "1960-05-01 - 1960-05-15"),
        ("[ 1960-05-01 - 1960-05-15 ]", "1960-05-01 - 1960-05-15"),
        ("[1960-05-01 - 1960-05-15]", "1960-05-01 - 1960-05-15"),
        ("[1960-05-01 / 1960-05-15]", "1960-05-01 / 1960-05-15"),
        ("[1960-05-01/1960-05-15]", "1960-05-01/1960-05-15"),
    ]

    for r in ranges:
        INPUT = {"date": r[0]}
        EXPECTED = {
            u'date': {
                u'begin': u'1960-05-01',
                u'end': u'1960-05-15',
                "displayDate": r[1]
            }
        }

        url = server() + "enrich_earliest_date?prop=date"

        resp, content = H.request(url, "POST", body=json.dumps(INPUT))
        assert str(resp.status).startswith("2")
        print_error_log()
        assert_same_jsons(EXPECTED, content)
示例#38
0
def test_geocode_geonames_name_search_context():
    """Should find a place name, only if matching other data.
    """
    INPUT = {
        "id": "12345",
        "_id": "12345",
        "sourceResource": {
            "spatial": {
                "name": "Portland",
                "state": "Maine"
            }
        }
    }

    EXPECTED = {
        "id": "12345",
        "_id": "12345",
        "sourceResource": {
            "spatial": [{
                "county": "Cumberland County",
                "country": "United States",
                "state": "Maine",
                "name": "Portland",
                "coordinates": "43.66147, -70.25533"
            }
        ]}
    }

    url = server() + "geocode"
    resp, content = H.request(url, "POST", body=json.dumps(INPUT))
    assert resp.status == 200
    assert_same_jsons(EXPECTED, json.loads(content))
示例#39
0
def test_range_with_brackets():
    """Should transform date range with brackets."""

    ranges = [
            ("1960-05-01 - 1960-05-15",     "1960-05-01 - 1960-05-15"),
            ("[ 1960-05-01 - 1960-05-15 ]", "1960-05-01 - 1960-05-15"),
            ("[1960-05-01 - 1960-05-15]",   "1960-05-01 - 1960-05-15"),
            ("[1960-05-01 / 1960-05-15]",   "1960-05-01 / 1960-05-15"),
            ("[1960-05-01/1960-05-15]",   "1960-05-01/1960-05-15"),
    ]

    for r in ranges:
        INPUT = {"date": r[0]}
        EXPECTED = {
            u'date' : {
                u'begin' : u'1960-05-01',
                u'end' : u'1960-05-15',
                "displayDate" : r[1]
            }
        }

        url = server() + "enrich_earliest_date?prop=date"

        resp, content = H.request(url, "POST", body=json.dumps(INPUT))
        assert str(resp.status).startswith("2")
        print_error_log()
        assert_same_jsons(EXPECTED, content)
示例#40
0
def _get_server_response(body, prop=None, to_prop=None):
    url = server() + "move_date_values"
    if prop:
        url = "%s?prop=%s" % (url, prop)
    if to_prop:
        url = "%s&to_prop=%s" % (url, to_prop)
    return H.request(url,"POST",body=body)
示例#41
0
def test_geocode_geonames_name_search_failure():
    """Shouldn't fall down when nothing is returned.
    """
    INPUT = {
        "id": "12345",
        "_id": "12345",
        "sourceResource": {
            "spatial": {
                "name": "1234567"
            }
        }
    }

    EXPECTED = {
        "id": "12345",
        "_id": "12345",
        "sourceResource": {
            "spatial": [{
                "name": "1234567"
            }]
        }
    }

    url = server() + "geocode"
    resp, content = H.request(url, "POST", body=json.dumps(INPUT))
    assert resp.status == 200
    assert_same_jsons(EXPECTED, json.loads(content))
def test_contentdm_identify_object_usc():
    """
    Should add a thumbnail URL made of the source URL.
    """
    INPUT = {
            u"something": "x",
            u"somethink": "y",
            u"originalRecord":
                    {"handle":
                        ["aaa", "http://some.url/cdm/ref/12345"]
                    },
            u"left": "right now!"
    }
    EXPECTED = {
            u"something": "x",
            u"somethink": "y",
            u"originalRecord": {
                "handle":
                    ["aaa", "http://some.url/cdm/ref/12345"]
                },
            u"object": ("http://some.url/utils/getthumbnail/12345"),
            u"admin": {u"object_status": 0},
            u"left": "right now!"
    }
    url = contentdm_url("False")
    resp, content = H.request(url, "POST", body=json.dumps(INPUT))
    print_error_log()
    assert str(resp.status).startswith("2")
    result = json.loads(content)

    assert_same_jsons(EXPECTED, result)
示例#43
0
def test_geocode_unicode():
    """Should handle unicode values
    """
    INPUT = {
        "id": "12345",
        "_id": "12345",
        "sourceResource": {
            "spatial": {
                "name": u"États-Unis"
            }
        }
    }

    EXPECTED = {
        "id": "12345",
        "_id": "12345",
        "sourceResource": {
            "spatial": [{
                "name": u"États-Unis"
            }]
        }
    }

    url = server() + "geocode"
    resp, content = H.request(url, "POST", body=json.dumps(INPUT))
    assert resp.status == 200
    assert_same_jsons(EXPECTED, json.loads(content))
示例#44
0
def test_strip_html():
    """'strip_html' strips HTML tags and entities recursively"""
    request_data = {
        'a': {
            'b': [' <i>string</i> <b>one</b> \n \t', 'string &lt; two  ']
        },
        'c': '  \n <p>string three</p>',
        'd': {},
        'e': 1,
        'f': '1 film negative: b&w ;'
    }
    expected_result = {
        'a': {
            'b': [u'string one', u'string < two']
        },
        'c': u'string three',
        'd': {},  # unaltered
        'e': 1,  # unaltered
        'f': '1 film negative: b&w ;'  #unaltered
    }
    url = server() + 'strip_html'
    resp_meta, resp_body = H.request(url,
                                     'POST',
                                     body=json.dumps(request_data))
    assert resp_meta.status == 200
    assert_same_jsons(expected_result, resp_body)
def test_enrich_location_no_provider_specific_enrich_location1():
    """
    No previous provider-specific location enrichment and does not contain states
    or state abbreviations.
    """
    INPUT = {
        "id": "12345",
        "sourceResource": {"spatial": [
            "Asheville",
            "Buncombe",
            "United States"
        ]},
        "creator": "Miguel"
    }
    OUTPUT = {
        "id": "12345",
        "sourceResource": {"spatial": [
            { "name": "Asheville" },
            { "name": "Buncombe" },
            { "name": "United States" }
        ]},
        "creator": "Miguel"
    }

    url = server() + "enrich_location"
    resp,content = H.request(url,"POST",body=json.dumps(INPUT))
    assert resp.status == 200
    assert json.loads(content) == OUTPUT
def test_enrich_location_after_provider_specific_enrich_location4():
    """
    Previous specific-provider location did not set state.
    """
    INPUT = {
        "id": "12345",
        "sourceResource": {
            "spatial": [{"city": "Asheville; La Jolla", "county": "Buncombe;San Diego", "country": "United States"}]
        },
        "creator": "Miguel",
    }
    EXPECTED = {
        "id": "12345",
        "sourceResource": {
            "spatial": [
                {"city": "Asheville", "county": "Buncombe", "country": "United States"},
                {"city": "La Jolla", "county": "San Diego"},
            ]
        },
        "creator": "Miguel",
    }

    url = server() + "enrich_location"
    resp, content = H.request(url, "POST", body=json.dumps(INPUT))
    assert resp.status == 200
    assert json.loads(content) == EXPECTED
def test_enrich_list_of_dictionaries_and_strings():
    """Should handle list of dictionaries and strings"""
    INPUT = {
        "id": "12345",
        "sourceResource": {
            "spatial": [
                {"country": "United States", "county": "Buncombe", "state": "North Carolina"},
                "Rushmore, Mount",
                "Mount Rushmore National Memorial",
            ]
        },
    }
    EXPECTED = {
        "id": "12345",
        "sourceResource": {
            "spatial": [
                {"country": "United States", "county": "Buncombe", "state": "North Carolina"},
                {"name": "Rushmore, Mount"},
                {"name": "Mount Rushmore National Memorial"},
            ]
        },
    }

    url = server() + "enrich_location"
    resp, content = H.request(url, "POST", body=json.dumps(INPUT))
    assert resp.status == 200
    assert json.loads(content) == EXPECTED
def test_enrich_location_after_provider_specific_enrich_location4():
    """
    Previous specific-provider location did not set state.
    """
    INPUT = {
        "id": "12345",
        "sourceResource": {"spatial": [
            {
                "city": "Asheville; La Jolla",
                "county": "Buncombe;San Diego",
                "country": "United States"
            }
        ]},
        "creator": "Miguel"
    }
    EXPECTED = {
        "id": "12345",
        "sourceResource": {"spatial": [
            {
                "city": "Asheville",
                "county": "Buncombe",
                "country": "United States",
            },
            {
                "city": "La Jolla",
                "county": "San Diego",
            }
        ]},
        "creator": "Miguel"
    }

    url = server() + "enrich_location"
    resp,content = H.request(url,"POST",body=json.dumps(INPUT))
    assert resp.status == 200
    assert json.loads(content) == EXPECTED
示例#49
0
def test_year_month():
    """Should recognize YYYY-MM and not YYYY-YY"""
    INPUT = [
        "1940/2",
        "1940/02",
        "1940 / 2",
        "1940 / 02",
        "1940-2",
        "1940-02",
        "1940 - 2",
        "1940 - 02",
        "2/1940",
        "02/1940",
        "2 / 1940",
        "02 / 1940",
        "2-1940",
        "02-1940",
        "2 - 1940",
        "02 - 1940",
    ]

    url = server() + "enrich_earliest_date?prop=date"
    for date in INPUT:
        d = "1940-02"
        input = {"date": date}
        expected = {"date": {"begin": d, "end": d, "displayDate": date}}

        resp, content = H.request(url, "POST", body=json.dumps(input))
        print_error_log()
        assert str(resp.status).startswith("2")
        assert_same_jsons(expected, content)
def test_enrich_list_of_dictionaries_and_strings():
    """Should handle list of dictionaries and strings"""
    INPUT = {
        "id": "12345",
        "sourceResource": {"spatial": [
            {
                "country": "United States",
                "county": "Buncombe",
                "state": "North Carolina"
            },
            "Rushmore, Mount",
            "Mount Rushmore National Memorial"
        ]}
    }
    EXPECTED = {
        "id": "12345",
        "sourceResource": {"spatial": [
            {
                "country": "United States",
                "county": "Buncombe",
                "state": "North Carolina"
            },
            {
                "name": "Rushmore, Mount"
            },
            {
                "name": "Mount Rushmore National Memorial"
            }
        ]}
    }

    url = server() + "enrich_location"
    resp,content = H.request(url,"POST",body=json.dumps(INPUT))
    assert resp.status == 200
    assert json.loads(content) == EXPECTED
def test_capitalize_value_exclude():
    """Should capitalize first letter of each property"""

    INPUT = {
        "id": "123",
        "sourceResource": {
            "format": [
                "format1",
                "format2"
            ],
            "subject": [
                "subject",
                "hi there",
                "hello"
            ]
        }
    }
    EXPECTED = {
        "id": "123",
        "sourceResource": {
            "format": [
                "Format1",
                "Format2"
            ],
            "subject": [
                "subject",
                "hi there",
                "hello"
            ]
        }
    }
    resp, content = H.request(url+"?exclude=sourceResource/subject", "POST", json.dumps(INPUT))
    assert resp.status == 200
    FETCHED = json.loads(content)
    assert FETCHED == EXPECTED, DictDiffer(EXPECTED, FETCHED).diff()
def test_removing_bracket():
    """Should remove bracket from the beginning of the name"""
    INPUT = {
        "id": "12345",
        "sourceResource": {"spatial": ["Charleston (S.C.); [Germany; Poland; Israel; New York (N.Y.); Georgia (U.S.)"]},
        "creator": "Miguel",
    }
    EXPECTED = {
        "id": "12345",
        "sourceResource": {
            "spatial": [
                {"name": "Charleston (S.C.)"},
                {"name": "Germany"},
                {"name": "Poland"},
                {"name": "Israel"},
                {"name": "New York (N.Y.)"},
                {"name": "Georgia (U.S.)"},
            ]
        },
        "creator": "Miguel",
    }

    url = server() + "enrich_location"
    resp, content = H.request(url, "POST", body=json.dumps(INPUT))
    assert resp.status == 200
    assert json.loads(content) == EXPECTED
示例#53
0
def test_geocode_set_name_coordinates():
    """Should set the name property to the lowest hierarchy value"""
    INPUT = {
        "id": "12345",
        "_id": "12345",
        "sourceResource": {
            "spatial": {
                "coordinates": "37.7771186829, -122.419639587",
                "city": "Bananas"
            }
        }
    }
    EXPECTED = {
        "id": "12345",
        "_id": "12345",
        "sourceResource": {
            "spatial": [{
                "coordinates": "37.7771186829, -122.419639587",
                "city": "Bananas",
                "state": "California",
                "name": "Bananas",
                "county": "San Francisco County",
                "country": "United States"
            }]
        }
    }

    url = server() + "geocode"
    resp, content = H.request(url, "POST", body=json.dumps(INPUT))
    assert resp.status == 200
    assert_same_jsons(EXPECTED, json.loads(content))
示例#54
0
def test_artstor_cleanup_creator2():
    """
    Cleanup the creator field
    """

    INPUT = {
        "sourceResource": {
            "creator": [
                " and bananas", "   Artist: bananas", "Author: bananas",
                "Binder: bananas", "Drawn by bananas", "drawn by bananas",
                "  illuminator: bananas", "Or    bananas  ", "Scribe: bananas",
                "Resolve bananas", " Apples"
            ]
        }
    }
    EXPECTED = {
        "sourceResource": {
            "creator": [
                "bananas", "bananas", "bananas", "bananas", "bananas",
                "bananas", "bananas", "bananas", "bananas", "bananas", "Apples"
            ]
        }
    }

    url = server() + "artstor_cleanup_creator"
    resp, content = H.request(url, "POST", body=json.dumps(INPUT))
    assert str(resp.status).startswith("2")
    data = json.loads(content)
    assert data == EXPECTED, DictDiffer(data, EXPECTED).diff()
示例#55
0
def test_geocode_set_name_city():
    """Should set the name property to the city value"""
    INPUT = {
        "id": "12345",
        "_id": "12345",
        "sourceResource": {
            "spatial": {
                "city": "Los Angeles",
                "state": "California"
            }
        }
    }
    EXPECTED = {
        "id": "12345",
        "_id": "12345",
        "sourceResource": {
            "spatial": [{
                "coordinates": '34.05223, -118.24368',
                "city": "Los Angeles",
                'county': 'Los Angeles County',
                "state": "California",
                "country": "United States",
                "name": "Los Angeles"
            }]
        }
    }

    url = server() + "geocode"
    resp, content = H.request(url, "POST", body=json.dumps(INPUT))
    assert resp.status == 200
    assert_same_jsons(EXPECTED, json.loads(content))
示例#56
0
def _get_server_response(body):
    url = server() + "oai-to-dpla"
    return H.request(
        url,
        "POST",
        body=body,
    )
示例#57
0
def test_geocode_set_name_county():
    """Should set the name property to the county value"""
    INPUT = {
        "id": "12345",
        "_id": "12345",
        "sourceResource": {
            "spatial": {
                "county": "Los Angeles County",
                "country": "Bananas"
            }
        }
    }
    EXPECTED = {
        "id": "12345",
        "_id": "12345",
        "sourceResource": {
            "spatial": [{
                "county": "Los Angeles County",
                "country": "Bananas",
                "name": "Los Angeles County",
                "state": "California",
                #uses bing because geonames wants to match country values
                "coordinates": "33.9934997559, -118.29750824"
            }]
        }
    }

    url = server() + "geocode"
    resp, content = H.request(url, "POST", body=json.dumps(INPUT))
    assert resp.status == 200
    assert_same_jsons(EXPECTED, json.loads(content))