def test_dataset_license(self): # No license_title nor license_url indict = {'license_id': 'odc-odbl'} exp = { 'licenses': [{ 'name': 'odc-odbl', }] } out = converter.dataset(indict) assert out == exp # Remap everything in licenses indict = { 'license_id': 'cc-by', 'license_title': 'Creative Commons Attribution', 'license_url': 'http://www.opendefinition.org/licenses/cc-by' } exp = { 'licenses': [{ 'name': 'cc-by', 'title': 'Creative Commons Attribution', 'path': 'http://www.opendefinition.org/licenses/cc-by' }] } out = converter.dataset(indict) assert out == exp
def test_dataset_author_and_maintainer(self): indict = { 'author': 'World Bank and OECD', 'author_email': '*****@*****.**' } exp = { 'contributors': [{ 'title': 'World Bank and OECD', 'email': '*****@*****.**', 'role': 'author' }] } out = converter.dataset(indict) assert out == exp indict = { 'author': 'World Bank and OECD', 'author_email': '*****@*****.**', 'maintainer': 'Datopian', 'maintainer_email': '*****@*****.**' } exp = { 'contributors': [ { 'title': 'World Bank and OECD', 'email': '*****@*****.**', 'role': 'author' }, { 'title': 'Datopian', 'email': '*****@*****.**', 'role': 'maintainer' }, ] } out = converter.dataset(indict) assert out == exp # if we already have contributors use that ... indict = { 'contributors': [{ 'title': 'Datopians' }], 'author': 'World Bank and OECD', } exp = {'contributors': [{'title': 'Datopians'}]} out = converter.dataset(indict) assert out == exp
def test_unjsonify_all_extra_values(self): indict = { 'extras': [{ 'key': 'location', 'value': '{"country": {"China": {"population": ' '"1233214331", "capital": "Beijing"}}}' }, { 'key': 'numbers', 'value': '[[[1, 2, 3], [2, 4, 5]], [[7, 6, 0]]]' }] } out = converter.dataset(indict) exp = { "location": { "country": { "China": { "population": "1233214331", "capital": "Beijing" } } }, "numbers": [[[1, 2, 3], [2, 4, 5]], [[7, 6, 0]]] } assert out == exp
def test_dataset_extras(self): indict = { 'extras': [{ 'key': 'title_cn', 'value': u'國內生產總值' }, { 'key': 'years', 'value': '[2015, 2016]' }, { 'key': 'last_year', 'value': 2016 }, { 'key': 'location', 'value': '{"country": "China"}' }] } exp = { 'title_cn': u'國內生產總值', 'years': [2015, 2016], 'last_year': 2016, 'location': { 'country': 'China' } } out = converter.dataset(indict) assert out == exp
def test_round_trip_ckan(self): # `ckan1` != `ckan2` but `ckan2` == `ckan3` inpath = 'tests/fixtures/full_ckan_package.json' ckan1 = json.load(open(inpath)) fd1 = ckan_to_frictionless.dataset(ckan1) ckan2 = frictionless_to_ckan.package(fd1) fd2 = ckan_to_frictionless.dataset(ckan2) ckan3 = frictionless_to_ckan.package(fd2) # FIXME: this currently doesn't work for Python 2 due to the way # Unicode is handled and because the dictionary keys do not keep # the same order. # Solution 1: Skip for Python 2 (it's clearly the same dictionary # if the build passes on Python 3) # Solution 2: Hard code the dicts as in `test_extras_is_converted` # in test_frictionless_to_ckan.py instead of loading JSON and # sort the keys. if not six.PY2: assert ckan2 == ckan3
def test_key_mappings(self): # notes indict = { 'notes': 'Country, regional and world GDP', 'url': 'https://datopian.com' } exp = { 'description': 'Country, regional and world GDP', 'homepage': 'https://datopian.com' } out = converter.dataset(indict) assert out == exp
def test_dataset_license_with_licenses_in_extras(self): indict = { 'license_id': 'odc-odbl', 'license_title': 'Open Data Commons Open Database License', 'license_url': 'https://opendatacommons.org/licenses/odbl/1-0/index.html', 'extras': [{ 'key': 'licenses', 'value': json.dumps([{ 'name': 'cc-by', 'title': 'Creative Commons Attribution', 'path': 'http://www.opendefinition.org/licenses/cc-by' }, { 'name': 'odc-by', 'title': 'Open Data Commons Attribution License', 'path': 'https://opendatacommons.org/licenses/by/1-0/index.html' }]) }] } exp = { 'licenses': [{ 'name': 'odc-odbl', 'title': 'Open Data Commons Open Database License', 'path': 'https://opendatacommons.org/licenses/odbl/1-0/index.html' }, { 'name': 'odc-by', 'title': 'Open Data Commons Attribution License', 'path': 'https://opendatacommons.org/licenses/by/1-0/index.html' }] } out = converter.dataset(indict) assert out == exp
def _convert_excluding_path(ckan_dataset): """Convert a CKAN dataset to a frictionless package but exclude custom `path` values This is done because frictionless_ckan_mapper will override `path` if URL is set for a resource, but we want to preserve `path` if it was previously set. """ existing_paths = {i: r['path'] for i, r in enumerate(ckan_dataset.get('resources', [])) if 'path' in r} package = ctf.dataset(ckan_dataset) for i, path in iteritems(existing_paths): package['resources'][i]['path'] = path return package
def test_dataset_tags(self): indict = { 'tags': [{ 'display_name': 'economy', 'id': '9d602a79-7742-44a7-9029-50b9eca38c90', 'name': 'economy', 'state': 'active' }, { 'display_name': 'worldbank', 'id': '3ccc2e3b-f875-49ef-a39d-6601d6c0ef76', 'name': 'worldbank', 'state': 'active' }] } exp = {'keywords': ['economy', 'worldbank']} out = converter.dataset(indict) assert out == exp
def test_keys_are_passed_through(self): indict = { 'name': 'gdp', 'id': 'xxxx', 'title': 'Countries GDP', 'version': '1.0', # random 'xxx': 'aldka' } out = converter.dataset(indict) exp = { 'name': 'gdp', 'id': 'xxxx', 'title': 'Countries GDP', 'version': '1.0', 'xxx': 'aldka' } assert out == exp
def migrate_datasets(datasets, metastore_client): """Migrate all datasets in an iterable to metastore """ datapackages = (ckan_to_frictionless.dataset(ds) for ds in datasets if ds['type'] == 'dataset') stored = 0 for package in datapackages: log.debug("Converted dataset to datapacakge: %s", package) try: author = _get_author(package) metastore_client.create(package['name'], package, author=author) stored += 1 log.debug("Successfully stored package: %s", package['name']) except Conflict: log.info("Package already exists in metastore backend: %s", package['name']) except Exception: log.exception("Failed storing package: %s", package['name']) return stored
def test_resources_are_converted(self): indict = { 'name': 'gdp', 'resources': [{ 'name': 'data.csv', 'url': 'http://someplace.com/data.csv', 'size': 100 }] } exp = { 'name': 'gdp', 'resources': [{ 'name': 'data.csv', 'path': 'http://someplace.com/data.csv', 'bytes': 100 }] } out = converter.dataset(indict) assert out == exp
def test_differences_ckan_round_trip(self): # When converting ckan1 to fd1 then fd1 to ckan2, # ckan1 is bound to differ from ckan2. # Those fixtures illustrate the expected differences. inpath = 'tests/fixtures/full_ckan_package.json' ckan1 = json.load(open(inpath)) fd1 = ckan_to_frictionless.dataset(ckan1) ckan2 = frictionless_to_ckan.package(fd1) inpath_round_trip = ('tests/fixtures/' 'full_ckan_package_first_round_trip.json') exp = json.load(open(inpath_round_trip)) # FIXME: this currently doesn't work for Python 2 due to the way # Unicode is handled and because the dictionary keys do not keep # the same order. # Solution 1: Skip for Python 2 (it's clearly the same dictionary # if the build passes on Python 3) # Solution 2: Hard code the dicts as in `test_extras_is_converted` # in test_frictionless_to_ckan.py instead of loading JSON and # sort the keys. if not six.PY2: assert ckan2 == exp
def test_keys_are_removed_that_should_be(self): indict = { 'isopen': True, 'num_tags': 1, 'num_resources': 10, 'state': 'active', "organization": { "description": "", "title": "primary_care_prescribing_dispensing", "created": "2020-03-31T21:51:41.334189", "approval_status": "approved", "is_organization": True, "state": "active", "image_url": "", "revision_id": "7c86fde3-9899-41d6-b0bb-6c72dd4b6b94", "type": "organization", "id": "a275814e-6c15-40a8-99fd-af911f1568ef", "name": "primary_care_prescribing_dispensing" } } exp = {} out = converter.dataset(indict) assert out == exp
def test_empty_tags_ignored(self): indict = {"tags": []} exp = {} out = converter.dataset(indict) assert out == exp
def test_null_values_are_stripped(self): indict = {'id': '12312', 'title': 'title here', 'format': None} exp = {'id': '12312', 'title': 'title here'} out = converter.dataset(indict) assert out == exp
def test_keys_are_removed_that_should_be(self): indict = {'state': 'active'} exp = {} out = converter.dataset(indict) assert out == exp