def test_dataset_license(self):
        # No license_title nor license_url
        indict = {'license_id': 'odc-odbl'}
        exp = {
            'licenses': [{
                'name': 'odc-odbl',
            }]
        }
        out = converter.dataset(indict)
        assert out == exp

        # Remap everything in licenses
        indict = {
            'license_id': 'cc-by',
            'license_title': 'Creative Commons Attribution',
            'license_url': 'http://www.opendefinition.org/licenses/cc-by'
        }
        exp = {
            'licenses': [{
                'name': 'cc-by',
                'title': 'Creative Commons Attribution',
                'path': 'http://www.opendefinition.org/licenses/cc-by'
            }]
        }
        out = converter.dataset(indict)
        assert out == exp
    def test_dataset_author_and_maintainer(self):
        indict = {
            'author': 'World Bank and OECD',
            'author_email': '*****@*****.**'
        }
        exp = {
            'contributors': [{
                'title': 'World Bank and OECD',
                'email': '*****@*****.**',
                'role': 'author'
            }]
        }
        out = converter.dataset(indict)
        assert out == exp

        indict = {
            'author': 'World Bank and OECD',
            'author_email': '*****@*****.**',
            'maintainer': 'Datopian',
            'maintainer_email': '*****@*****.**'
        }
        exp = {
            'contributors': [
                {
                    'title': 'World Bank and OECD',
                    'email': '*****@*****.**',
                    'role': 'author'
                },
                {
                    'title': 'Datopian',
                    'email': '*****@*****.**',
                    'role': 'maintainer'
                },
            ]
        }
        out = converter.dataset(indict)
        assert out == exp

        # if we already have contributors use that ...
        indict = {
            'contributors': [{
                'title': 'Datopians'
            }],
            'author': 'World Bank and OECD',
        }
        exp = {'contributors': [{'title': 'Datopians'}]}
        out = converter.dataset(indict)
        assert out == exp
 def test_unjsonify_all_extra_values(self):
     indict = {
         'extras': [{
             'key':
             'location',
             'value':
             '{"country": {"China": {"population": '
             '"1233214331", "capital": "Beijing"}}}'
         }, {
             'key': 'numbers',
             'value': '[[[1, 2, 3], [2, 4, 5]], [[7, 6, 0]]]'
         }]
     }
     out = converter.dataset(indict)
     exp = {
         "location": {
             "country": {
                 "China": {
                     "population": "1233214331",
                     "capital": "Beijing"
                 }
             }
         },
         "numbers": [[[1, 2, 3], [2, 4, 5]], [[7, 6, 0]]]
     }
     assert out == exp
 def test_dataset_extras(self):
     indict = {
         'extras': [{
             'key': 'title_cn',
             'value': u'國內生產總值'
         }, {
             'key': 'years',
             'value': '[2015, 2016]'
         }, {
             'key': 'last_year',
             'value': 2016
         }, {
             'key': 'location',
             'value': '{"country": "China"}'
         }]
     }
     exp = {
         'title_cn': u'國內生產總值',
         'years': [2015, 2016],
         'last_year': 2016,
         'location': {
             'country': 'China'
         }
     }
     out = converter.dataset(indict)
     assert out == exp
示例#5
0
    def test_round_trip_ckan(self):
        # `ckan1` != `ckan2` but `ckan2` == `ckan3`
        inpath = 'tests/fixtures/full_ckan_package.json'
        ckan1 = json.load(open(inpath))
        fd1 = ckan_to_frictionless.dataset(ckan1)
        ckan2 = frictionless_to_ckan.package(fd1)
        fd2 = ckan_to_frictionless.dataset(ckan2)
        ckan3 = frictionless_to_ckan.package(fd2)

        # FIXME: this currently doesn't work for Python 2 due to the way
        # Unicode is handled and because the dictionary keys do not keep
        # the same order.
        # Solution 1: Skip for Python 2 (it's clearly the same dictionary
        # if the build passes on Python 3)
        # Solution 2: Hard code the dicts as in `test_extras_is_converted`
        # in test_frictionless_to_ckan.py instead of loading JSON and
        # sort the keys.
        if not six.PY2:
            assert ckan2 == ckan3
 def test_key_mappings(self):
     # notes
     indict = {
         'notes': 'Country, regional and world GDP',
         'url': 'https://datopian.com'
     }
     exp = {
         'description': 'Country, regional and world GDP',
         'homepage': 'https://datopian.com'
     }
     out = converter.dataset(indict)
     assert out == exp
 def test_dataset_license_with_licenses_in_extras(self):
     indict = {
         'license_id':
         'odc-odbl',
         'license_title':
         'Open Data Commons Open Database License',
         'license_url':
         'https://opendatacommons.org/licenses/odbl/1-0/index.html',
         'extras': [{
             'key':
             'licenses',
             'value':
             json.dumps([{
                 'name':
                 'cc-by',
                 'title':
                 'Creative Commons Attribution',
                 'path':
                 'http://www.opendefinition.org/licenses/cc-by'
             }, {
                 'name':
                 'odc-by',
                 'title':
                 'Open Data Commons Attribution License',
                 'path':
                 'https://opendatacommons.org/licenses/by/1-0/index.html'
             }])
         }]
     }
     exp = {
         'licenses': [{
             'name':
             'odc-odbl',
             'title':
             'Open Data Commons Open Database License',
             'path':
             'https://opendatacommons.org/licenses/odbl/1-0/index.html'
         }, {
             'name':
             'odc-by',
             'title':
             'Open Data Commons Attribution License',
             'path':
             'https://opendatacommons.org/licenses/by/1-0/index.html'
         }]
     }
     out = converter.dataset(indict)
     assert out == exp
示例#8
0
def _convert_excluding_path(ckan_dataset):
    """Convert a CKAN dataset to a frictionless package but exclude custom `path` values

    This is done because frictionless_ckan_mapper will override `path` if URL is set for
    a resource, but we want to preserve `path` if it was previously set.
    """
    existing_paths = {i: r['path']
                      for i, r in enumerate(ckan_dataset.get('resources', []))
                      if 'path' in r}

    package = ctf.dataset(ckan_dataset)

    for i, path in iteritems(existing_paths):
        package['resources'][i]['path'] = path

    return package
 def test_dataset_tags(self):
     indict = {
         'tags': [{
             'display_name': 'economy',
             'id': '9d602a79-7742-44a7-9029-50b9eca38c90',
             'name': 'economy',
             'state': 'active'
         }, {
             'display_name': 'worldbank',
             'id': '3ccc2e3b-f875-49ef-a39d-6601d6c0ef76',
             'name': 'worldbank',
             'state': 'active'
         }]
     }
     exp = {'keywords': ['economy', 'worldbank']}
     out = converter.dataset(indict)
     assert out == exp
 def test_keys_are_passed_through(self):
     indict = {
         'name': 'gdp',
         'id': 'xxxx',
         'title': 'Countries GDP',
         'version': '1.0',
         # random
         'xxx': 'aldka'
     }
     out = converter.dataset(indict)
     exp = {
         'name': 'gdp',
         'id': 'xxxx',
         'title': 'Countries GDP',
         'version': '1.0',
         'xxx': 'aldka'
     }
     assert out == exp
示例#11
0
def migrate_datasets(datasets, metastore_client):
    """Migrate all datasets in an iterable to metastore
    """
    datapackages = (ckan_to_frictionless.dataset(ds) for ds in datasets
                    if ds['type'] == 'dataset')
    stored = 0
    for package in datapackages:
        log.debug("Converted dataset to datapacakge: %s", package)
        try:
            author = _get_author(package)
            metastore_client.create(package['name'], package, author=author)
            stored += 1
            log.debug("Successfully stored package: %s", package['name'])
        except Conflict:
            log.info("Package already exists in metastore backend: %s",
                     package['name'])
        except Exception:
            log.exception("Failed storing package: %s", package['name'])
    return stored
 def test_resources_are_converted(self):
     indict = {
         'name':
         'gdp',
         'resources': [{
             'name': 'data.csv',
             'url': 'http://someplace.com/data.csv',
             'size': 100
         }]
     }
     exp = {
         'name':
         'gdp',
         'resources': [{
             'name': 'data.csv',
             'path': 'http://someplace.com/data.csv',
             'bytes': 100
         }]
     }
     out = converter.dataset(indict)
     assert out == exp
示例#13
0
    def test_differences_ckan_round_trip(self):
        # When converting ckan1 to fd1 then fd1 to ckan2,
        # ckan1 is bound to differ from ckan2.
        # Those fixtures illustrate the expected differences.
        inpath = 'tests/fixtures/full_ckan_package.json'
        ckan1 = json.load(open(inpath))
        fd1 = ckan_to_frictionless.dataset(ckan1)
        ckan2 = frictionless_to_ckan.package(fd1)
        inpath_round_trip = ('tests/fixtures/'
                             'full_ckan_package_first_round_trip.json')
        exp = json.load(open(inpath_round_trip))

        # FIXME: this currently doesn't work for Python 2 due to the way
        # Unicode is handled and because the dictionary keys do not keep
        # the same order.
        # Solution 1: Skip for Python 2 (it's clearly the same dictionary
        # if the build passes on Python 3)
        # Solution 2: Hard code the dicts as in `test_extras_is_converted`
        # in test_frictionless_to_ckan.py instead of loading JSON and
        # sort the keys.
        if not six.PY2:
            assert ckan2 == exp
 def test_keys_are_removed_that_should_be(self):
     indict = {
         'isopen': True,
         'num_tags': 1,
         'num_resources': 10,
         'state': 'active',
         "organization": {
             "description": "",
             "title": "primary_care_prescribing_dispensing",
             "created": "2020-03-31T21:51:41.334189",
             "approval_status": "approved",
             "is_organization": True,
             "state": "active",
             "image_url": "",
             "revision_id": "7c86fde3-9899-41d6-b0bb-6c72dd4b6b94",
             "type": "organization",
             "id": "a275814e-6c15-40a8-99fd-af911f1568ef",
             "name": "primary_care_prescribing_dispensing"
         }
     }
     exp = {}
     out = converter.dataset(indict)
     assert out == exp
 def test_empty_tags_ignored(self):
     indict = {"tags": []}
     exp = {}
     out = converter.dataset(indict)
     assert out == exp
 def test_null_values_are_stripped(self):
     indict = {'id': '12312', 'title': 'title here', 'format': None}
     exp = {'id': '12312', 'title': 'title here'}
     out = converter.dataset(indict)
     assert out == exp
 def test_keys_are_removed_that_should_be(self):
     indict = {'state': 'active'}
     exp = {}
     out = converter.dataset(indict)
     assert out == exp