示例#1
0
def test_attributes_can_be_set():
    descriptor = {
        'profile': 'data-package',
    }
    package = Package(descriptor)
    package.descriptor['title'] = 'bar'
    assert package.to_dict() == {'profile': 'data-package', 'title': 'bar'}
示例#2
0
def test_it_removes_temporary_directories(datapackage_zip):
    tempdirs_glob = os.path.join(tempfile.gettempdir(), '*-datapackage')
    original_tempdirs = glob.glob(tempdirs_glob)
    package = Package(datapackage_zip)
    package.save(datapackage_zip)
    del package
    assert glob.glob(tempdirs_glob) == original_tempdirs
示例#3
0
def test_package_save_slugify_fk_resource_name_issue_181():
    descriptor = {
        'resources': [
            {
                'name': 'my-langs',
                'data': [['en'], ['ch']],
                'schema': {
                    'fields': [
                        {'name': 'lang'},
                    ]
                }
            },
            {
                'name': 'my-notes',
                'data': [['1', 'en', 'note1'], [2, 'ch', 'note2']],
                'schema': {
                    'fields': [
                        {'name': 'id', 'type': 'integer'},
                        {'name': 'lang'},
                        {'name': 'note'},
                    ],
                    'foreignKeys': [
                        {'fields': 'lang', 'reference': {'resource': 'my-langs', 'fields': 'lang'}}
                    ]
                }
            }
        ]
    }
    storage = Mock(buckets=['my_langs', 'my_notes'], spec=Storage)
    package = Package(descriptor)
    package.save(storage=storage)
    assert storage.create.call_args[0][0] == ['my_langs', 'my_notes']
    assert storage.create.call_args[0][1][1]['foreignKeys'] == [
        {'fields': 'lang', 'reference': {'resource': 'my_langs', 'fields': 'lang'}}
    ]
示例#4
0
def test_single_self_field_foreign_key():
    descriptor = deepcopy(FK_DESCRIPTOR)
    descriptor['resources'][0]['schema']['foreignKeys'][0]['fields'] = 'parent_id'
    descriptor['resources'][0]['schema']['foreignKeys'][0]['reference']['resource'] = ''
    descriptor['resources'][0]['schema']['foreignKeys'][0]['reference']['fields'] = 'id'
    resource = Package(descriptor).get_resource('main')
    keyed_rows = resource.read(keyed=True, relations=True)
    assert keyed_rows == [
      {
          'id': '1',
          'name': 'Alex',
          'surname': 'Martin',
          'parent_id': None,
      },
      {
          'id': '2',
          'name': 'John',
          'surname': 'Dockins',
          'parent_id': {'id': '1', 'name': 'Alex', 'surname': 'Martin', 'parent_id': None},
      },
      {
          'id': '3',
          'name': 'Walter',
          'surname': 'White',
          'parent_id': {'id': '2', 'name': 'John', 'surname': 'Dockins', 'parent_id': '1'},
      },
    ]
示例#5
0
def test_attributes_arent_immutable():
    descriptor = {
        'profile': 'data-package',
        'keywords': [],
    }
    package = Package(descriptor)
    package.descriptor['keywords'].append('foo')
    assert package.to_dict() == {'profile': 'data-package', 'keywords': ['foo']}
示例#6
0
def test_iter_errors_returns_schemas_iter_errors(profile_mock):
    iter_errors_mock = mock.Mock()
    iter_errors_mock.return_value = 'the iter errors'
    profile_mock.return_value.iter_errors = iter_errors_mock
    descriptor = {'resources': [{'name': 'name', 'data': ['data']}]}
    package = Package(descriptor)
    assert package.iter_errors() == 'the iter errors'
    iter_errors_mock.assert_called_with(package.to_dict())
示例#7
0
def test_single_field_foreign_key():
    resource = Package(FK_DESCRIPTOR).get_resource('main')
    rows = resource.read(relations=True)
    assert rows == [
      ['1', {'firstname': 'Alex', 'surname': 'Martin'}, 'Martin', None],
      ['2', {'firstname': 'John', 'surname': 'Dockins'}, 'Dockins', '1'],
      ['3', {'firstname': 'Walter', 'surname': 'White'}, 'White', '2'],
    ]
示例#8
0
def test_descriptor_dict_without_local_resources_is_safe():
    descriptor = {
        'resources': [
            {'data': 42},
            {'url': 'http://someplace.com/data.csv'},
        ]
    }
    package = Package(descriptor, {})
    assert package.safe()
示例#9
0
def test_to_dict_value_can_be_altered_without_changing_the_package():
    descriptor = {
        'profile': 'data-package',
    }
    package = Package(descriptor)
    package_dict = package.to_dict()
    package_dict['foo'] = 'bar'
    assert package.descriptor == {
        'profile': 'data-package',
    }
示例#10
0
def test_validate_works_when_setting_attributes_after_creation():
    schema = {
        'properties': {
            'name': {}
        },
        'required': ['name'],
    }
    package = Package(schema=schema)
    package.descriptor['name'] = 'foo'
    package.validate()
示例#11
0
def test_validate_raises_validation_error_if_invalid():
    schema = {
        'properties': {
            'name': {},
        },
        'required': ['name'],
    }
    package = Package(schema=schema)
    with pytest.raises(exceptions.ValidationError):
        package.validate()
示例#12
0
def test_single_field_foreign_key_invalid():
    descriptor = deepcopy(FK_DESCRIPTOR)
    descriptor['resources'][1]['data'][2][0] = 'Max'
    resource = Package(descriptor).get_resource('main')
    with pytest.raises(exceptions.RelationError) as excinfo1:
        resource.read(relations=True)
    with pytest.raises(exceptions.RelationError) as excinfo2:
        resource.check_relations()
    assert 'Foreign key' in str(excinfo1.value)
    assert 'Foreign key' in str(excinfo2.value)
示例#13
0
def test_changing_resources_in_descriptor_changes_datapackage():
    descriptor = {
        'resources': [
            {'data': '万事开头难'}
        ]
    }
    package = Package(descriptor)
    package.descriptor['resources'][0]['name'] = 'saying'
    package.commit()
    assert package.descriptor['resources'][0]['name'] == 'saying'
示例#14
0
def datapackage_zip(tmpfile):
    descriptor = {
        'name': 'proverbs',
        'resources': [
            {'name': 'name', 'path': 'foo.txt'},
        ]
    }
    package = Package(descriptor, default_base_path='data')
    package.save(tmpfile)
    return tmpfile
示例#15
0
def test_save_data_to_storage():
    schema = {
        'fields': [{'format': 'default', 'name': 'id', 'type': 'integer'}],
        'missingValues': ['']
    }
    storage = Mock(buckets=['data'], spec=Storage)
    package = Package({'resources': [{'name': 'data', 'data': [['id'], [1], [2], [3]]}]})
    package.save(storage=storage)
    storage.create.assert_called_with(['data'], [schema], force=True)
    storage.write.assert_called_with('data', ANY)
示例#16
0
def test_can_add_resource_to_descriptor_in_place():
    resource = {
        'data': '万事开头难',
    }
    package = Package()
    resources = package.descriptor.get('resources', [])
    resources.append(resource)
    package.descriptor['resources'] = resources
    package.commit()
    assert len(package.resources) == 1
    assert package.resources[0].source == '万事开头难'
示例#17
0
def test_should_raise_validation_error_if_datapackage_is_invalid(tmpfile):
    descriptor = {}
    schema = {
        'properties': {
            'name': {},
        },
        'required': ['name'],
    }
    package = Package(descriptor, schema)
    with pytest.raises(exceptions.ValidationError):
        package.save(tmpfile)
示例#18
0
def test_can_remove_resource_from_descriptor_in_place():
    descriptor = {
        'resources': [
            {'data': '万事开头难'},
            {'data': 'All beginnings are hard'}
        ]
    }
    package = Package(descriptor)
    del package.descriptor['resources'][1]
    package.commit()
    assert len(package.resources) == 1
    assert package.resources[0].source == '万事开头难'
示例#19
0
def test_multi_field_foreign_key_invalid():
    descriptor = deepcopy(FK_DESCRIPTOR)
    descriptor['resources'][0]['schema']['foreignKeys'][0]['fields'] = ['name', 'surname']
    descriptor['resources'][0]['schema']['foreignKeys'][0]['reference']['fields'] = ['firstname', 'surname']
    descriptor['resources'][1]['data'][2][0] = 'Max'
    resource = Package(descriptor).get_resource('main')
    with pytest.raises(exceptions.RelationError) as excinfo1:
        resource.read(relations=True)
    with pytest.raises(exceptions.RelationError) as excinfo2:
        resource.check_relations()
    assert 'Foreign key' in str(excinfo1.value)
    assert 'Foreign key' in str(excinfo2.value)
示例#20
0
def test_validate():
    descriptor = {
        'name': 'foo',
    }
    schema = {
        'properties': {
            'name': {},
        },
        'required': ['name'],
    }
    pakcage = Package(descriptor, schema)
    pakcage.validate()
示例#21
0
def test_adds_datapackage_descriptor_at_zipfile_root(tmpfile):
    descriptor = {
        'name': 'proverbs',
        'resources': [
            {'data': '万事开头难'}
        ]
    }
    schema = {}
    package = Package(descriptor, schema)
    package.save(tmpfile)
    with zipfile.ZipFile(tmpfile, 'r') as z:
        package_json = z.read('datapackage.json').decode('utf-8')
    assert json.loads(package_json) == json.loads(package.to_json())
示例#22
0
def test_single_self_field_foreign_key_invalid():
    descriptor = deepcopy(FK_DESCRIPTOR)
    descriptor['resources'][0]['schema']['foreignKeys'][0]['fields'] = 'parent_id'
    descriptor['resources'][0]['schema']['foreignKeys'][0]['reference']['resource'] = ''
    descriptor['resources'][0]['schema']['foreignKeys'][0]['reference']['fields'] = 'id'
    descriptor['resources'][0]['data'][2][0] = '0'
    resource = Package(descriptor).get_resource('main')
    with pytest.raises(exceptions.RelationError) as excinfo1:
        resource.read(relations=True)
    with pytest.raises(exceptions.RelationError) as excinfo2:
        resource.check_relations()
    assert 'Foreign key' in str(excinfo1.value)
    assert 'Foreign key' in str(excinfo2.value)
示例#23
0
def test_generates_filenames_for_named_resources(tmpfile):
    descriptor = {
        'name': 'proverbs',
        'resources': [
            {'name': 'proverbs', 'format': 'TXT', 'path': 'unicode.txt'},
            {'name': 'proverbs_without_format', 'path': 'unicode.txt'}
        ]
    }
    schema = {}
    package = Package(descriptor, schema, default_base_path='data')
    package.save(tmpfile)
    with zipfile.ZipFile(tmpfile, 'r') as z:
        assert 'data/proverbs.txt' in z.namelist()
        assert 'data/proverbs_without_format' in z.namelist()
示例#24
0
def test_generates_unique_filenames_for_unnamed_resources(tmpfile):
    descriptor = {
        'name': 'proverbs',
        'resources': [
            {'path': 'unicode.txt'},
            {'path': 'foo.txt'}
        ]
    }
    schema = {}
    package = Package(descriptor, schema, default_base_path='data')
    package.save(tmpfile)
    with zipfile.ZipFile(tmpfile, 'r') as z:
        files = z.namelist()
        assert sorted(set(files)) == sorted(files)
def test_table_pandas(name):

    # Storage
    storage = Storage.connect('pandas')

    # Save
    package = Package('data/packages/%s/datapackage.json' % name)
    package.save(storage=storage)

    # Load
    package = Package(storage=storage)
    assert package.resources
    for resource in package.resources:
        assert resource.read()
示例#26
0
def test_fixes_resources_paths_to_be_relative_to_package(tmpfile):
    descriptor = {
        'name': 'proverbs',
        'resources': [
            {'name': 'unicode', 'format': 'txt', 'path': 'unicode.txt'}
        ]
    }
    schema = {}
    pakage = Package(
        descriptor, schema, default_base_path='data')
    pakage.save(tmpfile)
    with zipfile.ZipFile(tmpfile, 'r') as z:
        json_string = z.read('datapackage.json').decode('utf-8')
        generated_pakage_dict = json.loads(json_string)
    assert generated_pakage_dict['resources'][0]['path'] == 'data/unicode.txt'
def test_table_sql(name):

    # Storage
    engine = create_engine('sqlite:///')
    storage = Storage.connect('sql', engine=engine)

    # Save
    package = Package('data/packages/%s/datapackage.json' % name)
    package.save(storage=storage)

    # Load
    package = Package(storage=storage)
    assert package.resources
    for resource in package.resources:
        assert resource.read()
示例#28
0
def test_adds_resources_inside_data_subfolder(tmpfile):
    descriptor = {
        'name': 'proverbs',
        'resources': [
            {'name': 'name', 'path': 'unicode.txt'}
        ]
    }
    schema = {}
    package = Package(descriptor, schema, default_base_path='data')
    package.save(tmpfile)
    with zipfile.ZipFile(tmpfile, 'r') as z:
        filename = [name for name in z.namelist()
                    if name.startswith('data/')]
        assert len(filename) == 1
        resource_data = z.read(filename[0]).decode('utf-8')
    assert resource_data == '万事开头难\n'
示例#29
0
def test_load_data_from_storage():
    schema = {
        'fields': [{'format': 'default', 'name': 'id', 'type': 'integer'}],
        'missingValues': ['']
    }
    storage = Mock(
        buckets=['data'],
        describe=lambda bucket: {'fields': [{'name': 'id', 'type': 'integer'}]},
        iter=lambda bucket: [[1], [2], [3]],
        spec=Storage)
    package = Package(storage=storage)
    package.infer()
    resource = package.get_resource('data')
    assert len(package.resources) == 1
    assert resource.descriptor == {
        'name': 'data',
        'path': 'data',
        'encoding': 'utf-8',
        'profile': 'tabular-data-resource',
        'schema': schema}
    assert resource.headers == ['id']
    assert resource.read() == [[1], [2], [3]]
def test_table_bigquery(name):

    # Storage
    os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '.credentials.json'
    credentials = GoogleCredentials.get_application_default()
    service = build('bigquery', 'v2', credentials=credentials)
    project = json.load(io.open('.credentials.json', encoding='utf-8'))['project_id']
    dataset = 'package'
    prefix = '%s_' % uuid.uuid4().hex
    storage = Storage.connect('bigquery',
        service=service, project=project, dataset=dataset, prefix=prefix)

    # Save
    package = Package('data/packages/%s/datapackage.json' % name)
    package.save(storage=storage)

    # Load
    package = Package(storage=storage)
    assert package.resources
    for resource in package.resources:
        assert resource.read()

    # Clean
    storage.delete()
示例#31
0
文件: load.py 项目: jornh/dataflows
    def process_datapackage(self, dp: Package):

        # If loading from datapackage & resource iterator:
        if isinstance(self.load_source, tuple):
            datapackage_descriptor, resource_iterator = self.load_source
            resources = datapackage_descriptor['resources']
            resource_matcher = ResourceMatcher(self.resources, datapackage_descriptor)
            for resource_descriptor in datapackage_descriptor['resources']:
                if resource_matcher.match(resource_descriptor['name']):
                    self.resource_descriptors.append(resource_descriptor)
            self.iterators = (resource for resource, descriptor in zip(resource_iterator, resources)
                              if resource_matcher.match(descriptor['name']))

        # If load_source is string:
        else:
            # Handle Environment vars if necessary:
            if self.load_source.startswith('env://'):
                env_var = self.load_source[6:]
                self.load_source = os.environ.get(env_var)
                if self.load_source is None:
                    raise ValueError(f"Couldn't find value for env var '{env_var}'")

            # Loading from datapackage:
            if os.path.basename(self.load_source) == 'datapackage.json':
                self.load_dp = Package(self.load_source)
                resource_matcher = ResourceMatcher(self.resources, self.load_dp)
                for resource in self.load_dp.resources:
                    if resource_matcher.match(resource.name):
                        self.resource_descriptors.append(resource.descriptor)
                        self.iterators.append(resource.iter(keyed=True, cast=True))

            # Loading for any other source
            else:
                path = os.path.basename(self.load_source)
                path = os.path.splitext(path)[0]
                descriptor = dict(path=self.name or path,
                                  profile='tabular-data-resource')
                self.resource_descriptors.append(descriptor)
                descriptor['name'] = self.name or path
                if 'encoding' in self.options:
                    descriptor['encoding'] = self.options['encoding']
                self.options.setdefault('custom_parsers', {}).setdefault('xml', XMLParser)
                self.options.setdefault('ignore_blank_headers', True)
                self.options.setdefault('headers', 1)
                stream: Stream = Stream(self.load_source, **self.options).open()
                if len(stream.headers) != len(set(stream.headers)):
                    if not self.deduplicate_headers:
                        raise ValueError(
                            'Found duplicate headers. Use the `deduplicate_headers` flag')
                    stream.headers = self.rename_duplicate_headers(stream.headers)
                schema = Schema().infer(
                    stream.sample, headers=stream.headers,
                    confidence=1, guesser_cls=self.guesser)
                if self.override_schema:
                    schema.update(self.override_schema)
                if self.override_fields:
                    fields = schema.get('fields', [])
                    for field in fields:
                        field.update(self.override_fields.get(field['name'], {}))
                descriptor['schema'] = schema
                descriptor['format'] = self.options.get('format', stream.format)
                descriptor['path'] += '.{}'.format(stream.format)
                self.iterators.append(stream.iter(keyed=True))
        dp.descriptor.setdefault('resources', []).extend(self.resource_descriptors)
        return dp
示例#32
0
def ts_validate(data_path, schema=None):
    """Validate a given TableSchema using the Datapackage package.

    Arguments:
        data_path (str): Path to the TableSchema JSON or BDBag directory
                or BDBag archive to validate.
        schema (str): The schema to validate against. If not provided,
                the data is only validated against the defined TableSchema.
                Default None.

    Returns:
        dict: The validation results.
            is_valid (bool): Is the TableSchema valid?
            raw_errors (list): The raw Exceptions generated from any validation errors.
            error (str): A formatted error message about any validation errors.
    """
    # If data_path is BDBag archive, unarchive to temp dir
    try:
        data_path = bdbag_api.extract_bag(data_path, temp=True)
    # data_path is not archive
    except RuntimeError:
        pass
    # If data_path is dir (incl. if was unarchived), find JSON desc
    if os.path.isdir(data_path):
        # If 'data' dir present, search there instead
        if "data" in os.listdir(data_path):
            data_path = os.path.join(data_path, "data")
        # Find .json file (cannot be hidden)
        desc_file_list = [
            filename for filename in os.listdir(data_path)
            if filename.endswith(".json") and not filename.startswith(".")
        ]
        if len(desc_file_list) < 1:
            return {
                "is_valid":
                False,
                "raw_errors":
                [FileNotFoundError("No TableSchema JSON file found.")],
                "error":
                "No TableSchema JSON file found."
            }
        elif len(desc_file_list) > 1:
            return {
                "is_valid":
                False,
                "raw_errors":
                [RuntimeError("Multiple JSON files found in directory.")],
                "error":
                "Multiple JSON files found in directory."
            }
        else:
            data_path = os.path.join(data_path, desc_file_list[0])
    # data_path should/must be file now (JSON desc)
    if not os.path.isfile(data_path):
        return {
            "is_valid":
            False,
            "raw_errors": [
                ValueError(
                    "Path '{}' does not refer to a file".format(data_path))
            ],
            "error":
            "Path '{}' does not refer to a file".format(data_path)
        }

    # Read into Package (identical to DataPackage), return error on failure
    try:
        pkg = Package(descriptor=data_path, strict=True)
    except Exception as e:
        return {
            "is_valid": False,
            "raw_errors": e.errors,
            "error": "\n".join([str(err) for err in pkg.errors])
        }
    # Check and return package validity based on non-Exception-throwing Package validation
    if not pkg.valid:
        return {
            "is_valid": pkg.valid,
            "raw_errors": pkg.errors,
            "error": "\n".join([str(err) for err in pkg.errors])
        }
    # Perform manual validation as well
    for resource in pkg.resources:
        try:
            resource.read()
        except CastError as e:
            return {
                "is_valid": False,
                "raw_errors": e.errors,
                "error": "\n".join([str(err) for err in e.errors])
            }
        except Exception as e:
            return {"is_valid": False, "raw_errors": repr(e), "error": str(e)}
    return {"is_valid": True, "raw_errors": [], "error": None}
示例#33
0
def build(config: Dict) -> Package:
    """Builds a datapackage.Datapackage object from a config dictionary.

    The configuration dictionary should contain the following keys:
    "metadata", "files".

    Information about the corresponding study can be placed in metadata.
    Example:
        {
            'metadata': {
                'name': 'ddionrails-study',
                'id': 'doi'
            }
        }
    The desired files to be included in the Tabular Data Package can be placed in 'files':
    Example:
        {
            'files': [
                'concepts.csv'
            ]
        }

    See: examples/example-config.yml

    The resulting Tabular Data Package is written to disk as 'datapackage.json' in
    the directory the command line tool is run.

    Args:
        config: The configuration of the Datapackage to be created.

    """

    if "metadata" not in config or "files" not in config:
        raise ValueError("Config must contain 'metadata' and 'files'")

    # Read the descriptor base dictionary from disk
    # and update it with values from the config file
    descriptor = read_yaml(DATAPACKAGE_BASE_FILE)
    descriptor["name"] = config["metadata"].get("name")
    descriptor["id"] = config["metadata"].get("id")
    descriptor["title"] = config["metadata"].get("title")
    # Remove empty keys from the dictionary
    descriptor = {key: value for key, value in descriptor.items() if value}

    # Create a Datapackage object from the descriptor dictionary
    package = Package(descriptor=descriptor)
    wanted_files = [file.split(".")[0] for file in config["files"]]
    for file in wanted_files:
        # If a filename ends with "_strict"
        # create the basic Tabular Data Resource first
        # then add the "stricter" rules from the "_strict" file
        if "_strict" in file:
            basic_file = file.replace("_strict", "")
            resource = read_tabular_data_resource(basic_file)
            strict_resource = read_tabular_data_resource(file)
            merge(resource, strict_resource)
        else:
            resource = read_tabular_data_resource(file)
        package.add_resource(resource)
    package.commit()
    if not package.valid:
        for error in package.errors:
            LOGGER.error(error)
    return package
示例#34
0
def test_attributes_return_defaults_id_descriptor_is_empty():
    descriptor = {}
    schema = {}
    package = Package(descriptor, schema)
    assert package.attributes == ('profile', )
示例#35
0
def test_schema():
    descriptor = {}
    schema = {'foo': 'bar'}
    package = Package(descriptor, schema=schema)
    assert package.schema.to_dict() == schema
示例#36
0
def test_init_raises_if_url_doesnt_exist():
    url = 'http://someplace.com/datapackage.json'
    httpretty.register_uri(httpretty.GET, url, status=404)
    with pytest.raises(exceptions.DataPackageException):
        Package(url)
示例#37
0
def test_local_with_relative_resources_paths_is_safe():
    package = Package('data/datapackage_with_foo.txt_resource.json', {})
    assert package.safe()
示例#38
0
def test_should_raise_if_path_doesnt_exist():
    package = Package({}, {})
    with pytest.raises(exceptions.DataPackageException):
        package.save('/non/existent/file/path')
示例#39
0
def test_works_with_resources_with_relative_paths(tmpfile):
    package = Package('data/datapackage_with_foo.txt_resource.json')
    package.save(tmpfile)
    with zipfile.ZipFile(tmpfile, 'r') as z:
        assert len(z.filelist) == 2
示例#40
0
def test_init_raises_if_path_json_isnt_a_dict():
    with pytest.raises(exceptions.DataPackageException):
        Package('data/empty_array.json')
示例#41
0
def test_init_raises_if_path_is_a_bad_json():
    with pytest.raises(exceptions.DataPackageException) as excinfo:
        Package('data/bad_json.json')
    message = str(excinfo.value)
    assert 'Unable to parse JSON' in message
    assert 'line 2 column 5 (char 6)' in message
示例#42
0
def test_saves_as_zip(tmpfile):
    package = Package(schema={})
    package.save(tmpfile)
    assert zipfile.is_zipfile(tmpfile)
示例#43
0
def test_changing_resources_in_descriptor_changes_datapackage():
    descriptor = {'resources': [{'data': '万事开头难'}]}
    package = Package(descriptor)
    package.descriptor['resources'][0]['name'] = 'saying'
    package.commit()
    assert package.descriptor['resources'][0]['name'] == 'saying'
示例#44
0
def test_should_raise_if_zipfile_raised_LargeZipFile(zipfile_mock, tmpfile):
    zipfile_mock.side_effect = zipfile.LargeZipFile()
    package = Package({}, {})
    with pytest.raises(exceptions.DataPackageException):
        package.save(tmpfile)
示例#45
0
def test_zip_with_relative_resources_paths_is_safe(datapackage_zip):
    package = Package(datapackage_zip.name, {})
    assert package.safe()
示例#46
0
def test_init_raises_if_filelike_object_isnt_a_json():
    invalid_json = six.StringIO('{"foo"}')
    with pytest.raises(exceptions.DataPackageException):
        Package(invalid_json)
示例#47
0
def test_base_path_defaults_to_none():
    assert Package().base_path is None
示例#48
0
def test_it_works_with_local_paths(datapackage_zip):
    package = Package(datapackage_zip.name)
    assert package.descriptor['name'] == 'proverbs'
    assert len(package.resources) == 1
    assert package.resources[0].data == b'foo\n'
示例#49
0
def test_schema_raises_registryerror_if_registry_raised(registry_mock):
    registry_mock.side_effect = exceptions.RegistryError
    with pytest.raises(exceptions.RegistryError):
        Package()
示例#50
0
def test_it_works_with_file_objects(datapackage_zip):
    package = Package(datapackage_zip)
    assert package.descriptor['name'] == 'proverbs'
    assert len(package.resources) == 1
    assert package.resources[0].data == b'foo\n'
from datapackage import Package
from datapackage.exceptions import RelationError

# Dereference relations

package = Package('data/ferc1-test/datapackage.json')
try:
    keyed_rows = package.get_resource('fuel_ferc1').read(keyed=True,
                                                         relations=True)
    print(keyed_rows[0])
except RelationError as exception:
    print(exception)
示例#52
0
def test_local_data_path(datapackage_zip):
    package = Package(datapackage_zip)
    assert package.resources[0].local_data_path is not None
    with open('data/foo.txt') as data_file:
        with open(package.resources[0].local_data_path) as local_data_file:
            assert local_data_file.read() == data_file.read()
示例#53
0
import click

from datapackage import Package
from tableschema.exceptions import RelationError

STUDY = "soep-core"
VERSION = "v33"

package = Package("metadata/datapackage.json")


@click.command()
@click.argument("entity")
def validate_entity(entity):
    resource = package.get_resource(entity)
    resource.check_relations()


if __name__ == "__main__":
    validate_entity()
示例#54
0
def test_it_breaks_if_theres_no_datapackage_json(tmpfile):
    with zipfile.ZipFile(tmpfile.name, 'w') as z:
        z.writestr('data.txt', 'foobar')
    with pytest.raises(exceptions.DataPackageException):
        Package(tmpfile.name, {})
    'contractors',
    'amounts',
]
factors = [
    1000,
    1000,
    1000,
    1000,
    1000,
    1,
    1,
    1,
]

budget_fixes = Package(
    '/var/datapackages/budget/national/changes/current-year-fixes/datapackage.json'
)
budget_fixes = list(budget_fixes.resources[0].iter(keyed=True))
budget_fixes = dict(((x['year'], x['code']), x) for x in budget_fixes)
logging.info('GOT %s budget fixes', len(budget_fixes))

codes_and_titles = [('admin_cls_code_%d' % l, 'admin_cls_title_%d' % l)
                    for l in range(2, 10, 2)]

phases = {'מקורי': 'allocated', 'מאושר': 'revised', 'ביצוע': 'executed'}

resource = datapackage['resources'][0]
fields = resource['schema']['fields']
new_fields = [{
    'name': 'code',
    'type': 'string'
示例#56
0
def test_init_uses_base_schema_by_default():
    package = Package()
    assert package.schema.title == 'Data Package'
示例#57
0
def test_schema_gets_from_registry_if_available(registry_class_mock):
    schema = {'foo': 'bar'}
    registry_mock = mock.MagicMock()
    registry_mock.get.return_value = schema
    registry_class_mock.return_value = registry_mock
    assert Package().schema.to_dict() == schema
示例#58
0
def test_without_resources_is_safe():
    descriptor = {}
    package = Package(descriptor, {})
    assert package.safe()
示例#59
0
def test_accepts_file_paths(tmpfile):
    package = Package(schema={})
    package.save(tmpfile.name)
    assert zipfile.is_zipfile(tmpfile.name)
示例#60
0
def test_init_raises_if_path_isnt_a_json():
    with pytest.raises(exceptions.DataPackageException):
        Package('data/not_a_json')