def create(self, bucket, descriptor, force=False):

        # Make lists
        buckets = bucket
        if isinstance(bucket, six.string_types):
            buckets = [bucket]
        descriptors = descriptor
        if isinstance(descriptor, dict):
            descriptors = [descriptor]

        # Check buckets for existence
        for bucket in reversed(self.buckets):
            if bucket in buckets:
                if not force:
                    message = 'Bucket "%s" already exists.' % bucket
                    raise RuntimeError(message)
                self.delete(bucket)

        # Define buckets
        for bucket, descriptor in zip(buckets, descriptors):

            # Add to schemas
            self.__descriptors[bucket] = descriptor

            # Crate table
            jsontableschema.validate(descriptor)
            tablename = mappers.bucket_to_tablename(self.__prefix, bucket)
            columns, constraints = mappers.descriptor_to_columns_and_constraints(
                self.__prefix, bucket, descriptor)
            Table(tablename, self.__metadata, *(columns + constraints))

        # Create tables, update metadata
        self.__metadata.create_all()
Exemplo n.º 2
0
 def clean(self):
     """
     Validate the data descriptor
     """
     #  Validate the data package
     validator = datapackage.DataPackage(self.data_descriptor)
     try:
         validator.validate()
     except Exception as e:
         raise ValidationError('Data package errors: {}'.format(
             [str(e[0]) for e in validator.iter_errors()]))
     # Check that there is at least one resources defined (not required by the standard)
     if len(self.resources) == 0:
         raise ValidationError('You must define at least one resource')
     # Validate the schema for all resources
     for resource in self.resources:
         if 'schema' not in resource:
             raise ValidationError("Resource without a 'schema'.")
         else:
             schema = resource.get('schema')
             try:
                 jsontableschema.validate(schema)
             except Exception as e:
                 raise ValidationError(
                     'Schema errors for resource "{}": {}'.format(
                         resource.get('name'), [
                             str(e[0]) for e in
                             jsontableschema.validator.iter_errors(schema)
                         ]))
Exemplo n.º 3
0
 def clean(self):
     """
     Validate the data descriptor
     """
     #  Validate the data package
     validator = datapackage.DataPackage(self.data_descriptor)
     try:
         validator.validate()
     except Exception as e:
         raise ValidationError('Data package errors: {}'.format([str(e[0]) for e in validator.iter_errors()]))
     # Check that there is at least one resources defined (not required by the standard)
     if len(self.resources) == 0:
         raise ValidationError('You must define at least one resource')
     # Validate the schema for all resources
     for resource in self.resources:
         if 'schema' not in resource:
             raise ValidationError("Resource without a 'schema'.")
         else:
             schema = resource.get('schema')
             try:
                 jsontableschema.validate(schema)
             except Exception as e:
                 raise ValidationError(
                     'Schema errors for resource "{}": {}'.format(
                         resource.get('name'),
                         [str(e[0]) for e in jsontableschema.validator.iter_errors(schema)]))
Exemplo n.º 4
0
    def create(self, bucket, descriptor, force=False, indexes_fields=None):
        """Create table by schema.

        Parameters
        ----------
        table: str/list
            Table name or list of table names.
        schema: dict/list
            JSONTableSchema schema or list of schemas.
        indexes_fields: list
            list of tuples containing field names, or list of such lists

        Raises
        ------
        RuntimeError
            If table already exists.

        """

        # Make lists
        buckets = bucket
        if isinstance(bucket, six.string_types):
            buckets = [bucket]
        descriptors = descriptor
        if isinstance(descriptor, dict):
            descriptors = [descriptor]
        if indexes_fields is None or len(indexes_fields) == 0:
            indexes_fields = [()] * len(descriptors)
        elif type(indexes_fields[0][0]) not in {list, tuple}:
            indexes_fields = [indexes_fields]
        assert len(indexes_fields) == len(descriptors)
        assert len(buckets) == len(descriptors)

        # Check buckets for existence
        for bucket in reversed(self.buckets):
            if bucket in buckets:
                if not force:
                    message = 'Bucket "%s" already exists.' % bucket
                    raise RuntimeError(message)
                self.delete(bucket)

        # Define buckets
        for bucket, descriptor, index_fields in zip(buckets, descriptors,
                                                    indexes_fields):

            # Add to schemas
            self.__descriptors[bucket] = descriptor

            # Create table
            jsontableschema.validate(descriptor)
            tablename = mappers.bucket_to_tablename(self.__prefix, bucket)
            columns, constraints, indexes = mappers.descriptor_to_columns_and_constraints(
                self.__prefix, bucket, descriptor, index_fields,
                self.__autoincrement)
            Table(tablename, self.__metadata,
                  *(columns + constraints + indexes))

        # Create tables, update metadata
        self.__metadata.create_all()
Exemplo n.º 5
0
    def create(self, bucket, descriptor, force=False, indexes_fields=None):
        """Create table by schema.

        Parameters
        ----------
        table: str/list
            Table name or list of table names.
        schema: dict/list
            JSONTableSchema schema or list of schemas.
        indexes_fields: list
            list of tuples containing field names, or list of such lists

        Raises
        ------
        RuntimeError
            If table already exists.

        """

        # Make lists
        buckets = bucket
        if isinstance(bucket, six.string_types):
            buckets = [bucket]
        descriptors = descriptor
        if isinstance(descriptor, dict):
            descriptors = [descriptor]
        if indexes_fields is None or len(indexes_fields) == 0:
            indexes_fields = [()] * len(descriptors)
        elif type(indexes_fields[0][0]) not in {list, tuple}:
            indexes_fields = [indexes_fields]
        assert len(indexes_fields) == len(descriptors)
        assert len(buckets) == len(descriptors)

        # Check buckets for existence
        for bucket in reversed(self.buckets):
            if bucket in buckets:
                if not force:
                    message = 'Bucket "%s" already exists.' % bucket
                    raise RuntimeError(message)
                self.delete(bucket)

        # Define buckets
        for bucket, descriptor, index_fields in zip(buckets, descriptors, indexes_fields):

            # Add to schemas
            self.__descriptors[bucket] = descriptor

            # Create table
            jsontableschema.validate(descriptor)
            tablename = mappers.bucket_to_tablename(self.__prefix, bucket)
            columns, constraints, indexes = mappers.descriptor_to_columns_and_constraints(
                self.__prefix, bucket, descriptor, index_fields)
            Table(tablename, self.__metadata, *(columns+constraints+indexes))

        # Create tables, update metadata
        self.__metadata.create_all()
Exemplo n.º 6
0
 def validate_data_package(data_package, dataset_type):
     """
     Will throw a validation error if any problem
     :param data_package:
     :param dataset_type:
     :return:
     """
     validator = datapackage.DataPackage(data_package)
     try:
         validator.validate()
     except Exception:
         raise ValidationError('Data package errors:<br>{}'.format(
             "<br>".join([e.message for e in validator.iter_errors()])
         ))
     # Check that there is at least one resources defined (not required by the standard)
     resources = data_package.get('resources', [])
     if len(resources) == 0:
         raise ValidationError('You must define at least one resource')
     if len(resources) > 1:
         raise ValidationError('Only one resource per DataSet')
     # Validate the schema
     resource = resources[0]
     if 'schema' not in resource:
         raise ValidationError("Resource without a 'schema'.")
     else:
         schema = resource.get('schema', {})
         try:
             # use frictionless validator
             jsontableschema.validate(schema)
         except Exception:
             raise ValidationError(
                 'Schema errors for resource "{}":<br>{}'.format(
                     resource.get('name'),
                     "<br>".join([e.message for e in jsontableschema.validator.iter_errors(schema)])
                 ))
         try:
             # use our own schema class to validate.
             # The constructor should raise an exception if error
             if dataset_type == Dataset.TYPE_SPECIES_OBSERVATION:
                 SpeciesObservationSchema(schema)
             elif dataset_type == Dataset.TYPE_OBSERVATION:
                 ObservationSchema(schema)
             else:
                 GenericSchema(schema)
         except Exception as e:
             raise ValidationError(
                 'Schema errors for resource "{}": {}'.format(
                     resource.get('name'),
                     e))
 def test_schema_valid_fk_array(self):
     filepath = os.path.join(self.data_dir,
                             'schema_valid_fk_array.json')
     with io.open(filepath) as stream:
         schema = json.load(stream)
     valid = jsontableschema.validate(schema)
     self.assertTrue(valid)
Exemplo n.º 8
0
    def create(self, table, schema):
        """Create table by schema.

        Parameters
        ----------
        table: str/list
            Table name or list of table names.
        schema: dict/list
            JSONTableSchema schema or list of schemas.

        Raises
        ------
        RuntimeError
            If table already exists.

        """

        # Make lists
        tables = table
        if isinstance(table, six.string_types):
            tables = [table]
        schemas = schema
        if isinstance(schema, dict):
            schemas = [schema]

        # Check tables for existence
        for table in tables:
            if self.check(table):
                message = 'Table "%s" already exists.' % table
                raise RuntimeError(message)

        # Define tables
        for table, schema in zip(tables, schemas):

            # Add to schemas
            self.__schemas[table] = schema

            # Crate sa table
            table = mappers.convert_table(self.__prefix, table)
            jsontableschema.validate(schema)
            columns, constraints = mappers.convert_schema(
                    self.__prefix, table, schema)
            Table(table, self.__metadata, *(columns+constraints))

        # Create tables, update metadata
        self.__metadata.create_all()
Exemplo n.º 9
0
def validate(schema):

    """Validate that a supposed schema is in fact a JSON Table Schema."""

    valid, errors = jsontableschema.validate(schema)

    click.echo(valid)
    click.echo(errors)
 def _filter_row(self, row, **kwargs):
     id = int(row.pop("id")) if "id" in row else None
     values = self._get_values(row)
     if self.db_table is None:
         jsontableschema.validate(self._table_schema)
         prefix, bucket = "", self.table_name
         index_fields = []
         autoincrement = None
         tablename = mappers.bucket_to_tablename(prefix, bucket)
         columns, constraints, indexes = mappers.descriptor_to_columns_and_constraints(
             prefix, bucket, self._table_schema, index_fields,
             autoincrement)
         self.db_table = Table(tablename, self.db_meta,
                               *(columns + constraints + indexes))
         self.db_table.create()
     res = self._upsert(id, values)
     if res:
         yield res
    def create(self, bucket, descriptor, force=False):

        # Make lists
        buckets = bucket
        if isinstance(bucket, six.string_types):
            buckets = [bucket]
        descriptors = descriptor
        if isinstance(descriptor, dict):
            descriptors = [descriptor]

        # Iterate over buckets/descriptors
        for bucket, descriptor in zip(buckets, descriptors):

            # Existent bucket
            if bucket in self.buckets:
                if not force:
                    message = 'Bucket "%s" already exists' % bucket
                    raise RuntimeError(message)
                self.delete(bucket)

            # Add to schemas
            self.__descriptors[bucket] = descriptor

            # Prepare job body
            jsontableschema.validate(descriptor)
            tablename = mappers.bucket_to_tablename(self.__prefix, bucket)
            nativedesc = mappers.descriptor_to_nativedesc(descriptor)
            body = {
                'tableReference': {
                    'projectId': self.__project,
                    'datasetId': self.__dataset,
                    'tableId': tablename,
                },
                'schema': nativedesc,
            }

            # Make request
            self.__service.tables().insert(
                projectId=self.__project,
                datasetId=self.__dataset,
                body=body).execute()

        # Remove buckets cache
        self.__buckets = None
Exemplo n.º 12
0
    def create(self, bucket, descriptor, force=False):

        # Make lists
        buckets = bucket
        if isinstance(bucket, six.string_types):
            buckets = [bucket]
        descriptors = descriptor
        if isinstance(descriptor, dict):
            descriptors = [descriptor]

        # Iterate over buckets/descriptors
        for bucket, descriptor in zip(buckets, descriptors):

            # Existent bucket
            if bucket in self.buckets:
                if not force:
                    message = 'Bucket "%s" already exists' % bucket
                    raise RuntimeError(message)
                self.delete(bucket)

            # Add to schemas
            self.__descriptors[bucket] = descriptor

            # Prepare job body
            jsontableschema.validate(descriptor)
            tablename = mappers.bucket_to_tablename(self.__prefix, bucket)
            nativedesc = mappers.descriptor_to_nativedesc(descriptor)
            body = {
                'tableReference': {
                    'projectId': self.__project,
                    'datasetId': self.__dataset,
                    'tableId': tablename,
                },
                'schema': nativedesc,
            }

            # Make request
            self.__service.tables().insert(projectId=self.__project,
                                           datasetId=self.__dataset,
                                           body=body).execute()

        # Remove buckets cache
        self.__buckets = None
Exemplo n.º 13
0
 def clean(self):
     """
     Validate the data descriptor
     """
     # Validate the data package
     validator = datapackage.DataPackage(self.data_package)
     try:
         validator.validate()
     except Exception as e:
         raise ValidationError('Data package errors: {}'.format([e.message for e in validator.iter_errors()]))
     # Check that there is at least one resources defined (not required by the standard)
     if len(self.resources) == 0:
         raise ValidationError('You must define at least one resource')
     if len(self.resources) > 1:
         raise ValidationError('Only one resource per DataSet')
     # Validate the schema
     if 'schema' not in self.resource:
         raise ValidationError("Resource without a 'schema'.")
     else:
         schema = self.schema
         try:
             # use frictionless validator
             jsontableschema.validate(schema)
         except Exception as e:
             raise ValidationError(
                 'Schema errors for resource "{}": {}'.format(
                     self.resource.get('name'),
                     [e.message for e in jsontableschema.validator.iter_errors(schema)]))
         try:
             # use our own schema class to validate.
             # The constructor should raise an exception if error
             if self.type == self.TYPE_SPECIES_OBSERVATION:
                 SpeciesObservationSchema(schema)
             elif self.type == self.TYPE_OBSERVATION:
                 ObservationSchema(schema)
             else:
                 GenericSchema(schema)
         except Exception as e:
             raise ValidationError(
                 'Schema errors for resource "{}": {}'.format(
                     self.resource.get('name'),
                     e))
Exemplo n.º 14
0
def table(source, schema=None, **options):
    errors = []
    tables = []

    # Prepare schema
    if schema is not None:
        descriptor = schema
        try:
            # https://github.com/frictionlessdata/jsontableschema-py/issues/113
            from jsontableschema.helpers import load_json_source
            loaded_descriptor = load_json_source(schema)
            validate(loaded_descriptor, no_fail_fast=True)
            schema = Schema(loaded_descriptor)
        except jsontableschema.exceptions.MultipleInvalid as exception:
            for error in exception.errors:
                # Error message should contain schema source (often it's path)
                message = spec['errors']['jsontableschema-error']['message']
                message = message.format(
                    error_message='{problem} [{source}]'.format(
                        problem=str(error).splitlines()[0],
                        source=str(descriptor)))
                errors.append({
                    'code': 'jsontableschema-error',
                    'message': message,
                    'row-number': None,
                    'column-number': None,
                })

    # Add table
    if not errors:
        options.setdefault('headers', 1)
        tables.append({
            'source': str(source),
            'stream': Stream(source, **options),
            'schema': schema,
            'extra': {},
        })

    return errors, tables
Exemplo n.º 15
0
    def create(self, bucket, descriptor, force=False):

        # Make lists
        buckets = bucket
        if isinstance(bucket, six.string_types):
            buckets = [bucket]
        descriptors = descriptor
        if isinstance(descriptor, dict):
            descriptors = [descriptor]

        # Check buckets for existence
        for bucket in buckets:
            if bucket in self.buckets:
                if not force:
                    raise RuntimeError('Bucket "%s" already exists' % bucket)
                self.delete(bucket)

        # Define dataframes
        for bucket, descriptor in zip(buckets, descriptors):
            jsontableschema.validate(descriptor)
            self.__descriptors[bucket] = descriptor
            self.__dataframes[bucket] = pd.DataFrame()
Exemplo n.º 16
0
 def test_schema_invalid_pk_string(self):
     filepath = os.path.join(self.data_dir, 'schema_invalid_pk_string.json')
     with io.open(filepath) as stream:
         schema = json.load(stream)
     valid, errors = jsontableschema.validate(schema)
     self.assertFalse(valid)