def create(self, bucket, descriptor, force=False): # Make lists buckets = bucket if isinstance(bucket, six.string_types): buckets = [bucket] descriptors = descriptor if isinstance(descriptor, dict): descriptors = [descriptor] # Check buckets for existence for bucket in reversed(self.buckets): if bucket in buckets: if not force: message = 'Bucket "%s" already exists.' % bucket raise RuntimeError(message) self.delete(bucket) # Define buckets for bucket, descriptor in zip(buckets, descriptors): # Add to schemas self.__descriptors[bucket] = descriptor # Crate table jsontableschema.validate(descriptor) tablename = mappers.bucket_to_tablename(self.__prefix, bucket) columns, constraints = mappers.descriptor_to_columns_and_constraints( self.__prefix, bucket, descriptor) Table(tablename, self.__metadata, *(columns + constraints)) # Create tables, update metadata self.__metadata.create_all()
def clean(self): """ Validate the data descriptor """ # Validate the data package validator = datapackage.DataPackage(self.data_descriptor) try: validator.validate() except Exception as e: raise ValidationError('Data package errors: {}'.format( [str(e[0]) for e in validator.iter_errors()])) # Check that there is at least one resources defined (not required by the standard) if len(self.resources) == 0: raise ValidationError('You must define at least one resource') # Validate the schema for all resources for resource in self.resources: if 'schema' not in resource: raise ValidationError("Resource without a 'schema'.") else: schema = resource.get('schema') try: jsontableschema.validate(schema) except Exception as e: raise ValidationError( 'Schema errors for resource "{}": {}'.format( resource.get('name'), [ str(e[0]) for e in jsontableschema.validator.iter_errors(schema) ]))
def clean(self): """ Validate the data descriptor """ # Validate the data package validator = datapackage.DataPackage(self.data_descriptor) try: validator.validate() except Exception as e: raise ValidationError('Data package errors: {}'.format([str(e[0]) for e in validator.iter_errors()])) # Check that there is at least one resources defined (not required by the standard) if len(self.resources) == 0: raise ValidationError('You must define at least one resource') # Validate the schema for all resources for resource in self.resources: if 'schema' not in resource: raise ValidationError("Resource without a 'schema'.") else: schema = resource.get('schema') try: jsontableschema.validate(schema) except Exception as e: raise ValidationError( 'Schema errors for resource "{}": {}'.format( resource.get('name'), [str(e[0]) for e in jsontableschema.validator.iter_errors(schema)]))
def create(self, bucket, descriptor, force=False, indexes_fields=None): """Create table by schema. Parameters ---------- table: str/list Table name or list of table names. schema: dict/list JSONTableSchema schema or list of schemas. indexes_fields: list list of tuples containing field names, or list of such lists Raises ------ RuntimeError If table already exists. """ # Make lists buckets = bucket if isinstance(bucket, six.string_types): buckets = [bucket] descriptors = descriptor if isinstance(descriptor, dict): descriptors = [descriptor] if indexes_fields is None or len(indexes_fields) == 0: indexes_fields = [()] * len(descriptors) elif type(indexes_fields[0][0]) not in {list, tuple}: indexes_fields = [indexes_fields] assert len(indexes_fields) == len(descriptors) assert len(buckets) == len(descriptors) # Check buckets for existence for bucket in reversed(self.buckets): if bucket in buckets: if not force: message = 'Bucket "%s" already exists.' % bucket raise RuntimeError(message) self.delete(bucket) # Define buckets for bucket, descriptor, index_fields in zip(buckets, descriptors, indexes_fields): # Add to schemas self.__descriptors[bucket] = descriptor # Create table jsontableschema.validate(descriptor) tablename = mappers.bucket_to_tablename(self.__prefix, bucket) columns, constraints, indexes = mappers.descriptor_to_columns_and_constraints( self.__prefix, bucket, descriptor, index_fields, self.__autoincrement) Table(tablename, self.__metadata, *(columns + constraints + indexes)) # Create tables, update metadata self.__metadata.create_all()
def create(self, bucket, descriptor, force=False, indexes_fields=None): """Create table by schema. Parameters ---------- table: str/list Table name or list of table names. schema: dict/list JSONTableSchema schema or list of schemas. indexes_fields: list list of tuples containing field names, or list of such lists Raises ------ RuntimeError If table already exists. """ # Make lists buckets = bucket if isinstance(bucket, six.string_types): buckets = [bucket] descriptors = descriptor if isinstance(descriptor, dict): descriptors = [descriptor] if indexes_fields is None or len(indexes_fields) == 0: indexes_fields = [()] * len(descriptors) elif type(indexes_fields[0][0]) not in {list, tuple}: indexes_fields = [indexes_fields] assert len(indexes_fields) == len(descriptors) assert len(buckets) == len(descriptors) # Check buckets for existence for bucket in reversed(self.buckets): if bucket in buckets: if not force: message = 'Bucket "%s" already exists.' % bucket raise RuntimeError(message) self.delete(bucket) # Define buckets for bucket, descriptor, index_fields in zip(buckets, descriptors, indexes_fields): # Add to schemas self.__descriptors[bucket] = descriptor # Create table jsontableschema.validate(descriptor) tablename = mappers.bucket_to_tablename(self.__prefix, bucket) columns, constraints, indexes = mappers.descriptor_to_columns_and_constraints( self.__prefix, bucket, descriptor, index_fields) Table(tablename, self.__metadata, *(columns+constraints+indexes)) # Create tables, update metadata self.__metadata.create_all()
def validate_data_package(data_package, dataset_type): """ Will throw a validation error if any problem :param data_package: :param dataset_type: :return: """ validator = datapackage.DataPackage(data_package) try: validator.validate() except Exception: raise ValidationError('Data package errors:<br>{}'.format( "<br>".join([e.message for e in validator.iter_errors()]) )) # Check that there is at least one resources defined (not required by the standard) resources = data_package.get('resources', []) if len(resources) == 0: raise ValidationError('You must define at least one resource') if len(resources) > 1: raise ValidationError('Only one resource per DataSet') # Validate the schema resource = resources[0] if 'schema' not in resource: raise ValidationError("Resource without a 'schema'.") else: schema = resource.get('schema', {}) try: # use frictionless validator jsontableschema.validate(schema) except Exception: raise ValidationError( 'Schema errors for resource "{}":<br>{}'.format( resource.get('name'), "<br>".join([e.message for e in jsontableschema.validator.iter_errors(schema)]) )) try: # use our own schema class to validate. # The constructor should raise an exception if error if dataset_type == Dataset.TYPE_SPECIES_OBSERVATION: SpeciesObservationSchema(schema) elif dataset_type == Dataset.TYPE_OBSERVATION: ObservationSchema(schema) else: GenericSchema(schema) except Exception as e: raise ValidationError( 'Schema errors for resource "{}": {}'.format( resource.get('name'), e))
def test_schema_valid_fk_array(self): filepath = os.path.join(self.data_dir, 'schema_valid_fk_array.json') with io.open(filepath) as stream: schema = json.load(stream) valid = jsontableschema.validate(schema) self.assertTrue(valid)
def create(self, table, schema): """Create table by schema. Parameters ---------- table: str/list Table name or list of table names. schema: dict/list JSONTableSchema schema or list of schemas. Raises ------ RuntimeError If table already exists. """ # Make lists tables = table if isinstance(table, six.string_types): tables = [table] schemas = schema if isinstance(schema, dict): schemas = [schema] # Check tables for existence for table in tables: if self.check(table): message = 'Table "%s" already exists.' % table raise RuntimeError(message) # Define tables for table, schema in zip(tables, schemas): # Add to schemas self.__schemas[table] = schema # Crate sa table table = mappers.convert_table(self.__prefix, table) jsontableschema.validate(schema) columns, constraints = mappers.convert_schema( self.__prefix, table, schema) Table(table, self.__metadata, *(columns+constraints)) # Create tables, update metadata self.__metadata.create_all()
def validate(schema): """Validate that a supposed schema is in fact a JSON Table Schema.""" valid, errors = jsontableschema.validate(schema) click.echo(valid) click.echo(errors)
def _filter_row(self, row, **kwargs): id = int(row.pop("id")) if "id" in row else None values = self._get_values(row) if self.db_table is None: jsontableschema.validate(self._table_schema) prefix, bucket = "", self.table_name index_fields = [] autoincrement = None tablename = mappers.bucket_to_tablename(prefix, bucket) columns, constraints, indexes = mappers.descriptor_to_columns_and_constraints( prefix, bucket, self._table_schema, index_fields, autoincrement) self.db_table = Table(tablename, self.db_meta, *(columns + constraints + indexes)) self.db_table.create() res = self._upsert(id, values) if res: yield res
def create(self, bucket, descriptor, force=False): # Make lists buckets = bucket if isinstance(bucket, six.string_types): buckets = [bucket] descriptors = descriptor if isinstance(descriptor, dict): descriptors = [descriptor] # Iterate over buckets/descriptors for bucket, descriptor in zip(buckets, descriptors): # Existent bucket if bucket in self.buckets: if not force: message = 'Bucket "%s" already exists' % bucket raise RuntimeError(message) self.delete(bucket) # Add to schemas self.__descriptors[bucket] = descriptor # Prepare job body jsontableschema.validate(descriptor) tablename = mappers.bucket_to_tablename(self.__prefix, bucket) nativedesc = mappers.descriptor_to_nativedesc(descriptor) body = { 'tableReference': { 'projectId': self.__project, 'datasetId': self.__dataset, 'tableId': tablename, }, 'schema': nativedesc, } # Make request self.__service.tables().insert( projectId=self.__project, datasetId=self.__dataset, body=body).execute() # Remove buckets cache self.__buckets = None
def create(self, bucket, descriptor, force=False): # Make lists buckets = bucket if isinstance(bucket, six.string_types): buckets = [bucket] descriptors = descriptor if isinstance(descriptor, dict): descriptors = [descriptor] # Iterate over buckets/descriptors for bucket, descriptor in zip(buckets, descriptors): # Existent bucket if bucket in self.buckets: if not force: message = 'Bucket "%s" already exists' % bucket raise RuntimeError(message) self.delete(bucket) # Add to schemas self.__descriptors[bucket] = descriptor # Prepare job body jsontableschema.validate(descriptor) tablename = mappers.bucket_to_tablename(self.__prefix, bucket) nativedesc = mappers.descriptor_to_nativedesc(descriptor) body = { 'tableReference': { 'projectId': self.__project, 'datasetId': self.__dataset, 'tableId': tablename, }, 'schema': nativedesc, } # Make request self.__service.tables().insert(projectId=self.__project, datasetId=self.__dataset, body=body).execute() # Remove buckets cache self.__buckets = None
def clean(self): """ Validate the data descriptor """ # Validate the data package validator = datapackage.DataPackage(self.data_package) try: validator.validate() except Exception as e: raise ValidationError('Data package errors: {}'.format([e.message for e in validator.iter_errors()])) # Check that there is at least one resources defined (not required by the standard) if len(self.resources) == 0: raise ValidationError('You must define at least one resource') if len(self.resources) > 1: raise ValidationError('Only one resource per DataSet') # Validate the schema if 'schema' not in self.resource: raise ValidationError("Resource without a 'schema'.") else: schema = self.schema try: # use frictionless validator jsontableschema.validate(schema) except Exception as e: raise ValidationError( 'Schema errors for resource "{}": {}'.format( self.resource.get('name'), [e.message for e in jsontableschema.validator.iter_errors(schema)])) try: # use our own schema class to validate. # The constructor should raise an exception if error if self.type == self.TYPE_SPECIES_OBSERVATION: SpeciesObservationSchema(schema) elif self.type == self.TYPE_OBSERVATION: ObservationSchema(schema) else: GenericSchema(schema) except Exception as e: raise ValidationError( 'Schema errors for resource "{}": {}'.format( self.resource.get('name'), e))
def table(source, schema=None, **options): errors = [] tables = [] # Prepare schema if schema is not None: descriptor = schema try: # https://github.com/frictionlessdata/jsontableschema-py/issues/113 from jsontableschema.helpers import load_json_source loaded_descriptor = load_json_source(schema) validate(loaded_descriptor, no_fail_fast=True) schema = Schema(loaded_descriptor) except jsontableschema.exceptions.MultipleInvalid as exception: for error in exception.errors: # Error message should contain schema source (often it's path) message = spec['errors']['jsontableschema-error']['message'] message = message.format( error_message='{problem} [{source}]'.format( problem=str(error).splitlines()[0], source=str(descriptor))) errors.append({ 'code': 'jsontableschema-error', 'message': message, 'row-number': None, 'column-number': None, }) # Add table if not errors: options.setdefault('headers', 1) tables.append({ 'source': str(source), 'stream': Stream(source, **options), 'schema': schema, 'extra': {}, }) return errors, tables
def create(self, bucket, descriptor, force=False): # Make lists buckets = bucket if isinstance(bucket, six.string_types): buckets = [bucket] descriptors = descriptor if isinstance(descriptor, dict): descriptors = [descriptor] # Check buckets for existence for bucket in buckets: if bucket in self.buckets: if not force: raise RuntimeError('Bucket "%s" already exists' % bucket) self.delete(bucket) # Define dataframes for bucket, descriptor in zip(buckets, descriptors): jsontableschema.validate(descriptor) self.__descriptors[bucket] = descriptor self.__dataframes[bucket] = pd.DataFrame()
def test_schema_invalid_pk_string(self): filepath = os.path.join(self.data_dir, 'schema_invalid_pk_string.json') with io.open(filepath) as stream: schema = json.load(stream) valid, errors = jsontableschema.validate(schema) self.assertFalse(valid)