def test_update_field(): schema = Schema(DESCRIPTOR_MIN) assert schema.update_field('id', {'type': 'number'}) is True assert schema.update_field('height', {'type': 'number'}) is True assert schema.update_field('unknown', {'type': 'number'}) is False schema.commit() assert schema.get_field('id').type == 'number' assert schema.get_field('height').type == 'number'
def add_foreign_key(schema: Schema, fields: Union[str, List[str]], referenced_table: str, referenced_fields: Union[str, List[str]], description: str = None) -> None: if 'foreignKeys' not in schema.descriptor: schema.descriptor['foreignKeys'] = list() foreign_key_descriptor = { 'fields': fields, 'reference': { 'resource': referenced_table, 'fields': referenced_fields }, } if description: foreign_key_descriptor['description'] = description schema.descriptor['foreignKeys'].append(foreign_key_descriptor) schema.commit(strict=True)
def __inspect_table(self, table): # Start timer start = datetime.datetime.now() # Prepare vars errors = [] warnings = [] headers = [] row_number = 0 fatal_error = False source = table['source'] stream = table['stream'] schema = table['schema'] extra = table['extra'] # Prepare checks checks = registry.compile_checks(table.get('checks', self.__checks), self.__skip_checks, order_fields=self.__order_fields, infer_fields=self.__infer_fields) # Prepare table try: stream.open() sample = stream.sample headers = stream.headers if headers is None: headers = [None] * len(sample[0]) if sample else [] if _filter_checks(checks, type='schema'): if schema is None and self.__infer_schema: schema = Schema() schema.infer(sample, headers=headers) if schema is None: checks = _filter_checks(checks, type='schema', inverse=True) except Exception as exception: fatal_error = True error = _compose_error_from_exception(exception) errors.append(error) # Prepare schema if not fatal_error: if schema: if schema.primary_key: for field in schema.descriptor.get('fields', []): if field.get('name') in schema.primary_key: field['primaryKey'] = True schema.commit() for error in schema.errors: fatal_error = True error = _compose_error_from_schema_error(error) errors.append(error) # Prepare cells if not fatal_error: cells = [] fields = [None] * len(headers) if schema is not None: fields = schema.fields iterator = zip_longest(headers, fields, fillvalue=_FILLVALUE) for number, (header, field) in enumerate(iterator, start=1): cell = {'number': number} if header is not _FILLVALUE: cell['header'] = header cell['value'] = header if field is not _FILLVALUE: cell['field'] = field cells.append(cell) # Head checks if not fatal_error: if None not in headers: head_checks = _filter_checks(checks, context='head') for check in head_checks: if not cells: break check_func = getattr(check['func'], 'check_headers', check['func']) check_func(errors, cells, sample) for error in errors: error['row'] = None # Body checks if not fatal_error: cellmap = {cell['number']: cell for cell in cells} body_checks = _filter_checks(checks, context='body') with stream: extended_rows = stream.iter(extended=True) while True: try: row_number, _, row = next(extended_rows) except StopIteration: break except Exception as exception: fatal_error = True error = _compose_error_from_exception(exception) errors.append(error) break cells = [] iterator = zip_longest(headers, row, fillvalue=_FILLVALUE) for number, (header, value) in enumerate(iterator, start=1): cellref = cellmap.get(number, {}) cell = {'number': number} if header is not _FILLVALUE: cell['header'] = cellref.get('header', header) if 'field' in cellref: cell['field'] = cellref['field'] if value is not _FILLVALUE: cell['value'] = value cells.append(cell) for check in body_checks: if not cells: break check_func = getattr(check['func'], 'check_row', check['func']) check_func(errors, cells, row_number) for error in reversed(errors): if 'row' in error: break error['row'] = row if row_number >= self.__row_limit: warnings.append( 'Table "%s" inspection has reached %s row(s) limit' % (source, self.__row_limit)) break if len(errors) >= self.__error_limit: warnings.append( 'Table "%s" inspection has reached %s error(s) limit' % (source, self.__error_limit)) break # Table checks if not fatal_error: for check in checks: check_func = getattr(check['func'], 'check_table', None) if check_func: check_func(errors) # Stop timer stop = datetime.datetime.now() # Compose report headers = headers if None not in headers else None errors = errors[:self.__error_limit] errors = _sort_errors(errors) report = copy(extra) report.update({ 'time': round((stop - start).total_seconds(), 3), 'valid': not bool(errors), 'error-count': len(errors), 'row-count': row_number, 'source': source, 'headers': headers, 'scheme': stream.scheme, 'format': stream.format, 'encoding': stream.encoding, 'schema': 'table-schema' if schema else None, 'errors': errors, }) return warnings, report
def add_primary_key(schema: Schema, primary_key: Union[str, List[str]]) -> None: schema.descriptor['primaryKey'] = primary_key schema.commit()