Exemplo n.º 1
0
def test_update_field():
    schema = Schema(DESCRIPTOR_MIN)
    assert schema.update_field('id', {'type': 'number'}) is True
    assert schema.update_field('height', {'type': 'number'}) is True
    assert schema.update_field('unknown', {'type': 'number'}) is False
    schema.commit()
    assert schema.get_field('id').type == 'number'
    assert schema.get_field('height').type == 'number'
Exemplo n.º 2
0
def add_foreign_key(schema: Schema, fields: Union[str, List[str]], referenced_table: str,
                    referenced_fields: Union[str, List[str]], description: str = None) -> None:
    if 'foreignKeys' not in schema.descriptor:
        schema.descriptor['foreignKeys'] = list()

    foreign_key_descriptor = {
        'fields': fields,
        'reference': {
            'resource': referenced_table,
            'fields': referenced_fields
        },
    }
    if description:
        foreign_key_descriptor['description'] = description
    schema.descriptor['foreignKeys'].append(foreign_key_descriptor)
    schema.commit(strict=True)
Exemplo n.º 3
0
    def __inspect_table(self, table):

        # Start timer
        start = datetime.datetime.now()

        # Prepare vars
        errors = []
        warnings = []
        headers = []
        row_number = 0
        fatal_error = False
        source = table['source']
        stream = table['stream']
        schema = table['schema']
        extra = table['extra']

        # Prepare checks
        checks = registry.compile_checks(table.get('checks', self.__checks),
                                         self.__skip_checks,
                                         order_fields=self.__order_fields,
                                         infer_fields=self.__infer_fields)

        # Prepare table
        try:
            stream.open()
            sample = stream.sample
            headers = stream.headers
            if headers is None:
                headers = [None] * len(sample[0]) if sample else []
            if _filter_checks(checks, type='schema'):
                if schema is None and self.__infer_schema:
                    schema = Schema()
                    schema.infer(sample, headers=headers)
            if schema is None:
                checks = _filter_checks(checks, type='schema', inverse=True)
        except Exception as exception:
            fatal_error = True
            error = _compose_error_from_exception(exception)
            errors.append(error)

        # Prepare schema
        if not fatal_error:
            if schema:
                if schema.primary_key:
                    for field in schema.descriptor.get('fields', []):
                        if field.get('name') in schema.primary_key:
                            field['primaryKey'] = True
                    schema.commit()
                for error in schema.errors:
                    fatal_error = True
                    error = _compose_error_from_schema_error(error)
                    errors.append(error)

        # Prepare cells
        if not fatal_error:
            cells = []
            fields = [None] * len(headers)
            if schema is not None:
                fields = schema.fields
            iterator = zip_longest(headers, fields, fillvalue=_FILLVALUE)
            for number, (header, field) in enumerate(iterator, start=1):
                cell = {'number': number}
                if header is not _FILLVALUE:
                    cell['header'] = header
                    cell['value'] = header
                if field is not _FILLVALUE:
                    cell['field'] = field
                cells.append(cell)

        # Head checks
        if not fatal_error:
            if None not in headers:
                head_checks = _filter_checks(checks, context='head')
                for check in head_checks:
                    if not cells:
                        break
                    check_func = getattr(check['func'], 'check_headers',
                                         check['func'])
                    check_func(errors, cells, sample)
                for error in errors:
                    error['row'] = None

        # Body checks
        if not fatal_error:
            cellmap = {cell['number']: cell for cell in cells}
            body_checks = _filter_checks(checks, context='body')
            with stream:
                extended_rows = stream.iter(extended=True)
                while True:
                    try:
                        row_number, _, row = next(extended_rows)
                    except StopIteration:
                        break
                    except Exception as exception:
                        fatal_error = True
                        error = _compose_error_from_exception(exception)
                        errors.append(error)
                        break
                    cells = []
                    iterator = zip_longest(headers, row, fillvalue=_FILLVALUE)
                    for number, (header, value) in enumerate(iterator,
                                                             start=1):
                        cellref = cellmap.get(number, {})
                        cell = {'number': number}
                        if header is not _FILLVALUE:
                            cell['header'] = cellref.get('header', header)
                        if 'field' in cellref:
                            cell['field'] = cellref['field']
                        if value is not _FILLVALUE:
                            cell['value'] = value
                        cells.append(cell)
                    for check in body_checks:
                        if not cells:
                            break
                        check_func = getattr(check['func'], 'check_row',
                                             check['func'])
                        check_func(errors, cells, row_number)
                    for error in reversed(errors):
                        if 'row' in error:
                            break
                        error['row'] = row
                    if row_number >= self.__row_limit:
                        warnings.append(
                            'Table "%s" inspection has reached %s row(s) limit'
                            % (source, self.__row_limit))
                        break
                    if len(errors) >= self.__error_limit:
                        warnings.append(
                            'Table "%s" inspection has reached %s error(s) limit'
                            % (source, self.__error_limit))
                        break

        # Table checks
        if not fatal_error:
            for check in checks:
                check_func = getattr(check['func'], 'check_table', None)
                if check_func:
                    check_func(errors)

        # Stop timer
        stop = datetime.datetime.now()

        # Compose report
        headers = headers if None not in headers else None
        errors = errors[:self.__error_limit]
        errors = _sort_errors(errors)
        report = copy(extra)
        report.update({
            'time': round((stop - start).total_seconds(), 3),
            'valid': not bool(errors),
            'error-count': len(errors),
            'row-count': row_number,
            'source': source,
            'headers': headers,
            'scheme': stream.scheme,
            'format': stream.format,
            'encoding': stream.encoding,
            'schema': 'table-schema' if schema else None,
            'errors': errors,
        })

        return warnings, report
Exemplo n.º 4
0
def add_primary_key(schema: Schema, primary_key: Union[str, List[str]]) -> None:
    schema.descriptor['primaryKey'] = primary_key
    schema.commit()