def test_check_type_or_format_error(log):
    errors = []
    cells = [
        {
            'number': 1,
            'header': 'name1',
            'value': '1',
            'field': Field({
                'name': 'name',
                'type': 'integer'
            })
        },
    ]
    type_or_format_error(errors, cells, 1)
    assert log(errors) == []
    assert len(cells) == 1
    assert cells[0]['value'] == 1
Пример #2
0
def _process_field(field: Field, rules, ret, prefix):
    schema_type = field['type']
    if schema_type == 'array':
        field = copy(field)
        field['type'] = field['es:itemType']
        return _process_field(field, rules, ret, prefix)
    enabled = field.get('es:index', True)
    subschema = {'fields': []}
    if enabled and schema_type == 'object':
        subschema = field['es:schema']
        _process_schema(subschema, rules, ret, prefix + field['name'] + '.')
    elif schema_type == 'string':
        if field['name'] not in ('doc_id', ):
            search_field = prefix + field['name']
            for suffix in rules.get(
                ('es:title' in field, 'es:keyword' in field), ['']):
                ret.append(search_field + suffix)
Пример #3
0
def test_check_extra_header_infer_with_empty_data(log):
    cells = [
        goodtables.cells.create_cell('name1',
                                     field=Field({'name': 'name1'}),
                                     column_number=1),
        goodtables.cells.create_cell('name2', column_number=2),
    ]
    sample = [
        ['123', ''],
        ['456', ''],
        ['789', ''],
    ]
    extra_header = ExtraHeader(infer_fields=True)
    errors = extra_header.check_headers(cells, sample=sample)
    assert log(errors) == []
    assert len(cells) == 2
    assert cells[1]['field'].name == 'name2'
    assert cells[1]['field'].type == 'string'
def test_check_type_or_format_error_problem(log):
    errors = []
    cells = [
        {
            'number': 1,
            'header': 'name1',
            'value': 'value1',
            'field': Field({
                'name': 'name',
                'type': 'integer'
            })
        },
    ]
    type_or_format_error(errors, cells, 1)
    assert log(errors) == [
        (1, 1, 'type-or-format-error'),
    ]
    assert len(cells) == 0
Пример #5
0
def test_check_extra_header_infer_with_empty_data(log):
    errors = []
    cells = [
        {'number': 1,
         'header': 'name1',
         'field': Field({'name': 'name1'})},
        {'number': 2,
         'header': 'name2'},
    ]
    sample = [
        ['123', ''],
        ['456', ''],
        ['789', ''],
    ]
    extra_header = ExtraHeader(infer_fields=True)
    extra_header.check_headers(errors, cells, sample=sample)
    assert log(errors) == []
    assert len(cells) == 2
    assert cells[1]['field'].name == 'name2'
    assert cells[1]['field'].type == 'string'
Пример #6
0
def test_test_value_required():
    field = Field(
        {
            'name': 'name',
            'type': 'string',
            'constraints': {
                'required': True
            }
        },
        missing_values=['', 'NA', 'N/A'])
    test = partial(field.test_value, constraints=['required'])
    assert test('test') == True
    assert test('null') == True
    assert test('none') == True
    assert test('nil') == True
    assert test('nan') == True
    assert test('NA') == False
    assert test('N/A') == False
    assert test('-') == True
    assert test('') == False
    assert test(None) == False
Пример #7
0
def test_test_value_constraints_false():
    assert Field(DESCRIPTOR_MIN).test_value('', constraints=False) == True
Пример #8
0
def test_test_value():
    assert Field(DESCRIPTOR_MAX).test_value('1') == True
    assert Field(DESCRIPTOR_MAX).test_value('string') == False
    assert Field(DESCRIPTOR_MAX).test_value('') == False
Пример #9
0
def test_cast_value_null_with_missing_values():
    field = Field({'name': 'name', 'type': 'number'}, missing_values=['null'])
    assert field.cast_value('null') == None
Пример #10
0
def test_cast_value_constraint_error():
    with pytest.raises(exceptions.CastError):
        Field(DESCRIPTOR_MAX).cast_value('')
Пример #11
0
def test_format():
    assert Field(DESCRIPTOR_MIN).format == 'default'
    assert Field(DESCRIPTOR_MAX).format == 'default'
Пример #12
0
def test_type():
    assert Field(DESCRIPTOR_MIN).type == 'string'
    assert Field(DESCRIPTOR_MAX).type == 'integer'
Пример #13
0
def test_name():
    assert Field(DESCRIPTOR_MIN).name == 'id'
Пример #14
0
def test_descriptor(apply_defaults):
    assert Field(DESCRIPTOR_MIN).descriptor == apply_defaults(DESCRIPTOR_MIN)
 def caster(v):
     f = Field(__field)
     return f.cast_value(v)
Пример #16
0
def test_missing_values():
    assert Field(DESCRIPTOR_MIN).missing_values == ['']
    assert Field(DESCRIPTOR_MIN, missing_values=['-']).missing_values == ['-']
Пример #17
0
def test_cast_value_null_with_missing_values():
    field = Field({'name': 'name', 'type': 'number'}, missing_values=['null'])
    assert field.cast_value('null') == None
Пример #18
0
def test_constraints():
    assert Field(DESCRIPTOR_MIN).constraints == {}
    assert Field(DESCRIPTOR_MAX).constraints == {'required': True}
Пример #19
0
def test_required():
    assert Field(DESCRIPTOR_MIN).required == False
    assert Field(DESCRIPTOR_MAX).required == True
Пример #20
0
def test_cast_value():
    assert Field(DESCRIPTOR_MAX).cast_value('1') == 1
Пример #21
0
class SchemaField:
    """
    Utility class for a field in a schema.
    Uses a tableschema.Field (https://github.com/frictionlessdata/tableschema-py/blob/master/tableschema/field.py)
    for help. It doesn't extend this class but compose with it, mostly for the use of the cast_value method.
    """
    DATETIME_TYPES = ['date', 'datetime']
    TRUE_VALUES = ['True', 'true', 'True', 'YES', 'yes', 'y', 'Y', 'Yes']
    FALSE_VALUES = ['FALSE', 'false', 'False', 'NO', 'no', 'n', 'N', 'No']

    def __init__(self, descriptor):
        self.descriptor = self.__curate_descriptor(descriptor)
        self.name = self.descriptor.get('name')
        # We want to throw an exception if there is no name
        if not self.name:
            raise FieldSchemaError("A field without a name: {}".format(
                json.dumps(descriptor)))
        # the tableschema field.
        self.tableschema_field = TableField(self.descriptor)
        # biosys specific
        self.biosys = BiosysSchema(
            self.descriptor.get(BiosysSchema.BIOSYS_KEY_NAME))
        self.constraints = SchemaConstraints(
            self.descriptor.get('constraints', {}))

    # implement some dict like methods
    def __getitem__(self, item):
        return self.descriptor.__getitem__(item)

    def get(self, k, d=None):
        return self.descriptor.get(k, d)

    @property
    def title(self):
        return self.descriptor.get('title')

    @property
    def type(self):
        return self.descriptor.get('type')

    @property
    def column_name(self):
        return self.name

    @property
    def required(self):
        return self.constraints.required

    @property
    def aliases(self):
        return self.descriptor[
            'aliases'] if 'aliases' in self.descriptor else []

    @property
    def is_datetime_types(self):
        return self.type in self.DATETIME_TYPES

    @property
    def is_date_type(self):
        return self.type == 'date'

    @property
    def is_numeric(self):
        return self.type in ['number', 'integer']

    @property
    def format(self):
        return self.descriptor['format']

    def has_alias(self, name, icase=False):
        for alias in self.aliases:
            if (alias == name) or (icase and alias.lower() == name.lower()):
                return True
        return False

    def has_name_or_alias(self, name, alias, icase=False):
        """
        Test is the field has a name name or an alias alias
        :param name:
        :param alias:
        :param icase:
        :return:
        """
        has_name = (self.name
                    == name) or (icase and self.name.lower() == name.lower())
        return has_name or self.has_alias(alias, icase=icase)

    def cast(self, value):
        """
        Returns o native Python object of the expected format. Will throw an exception
        if the value doesn't complies with any constraints.
        This method delegates most of the cast to the tableschema.Field.cast_value. Except for
        - date and dateTime with format='any'. This because the tableschema.Field.cast_value interprets an ambiguous
        day/month/year date as month/day/year (american way)
        :param value:
        :return:
        """
        # we want to strip strings
        if isinstance(value, six.string_types):
            value = value.strip()
            # TODO: remove that when running in Python3
            if not isinstance(value, six.text_type):
                # the ensure only unicode
                value = six.u(value).strip()
        # date or datetime with format='any
        if self.is_datetime_types and self.format == 'any' and value:
            return cast_date_any_format(
                value) if self.is_date_type else cast_datetime_any_format(
                    value)
        # delegates to tableschema.Field.cast_value
        return self.tableschema_field.cast_value(value, constraints=True)

    def validation_error(self, value):
        """
        Return an error message if the value is not valid according to the schema.
        It relies on exception thrown by the 'cast1 method of Type method.
        :param value:
        :return: None if value is valid or an error message string
        """
        error = None
        # override the integer validation. The default message is a bit cryptic if there's an error casting a string
        # like '1.2' into an int.
        if self.type == 'integer':
            if not is_blank_value(value):
                not_integer = False
                try:
                    casted = self.cast(value)
                    # there's also the case where the case where a float 1.2 is successfully casted in 1
                    # (ex: int(1.2) = 1)
                    if str(casted) != str(value):
                        not_integer = True
                except Exception:
                    not_integer = True
                if not_integer:
                    return 'The field "{}" must be a whole number.'.format(
                        self.name)
        try:
            self.cast(value)
        except Exception as e:
            error = "{}".format(e)
            # Override the default enum exception message to include all possible values
            if error.find('enum array') and self.constraints.enum:
                values = [str(v) for v in self.constraints.enum]
                error = "The value must be one the following: {}".format(
                    values)
        return error

    def __curate_descriptor(self, descriptor):
        """
        Apply some changes to the descriptor:

        - Change default values for boolean (adding 'yes' and 'no')
        Since TableSchema V1.0 the default true values are [ "true", "True", "TRUE", "1" ]
        We want to be sure that 'yes' and 'no' (and variations) are included by default.
        The schema specifications allows to override the true and false values with 'trueValues' and 'falseValues'
        (see https://frictionlessdata.io/specs/table-schema/)
        """
        if descriptor.get('type') == 'boolean':
            descriptor['trueValues'] = descriptor.get('trueValues',
                                                      self.TRUE_VALUES)
            descriptor['falseValues'] = descriptor.get('falseValues',
                                                       self.FALSE_VALUES)
        return descriptor

    def __str__(self):
        return '{}'.format(self.name)
Пример #22
0
 def caster(v):
     f = Field(__field)
     return f.cast_value(v)