Пример #1
0
    def validate_length(self, value):
        """
        Validate that ``value`` conforms to
        :py:attr:`~cutplace.fields.AbstractFieldFormat.length`.

        :raises cutplace.errors.FieldValueError: if ``value`` is too short \
          or too long
        """
        assert value is not None

        if self.length is not None and not (self.is_allowed_to_be_empty and (value == '')):
            try:
                if self.data_format.format == data.FORMAT_FIXED:
                    # Length of fixed format is considered a maximum, fewer characters have to be padded later.
                    value_length = len(value)
                    fixed_length = self.length.lower_limit
                    if value_length > fixed_length:
                        raise errors.FieldValueError(
                            'fixed format field must have at most %d characters instead of %d: %s'
                            % (fixed_length, value_length, _compat.text_repr(value))
                        )
                else:
                    self.length.validate(
                        "length of '%s' with value %s" % (self.field_name, _compat.text_repr(value)), len(value))
            except errors.RangeValueError as error:
                raise errors.FieldValueError(six.text_type(error))
Пример #2
0
def validated_field_name(supposed_field_name, location=None):
    """
    Same as ``supposed_field_name`` except with surrounding white space removed.

    :param cutplace.errors.Location location: location used in case of errors
    :raise cutplace.errors.InterfaceError: if ``supposed_field_name`` is \
      invalid
    """
    field_name = supposed_field_name.strip()
    basic_requirements_text = 'field name must be a valid Python name consisting of ASCII letters, ' \
                              'underscore (_) and digits'
    if field_name == '':
        raise errors.InterfaceError(basic_requirements_text + 'but is empty', location)
    if keyword.iskeyword(field_name):
        raise errors.InterfaceError("field name must not be a Python keyword but is: '%s'" % field_name, location)
    is_first_character = True
    for character in field_name:
        if is_first_character:
            if character not in _ASCII_LETTERS:
                raise errors.InterfaceError(
                    "field name must begin with a lower-case letter but is: %s"
                    % _compat.text_repr(field_name), location)
            is_first_character = False
        else:
            if character not in _ASCII_LETTERS_DIGITS_AND_UNDERSCORE:
                raise errors.InterfaceError(
                    basic_requirements_text + 'but is: %s' % _compat.text_repr(field_name), location)
    return field_name
Пример #3
0
    def add_data_format_row(self, row_data):
        """
        Extract name and value from ``row_data`` and apply it to
        :py:attr:`~cutplace.interface.Cid.data_format` by calling
        :py:meth:`~cutplace.data.DataFormat.set_property`.

        :param list row_data: a list with at least 2 items for name and value \
            that can be passed to \
            :py:meth:`cutplace.data.DataFormat.set_property()`.
        """
        assert row_data is not None
        assert len(row_data) >= 2

        name, value = row_data[:2]
        lower_name = name.lower()
        self._location.advance_cell()
        if name == '':
            raise errors.InterfaceError(
                'name of data format property must be specified',
                self._location)
        self._location.advance_cell()
        if (self._data_format is None) and (lower_name != data.KEY_FORMAT):
            raise errors.InterfaceError(
                'first data format row must set property %s instead of %s' %
                (_compat.text_repr(data.KEY_FORMAT), _compat.text_repr(name)),
                self._location)
        if (self._data_format is not None) and (lower_name == data.KEY_FORMAT):
            raise errors.InterfaceError(
                'data format already is %s and must be set only once' %
                _compat.text_repr(self._data_format.format), self._location)
        lower_value = value.lower()
        if self._data_format is None:
            self._data_format = data.DataFormat(lower_value, self._location)
        else:
            self._data_format.set_property(name.lower(), value, self._location)
Пример #4
0
    def validated_value(self, value):
        assert value

        translated_value = ""
        found_decimal_separator = False
        for character_to_process in value:
            if character_to_process == self.decimal_separator:
                if found_decimal_separator:
                    raise errors.FieldValueError(
                        "decimal field must contain only one decimal separator (%s): %s"
                        % (_compat.text_repr(self.decimal_separator), _compat.text_repr(value)))
                translated_value += "."
                found_decimal_separator = True
            elif self.thousands_separator and (character_to_process == self.thousands_separator):
                if found_decimal_separator:
                    raise errors.FieldValueError(
                        "decimal field must contain thousands separator (%r) only before "
                        "decimal separator (%r): %r "
                        % (self.thousands_separator, self.decimal_separator, value))
            else:
                translated_value += character_to_process

        try:
            result = decimal.Decimal(translated_value)
        except Exception as error:
            # TODO: limit exception handler to decimal exception or whatever decimal.Decimal raises.
            message = "value is %r but must be a decimal number: %s" % (value, error)
            raise errors.FieldValueError(message)

        try:
            self.valid_range.validate(self._field_name, result)
        except errors.RangeValueError as error:
            raise errors.FieldValueError(str(error))

        return result
Пример #5
0
    def validated_value(self, value):
        assert value

        translated_value = ""
        found_decimal_separator = False
        for valueIndex in range(len(value)):
            character_to_process = value[valueIndex]
            if character_to_process == self.decimalSeparator:
                if found_decimal_separator:
                    raise errors.FieldValueError(
                        "decimal field must contain only one decimal separator (%s): %s"
                        % (_compat.text_repr(self.decimalSeparator), _compat.text_repr(value)))
                translated_value += "."
                found_decimal_separator = True
            elif self.thousandsSeparator and (character_to_process == self.thousandsSeparator):
                if found_decimal_separator:
                    raise errors.FieldValueError(
                        "decimal field must contain thousands separator (%r) only before "
                        "decimal separator (%r): %r (position %d)"
                        % (self.thousandsSeparator, self.decimalSeparator, value, valueIndex + 1))
            else:
                translated_value += character_to_process
        try:
            result = decimal.Decimal(translated_value)
        except Exception as error:
            # TODO: limite exception handler to decimal exception or whatever decimal.Decimal raises.
            message = "value is %r but must be a decimal number: %s" % (value, error)
            raise errors.FieldValueError(message)

        return result
Пример #6
0
    def validate_length(self, value):
        """
        Validate that ``value`` conforms to
        :py:attr:`~cutplace.fields.AbstractFieldFormat.length`.

        :raises cutplace.errors.FieldValueError: if ``value`` is too short \
          or too long
        """
        assert value is not None

        if self.length is not None and not (self.is_allowed_to_be_empty and
                                            (value == '')):
            try:
                if self.data_format.format == data.FORMAT_FIXED:
                    # Length of fixed format is considered a maximum, fewer characters have to be padded later.
                    value_length = len(value)
                    fixed_length = self.length.lower_limit
                    if value_length > fixed_length:
                        raise errors.FieldValueError(
                            'fixed format field must have at most %d characters instead of %d: %s'
                            % (fixed_length, value_length,
                               _compat.text_repr(value)))
                else:
                    self.length.validate(
                        "length of '%s' with value %s" %
                        (self.field_name, _compat.text_repr(value)),
                        len(value))
            except errors.RangeValueError as error:
                raise errors.FieldValueError(six.text_type(error))
Пример #7
0
    def add_data_format_row(self, row_data):
        """
        Extract name and value from ``row_data`` and apply it to
        :py:attr:`~cutplace.interface.Cid.data_format` by calling
        :py:meth:`~cutplace.data.DataFormat.set_property`.

        :param list row_data: a list with at least 2 items for name and value \
            that can be passed to \
            :py:meth:`cutplace.data.DataFormat.set_property()`.
        """
        assert row_data is not None
        assert len(row_data) >= 2

        name, value = row_data[:2]
        lower_name = name.lower()
        self._location.advance_cell()
        if name == '':
            raise errors.InterfaceError('name of data format property must be specified', self._location)
        self._location.advance_cell()
        if (self._data_format is None) and (lower_name != data.KEY_FORMAT):
            raise errors.InterfaceError(
                'first data format row must set property %s instead of %s'
                % (_compat.text_repr(data.KEY_FORMAT), _compat.text_repr(name)),
                self._location)
        if (self._data_format is not None) and (lower_name == data.KEY_FORMAT):
            raise errors.InterfaceError(
                'data format already is %s and must be set only once'
                % _compat.text_repr(self._data_format.format),
                self._location)
        lower_value = value.lower()
        if self._data_format is None:
            self._data_format = data.DataFormat(lower_value, self._location)
        else:
            self._data_format.set_property(name.lower(), value, self._location)
Пример #8
0
    def __init__(self, field_name, is_allowed_to_be_empty, length, rule, data_format):
        super(ConstantFieldFormat, self).__init__(
            field_name, is_allowed_to_be_empty, length, rule, data_format, empty_value='')

        # Extract constant from rule tokens.
        tokens = _tools.tokenize_without_space(rule)
        toky = next(tokens)
        if _tools.is_eof_token(toky):
            # No rule means that the field must always be empty.
            self._constant = ''
        else:
            self._constant = _tools.token_text(toky)
            toky = next(tokens)
            if not _tools.is_eof_token(toky):
                raise errors.InterfaceError(
                    _('constant rule must be a single Python token but also found: %s')
                    % _compat.text_repr(_tools.token_text(toky)))
        has_empty_rule = (rule == '')
        if self.is_allowed_to_be_empty and not has_empty_rule:
            raise errors.InterfaceError(
                _('to describe a Constant that can be empty, use a Choice field with a single choice'))
        if not self.is_allowed_to_be_empty and has_empty_rule:
            raise errors.InterfaceError(
                _('field must be marked as empty to describe a constant empty value'))
        try:
            self.length.validate(
                _('rule of constant field %s') % _compat.text_repr(self.field_name), len(self._constant))
        except errors.RangeValueError:
            raise errors.InterfaceError(
                _('length is %s but must be %d to match constant %s')
                % (self.length, len(self._constant), _compat.text_repr(self._constant)))
Пример #9
0
    def validated_value(self, value):
        assert value

        translated_value = ""
        found_decimal_separator = False
        for character_to_process in value:
            if character_to_process == self.decimal_separator:
                if found_decimal_separator:
                    raise errors.FieldValueError(
                        _("decimal field must contain only one decimal separator (%s): %s")
                        % (_compat.text_repr(self.decimal_separator), _compat.text_repr(value)))
                translated_value += "."
                found_decimal_separator = True
            elif self.thousands_separator and (character_to_process == self.thousands_separator):
                if found_decimal_separator:
                    raise errors.FieldValueError(_(
                        "decimal field must contain thousands separator (%r) only before "
                        "decimal separator (%r): %r "
                        ) % (self.thousands_separator, self.decimal_separator, value))
            else:
                translated_value += character_to_process

        try:
            result = decimal.Decimal(translated_value)
        except Exception as error:
            # TODO: limit exception handler to decimal exception or whatever decimal.Decimal raises.
            message = "value is %r but must be a decimal number: %s" % (value, error)
            raise errors.FieldValueError(message)

        try:
            self.valid_range.validate(self._field_name, result)
        except errors.RangeValueError as error:
            raise errors.FieldValueError(str(error))

        return result
Пример #10
0
def validated_field_name(supposed_field_name, location=None):
    """
    Same as ``supposed_field_name`` except with surrounding white space removed.

    :param cutplace.errors.Location location: location used in case of errors
    :raise cutplace.errors.InterfaceError: if ``supposed_field_name`` is \
      invalid
    """
    field_name = supposed_field_name.strip()
    basic_requirements_text = 'field name must be a valid Python name consisting of ASCII letters, ' \
                              'underscore (_) and digits'
    if field_name == '':
        raise errors.InterfaceError(basic_requirements_text + 'but is empty',
                                    location)
    if keyword.iskeyword(field_name):
        raise errors.InterfaceError(
            "field name must not be a Python keyword but is: '%s'" %
            field_name, location)
    is_first_character = True
    for character in field_name:
        if is_first_character:
            if character not in _ASCII_LETTERS:
                raise errors.InterfaceError(
                    "field name must begin with a lower-case letter but is: %s"
                    % _compat.text_repr(field_name), location)
            is_first_character = False
        else:
            if character not in _ASCII_LETTERS_DIGITS_AND_UNDERSCORE:
                raise errors.InterfaceError(
                    basic_requirements_text +
                    'but is: %s' % _compat.text_repr(field_name), location)
    return field_name
Пример #11
0
    def __init__(self, field_name, is_allowed_to_be_empty, length, rule, data_format):
        super(ConstantFieldFormat, self).__init__(
            field_name, is_allowed_to_be_empty, length, rule, data_format, empty_value='')

        # Extract constant from rule tokens.
        tokens = _tools.tokenize_without_space(rule)
        toky = next(tokens)
        if _tools.is_eof_token(toky):
            # No rule means that the field must always be empty.
            self._constant = ''
        else:
            self._constant = _tools.token_text(toky)
            toky = next(tokens)
            if not _tools.is_eof_token(toky):
                raise errors.InterfaceError(
                    'constant rule must be a single Python token but also found: %s'
                    % _compat.text_repr(_tools.token_text(toky)))
        has_empty_rule = (rule == '')
        if self.is_allowed_to_be_empty and not has_empty_rule:
            raise errors.InterfaceError(
                'to describe a Constant that can be empty, use a Choice field with a single choice')
        if not self.is_allowed_to_be_empty and has_empty_rule:
            raise errors.InterfaceError(
                'field must be marked as empty to describe a constant empty value')
        try:
            self.length.validate(
                'rule of constant field %s' % _compat.text_repr(self.field_name), len(self._constant))
        except errors.RangeValueError:
            raise errors.InterfaceError(
                'length is %s but must be %d to match constant %s'
                % (self.length, len(self._constant), _compat.text_repr(self._constant)))
Пример #12
0
    def validated_value(self, value):
        assert value

        if value != self._constant:
            raise errors.FieldValueError(
                "value is %s but must be constant: %s"
                % (_compat.text_repr(value), _compat.text_repr(self._constant)))
        return value
Пример #13
0
    def validated_value(self, value):
        assert value

        if not self.regex.match(value):
            raise errors.FieldValueError(
                'value %s must match pattern: %s (regex %s)'
                % (_compat.text_repr(value), _compat.text_repr(self.rule), _compat.text_repr(self.pattern)))
        return value
Пример #14
0
    def validated_value(self, value):
        assert value

        if value != self._constant:
            raise errors.FieldValueError(
                "value is %s but must be constant: %s" %
                (_compat.text_repr(value), _compat.text_repr(self._constant)))
        return value
Пример #15
0
    def validated_value(self, value):
        assert value

        if not self.regex.match(value):
            raise errors.FieldValueError(
                "value %s must match regular expression: %s" %
                (_compat.text_repr(value), _compat.text_repr(self.rule)))
        return value
Пример #16
0
    def validated_value(self, value):
        assert value

        if not self.regex.match(value):
            raise errors.FieldValueError(
                _('value %s must match pattern: %s (regex %s)')
                % (_compat.text_repr(value), _compat.text_repr(self.rule), _compat.text_repr(self.pattern)))
        return value
Пример #17
0
    def validated_value(self, value):
        assert value

        if not self.regex.match(value):
            raise errors.FieldValueError(
                "value %s must match regular expression: %s"
                % (_compat.text_repr(value), _compat.text_repr(self.rule)))
        return value
Пример #18
0
    def _validated_character(key, value, location):
        r"""
        A single character intended as value for data format property ``key``
        derived from ``value``, which can be:

        * a decimal or hex number (prefixed with ``'0x'``) referring to the ASCII/Unicode of the character
        * a string containing a single character such as ``'\t'``.
        * a symbolic name from :py:const:`cutplace.errors.NAME_TO_ASCII_CODE_MAP` such as ``tab``.

        :raises cutplace.errors.InterfaceError: on any broken ``value``
        """
        assert key
        assert value is not None

        name_for_errors = 'data format property %s' % _compat.text_repr(key)
        stripped_value = value.strip()
        if (len(stripped_value) == 1) and (stripped_value
                                           not in string.digits):
            result_code = ord(stripped_value)
        else:
            tokens = tokenize.generate_tokens(io.StringIO(value).readline)
            next_token = next(tokens)
            if _tools.is_eof_token(next_token):
                raise errors.InterfaceError(
                    _("value for %s must be specified") % name_for_errors,
                    location)
            next_type = next_token[0]
            next_value = next_token[1]
            if next_type == token.NAME:
                result_code = ranges.code_for_symbolic_token(
                    name_for_errors, next_value, location)
            elif next_type == token.NUMBER:
                result_code = ranges.code_for_number_token(
                    name_for_errors, next_value, location)
            elif next_type == token.STRING:
                result_code = ranges.code_for_string_token(
                    name_for_errors, next_value, location)
            elif (len(next_value)
                  == 1) and not _tools.is_eof_token(next_token):
                result_code = ord(next_value)
            else:
                raise errors.InterfaceError(
                    _('value for %s must a number, a single character or a symbolic name but is: %s'
                      ) % (name_for_errors, _compat.text_repr(value)),
                    location)
            # Ensure there are no further tokens.
            next_token = next(tokens)
            if (not _tools.is_eof_token(next_token)) and (next_token[0] !=
                                                          tokenize.NEWLINE):
                raise errors.InterfaceError(
                    _('value for %s must be a single character but is: %s') %
                    (name_for_errors, _compat.text_repr(value)), location)
        # TODO: Handle 'none' properly.
        assert result_code is not None
        assert result_code >= 0
        result = six.unichr(result_code)
        assert result is not None
        return result
Пример #19
0
    def validate_row(self, row):
        """
        Validate a single ``row``:

        1. Check if the number of items in ``row`` matches the number of
           fields in the CID
        2. Check that all fields conform to their field format (as defined
           by :py:class:`cutplace.fields.AbstractFieldFormat` and its
           descendants)
        3. Check that the row conforms to all row checks (as defined by
           :py:meth:`cutplace.checks.AbstractCheck.check_row`)

        The caller is responsible for :py:attr:`~.location` pointing to the
        correct row in the data while ``validate_row`` takes care of calling
        :py:meth:`cutplace.errors.Location.set_cell` appropriately.
        """
        assert row is not None
        assert self.location is not None

        # Validate that number of fields.
        actual_item_count = len(row)
        if actual_item_count < self._expected_item_count:
            raise errors.DataError(
                'row must contain %d fields but only has %d: %s' %
                (self._expected_item_count, actual_item_count, row),
                self.location)
        if actual_item_count > self._expected_item_count:
            raise errors.DataError(
                'row must contain %d fields but has %d, additional values are: %s'
                % (self._expected_item_count, actual_item_count,
                   row[self._expected_item_count:]), self.location)

        # Validate each field according to its format.
        for field_index, field_value in enumerate(row):
            self.location.set_cell(field_index)
            field_to_validate = self.cid.field_formats[field_index]
            try:
                if not isinstance(field_value, six.text_type):
                    raise errors.FieldValueError(
                        'type must be %s instead of %s: %s' %
                        (six.text_type.__name__, type(field_value).__name__,
                         _compat.text_repr(field_value)))
                field_to_validate.validated(field_value)
            except errors.FieldValueError as error:
                error.prepend_message(
                    'cannot accept field %s' %
                    _compat.text_repr(field_to_validate.field_name),
                    self.location)
                raise

        # Validate the whole row according to row checks.
        self.location.set_cell(0)
        field_map = _create_field_map(self.cid.field_names, row)
        for check_name in self.cid.check_names:
            self.cid.check_map[check_name].check_row(field_map, self.location)
Пример #20
0
    def validate_row(self, row):
        """
        Validate a single ``row``:

        1. Check if the number of items in ``row`` matches the number of
           fields in the CID
        2. Check that all fields conform to their field format (as defined
           by :py:class:`cutplace.fields.AbstractFieldFormat` and its
           descendants)
        3. Check that the row conforms to all row checks (as defined by
           :py:meth:`cutplace.checks.AbstractCheck.check_row`)

        The caller is responsible for :py:attr:`~.location` pointing to the
        correct row in the data while ``validate_row`` takes care of calling
        :py:meth:`cutplace.errors.Location.set_cell` appropriately.
        """
        assert row is not None
        assert self.location is not None

        # Validate that number of fields.
        actual_item_count = len(row)
        if actual_item_count < self._expected_item_count:
            raise errors.DataError(
                'row must contain %d fields but only has %d: %s'
                % (self._expected_item_count, actual_item_count, row),
                self.location)
        if actual_item_count > self._expected_item_count:
            raise errors.DataError(
                'row must contain %d fields but has %d, additional values are: %s'
                % (self._expected_item_count, actual_item_count, row[self._expected_item_count:]),
                self.location)

        # Validate each field according to its format.
        for field_index, field_value in enumerate(row):
            self.location.set_cell(field_index)
            field_to_validate = self.cid.field_formats[field_index]
            try:
                if not isinstance(field_value, six.text_type):
                    raise errors.FieldValueError(
                        'type must be %s instead of %s: %s'
                        % (six.text_type.__name__, type(field_value).__name__, _compat.text_repr(field_value)))
                field_to_validate.validated(field_value)
            except errors.FieldValueError as error:
                error.prepend_message(
                    'cannot accept field %s' % _compat.text_repr(field_to_validate.field_name), self.location)
                raise

        # Validate the whole row according to row checks.
        self.location.set_cell(0)
        field_map = _create_field_map(self.cid.field_names, row)
        for check_name in self.cid.check_names:
            self.cid.check_map[check_name].check_row(field_map, self.location)
Пример #21
0
 def _validated_int_at_least_0(key, value, location):
     assert key
     assert value is not None
     try:
         result = int(value)
     except ValueError:
         raise errors.InterfaceError(
             'data format property %s is %s but must be a number'
             % (_compat.text_repr(key), _compat.text_repr(value)), location)
     if result < 0:
         raise errors.InterfaceError(
             'data format property %s is %d but must be at least 0' % (_compat.text_repr(key), result), location)
     return result
Пример #22
0
    def _validated_character(key, value, location):
        r"""
        A single character intended as value for data format property ``key``
        derived from ``value``, which can be:

        * a decimal or hex number (prefixed with ``'0x'``) referring to the ASCII/Unicode of the character
        * a string containing a single character such as ``'\t'``.
        * a symbolic name from :py:const:`cutplace.errors.NAME_TO_ASCII_CODE_MAP` such as ``tab``.

        :raises cutplace.errors.InterfaceError: on any broken ``value``
        """
        assert key
        assert value is not None

        name_for_errors = 'data format property %s' % _compat.text_repr(key)
        stripped_value = value.strip()
        if (len(stripped_value) == 1) and (stripped_value not in string.digits):
            result_code = ord(stripped_value)
        else:
            tokens = tokenize.generate_tokens(io.StringIO(value).readline)
            next_token = next(tokens)
            if _tools.is_eof_token(next_token):
                raise errors.InterfaceError(
                    "value for %s must be specified" % name_for_errors, location)
            next_type = next_token[0]
            next_value = next_token[1]
            if next_type == token.NAME:
                result_code = ranges.code_for_symbolic_token(name_for_errors, next_value, location)
            elif next_type == token.NUMBER:
                result_code = ranges.code_for_number_token(name_for_errors, next_value, location)
            elif next_type == token.STRING:
                result_code = ranges.code_for_string_token(name_for_errors, next_value, location)
            elif (len(next_value) == 1) and not _tools.is_eof_token(next_token):
                result_code = ord(next_value)
            else:
                raise errors.InterfaceError(
                    'value for %s must a number, a single character or a symbolic name but is: %s'
                    % (name_for_errors, _compat.text_repr(value)), location)
            # Ensure there are no further tokens.
            next_token = next(tokens)
            if not _tools.is_eof_token(next_token):
                raise errors.InterfaceError(
                    'value for %s must be a single character but is: %s'
                    % (name_for_errors, _compat.text_repr(value)), location)
        # TODO: Handle 'none' properly.
        assert result_code is not None
        assert result_code >= 0
        result = six.unichr(result_code)
        assert result is not None
        return result
Пример #23
0
 def _validated_int_at_least_0(key, value, location):
     assert key
     assert value is not None
     try:
         result = int(value)
     except ValueError:
         raise errors.InterfaceError(
             _('data format property %s is %s but must be a number') %
             (_compat.text_repr(key), _compat.text_repr(value)), location)
     if result < 0:
         raise errors.InterfaceError(
             _('data format property %s is %d but must be at least 0') %
             (_compat.text_repr(key), result), location)
     return result
Пример #24
0
def code_for_string_token(name, value, location):
    """
    The numeric code for text representing an string with a single character in ``value``.

    :param str name: the name of the value as it is known to the end user
    :param str value: the text that represents a string with a single character
    :param cutplace.errors.Location location: the location of ``value`` or ``None``
    """
    assert name is not None
    assert value is not None
    assert len(value) >= 2
    left_quote = value[0]
    right_quote = value[-1]
    assert left_quote in "\"\'", "left_quote=%r" % left_quote
    assert right_quote in "\"\'", "right_quote=%r" % right_quote

    value_without_quotes = value[1:-1]
    if len(value_without_quotes) != 1:
        value_without_quotes = value_without_quotes.encode('utf-8').decode(
            'unicode_escape')
        if len(value_without_quotes) != 1:
            raise errors.InterfaceError(
                _('text for %s must be a single character but is: %s') %
                (name, _compat.text_repr(value)), location)
    return ord(value_without_quotes)
Пример #25
0
    def _validated_choice(key, value, choices, location, ignore_case=False):
        """
        Same as ``value`` or ``value.lower()`` in case ``ignore_case`` is set
        to ``True``. If the supposed result is not on of the available
        ``choices``, raise `errors.InterfaceError`.
        """
        assert key
        assert value is not None
        assert choices

        result = value if not ignore_case else value.lower()
        if result not in choices:
            raise errors.InterfaceError(
                'data format property %s is %s but must be one of: %s'
                % (_compat.text_repr(key), _compat.text_repr(value), _tools.human_readable_list(choices)), location)
        return result
Пример #26
0
    def __init__(self, field_name, is_allowed_to_be_empty, length, rule,
                 data_format):
        super(ChoiceFieldFormat, self).__init__(field_name,
                                                is_allowed_to_be_empty,
                                                length,
                                                rule,
                                                data_format,
                                                empty_value='')
        self.choices = []

        # Split rule into tokens, ignoring white space.
        tokens = _tools.tokenize_without_space(rule)

        # Extract choices from rule tokens.
        previous_toky = None
        toky = next(tokens)
        while not _tools.is_eof_token(toky):
            if _tools.is_comma_token(toky):
                # Handle comma after comma without choice.
                if previous_toky:
                    previous_toky_text = previous_toky[1]
                else:
                    previous_toky_text = None
                raise errors.InterfaceError(
                    "choice value must precede a comma (,) but found: %s" %
                    _compat.text_repr(previous_toky_text))
            choice = _tools.token_text(toky)
            if not choice:
                raise errors.InterfaceError(
                    "choice field must be allowed to be empty instead of containing an empty choice"
                )
            self.choices.append(choice)
            toky = next(tokens)
            if not _tools.is_eof_token(toky):
                if not _tools.is_comma_token(toky):
                    raise errors.InterfaceError(
                        "comma (,) must follow choice value %s but found: %s" %
                        (_compat.text_repr(choice), _compat.text_repr(
                            toky[1])))
                # Process next choice after comma.
                toky = next(tokens)
                if _tools.is_eof_token(toky):
                    raise errors.InterfaceError(
                        "trailing comma (,) must be removed")
        if not self.is_allowed_to_be_empty and not self.choices:
            raise errors.InterfaceError(
                "choice field without any choices must be allowed to be empty")
Пример #27
0
    def _has_data_after_skipped_line_delimiter():
        """
        If `fixed_file` has data, assume they are a line delimiter as specified
        by `line_delimiter` and read and validate them.

        In case `line_delimiter` is `None`, the result is always ``True`` even
        if the input has already reached its end.
        """
        assert location is not None
        assert line_delimiter in _VALID_FIXED_LINE_DELIMITERS
        assert unread_character_after_line_delimiter[0] is None

        result = True
        if line_delimiter is not None:
            if line_delimiter == '\r\n':
                actual_line_delimiter = fixed_file.read(2)
            else:
                assert line_delimiter in ('\n', '\r', 'any')
                actual_line_delimiter = fixed_file.read(1)
            if actual_line_delimiter == '':
                result = False
            elif line_delimiter == 'any':
                if actual_line_delimiter == '\r':
                    # Process the optional '\n' for 'any'.
                    anticipated_linefeed = fixed_file.read(1)
                    if anticipated_linefeed == '\n':
                        actual_line_delimiter += anticipated_linefeed
                    elif anticipated_linefeed == '':
                        result = False
                    else:
                        # Unread the previous character because it is unrelated to line delimiters.
                        unread_character_after_line_delimiter[
                            0] = anticipated_linefeed
                if actual_line_delimiter not in _VALID_FIXED_ANY_LINE_DELIMITERS:
                    valid_line_delimiters = _tools.human_readable_list(
                        _VALID_FIXED_ANY_LINE_DELIMITERS)
                    raise errors.DataFormatError(
                        'line delimiter is %s but must be one of: %s' %
                        (_compat.text_repr(actual_line_delimiter),
                         valid_line_delimiters), location)
            elif actual_line_delimiter != line_delimiter:
                raise errors.DataFormatError(
                    'line delimiter is %s but must be %s' %
                    (_compat.text_repr(actual_line_delimiter),
                     _compat.text_repr(line_delimiter)), location)
        return result
Пример #28
0
    def _validated_choice(key, value, choices, location, ignore_case=False):
        """
        Same as ``value`` or ``value.lower()`` in case ``ignore_case`` is set
        to ``True``. If the supposed result is not on of the available
        ``choices``, raise `errors.InterfaceError`.
        """
        assert key
        assert value is not None
        assert choices

        result = value if not ignore_case else value.lower()
        if result not in choices:
            raise errors.InterfaceError(
                _('data format property %s is %s but must be one of: %s') %
                (_compat.text_repr(key), _compat.text_repr(value),
                 _tools.human_readable_list(choices)), location)
        return result
Пример #29
0
    def validated_value(self, value):
        assert value

        if value not in self.choices:
            raise errors.FieldValueError(
                "value is %s but must be one of: %s"
                % (_compat.text_repr(value), _tools.human_readable_list(self.choices)))
        return value
Пример #30
0
    def validated_value(self, value):
        assert value

        if value not in self.choices:
            raise errors.FieldValueError(
                "value is %s but must be one of: %s"
                % (_compat.text_repr(value), _tools.human_readable_list(self.choices)))
        return value
Пример #31
0
 def check_distinct(name1, name2):
     assert name1 is not None
     assert name2 is not None
     assert name1 < name2, 'names must be sorted for consistent error message: %r, %r' % (name1, name2)
     value1 = self.__dict__['_' + name1]
     value2 = self.__dict__['_' + name2]
     if value1 == value2:
         raise errors.InterfaceError(
             "'%s' and '%s' are both %s but must be different from each other"
             % (name1, name2, _compat.text_repr(value1)))
Пример #32
0
    def _has_data_after_skipped_line_delimiter():
        """
        If `fixed_file` has data, assume they are a line delimiter as specified
        by `line_delimiter` and read and validate them.

        In case `line_delimiter` is `None`, the result is always ``True`` even
        if the input has already reached its end.
        """
        assert location is not None
        assert line_delimiter in _VALID_FIXED_LINE_DELIMITERS
        assert unread_character_after_line_delimiter[0] is None

        result = True
        if line_delimiter is not None:
            if line_delimiter == '\r\n':
                actual_line_delimiter = fixed_file.read(2)
            else:
                assert line_delimiter in ('\n', '\r', 'any')
                actual_line_delimiter = fixed_file.read(1)
            if actual_line_delimiter == '':
                result = False
            elif line_delimiter == 'any':
                if actual_line_delimiter == '\r':
                    # Process the optional '\n' for 'any'.
                    anticipated_linefeed = fixed_file.read(1)
                    if anticipated_linefeed == '\n':
                        actual_line_delimiter += anticipated_linefeed
                    elif anticipated_linefeed == '':
                        result = False
                    else:
                        # Unread the previous character because it is unrelated to line delimiters.
                        unread_character_after_line_delimiter[0] = anticipated_linefeed
                if actual_line_delimiter not in _VALID_FIXED_ANY_LINE_DELIMITERS:
                    valid_line_delimiters = _tools.human_readable_list(_VALID_FIXED_ANY_LINE_DELIMITERS)
                    raise errors.DataFormatError(
                        'line delimiter is %s but must be one of: %s' %
                        (_compat.text_repr(actual_line_delimiter), valid_line_delimiters), location)
            elif actual_line_delimiter != line_delimiter:
                raise errors.DataFormatError(
                    'line delimiter is %s but must be %s'
                    % (_compat.text_repr(actual_line_delimiter), _compat.text_repr(line_delimiter)), location)
        return result
Пример #33
0
 def check_distinct(name1, name2):
     assert name1 is not None
     assert name2 is not None
     assert name1 < name2, 'names must be sorted for consistent error message: %r, %r' % (
         name1, name2)
     value1 = self.__dict__['_' + name1]
     value2 = self.__dict__['_' + name2]
     if value1 == value2:
         raise errors.InterfaceError(
             _("'%s' and '%s' are both %s but must be different from each other"
               ) % (name1, name2, _compat.text_repr(value1)))
Пример #34
0
    def validated_value(self, value):
        assert value

        try:
            result = time.strptime(value, self.strptimeFormat)
        except ValueError:
            raise errors.FieldValueError(
                "date must match format %s (%s) but is: %s (%s)" %
                (self.human_readable_format, self.strptimeFormat,
                 _compat.text_repr(value), sys.exc_info()[1]))
        return result
Пример #35
0
    def write_row(self, row_to_write):
        """
        Write a row of fixed length strings.

        :param list row_to_write: a list of str where each item must have \
          exactly the same length as the corresponding entry in \
          :py:attr:`~.field_lengths`
        :raises AssertionError: if ``row_to_write`` is not a list of \
          strings with each matching the corresponding ``field_lengths`` \
          as specified to :py:meth:`~.__init__`.
        """
        assert row_to_write is not None
        row_to_write_item_count = len(row_to_write)
        assert row_to_write_item_count == self._expected_row_item_count, \
            '%s: row must have %d items instead of %d: %s' \
            % (self.location, self._expected_row_item_count, row_to_write_item_count, row_to_write)
        if __debug__:
            for field_index, field_value in enumerate(row_to_write):
                self.location.set_cell(field_index)
                field_name, expected_field_length = self._field_names_and_lengths[
                    field_index]
                assert isinstance(field_value, six.text_type), \
                    '%s: field %s must be of type %s instead of %s: %r' \
                    % (self.location, _compat.text_repr(field_name), six.text_type.__name__, type(field_value).__name__,
                       field_value)
                actual_field_length = len(field_value)
                assert actual_field_length == expected_field_length, \
                    '%s: field %s must have exactly %d characters instead of %d: %r' \
                    % (self.location, _compat.text_repr(field_name), expected_field_length, actual_field_length,
                       field_value)
            self.location.set_cell(0)

        try:
            self._target_stream.write(''.join(row_to_write))
        except UnicodeEncodeError as error:
            raise errors.DataFormatError(
                'cannot write data row: %s; row=%s' % (error, row_to_write),
                self.location)
        if self._line_separator is not None:
            self._target_stream.write(self._line_separator)
        self.location.advance_line()
Пример #36
0
    def validated_value(self, value):
        assert value

        try:
            value_as_int = int(value)
        except ValueError:
            raise errors.FieldValueError("value must be an integer number: %s" % _compat.text_repr(value))
        try:
            self.valid_range.validate("value", value_as_int)
        except errors.RangeValueError as error:
            raise errors.FieldValueError(six.text_type(error))
        return value_as_int
Пример #37
0
    def validated_value(self, value):
        assert value

        try:
            value_as_int = int(value)
        except ValueError:
            raise errors.FieldValueError("value must be an integer number: %s" % _compat.text_repr(value))
        try:
            self.valid_range.validate("value", value_as_int)
        except errors.RangeValueError as error:
            raise errors.FieldValueError(six.text_type(error))
        return value_as_int
Пример #38
0
    def write_row(self, row_to_write):
        """
        Write a row of fixed length strings.

        :param list row_to_write: a list of str where each item must have \
          exactly the same length as the corresponding entry in \
          :py:attr:`~.field_lengths`
        :raises AssertionError: if ``row_to_write`` is not a list of \
          strings with each matching the corresponding ``field_lengths`` \
          as specified to :py:meth:`~.__init__`.
        """
        assert row_to_write is not None
        row_to_write_item_count = len(row_to_write)
        assert row_to_write_item_count == self._expected_row_item_count, \
            '%s: row must have %d items instead of %d: %s' \
            % (self.location, self._expected_row_item_count, row_to_write_item_count, row_to_write)
        if __debug__:
            for field_index, field_value in enumerate(row_to_write):
                self.location.set_cell(field_index)
                field_name, expected_field_length = self._field_names_and_lengths[field_index]
                assert isinstance(field_value, six.text_type), \
                    '%s: field %s must be of type %s instead of %s: %r' \
                    % (self.location, _compat.text_repr(field_name), six.text_type.__name__, type(field_value).__name__,
                       field_value)
                actual_field_length = len(field_value)
                assert actual_field_length == expected_field_length, \
                    '%s: field %s must have exactly %d characters instead of %d: %r' \
                    % (self.location, _compat.text_repr(field_name), expected_field_length, actual_field_length,
                       field_value)
            self.location.set_cell(0)

        try:
            self._target_stream.write(''.join(row_to_write))
        except UnicodeEncodeError as error:
            raise errors.DataFormatError(
                'cannot write data row: %s; row=%s'
                % (error, row_to_write), self.location)
        if self._line_separator is not None:
            self._target_stream.write(self._line_separator)
        self.location.advance_line()
Пример #39
0
    def add_check_row(self, possibly_incomplete_items):
        """
        Add a check as declared in ``possibly_incomplete_items``, which
        ideally is a list composed of 3 elements:

        1. description ('customer_id_must_be_unique')
        2. type (e.g. 'IsUnique'  mapping to :py:class:`cutplace.checks.IsUniqueCheck`)
        3. rule (e.g. 'customer_id')

        Missing items are interpreted as empty string (``''``), additional
        items are ignored.

        :raises cutplace.errors.InterfaceError: on broken \
          ``possibly_incomplete_items``
        """
        assert possibly_incomplete_items is not None

        items = list(possibly_incomplete_items)
        # HACK: Ignore possible concatenated (empty) cells between description and type.
        while (len(items) >= 2) and (items[1].strip() == ''):
            del items[1]

        check_description, check_type, check_rule = (items + 3 * [''])[:3]
        self._location.advance_cell()
        if check_description == '':
            raise errors.InterfaceError('check description must be specified',
                                        self._location)
        self._location.advance_cell()
        check_class_name = check_type + "Check"
        if check_class_name not in self._check_name_to_class_map:
            list_of_available_check_types = _tools.human_readable_list(
                sorted(self._check_name_to_class_map.keys()))
            raise errors.InterfaceError(
                "check type is '%s' but must be one of: %s" %
                (check_type, list_of_available_check_types), self._location)
        _log.debug("create check: %s(%r, %r)", check_type, check_description,
                   check_rule)
        check_class = self._create_check_class(check_type)
        check = check_class.__new__(check_class, check_description, check_rule,
                                    self._field_names, self._location)
        check.__init__(check_description, check_rule, self._field_names,
                       self._location)
        self._location.set_cell(1)
        existing_check = self._check_name_to_check_map.get(check_description)
        if existing_check is not None:
            raise errors.InterfaceError(
                "check description must be used only once: %s" %
                _compat.text_repr(check_description), self._location,
                "first declaration", existing_check.location)
        self._check_name_to_check_map[check_description] = check
        self._check_names.append(check_description)
        assert len(self.check_names) == len(self._check_name_to_check_map)
Пример #40
0
    def __init__(self, field_name, is_allowed_to_be_empty, length, rule, data_format):
        super(ChoiceFieldFormat, self).__init__(
            field_name, is_allowed_to_be_empty, length, rule, data_format, empty_value='')
        self.choices = []

        # Split rule into tokens, ignoring white space.
        tokens = _tools.tokenize_without_space(rule)

        # Extract choices from rule tokens.
        previous_toky = None
        toky = next(tokens)
        while not _tools.is_eof_token(toky):
            if _tools.is_comma_token(toky):
                # Handle comma after comma without choice.
                if previous_toky:
                    previous_toky_text = previous_toky[1]
                else:
                    previous_toky_text = None
                raise errors.InterfaceError(
                    "choice value must precede a comma (,) but found: %s" % _compat.text_repr(previous_toky_text))
            choice = _tools.token_text(toky)
            if not choice:
                raise errors.InterfaceError(
                    "choice field must be allowed to be empty instead of containing an empty choice")
            self.choices.append(choice)
            toky = next(tokens)
            if not _tools.is_eof_token(toky):
                if not _tools.is_comma_token(toky):
                    raise errors.InterfaceError(
                        "comma (,) must follow choice value %s but found: %s"
                        % (_compat.text_repr(choice), _compat.text_repr(toky[1])))
                # Process next choice after comma.
                toky = next(tokens)
                if _tools.is_eof_token(toky):
                    raise errors.InterfaceError("trailing comma (,) must be removed")
        if not self.is_allowed_to_be_empty and not self.choices:
            raise errors.InterfaceError("choice field without any choices must be allowed to be empty")
Пример #41
0
    def validated_value(self, value):
        assert value

        if not self._has_time and (self.data_format.format == data.FORMAT_EXCEL) and (value.endswith(DateTimeFieldFormat._NO_EXCEL_TIME)):
            value_to_validate = value[:-DateTimeFieldFormat._NO_EXCEL_TIME_LENGTH]
        else:
            value_to_validate = value

        try:
            result = time.strptime(value_to_validate, self.strptime_format)
        except ValueError:
            raise errors.FieldValueError(
                "date must match format %s (%s) but is: %s (%s)"
                % (self.human_readable_format, self.strptime_format, _compat.text_repr(value_to_validate), sys.exc_info()[1]))
        return result
Пример #42
0
def human_readable_list(items, final_separator='or'):
    """
    All values in ``items`` in a human readable form. This is meant to be
    used in error messages, where dumping ``"%r"`` to the user does not cut
    it.
    """
    assert items is not None
    assert final_separator is not None
    item_count = len(items)
    if item_count == 0:
        result = ''
    elif item_count == 1:
        result = _compat.text_repr(items[0])
    else:
        result = ''
        for item_index in range(item_count):
            if item_index == item_count - 1:
                result += ' ' + final_separator + ' '
            elif item_index > 0:
                result += ', '
            result += _compat.text_repr(items[item_index])
        assert result
    assert result is not None
    return result
Пример #43
0
    def validated_value(self, value):
        assert value

        if not self._has_time and (self.data_format.format == data.FORMAT_EXCEL) and (value.endswith(DateTimeFieldFormat._NO_EXCEL_TIME)):
            value_to_validate = value[:-DateTimeFieldFormat._NO_EXCEL_TIME_LENGTH]
        else:
            value_to_validate = value

        try:
            result = time.strptime(value_to_validate, self.strptime_format)
        except ValueError:
            raise errors.FieldValueError(
                _("date must match format %s (%s) but is: %s (%s)")
                % (self.human_readable_format, self.strptime_format, _compat.text_repr(value_to_validate), sys.exc_info()[1]))
        return result
Пример #44
0
def human_readable_list(items, final_separator='or'):
    """
    All values in ``items`` in a human readable form. This is meant to be
    used in error messages, where dumping ``"%r"`` to the user does not cut
    it.
    """
    assert items is not None
    assert final_separator is not None
    item_count = len(items)
    if item_count == 0:
        result = ''
    elif item_count == 1:
        result = _compat.text_repr(items[0])
    else:
        result = ''
        for item_index in range(item_count):
            if item_index == item_count - 1:
                result += ' ' + final_separator + ' '
            elif item_index > 0:
                result += ', '
            result += _compat.text_repr(items[item_index])
        assert result
    assert result is not None
    return result
Пример #45
0
    def add_check_row(self, possibly_incomplete_items):
        """
        Add a check as declared in ``possibly_incomplete_items``, which
        ideally is a list composed of 3 elements:

        1. description ('customer_id_must_be_unique')
        2. type (e.g. 'IsUnique'  mapping to :py:class:`cutplace.checks.IsUniqueCheck`)
        3. rule (e.g. 'customer_id')

        Missing items are interpreted as empty string (``''``), additional
        items are ignored.

        :raises cutplace.errors.InterfaceError: on broken \
          ``possibly_incomplete_items``
        """
        assert possibly_incomplete_items is not None

        items = list(possibly_incomplete_items)
        # HACK: Ignore possible concatenated (empty) cells between description and type.
        while (len(items) >= 2) and (items[1].strip() == ''):
            del items[1]

        check_description, check_type, check_rule = (items + 3 * [''])[:3]
        self._location.advance_cell()
        if check_description == '':
            raise errors.InterfaceError(
                'check description must be specified', self._location)
        self._location.advance_cell()
        check_class_name = check_type + "Check"
        if check_class_name not in self._check_name_to_class_map:
            list_of_available_check_types = _tools.human_readable_list(sorted(self._check_name_to_class_map.keys()))
            raise errors.InterfaceError(
                "check type is '%s' but must be one of: %s"
                % (check_type, list_of_available_check_types),
                self._location)
        _log.debug("create check: %s(%r, %r)", check_type, check_description, check_rule)
        check_class = self._create_check_class(check_type)
        check = check_class.__new__(check_class, check_description, check_rule, self._field_names, self._location)
        check.__init__(check_description, check_rule, self._field_names, self._location)
        self._location.set_cell(1)
        existing_check = self._check_name_to_check_map.get(check_description)
        if existing_check is not None:
            raise errors.InterfaceError(
                "check description must be used only once: %s" % _compat.text_repr(check_description),
                self._location, "first declaration", existing_check.location)
        self._check_name_to_check_map[check_description] = check
        self._check_names.append(check_description)
        assert len(self.check_names) == len(self._check_name_to_check_map)
Пример #46
0
    def validate(self, name, value, location=None):
        """
        Validate that ``value`` is within the specified range.

        :param str name: the name of ``value`` known to the end user for \
          usage in possible error messages
        :param int value: the value to validate
        :param cutplace.errors.Location location: the location to refer to \
          in possible error messages
        :raises cutplace.errors.RangeValueError: if ``value`` is out of range
        """
        assert name is not None
        assert name
        assert value is not None

        if not isinstance(value, decimal.Decimal):
            try:
                value_as_decimal = decimal.Decimal(value)
            except decimal.DecimalException:
                raise errors.RangeValueError(
                    _("value must be decimal but is %s") %
                    _compat.text_repr(value), location)
        else:
            value_as_decimal = value

        if self._items is not None:
            is_valid = False
            item_index = 0
            while not is_valid and item_index < len(self._items):
                lower, upper = self._items[item_index]
                if lower is None:
                    assert upper is not None
                    if value_as_decimal <= upper:
                        is_valid = True
                elif upper is None:
                    if value_as_decimal >= lower:
                        is_valid = True
                elif (value_as_decimal >= lower) and (value_as_decimal <=
                                                      upper):
                    is_valid = True
                item_index += 1
            if not is_valid:
                raise errors.RangeValueError(
                    _("%s is %r but must be within range: %r") %
                    (name, value_as_decimal, self), location)
Пример #47
0
def code_for_number_token(name, value, location):
    """
    The numeric code for text representing an :py:class:`int` in ``value``.

    :param str name: the name of the value as it is known to the end user
    :param str value: the text that represents an :py:class:`int`
    :param cutplace.errors.Location location: the location of ``value`` or ``None``
    """
    assert name is not None
    assert value is not None

    try:
        # Note: base 0 automatically handles prefixes like 0x.
        result = int(value, 0)
    except ValueError:
        raise errors.InterfaceError(
            'numeric value for %s must be an integer number but is: %s' % (name, _compat.text_repr(value)), location)
    return result
Пример #48
0
    def validate(self, name, value, location=None):
        """
        Validate that ``value`` is within the specified range.

        :param str name: the name of ``value`` known to the end user for \
          usage in possible error messages
        :param int value: the value to validate
        :param cutplace.errors.Location location: the location to refer to \
          in possible error messages
        :raises cutplace.errors.RangeValueError: if ``value`` is out of range
        """
        assert name is not None
        assert name
        assert value is not None

        if not isinstance(value, decimal.Decimal):
            try:
                value_as_decimal = decimal.Decimal(value)
            except decimal.DecimalException:
                raise errors.RangeValueError(
                    "value must be decimal but is %s" % _compat.text_repr(value), location)
        else:
            value_as_decimal = value

        if self._items is not None:
            is_valid = False
            item_index = 0
            while not is_valid and item_index < len(self._items):
                lower, upper = self._items[item_index]
                if lower is None:
                    assert upper is not None
                    if value_as_decimal <= upper:
                        is_valid = True
                elif upper is None:
                    if value_as_decimal >= lower:
                        is_valid = True
                elif (value_as_decimal >= lower) and (value_as_decimal <= upper):
                    is_valid = True
                item_index += 1
            if not is_valid:
                raise errors.RangeValueError(
                    "%s is %r but must be within range: %r" % (name, value_as_decimal, self), location)
Пример #49
0
def code_for_symbolic_token(name, value, location):
    """
    The numeric code for text representing an a symbolic name in ``value``,
    which has to be one of the values in
    :py:const:`cutplace.errors.NAME_TO_ASCII_CODE_MAP`.

    :param str name: the name of the value as it is known to the end user
    :param str value: the text that represents a symbolic name
    :param cutplace.errors.Location location: the location of ``value`` or ``None``
    """
    assert name is not None
    assert value is not None

    try:
        result = errors.NAME_TO_ASCII_CODE_MAP[value.lower()]
    except KeyError:
        valid_symbols = _tools.human_readable_list(sorted(errors.NAME_TO_ASCII_CODE_MAP.keys()))
        raise errors.InterfaceError(
            'symbolic name %s for %s must be one of: %s' % (_compat.text_repr(value), name, valid_symbols), location)
    return result
Пример #50
0
    def validate_characters(self, value):
        """
        Validate that all characters in ``value`` are within
        :py:attr:`~cutplace.data.DataFormat.allowed_characters`.

        :raises cutplace.errors.FieldValueError: if any character in \
          ``value`` is not allowed
        """
        valid_character_range = self.data_format.allowed_characters
        if valid_character_range is not None:
            for character_column, character in enumerate(value, 1):
                character_code = ord(character)
                try:
                    valid_character_range.validate("character", character_code)
                except errors.RangeValueError:
                    raise errors.FieldValueError(_(
                        "character %s (code point U+%04x, decimal %d) in field '%s' at column %d must be an allowed "
                        "character: %s") % (
                            _compat.text_repr(character), character_code, character_code, self.field_name,
                            character_column, valid_character_range))
Пример #51
0
    def validate_characters(self, value):
        """
        Validate that all characters in ``value`` are within
        :py:attr:`~cutplace.data.DataFormat.allowed_characters`.

        :raises cutplace.errors.FieldValueError: if any character in \
          ``value`` is not allowed
        """
        valid_character_range = self.data_format.allowed_characters
        if valid_character_range is not None:
            for character_column, character in enumerate(value, 1):
                character_code = ord(character)
                try:
                    valid_character_range.validate("character", character_code)
                except errors.RangeValueError:
                    raise errors.FieldValueError(
                        "character %s (code point U+%04x, decimal %d) in field '%s' at column %d must be an allowed "
                        "character: %s" % (
                            _compat.text_repr(character), character_code, character_code, self.field_name,
                            character_column, valid_character_range))
Пример #52
0
def field_name_index(field_name_to_look_up, available_field_names, location):
    """
    The index of ``field_name_to_look_up`` (without leading or trailing
    white space) in ``available_field_names``.

    :param cutplace.errors.Location location: location used in case of errors
    :raise cutplace.errors.InterfaceError: if ``field_name_to_look_up`` is \
      not part of ``available_field_names``
    """
    assert field_name_to_look_up is not None
    assert field_name_to_look_up == field_name_to_look_up.strip()
    assert available_field_names

    field_name_to_look_up = field_name_to_look_up.strip()
    try:
        field_index = available_field_names.index(field_name_to_look_up)
    except ValueError:
        raise errors.InterfaceError(
            'unknown field name %s must be replaced by one of: %s' %
            (_compat.text_repr(field_name_to_look_up),
             _tools.human_readable_list(available_field_names)), location)
    return field_index
Пример #53
0
def field_name_index(field_name_to_look_up, available_field_names, location):
    """
    The index of ``field_name_to_look_up`` (without leading or trailing
    white space) in ``available_field_names``.

    :param cutplace.errors.Location location: location used in case of errors
    :raise cutplace.errors.InterfaceError: if ``field_name_to_look_up`` is \
      not part of ``available_field_names``
    """
    assert field_name_to_look_up is not None
    assert field_name_to_look_up == field_name_to_look_up.strip()
    assert available_field_names

    field_name_to_look_up = field_name_to_look_up.strip()
    try:
        field_index = available_field_names.index(field_name_to_look_up)
    except ValueError:
        raise errors.InterfaceError(
            'unknown field name %s must be replaced by one of: %s'
            % (_compat.text_repr(field_name_to_look_up), _tools.human_readable_list(available_field_names)),
            location)
    return field_index
Пример #54
0
def code_for_string_token(name, value, location):
    """
    The numeric code for text representing an string with a single character in ``value``.

    :param str name: the name of the value as it is known to the end user
    :param str value: the text that represents a string with a single character
    :param cutplace.errors.Location location: the location of ``value`` or ``None``
    """
    assert name is not None
    assert value is not None
    assert len(value) >= 2
    left_quote = value[0]
    right_quote = value[-1]
    assert left_quote in "\"\'", "left_quote=%r" % left_quote
    assert right_quote in "\"\'", "right_quote=%r" % right_quote

    value_without_quotes = value[1:-1]
    if len(value_without_quotes) != 1:
        value_without_quotes = value_without_quotes.encode('utf-8').decode('unicode_escape')
        if len(value_without_quotes) != 1:
            raise errors.InterfaceError(
                'text for %s must be a single character but is: %s' % (name, _compat.text_repr(value)), location)
    return ord(value_without_quotes)
Пример #55
0
    def add_field_format_row(self, possibly_incomplete_items):
        """
        Add field as described by `possibly_incomplete_items`, which is a
        list consisting of:

        1) field name
        2) optional: example value (can be empty)
        3) optional: empty flag ("X" = field is allowed to be empty)
        4) optional: length (using the syntax of :py:class:`cutplace.ranges.Range`)
        5) optional: field type (e.g. 'Integer' for :py:class:`cutplace.fields.IntegerFieldFormat`)
        6) optional: rule to validate field (depending on type)

        Any missing items are interpreted as empty string (``''``).
        Additional items are ignored.

        :raises cutplace.errors.InterfaceError: on broken \
          ``possibly_incomplete_items``
        """
        assert possibly_incomplete_items is not None
        assert self._location is not None

        if self._data_format is None:
            raise errors.InterfaceError("data format must be specified before first field", self._location)

        # Assert that the various lists and maps related to fields are in a consistent state.
        # Ideally this would be a class invariant, but this is Python, not Eiffel.
        field_count = len(self.field_names)
        assert len(self._field_formats) == field_count
        assert len(self._field_name_to_format_map) == field_count
        assert len(self._field_name_to_index_map) == field_count

        items = (possibly_incomplete_items + 6 * [''])[:6]

        # Obtain field name.
        field_name = fields.validated_field_name(items[0], self._location)
        if field_name in self._field_name_to_format_map:
            # TODO: Add see_also_location pointing to previous declaration.
            raise errors.InterfaceError(
                'duplicate field name must be changed to a unique one: %s' % field_name, self._location)

        # Obtain example.
        self._location.advance_cell()
        field_example = items[1]

        # Obtain "empty" mark.
        self._location.advance_cell()
        field_is_allowed_to_be_empty_text = items[2].strip().lower()
        if field_is_allowed_to_be_empty_text == '':
            field_is_allowed_to_be_empty = False
        elif field_is_allowed_to_be_empty_text == self._EMPTY_INDICATOR:
            field_is_allowed_to_be_empty = True
        else:
            raise errors.InterfaceError(
                "mark for empty field must be %s or empty but is %s"
                % (self._EMPTY_INDICATOR, field_is_allowed_to_be_empty_text), self._location)

        # Obtain length.
        self._location.advance_cell()
        field_length = items[3]

        # Obtain field type and rule.
        self._location.advance_cell()
        field_type_item = items[4].strip()
        if field_type_item == '':
            field_type = 'Text'
        else:
            field_type = ''
            field_type_parts = field_type_item.split(".")
            try:
                for part in field_type_parts:
                    if field_type:
                        field_type += "."
                    field_type += _tools.validated_python_name("field type part", part)
                assert field_type, "empty field type must be detected by validated_python_name()"
            except NameError as error:
                raise errors.InterfaceError(six.text_type(error), self._location)
        field_class = self._create_field_format_class(field_type)
        self._location.advance_cell()
        field_rule = items[5].strip()
        _log.debug("create field: %s(%r, %r, %r)", field_class.__name__, field_name, field_type, field_rule)
        try:
            field_format = field_class.__new__(
                field_class, field_name, field_is_allowed_to_be_empty, field_length, field_rule)
            field_format.__init__(
                field_name, field_is_allowed_to_be_empty, field_length, field_rule, self._data_format)
        except errors.InterfaceError as error:
            error_location = error.location if error.location is not None else self._location
            error.prepend_message('cannot declare field %s' % _compat.text_repr(field_name), error_location)
            raise error

        # Validate field length.
        # TODO #82: Cleanup validation for declared field formats.
        self._location.set_cell(4)
        field_length = field_format.length
        if self._data_format.format == data.FORMAT_FIXED:
            if field_length.items is None:
                raise errors.InterfaceError(
                    "length of field %s must be specified with fixed data format" % _compat.text_repr(field_name),
                    self._location)
            if field_length.lower_limit != field_length.upper_limit:
                raise errors.InterfaceError(
                    "length of field %s for fixed data format must be a specific number but is: %s"
                    % (_compat.text_repr(field_name), field_format.length), self._location)
            if field_length.lower_limit < 1:
                raise errors.InterfaceError(
                    "length of field %s for fixed data format must be at least 1 but is: %d"
                    % (_compat.text_repr(field_name), field_format.length.lower_limit), self._location)
        elif field_length.lower_limit is not None:
            if field_length.lower_limit < 0:
                raise errors.InterfaceError(
                    "lower limit for length of field %s must be at least 0 but is: %d"
                    % (_compat.text_repr(field_name), field_format.length.lower_limit), self._location)
        elif field_length.upper_limit is not None:
            # Note: 0 as upper limit is valid for a field that must always be empty.
            if field_length.upper_limit < 0:
                raise errors.InterfaceError(
                    "upper limit for length of field %s must be at least 0 but is: %d"
                    % (_compat.text_repr(field_name), field_format.length.upper_limit), self._location)

        # Set and validate example in case there is one.
        if field_example != '':
            try:
                field_format.example = field_example
            except errors.FieldValueError as error:
                self._location.set_cell(2)
                raise errors.InterfaceError(
                    "cannot validate example for field %s: %s" % (_compat.text_repr(field_name), error),
                    self._location)

        self._location.set_cell(1)

        assert field_name
        assert field_type
        assert field_rule is not None

        self.add_field_format(field_format)
Пример #56
0
 def __str__(self):
     return "%s(%s, %s, %s, %s)" % (
         self.__class__.__name__, _compat.text_repr(self.field_name), self.is_allowed_to_be_empty,
         _compat.text_repr(self.length), _compat.text_repr(self.rule))
Пример #57
0
def fixed_rows(fixed_source,
               encoding,
               field_name_and_lengths,
               line_delimiter='any'):
    r"""
    Rows found in file ``fixed_source`` using ``encoding``. The name and
    (fixed) length of the fields for each row are specified as a list of
    tuples ``(name, length)``. Each row can end with a line feed unless
    ``line_delimiter`` equals ``None``. Valid values are: ``'\n'``, ``'\r'``
    and ``'\r\n'``, in which case other values result in a
    `errors.DataFormatError`. Additionally ``'any'`` accepts any of the
    previous values.
    """
    assert fixed_source is not None
    assert encoding is not None
    for name, length in field_name_and_lengths:
        assert name is not None
        assert length >= 1, 'length for %s must be at least 1 but is %s' % (
            name, length)
    assert line_delimiter in _VALID_FIXED_LINE_DELIMITERS, \
        'line_delimiter=%s but must be one of: %s' % (_compat.text_repr(line_delimiter), _VALID_FIXED_LINE_DELIMITERS)

    # Predefine variable for access in local function.
    location = errors.Location(fixed_source, has_column=True)
    fixed_file = None
    # HACK: list with at most 1 character to be unread after a line feed. We
    # need to use a list so `_has_data_after_skipped_line_delimiter` can
    # modify its contents.
    unread_character_after_line_delimiter = [None]

    def _has_data_after_skipped_line_delimiter():
        """
        If `fixed_file` has data, assume they are a line delimiter as specified
        by `line_delimiter` and read and validate them.

        In case `line_delimiter` is `None`, the result is always ``True`` even
        if the input has already reached its end.
        """
        assert location is not None
        assert line_delimiter in _VALID_FIXED_LINE_DELIMITERS
        assert unread_character_after_line_delimiter[0] is None

        result = True
        if line_delimiter is not None:
            if line_delimiter == '\r\n':
                actual_line_delimiter = fixed_file.read(2)
            else:
                assert line_delimiter in ('\n', '\r', 'any')
                actual_line_delimiter = fixed_file.read(1)
            if actual_line_delimiter == '':
                result = False
            elif line_delimiter == 'any':
                if actual_line_delimiter == '\r':
                    # Process the optional '\n' for 'any'.
                    anticipated_linefeed = fixed_file.read(1)
                    if anticipated_linefeed == '\n':
                        actual_line_delimiter += anticipated_linefeed
                    elif anticipated_linefeed == '':
                        result = False
                    else:
                        # Unread the previous character because it is unrelated to line delimiters.
                        unread_character_after_line_delimiter[
                            0] = anticipated_linefeed
                if actual_line_delimiter not in _VALID_FIXED_ANY_LINE_DELIMITERS:
                    valid_line_delimiters = _tools.human_readable_list(
                        _VALID_FIXED_ANY_LINE_DELIMITERS)
                    raise errors.DataFormatError(
                        'line delimiter is %s but must be one of: %s' %
                        (_compat.text_repr(actual_line_delimiter),
                         valid_line_delimiters), location)
            elif actual_line_delimiter != line_delimiter:
                raise errors.DataFormatError(
                    'line delimiter is %s but must be %s' %
                    (_compat.text_repr(actual_line_delimiter),
                     _compat.text_repr(line_delimiter)), location)
        return result

    if isinstance(fixed_source, six.string_types):
        fixed_file = io.open(fixed_source, 'r', encoding=encoding)
        is_opened = True
    else:
        fixed_file = fixed_source
        is_opened = False

    has_data = True
    try:
        while has_data:
            field_index = 0
            row = []
            for field_name, field_length in field_name_and_lengths:
                if unread_character_after_line_delimiter[0] is None:
                    item = fixed_file.read(field_length)
                else:
                    assert len(unread_character_after_line_delimiter) == 1
                    item = unread_character_after_line_delimiter[0]
                    if field_length >= 2:
                        item += fixed_file.read(field_length - 1)
                    unread_character_after_line_delimiter[0] = None
                assert unread_character_after_line_delimiter[0] is None
                if not is_opened:
                    # Ensure that the input is a text file, `io.StringIO` or something similar. Binary files,
                    # `io.BytesIO` and the like cannot be used because the return bytes instead of strings.
                    # NOTE: We do not need to use _compat.text_repr(item) because type `unicode` does not fail here.
                    assert isinstance(item, six.text_type), \
                        '%s: fixed_source must yield strings but got type %s, value %r' % (location, type(item), item)
                item_length = len(item)
                if item_length == 0:
                    if field_index > 0:
                        names = [name for name, _ in field_name_and_lengths]
                        lengths = [
                            length for _, length in field_name_and_lengths
                        ]
                        previous_field_index = field_index - 1
                        characters_needed_count = sum(lengths[field_index:])
                        list_of_missing_field_names = _tools.human_readable_list(
                            names[field_index:], 'and')
                        raise errors.DataFormatError(
                            "after field '%s' %d characters must follow for: %s"
                            % (names[previous_field_index],
                               characters_needed_count,
                               list_of_missing_field_names), location)
                    # End of input reached.
                    has_data = False
                elif item_length == field_length:
                    row.append(item)
                    location.advance_column(field_length)
                    field_index += 1
                else:
                    raise errors.DataFormatError(
                        "cannot read field '%s': need %d characters but found only %d: %s"
                        % (field_name, field_length, item_length,
                           _compat.text_repr(item)), location)
            if has_data and not _has_data_after_skipped_line_delimiter():
                has_data = False
            if len(row) > 0:
                yield row
                location.advance_line()
    finally:
        if is_opened:
            fixed_file.close()
Пример #58
0
    def set_property(self, name, value, location=None):
        r"""
        Set data format property ``name`` to ``value`` possibly translating ``value`` from
        a human readable representation to an internal one.

        :param str name: any of the ``KEY_*`` constants
        :param value: the value to set the property to as it would show up in a CID. \
            In some cases, the value will be translated to an internal representation. \
            For example ``set_property(KEY_LINE_DELIMITER, 'lf')`` results in \
            :py:attr:`cutplace.data.line_delimiter` being ``'\n'``.
        :type value: str or None

        :raises cutplace.errors.InterfaceError: if ``name`` is not a valid property name for this data format
        :raises cutplace.errors.InterfaceError: if ``value`` is invalid for the specified property
        """
        assert not self.is_valid, 'after validate() has been called property %r cannot be set anymore' % name
        assert name is not None
        assert name == name.lower(), 'property name must be lower case: %r' % name
        assert (value is not None) or (name in (KEY_ALLOWED_CHARACTERS, KEY_LINE_DELIMITER))

        name = name.replace(' ', '_')
        property_attribute_name = '_' + name
        if property_attribute_name not in self.__dict__:
            valid_property_names = _tools.human_readable_list(list(self.__dict__.keys()))
            raise errors.InterfaceError(
                'data format property %s for format %s is %s but must be one of %s'
                % (_compat.text_repr(name), self.format, _compat.text_repr(value), valid_property_names), location)

        if name == KEY_ENCODING:
            try:
                codecs.lookup(value)
            except LookupError:
                raise errors.InterfaceError(
                    'value for data format property %s is %s but must be a valid encoding'
                    % (_compat.text_repr(KEY_ENCODING), _compat.text_repr(self.encoding)), location)
            self.encoding = value
        elif name == KEY_HEADER:
            self.header = DataFormat._validated_int_at_least_0(name, value, location)
        elif name == KEY_ALLOWED_CHARACTERS:
            try:
                self._allowed_characters = ranges.Range(value)
            except errors.InterfaceError as error:
                raise errors.InterfaceError(
                    'data format property %s must be a valid range: %s'
                    % (_compat.text_repr(KEY_ALLOWED_CHARACTERS), error), location)
        elif name == KEY_DECIMAL_SEPARATOR:
            self.decimal_separator = DataFormat._validated_choice(
                KEY_DECIMAL_SEPARATOR, value, _VALID_DECIMAL_SEPARATORS, location)
        elif name == KEY_ESCAPE_CHARACTER:
            self.escape_character = DataFormat._validated_choice(
                KEY_ESCAPE_CHARACTER, value, _VALID_ESCAPE_CHARACTERS, location)
        elif name == KEY_ITEM_DELIMITER:
            item_delimiter = DataFormat._validated_character(KEY_ITEM_DELIMITER, value, location)
            if item_delimiter == '\x00':
                raise errors.InterfaceError(
                    "data format property %s must not be 0 (to avoid zero termindated strings in Python's C based CSV reader)"
                    % _compat.text_repr(KEY_ITEM_DELIMITER), location)
            self.item_delimiter = item_delimiter
        elif name == KEY_LINE_DELIMITER:
            try:
                self.line_delimiter = _TEXT_TO_LINE_DELIMITER_MAP[value.lower()]
            except KeyError:
                raise errors.InterfaceError(
                    'line delimiter %s must be changed to one of: %s'
                    % (_compat.text_repr(value), _tools.human_readable_list(self._VALID_LINE_DELIMITER_TEXTS)),
                    location)
        elif name == KEY_QUOTE_CHARACTER:
            self.quote_character = DataFormat._validated_choice(
                KEY_QUOTE_CHARACTER, value, _VALID_QUOTE_CHARACTERS, location)
        elif name == KEY_SHEET:
            self.sheet = DataFormat._validated_int_at_least_0(KEY_SHEET, value, location)
        elif name == KEY_SKIP_INITIAL_SPACE:
            self.skip_initial_space = DataFormat._validated_bool(KEY_SKIP_INITIAL_SPACE, value, location)
        elif name == KEY_THOUSANDS_SEPARATOR:
            self.thousands_separator = DataFormat._validated_choice(
                KEY_THOUSANDS_SEPARATOR, value, _VALID_THOUSANDS_SEPARATORS, location)
        else:
            assert False, 'name=%r' % name