def validate_length(self, value): """ Validate that ``value`` conforms to :py:attr:`~cutplace.fields.AbstractFieldFormat.length`. :raises cutplace.errors.FieldValueError: if ``value`` is too short \ or too long """ assert value is not None if self.length is not None and not (self.is_allowed_to_be_empty and (value == '')): try: if self.data_format.format == data.FORMAT_FIXED: # Length of fixed format is considered a maximum, fewer characters have to be padded later. value_length = len(value) fixed_length = self.length.lower_limit if value_length > fixed_length: raise errors.FieldValueError( 'fixed format field must have at most %d characters instead of %d: %s' % (fixed_length, value_length, _compat.text_repr(value)) ) else: self.length.validate( "length of '%s' with value %s" % (self.field_name, _compat.text_repr(value)), len(value)) except errors.RangeValueError as error: raise errors.FieldValueError(six.text_type(error))
def validated_field_name(supposed_field_name, location=None): """ Same as ``supposed_field_name`` except with surrounding white space removed. :param cutplace.errors.Location location: location used in case of errors :raise cutplace.errors.InterfaceError: if ``supposed_field_name`` is \ invalid """ field_name = supposed_field_name.strip() basic_requirements_text = 'field name must be a valid Python name consisting of ASCII letters, ' \ 'underscore (_) and digits' if field_name == '': raise errors.InterfaceError(basic_requirements_text + 'but is empty', location) if keyword.iskeyword(field_name): raise errors.InterfaceError("field name must not be a Python keyword but is: '%s'" % field_name, location) is_first_character = True for character in field_name: if is_first_character: if character not in _ASCII_LETTERS: raise errors.InterfaceError( "field name must begin with a lower-case letter but is: %s" % _compat.text_repr(field_name), location) is_first_character = False else: if character not in _ASCII_LETTERS_DIGITS_AND_UNDERSCORE: raise errors.InterfaceError( basic_requirements_text + 'but is: %s' % _compat.text_repr(field_name), location) return field_name
def add_data_format_row(self, row_data): """ Extract name and value from ``row_data`` and apply it to :py:attr:`~cutplace.interface.Cid.data_format` by calling :py:meth:`~cutplace.data.DataFormat.set_property`. :param list row_data: a list with at least 2 items for name and value \ that can be passed to \ :py:meth:`cutplace.data.DataFormat.set_property()`. """ assert row_data is not None assert len(row_data) >= 2 name, value = row_data[:2] lower_name = name.lower() self._location.advance_cell() if name == '': raise errors.InterfaceError( 'name of data format property must be specified', self._location) self._location.advance_cell() if (self._data_format is None) and (lower_name != data.KEY_FORMAT): raise errors.InterfaceError( 'first data format row must set property %s instead of %s' % (_compat.text_repr(data.KEY_FORMAT), _compat.text_repr(name)), self._location) if (self._data_format is not None) and (lower_name == data.KEY_FORMAT): raise errors.InterfaceError( 'data format already is %s and must be set only once' % _compat.text_repr(self._data_format.format), self._location) lower_value = value.lower() if self._data_format is None: self._data_format = data.DataFormat(lower_value, self._location) else: self._data_format.set_property(name.lower(), value, self._location)
def validated_value(self, value): assert value translated_value = "" found_decimal_separator = False for character_to_process in value: if character_to_process == self.decimal_separator: if found_decimal_separator: raise errors.FieldValueError( "decimal field must contain only one decimal separator (%s): %s" % (_compat.text_repr(self.decimal_separator), _compat.text_repr(value))) translated_value += "." found_decimal_separator = True elif self.thousands_separator and (character_to_process == self.thousands_separator): if found_decimal_separator: raise errors.FieldValueError( "decimal field must contain thousands separator (%r) only before " "decimal separator (%r): %r " % (self.thousands_separator, self.decimal_separator, value)) else: translated_value += character_to_process try: result = decimal.Decimal(translated_value) except Exception as error: # TODO: limit exception handler to decimal exception or whatever decimal.Decimal raises. message = "value is %r but must be a decimal number: %s" % (value, error) raise errors.FieldValueError(message) try: self.valid_range.validate(self._field_name, result) except errors.RangeValueError as error: raise errors.FieldValueError(str(error)) return result
def validated_value(self, value): assert value translated_value = "" found_decimal_separator = False for valueIndex in range(len(value)): character_to_process = value[valueIndex] if character_to_process == self.decimalSeparator: if found_decimal_separator: raise errors.FieldValueError( "decimal field must contain only one decimal separator (%s): %s" % (_compat.text_repr(self.decimalSeparator), _compat.text_repr(value))) translated_value += "." found_decimal_separator = True elif self.thousandsSeparator and (character_to_process == self.thousandsSeparator): if found_decimal_separator: raise errors.FieldValueError( "decimal field must contain thousands separator (%r) only before " "decimal separator (%r): %r (position %d)" % (self.thousandsSeparator, self.decimalSeparator, value, valueIndex + 1)) else: translated_value += character_to_process try: result = decimal.Decimal(translated_value) except Exception as error: # TODO: limite exception handler to decimal exception or whatever decimal.Decimal raises. message = "value is %r but must be a decimal number: %s" % (value, error) raise errors.FieldValueError(message) return result
def validate_length(self, value): """ Validate that ``value`` conforms to :py:attr:`~cutplace.fields.AbstractFieldFormat.length`. :raises cutplace.errors.FieldValueError: if ``value`` is too short \ or too long """ assert value is not None if self.length is not None and not (self.is_allowed_to_be_empty and (value == '')): try: if self.data_format.format == data.FORMAT_FIXED: # Length of fixed format is considered a maximum, fewer characters have to be padded later. value_length = len(value) fixed_length = self.length.lower_limit if value_length > fixed_length: raise errors.FieldValueError( 'fixed format field must have at most %d characters instead of %d: %s' % (fixed_length, value_length, _compat.text_repr(value))) else: self.length.validate( "length of '%s' with value %s" % (self.field_name, _compat.text_repr(value)), len(value)) except errors.RangeValueError as error: raise errors.FieldValueError(six.text_type(error))
def add_data_format_row(self, row_data): """ Extract name and value from ``row_data`` and apply it to :py:attr:`~cutplace.interface.Cid.data_format` by calling :py:meth:`~cutplace.data.DataFormat.set_property`. :param list row_data: a list with at least 2 items for name and value \ that can be passed to \ :py:meth:`cutplace.data.DataFormat.set_property()`. """ assert row_data is not None assert len(row_data) >= 2 name, value = row_data[:2] lower_name = name.lower() self._location.advance_cell() if name == '': raise errors.InterfaceError('name of data format property must be specified', self._location) self._location.advance_cell() if (self._data_format is None) and (lower_name != data.KEY_FORMAT): raise errors.InterfaceError( 'first data format row must set property %s instead of %s' % (_compat.text_repr(data.KEY_FORMAT), _compat.text_repr(name)), self._location) if (self._data_format is not None) and (lower_name == data.KEY_FORMAT): raise errors.InterfaceError( 'data format already is %s and must be set only once' % _compat.text_repr(self._data_format.format), self._location) lower_value = value.lower() if self._data_format is None: self._data_format = data.DataFormat(lower_value, self._location) else: self._data_format.set_property(name.lower(), value, self._location)
def __init__(self, field_name, is_allowed_to_be_empty, length, rule, data_format): super(ConstantFieldFormat, self).__init__( field_name, is_allowed_to_be_empty, length, rule, data_format, empty_value='') # Extract constant from rule tokens. tokens = _tools.tokenize_without_space(rule) toky = next(tokens) if _tools.is_eof_token(toky): # No rule means that the field must always be empty. self._constant = '' else: self._constant = _tools.token_text(toky) toky = next(tokens) if not _tools.is_eof_token(toky): raise errors.InterfaceError( _('constant rule must be a single Python token but also found: %s') % _compat.text_repr(_tools.token_text(toky))) has_empty_rule = (rule == '') if self.is_allowed_to_be_empty and not has_empty_rule: raise errors.InterfaceError( _('to describe a Constant that can be empty, use a Choice field with a single choice')) if not self.is_allowed_to_be_empty and has_empty_rule: raise errors.InterfaceError( _('field must be marked as empty to describe a constant empty value')) try: self.length.validate( _('rule of constant field %s') % _compat.text_repr(self.field_name), len(self._constant)) except errors.RangeValueError: raise errors.InterfaceError( _('length is %s but must be %d to match constant %s') % (self.length, len(self._constant), _compat.text_repr(self._constant)))
def validated_value(self, value): assert value translated_value = "" found_decimal_separator = False for character_to_process in value: if character_to_process == self.decimal_separator: if found_decimal_separator: raise errors.FieldValueError( _("decimal field must contain only one decimal separator (%s): %s") % (_compat.text_repr(self.decimal_separator), _compat.text_repr(value))) translated_value += "." found_decimal_separator = True elif self.thousands_separator and (character_to_process == self.thousands_separator): if found_decimal_separator: raise errors.FieldValueError(_( "decimal field must contain thousands separator (%r) only before " "decimal separator (%r): %r " ) % (self.thousands_separator, self.decimal_separator, value)) else: translated_value += character_to_process try: result = decimal.Decimal(translated_value) except Exception as error: # TODO: limit exception handler to decimal exception or whatever decimal.Decimal raises. message = "value is %r but must be a decimal number: %s" % (value, error) raise errors.FieldValueError(message) try: self.valid_range.validate(self._field_name, result) except errors.RangeValueError as error: raise errors.FieldValueError(str(error)) return result
def validated_field_name(supposed_field_name, location=None): """ Same as ``supposed_field_name`` except with surrounding white space removed. :param cutplace.errors.Location location: location used in case of errors :raise cutplace.errors.InterfaceError: if ``supposed_field_name`` is \ invalid """ field_name = supposed_field_name.strip() basic_requirements_text = 'field name must be a valid Python name consisting of ASCII letters, ' \ 'underscore (_) and digits' if field_name == '': raise errors.InterfaceError(basic_requirements_text + 'but is empty', location) if keyword.iskeyword(field_name): raise errors.InterfaceError( "field name must not be a Python keyword but is: '%s'" % field_name, location) is_first_character = True for character in field_name: if is_first_character: if character not in _ASCII_LETTERS: raise errors.InterfaceError( "field name must begin with a lower-case letter but is: %s" % _compat.text_repr(field_name), location) is_first_character = False else: if character not in _ASCII_LETTERS_DIGITS_AND_UNDERSCORE: raise errors.InterfaceError( basic_requirements_text + 'but is: %s' % _compat.text_repr(field_name), location) return field_name
def __init__(self, field_name, is_allowed_to_be_empty, length, rule, data_format): super(ConstantFieldFormat, self).__init__( field_name, is_allowed_to_be_empty, length, rule, data_format, empty_value='') # Extract constant from rule tokens. tokens = _tools.tokenize_without_space(rule) toky = next(tokens) if _tools.is_eof_token(toky): # No rule means that the field must always be empty. self._constant = '' else: self._constant = _tools.token_text(toky) toky = next(tokens) if not _tools.is_eof_token(toky): raise errors.InterfaceError( 'constant rule must be a single Python token but also found: %s' % _compat.text_repr(_tools.token_text(toky))) has_empty_rule = (rule == '') if self.is_allowed_to_be_empty and not has_empty_rule: raise errors.InterfaceError( 'to describe a Constant that can be empty, use a Choice field with a single choice') if not self.is_allowed_to_be_empty and has_empty_rule: raise errors.InterfaceError( 'field must be marked as empty to describe a constant empty value') try: self.length.validate( 'rule of constant field %s' % _compat.text_repr(self.field_name), len(self._constant)) except errors.RangeValueError: raise errors.InterfaceError( 'length is %s but must be %d to match constant %s' % (self.length, len(self._constant), _compat.text_repr(self._constant)))
def validated_value(self, value): assert value if value != self._constant: raise errors.FieldValueError( "value is %s but must be constant: %s" % (_compat.text_repr(value), _compat.text_repr(self._constant))) return value
def validated_value(self, value): assert value if not self.regex.match(value): raise errors.FieldValueError( 'value %s must match pattern: %s (regex %s)' % (_compat.text_repr(value), _compat.text_repr(self.rule), _compat.text_repr(self.pattern))) return value
def validated_value(self, value): assert value if not self.regex.match(value): raise errors.FieldValueError( "value %s must match regular expression: %s" % (_compat.text_repr(value), _compat.text_repr(self.rule))) return value
def validated_value(self, value): assert value if not self.regex.match(value): raise errors.FieldValueError( _('value %s must match pattern: %s (regex %s)') % (_compat.text_repr(value), _compat.text_repr(self.rule), _compat.text_repr(self.pattern))) return value
def _validated_character(key, value, location): r""" A single character intended as value for data format property ``key`` derived from ``value``, which can be: * a decimal or hex number (prefixed with ``'0x'``) referring to the ASCII/Unicode of the character * a string containing a single character such as ``'\t'``. * a symbolic name from :py:const:`cutplace.errors.NAME_TO_ASCII_CODE_MAP` such as ``tab``. :raises cutplace.errors.InterfaceError: on any broken ``value`` """ assert key assert value is not None name_for_errors = 'data format property %s' % _compat.text_repr(key) stripped_value = value.strip() if (len(stripped_value) == 1) and (stripped_value not in string.digits): result_code = ord(stripped_value) else: tokens = tokenize.generate_tokens(io.StringIO(value).readline) next_token = next(tokens) if _tools.is_eof_token(next_token): raise errors.InterfaceError( _("value for %s must be specified") % name_for_errors, location) next_type = next_token[0] next_value = next_token[1] if next_type == token.NAME: result_code = ranges.code_for_symbolic_token( name_for_errors, next_value, location) elif next_type == token.NUMBER: result_code = ranges.code_for_number_token( name_for_errors, next_value, location) elif next_type == token.STRING: result_code = ranges.code_for_string_token( name_for_errors, next_value, location) elif (len(next_value) == 1) and not _tools.is_eof_token(next_token): result_code = ord(next_value) else: raise errors.InterfaceError( _('value for %s must a number, a single character or a symbolic name but is: %s' ) % (name_for_errors, _compat.text_repr(value)), location) # Ensure there are no further tokens. next_token = next(tokens) if (not _tools.is_eof_token(next_token)) and (next_token[0] != tokenize.NEWLINE): raise errors.InterfaceError( _('value for %s must be a single character but is: %s') % (name_for_errors, _compat.text_repr(value)), location) # TODO: Handle 'none' properly. assert result_code is not None assert result_code >= 0 result = six.unichr(result_code) assert result is not None return result
def validate_row(self, row): """ Validate a single ``row``: 1. Check if the number of items in ``row`` matches the number of fields in the CID 2. Check that all fields conform to their field format (as defined by :py:class:`cutplace.fields.AbstractFieldFormat` and its descendants) 3. Check that the row conforms to all row checks (as defined by :py:meth:`cutplace.checks.AbstractCheck.check_row`) The caller is responsible for :py:attr:`~.location` pointing to the correct row in the data while ``validate_row`` takes care of calling :py:meth:`cutplace.errors.Location.set_cell` appropriately. """ assert row is not None assert self.location is not None # Validate that number of fields. actual_item_count = len(row) if actual_item_count < self._expected_item_count: raise errors.DataError( 'row must contain %d fields but only has %d: %s' % (self._expected_item_count, actual_item_count, row), self.location) if actual_item_count > self._expected_item_count: raise errors.DataError( 'row must contain %d fields but has %d, additional values are: %s' % (self._expected_item_count, actual_item_count, row[self._expected_item_count:]), self.location) # Validate each field according to its format. for field_index, field_value in enumerate(row): self.location.set_cell(field_index) field_to_validate = self.cid.field_formats[field_index] try: if not isinstance(field_value, six.text_type): raise errors.FieldValueError( 'type must be %s instead of %s: %s' % (six.text_type.__name__, type(field_value).__name__, _compat.text_repr(field_value))) field_to_validate.validated(field_value) except errors.FieldValueError as error: error.prepend_message( 'cannot accept field %s' % _compat.text_repr(field_to_validate.field_name), self.location) raise # Validate the whole row according to row checks. self.location.set_cell(0) field_map = _create_field_map(self.cid.field_names, row) for check_name in self.cid.check_names: self.cid.check_map[check_name].check_row(field_map, self.location)
def _validated_int_at_least_0(key, value, location): assert key assert value is not None try: result = int(value) except ValueError: raise errors.InterfaceError( 'data format property %s is %s but must be a number' % (_compat.text_repr(key), _compat.text_repr(value)), location) if result < 0: raise errors.InterfaceError( 'data format property %s is %d but must be at least 0' % (_compat.text_repr(key), result), location) return result
def _validated_character(key, value, location): r""" A single character intended as value for data format property ``key`` derived from ``value``, which can be: * a decimal or hex number (prefixed with ``'0x'``) referring to the ASCII/Unicode of the character * a string containing a single character such as ``'\t'``. * a symbolic name from :py:const:`cutplace.errors.NAME_TO_ASCII_CODE_MAP` such as ``tab``. :raises cutplace.errors.InterfaceError: on any broken ``value`` """ assert key assert value is not None name_for_errors = 'data format property %s' % _compat.text_repr(key) stripped_value = value.strip() if (len(stripped_value) == 1) and (stripped_value not in string.digits): result_code = ord(stripped_value) else: tokens = tokenize.generate_tokens(io.StringIO(value).readline) next_token = next(tokens) if _tools.is_eof_token(next_token): raise errors.InterfaceError( "value for %s must be specified" % name_for_errors, location) next_type = next_token[0] next_value = next_token[1] if next_type == token.NAME: result_code = ranges.code_for_symbolic_token(name_for_errors, next_value, location) elif next_type == token.NUMBER: result_code = ranges.code_for_number_token(name_for_errors, next_value, location) elif next_type == token.STRING: result_code = ranges.code_for_string_token(name_for_errors, next_value, location) elif (len(next_value) == 1) and not _tools.is_eof_token(next_token): result_code = ord(next_value) else: raise errors.InterfaceError( 'value for %s must a number, a single character or a symbolic name but is: %s' % (name_for_errors, _compat.text_repr(value)), location) # Ensure there are no further tokens. next_token = next(tokens) if not _tools.is_eof_token(next_token): raise errors.InterfaceError( 'value for %s must be a single character but is: %s' % (name_for_errors, _compat.text_repr(value)), location) # TODO: Handle 'none' properly. assert result_code is not None assert result_code >= 0 result = six.unichr(result_code) assert result is not None return result
def _validated_int_at_least_0(key, value, location): assert key assert value is not None try: result = int(value) except ValueError: raise errors.InterfaceError( _('data format property %s is %s but must be a number') % (_compat.text_repr(key), _compat.text_repr(value)), location) if result < 0: raise errors.InterfaceError( _('data format property %s is %d but must be at least 0') % (_compat.text_repr(key), result), location) return result
def code_for_string_token(name, value, location): """ The numeric code for text representing an string with a single character in ``value``. :param str name: the name of the value as it is known to the end user :param str value: the text that represents a string with a single character :param cutplace.errors.Location location: the location of ``value`` or ``None`` """ assert name is not None assert value is not None assert len(value) >= 2 left_quote = value[0] right_quote = value[-1] assert left_quote in "\"\'", "left_quote=%r" % left_quote assert right_quote in "\"\'", "right_quote=%r" % right_quote value_without_quotes = value[1:-1] if len(value_without_quotes) != 1: value_without_quotes = value_without_quotes.encode('utf-8').decode( 'unicode_escape') if len(value_without_quotes) != 1: raise errors.InterfaceError( _('text for %s must be a single character but is: %s') % (name, _compat.text_repr(value)), location) return ord(value_without_quotes)
def _validated_choice(key, value, choices, location, ignore_case=False): """ Same as ``value`` or ``value.lower()`` in case ``ignore_case`` is set to ``True``. If the supposed result is not on of the available ``choices``, raise `errors.InterfaceError`. """ assert key assert value is not None assert choices result = value if not ignore_case else value.lower() if result not in choices: raise errors.InterfaceError( 'data format property %s is %s but must be one of: %s' % (_compat.text_repr(key), _compat.text_repr(value), _tools.human_readable_list(choices)), location) return result
def __init__(self, field_name, is_allowed_to_be_empty, length, rule, data_format): super(ChoiceFieldFormat, self).__init__(field_name, is_allowed_to_be_empty, length, rule, data_format, empty_value='') self.choices = [] # Split rule into tokens, ignoring white space. tokens = _tools.tokenize_without_space(rule) # Extract choices from rule tokens. previous_toky = None toky = next(tokens) while not _tools.is_eof_token(toky): if _tools.is_comma_token(toky): # Handle comma after comma without choice. if previous_toky: previous_toky_text = previous_toky[1] else: previous_toky_text = None raise errors.InterfaceError( "choice value must precede a comma (,) but found: %s" % _compat.text_repr(previous_toky_text)) choice = _tools.token_text(toky) if not choice: raise errors.InterfaceError( "choice field must be allowed to be empty instead of containing an empty choice" ) self.choices.append(choice) toky = next(tokens) if not _tools.is_eof_token(toky): if not _tools.is_comma_token(toky): raise errors.InterfaceError( "comma (,) must follow choice value %s but found: %s" % (_compat.text_repr(choice), _compat.text_repr( toky[1]))) # Process next choice after comma. toky = next(tokens) if _tools.is_eof_token(toky): raise errors.InterfaceError( "trailing comma (,) must be removed") if not self.is_allowed_to_be_empty and not self.choices: raise errors.InterfaceError( "choice field without any choices must be allowed to be empty")
def _has_data_after_skipped_line_delimiter(): """ If `fixed_file` has data, assume they are a line delimiter as specified by `line_delimiter` and read and validate them. In case `line_delimiter` is `None`, the result is always ``True`` even if the input has already reached its end. """ assert location is not None assert line_delimiter in _VALID_FIXED_LINE_DELIMITERS assert unread_character_after_line_delimiter[0] is None result = True if line_delimiter is not None: if line_delimiter == '\r\n': actual_line_delimiter = fixed_file.read(2) else: assert line_delimiter in ('\n', '\r', 'any') actual_line_delimiter = fixed_file.read(1) if actual_line_delimiter == '': result = False elif line_delimiter == 'any': if actual_line_delimiter == '\r': # Process the optional '\n' for 'any'. anticipated_linefeed = fixed_file.read(1) if anticipated_linefeed == '\n': actual_line_delimiter += anticipated_linefeed elif anticipated_linefeed == '': result = False else: # Unread the previous character because it is unrelated to line delimiters. unread_character_after_line_delimiter[ 0] = anticipated_linefeed if actual_line_delimiter not in _VALID_FIXED_ANY_LINE_DELIMITERS: valid_line_delimiters = _tools.human_readable_list( _VALID_FIXED_ANY_LINE_DELIMITERS) raise errors.DataFormatError( 'line delimiter is %s but must be one of: %s' % (_compat.text_repr(actual_line_delimiter), valid_line_delimiters), location) elif actual_line_delimiter != line_delimiter: raise errors.DataFormatError( 'line delimiter is %s but must be %s' % (_compat.text_repr(actual_line_delimiter), _compat.text_repr(line_delimiter)), location) return result
def _validated_choice(key, value, choices, location, ignore_case=False): """ Same as ``value`` or ``value.lower()`` in case ``ignore_case`` is set to ``True``. If the supposed result is not on of the available ``choices``, raise `errors.InterfaceError`. """ assert key assert value is not None assert choices result = value if not ignore_case else value.lower() if result not in choices: raise errors.InterfaceError( _('data format property %s is %s but must be one of: %s') % (_compat.text_repr(key), _compat.text_repr(value), _tools.human_readable_list(choices)), location) return result
def validated_value(self, value): assert value if value not in self.choices: raise errors.FieldValueError( "value is %s but must be one of: %s" % (_compat.text_repr(value), _tools.human_readable_list(self.choices))) return value
def check_distinct(name1, name2): assert name1 is not None assert name2 is not None assert name1 < name2, 'names must be sorted for consistent error message: %r, %r' % (name1, name2) value1 = self.__dict__['_' + name1] value2 = self.__dict__['_' + name2] if value1 == value2: raise errors.InterfaceError( "'%s' and '%s' are both %s but must be different from each other" % (name1, name2, _compat.text_repr(value1)))
def _has_data_after_skipped_line_delimiter(): """ If `fixed_file` has data, assume they are a line delimiter as specified by `line_delimiter` and read and validate them. In case `line_delimiter` is `None`, the result is always ``True`` even if the input has already reached its end. """ assert location is not None assert line_delimiter in _VALID_FIXED_LINE_DELIMITERS assert unread_character_after_line_delimiter[0] is None result = True if line_delimiter is not None: if line_delimiter == '\r\n': actual_line_delimiter = fixed_file.read(2) else: assert line_delimiter in ('\n', '\r', 'any') actual_line_delimiter = fixed_file.read(1) if actual_line_delimiter == '': result = False elif line_delimiter == 'any': if actual_line_delimiter == '\r': # Process the optional '\n' for 'any'. anticipated_linefeed = fixed_file.read(1) if anticipated_linefeed == '\n': actual_line_delimiter += anticipated_linefeed elif anticipated_linefeed == '': result = False else: # Unread the previous character because it is unrelated to line delimiters. unread_character_after_line_delimiter[0] = anticipated_linefeed if actual_line_delimiter not in _VALID_FIXED_ANY_LINE_DELIMITERS: valid_line_delimiters = _tools.human_readable_list(_VALID_FIXED_ANY_LINE_DELIMITERS) raise errors.DataFormatError( 'line delimiter is %s but must be one of: %s' % (_compat.text_repr(actual_line_delimiter), valid_line_delimiters), location) elif actual_line_delimiter != line_delimiter: raise errors.DataFormatError( 'line delimiter is %s but must be %s' % (_compat.text_repr(actual_line_delimiter), _compat.text_repr(line_delimiter)), location) return result
def check_distinct(name1, name2): assert name1 is not None assert name2 is not None assert name1 < name2, 'names must be sorted for consistent error message: %r, %r' % ( name1, name2) value1 = self.__dict__['_' + name1] value2 = self.__dict__['_' + name2] if value1 == value2: raise errors.InterfaceError( _("'%s' and '%s' are both %s but must be different from each other" ) % (name1, name2, _compat.text_repr(value1)))
def validated_value(self, value): assert value try: result = time.strptime(value, self.strptimeFormat) except ValueError: raise errors.FieldValueError( "date must match format %s (%s) but is: %s (%s)" % (self.human_readable_format, self.strptimeFormat, _compat.text_repr(value), sys.exc_info()[1])) return result
def write_row(self, row_to_write): """ Write a row of fixed length strings. :param list row_to_write: a list of str where each item must have \ exactly the same length as the corresponding entry in \ :py:attr:`~.field_lengths` :raises AssertionError: if ``row_to_write`` is not a list of \ strings with each matching the corresponding ``field_lengths`` \ as specified to :py:meth:`~.__init__`. """ assert row_to_write is not None row_to_write_item_count = len(row_to_write) assert row_to_write_item_count == self._expected_row_item_count, \ '%s: row must have %d items instead of %d: %s' \ % (self.location, self._expected_row_item_count, row_to_write_item_count, row_to_write) if __debug__: for field_index, field_value in enumerate(row_to_write): self.location.set_cell(field_index) field_name, expected_field_length = self._field_names_and_lengths[ field_index] assert isinstance(field_value, six.text_type), \ '%s: field %s must be of type %s instead of %s: %r' \ % (self.location, _compat.text_repr(field_name), six.text_type.__name__, type(field_value).__name__, field_value) actual_field_length = len(field_value) assert actual_field_length == expected_field_length, \ '%s: field %s must have exactly %d characters instead of %d: %r' \ % (self.location, _compat.text_repr(field_name), expected_field_length, actual_field_length, field_value) self.location.set_cell(0) try: self._target_stream.write(''.join(row_to_write)) except UnicodeEncodeError as error: raise errors.DataFormatError( 'cannot write data row: %s; row=%s' % (error, row_to_write), self.location) if self._line_separator is not None: self._target_stream.write(self._line_separator) self.location.advance_line()
def validated_value(self, value): assert value try: value_as_int = int(value) except ValueError: raise errors.FieldValueError("value must be an integer number: %s" % _compat.text_repr(value)) try: self.valid_range.validate("value", value_as_int) except errors.RangeValueError as error: raise errors.FieldValueError(six.text_type(error)) return value_as_int
def write_row(self, row_to_write): """ Write a row of fixed length strings. :param list row_to_write: a list of str where each item must have \ exactly the same length as the corresponding entry in \ :py:attr:`~.field_lengths` :raises AssertionError: if ``row_to_write`` is not a list of \ strings with each matching the corresponding ``field_lengths`` \ as specified to :py:meth:`~.__init__`. """ assert row_to_write is not None row_to_write_item_count = len(row_to_write) assert row_to_write_item_count == self._expected_row_item_count, \ '%s: row must have %d items instead of %d: %s' \ % (self.location, self._expected_row_item_count, row_to_write_item_count, row_to_write) if __debug__: for field_index, field_value in enumerate(row_to_write): self.location.set_cell(field_index) field_name, expected_field_length = self._field_names_and_lengths[field_index] assert isinstance(field_value, six.text_type), \ '%s: field %s must be of type %s instead of %s: %r' \ % (self.location, _compat.text_repr(field_name), six.text_type.__name__, type(field_value).__name__, field_value) actual_field_length = len(field_value) assert actual_field_length == expected_field_length, \ '%s: field %s must have exactly %d characters instead of %d: %r' \ % (self.location, _compat.text_repr(field_name), expected_field_length, actual_field_length, field_value) self.location.set_cell(0) try: self._target_stream.write(''.join(row_to_write)) except UnicodeEncodeError as error: raise errors.DataFormatError( 'cannot write data row: %s; row=%s' % (error, row_to_write), self.location) if self._line_separator is not None: self._target_stream.write(self._line_separator) self.location.advance_line()
def add_check_row(self, possibly_incomplete_items): """ Add a check as declared in ``possibly_incomplete_items``, which ideally is a list composed of 3 elements: 1. description ('customer_id_must_be_unique') 2. type (e.g. 'IsUnique' mapping to :py:class:`cutplace.checks.IsUniqueCheck`) 3. rule (e.g. 'customer_id') Missing items are interpreted as empty string (``''``), additional items are ignored. :raises cutplace.errors.InterfaceError: on broken \ ``possibly_incomplete_items`` """ assert possibly_incomplete_items is not None items = list(possibly_incomplete_items) # HACK: Ignore possible concatenated (empty) cells between description and type. while (len(items) >= 2) and (items[1].strip() == ''): del items[1] check_description, check_type, check_rule = (items + 3 * [''])[:3] self._location.advance_cell() if check_description == '': raise errors.InterfaceError('check description must be specified', self._location) self._location.advance_cell() check_class_name = check_type + "Check" if check_class_name not in self._check_name_to_class_map: list_of_available_check_types = _tools.human_readable_list( sorted(self._check_name_to_class_map.keys())) raise errors.InterfaceError( "check type is '%s' but must be one of: %s" % (check_type, list_of_available_check_types), self._location) _log.debug("create check: %s(%r, %r)", check_type, check_description, check_rule) check_class = self._create_check_class(check_type) check = check_class.__new__(check_class, check_description, check_rule, self._field_names, self._location) check.__init__(check_description, check_rule, self._field_names, self._location) self._location.set_cell(1) existing_check = self._check_name_to_check_map.get(check_description) if existing_check is not None: raise errors.InterfaceError( "check description must be used only once: %s" % _compat.text_repr(check_description), self._location, "first declaration", existing_check.location) self._check_name_to_check_map[check_description] = check self._check_names.append(check_description) assert len(self.check_names) == len(self._check_name_to_check_map)
def __init__(self, field_name, is_allowed_to_be_empty, length, rule, data_format): super(ChoiceFieldFormat, self).__init__( field_name, is_allowed_to_be_empty, length, rule, data_format, empty_value='') self.choices = [] # Split rule into tokens, ignoring white space. tokens = _tools.tokenize_without_space(rule) # Extract choices from rule tokens. previous_toky = None toky = next(tokens) while not _tools.is_eof_token(toky): if _tools.is_comma_token(toky): # Handle comma after comma without choice. if previous_toky: previous_toky_text = previous_toky[1] else: previous_toky_text = None raise errors.InterfaceError( "choice value must precede a comma (,) but found: %s" % _compat.text_repr(previous_toky_text)) choice = _tools.token_text(toky) if not choice: raise errors.InterfaceError( "choice field must be allowed to be empty instead of containing an empty choice") self.choices.append(choice) toky = next(tokens) if not _tools.is_eof_token(toky): if not _tools.is_comma_token(toky): raise errors.InterfaceError( "comma (,) must follow choice value %s but found: %s" % (_compat.text_repr(choice), _compat.text_repr(toky[1]))) # Process next choice after comma. toky = next(tokens) if _tools.is_eof_token(toky): raise errors.InterfaceError("trailing comma (,) must be removed") if not self.is_allowed_to_be_empty and not self.choices: raise errors.InterfaceError("choice field without any choices must be allowed to be empty")
def validated_value(self, value): assert value if not self._has_time and (self.data_format.format == data.FORMAT_EXCEL) and (value.endswith(DateTimeFieldFormat._NO_EXCEL_TIME)): value_to_validate = value[:-DateTimeFieldFormat._NO_EXCEL_TIME_LENGTH] else: value_to_validate = value try: result = time.strptime(value_to_validate, self.strptime_format) except ValueError: raise errors.FieldValueError( "date must match format %s (%s) but is: %s (%s)" % (self.human_readable_format, self.strptime_format, _compat.text_repr(value_to_validate), sys.exc_info()[1])) return result
def human_readable_list(items, final_separator='or'): """ All values in ``items`` in a human readable form. This is meant to be used in error messages, where dumping ``"%r"`` to the user does not cut it. """ assert items is not None assert final_separator is not None item_count = len(items) if item_count == 0: result = '' elif item_count == 1: result = _compat.text_repr(items[0]) else: result = '' for item_index in range(item_count): if item_index == item_count - 1: result += ' ' + final_separator + ' ' elif item_index > 0: result += ', ' result += _compat.text_repr(items[item_index]) assert result assert result is not None return result
def validated_value(self, value): assert value if not self._has_time and (self.data_format.format == data.FORMAT_EXCEL) and (value.endswith(DateTimeFieldFormat._NO_EXCEL_TIME)): value_to_validate = value[:-DateTimeFieldFormat._NO_EXCEL_TIME_LENGTH] else: value_to_validate = value try: result = time.strptime(value_to_validate, self.strptime_format) except ValueError: raise errors.FieldValueError( _("date must match format %s (%s) but is: %s (%s)") % (self.human_readable_format, self.strptime_format, _compat.text_repr(value_to_validate), sys.exc_info()[1])) return result
def add_check_row(self, possibly_incomplete_items): """ Add a check as declared in ``possibly_incomplete_items``, which ideally is a list composed of 3 elements: 1. description ('customer_id_must_be_unique') 2. type (e.g. 'IsUnique' mapping to :py:class:`cutplace.checks.IsUniqueCheck`) 3. rule (e.g. 'customer_id') Missing items are interpreted as empty string (``''``), additional items are ignored. :raises cutplace.errors.InterfaceError: on broken \ ``possibly_incomplete_items`` """ assert possibly_incomplete_items is not None items = list(possibly_incomplete_items) # HACK: Ignore possible concatenated (empty) cells between description and type. while (len(items) >= 2) and (items[1].strip() == ''): del items[1] check_description, check_type, check_rule = (items + 3 * [''])[:3] self._location.advance_cell() if check_description == '': raise errors.InterfaceError( 'check description must be specified', self._location) self._location.advance_cell() check_class_name = check_type + "Check" if check_class_name not in self._check_name_to_class_map: list_of_available_check_types = _tools.human_readable_list(sorted(self._check_name_to_class_map.keys())) raise errors.InterfaceError( "check type is '%s' but must be one of: %s" % (check_type, list_of_available_check_types), self._location) _log.debug("create check: %s(%r, %r)", check_type, check_description, check_rule) check_class = self._create_check_class(check_type) check = check_class.__new__(check_class, check_description, check_rule, self._field_names, self._location) check.__init__(check_description, check_rule, self._field_names, self._location) self._location.set_cell(1) existing_check = self._check_name_to_check_map.get(check_description) if existing_check is not None: raise errors.InterfaceError( "check description must be used only once: %s" % _compat.text_repr(check_description), self._location, "first declaration", existing_check.location) self._check_name_to_check_map[check_description] = check self._check_names.append(check_description) assert len(self.check_names) == len(self._check_name_to_check_map)
def validate(self, name, value, location=None): """ Validate that ``value`` is within the specified range. :param str name: the name of ``value`` known to the end user for \ usage in possible error messages :param int value: the value to validate :param cutplace.errors.Location location: the location to refer to \ in possible error messages :raises cutplace.errors.RangeValueError: if ``value`` is out of range """ assert name is not None assert name assert value is not None if not isinstance(value, decimal.Decimal): try: value_as_decimal = decimal.Decimal(value) except decimal.DecimalException: raise errors.RangeValueError( _("value must be decimal but is %s") % _compat.text_repr(value), location) else: value_as_decimal = value if self._items is not None: is_valid = False item_index = 0 while not is_valid and item_index < len(self._items): lower, upper = self._items[item_index] if lower is None: assert upper is not None if value_as_decimal <= upper: is_valid = True elif upper is None: if value_as_decimal >= lower: is_valid = True elif (value_as_decimal >= lower) and (value_as_decimal <= upper): is_valid = True item_index += 1 if not is_valid: raise errors.RangeValueError( _("%s is %r but must be within range: %r") % (name, value_as_decimal, self), location)
def code_for_number_token(name, value, location): """ The numeric code for text representing an :py:class:`int` in ``value``. :param str name: the name of the value as it is known to the end user :param str value: the text that represents an :py:class:`int` :param cutplace.errors.Location location: the location of ``value`` or ``None`` """ assert name is not None assert value is not None try: # Note: base 0 automatically handles prefixes like 0x. result = int(value, 0) except ValueError: raise errors.InterfaceError( 'numeric value for %s must be an integer number but is: %s' % (name, _compat.text_repr(value)), location) return result
def validate(self, name, value, location=None): """ Validate that ``value`` is within the specified range. :param str name: the name of ``value`` known to the end user for \ usage in possible error messages :param int value: the value to validate :param cutplace.errors.Location location: the location to refer to \ in possible error messages :raises cutplace.errors.RangeValueError: if ``value`` is out of range """ assert name is not None assert name assert value is not None if not isinstance(value, decimal.Decimal): try: value_as_decimal = decimal.Decimal(value) except decimal.DecimalException: raise errors.RangeValueError( "value must be decimal but is %s" % _compat.text_repr(value), location) else: value_as_decimal = value if self._items is not None: is_valid = False item_index = 0 while not is_valid and item_index < len(self._items): lower, upper = self._items[item_index] if lower is None: assert upper is not None if value_as_decimal <= upper: is_valid = True elif upper is None: if value_as_decimal >= lower: is_valid = True elif (value_as_decimal >= lower) and (value_as_decimal <= upper): is_valid = True item_index += 1 if not is_valid: raise errors.RangeValueError( "%s is %r but must be within range: %r" % (name, value_as_decimal, self), location)
def code_for_symbolic_token(name, value, location): """ The numeric code for text representing an a symbolic name in ``value``, which has to be one of the values in :py:const:`cutplace.errors.NAME_TO_ASCII_CODE_MAP`. :param str name: the name of the value as it is known to the end user :param str value: the text that represents a symbolic name :param cutplace.errors.Location location: the location of ``value`` or ``None`` """ assert name is not None assert value is not None try: result = errors.NAME_TO_ASCII_CODE_MAP[value.lower()] except KeyError: valid_symbols = _tools.human_readable_list(sorted(errors.NAME_TO_ASCII_CODE_MAP.keys())) raise errors.InterfaceError( 'symbolic name %s for %s must be one of: %s' % (_compat.text_repr(value), name, valid_symbols), location) return result
def validate_characters(self, value): """ Validate that all characters in ``value`` are within :py:attr:`~cutplace.data.DataFormat.allowed_characters`. :raises cutplace.errors.FieldValueError: if any character in \ ``value`` is not allowed """ valid_character_range = self.data_format.allowed_characters if valid_character_range is not None: for character_column, character in enumerate(value, 1): character_code = ord(character) try: valid_character_range.validate("character", character_code) except errors.RangeValueError: raise errors.FieldValueError(_( "character %s (code point U+%04x, decimal %d) in field '%s' at column %d must be an allowed " "character: %s") % ( _compat.text_repr(character), character_code, character_code, self.field_name, character_column, valid_character_range))
def validate_characters(self, value): """ Validate that all characters in ``value`` are within :py:attr:`~cutplace.data.DataFormat.allowed_characters`. :raises cutplace.errors.FieldValueError: if any character in \ ``value`` is not allowed """ valid_character_range = self.data_format.allowed_characters if valid_character_range is not None: for character_column, character in enumerate(value, 1): character_code = ord(character) try: valid_character_range.validate("character", character_code) except errors.RangeValueError: raise errors.FieldValueError( "character %s (code point U+%04x, decimal %d) in field '%s' at column %d must be an allowed " "character: %s" % ( _compat.text_repr(character), character_code, character_code, self.field_name, character_column, valid_character_range))
def field_name_index(field_name_to_look_up, available_field_names, location): """ The index of ``field_name_to_look_up`` (without leading or trailing white space) in ``available_field_names``. :param cutplace.errors.Location location: location used in case of errors :raise cutplace.errors.InterfaceError: if ``field_name_to_look_up`` is \ not part of ``available_field_names`` """ assert field_name_to_look_up is not None assert field_name_to_look_up == field_name_to_look_up.strip() assert available_field_names field_name_to_look_up = field_name_to_look_up.strip() try: field_index = available_field_names.index(field_name_to_look_up) except ValueError: raise errors.InterfaceError( 'unknown field name %s must be replaced by one of: %s' % (_compat.text_repr(field_name_to_look_up), _tools.human_readable_list(available_field_names)), location) return field_index
def code_for_string_token(name, value, location): """ The numeric code for text representing an string with a single character in ``value``. :param str name: the name of the value as it is known to the end user :param str value: the text that represents a string with a single character :param cutplace.errors.Location location: the location of ``value`` or ``None`` """ assert name is not None assert value is not None assert len(value) >= 2 left_quote = value[0] right_quote = value[-1] assert left_quote in "\"\'", "left_quote=%r" % left_quote assert right_quote in "\"\'", "right_quote=%r" % right_quote value_without_quotes = value[1:-1] if len(value_without_quotes) != 1: value_without_quotes = value_without_quotes.encode('utf-8').decode('unicode_escape') if len(value_without_quotes) != 1: raise errors.InterfaceError( 'text for %s must be a single character but is: %s' % (name, _compat.text_repr(value)), location) return ord(value_without_quotes)
def add_field_format_row(self, possibly_incomplete_items): """ Add field as described by `possibly_incomplete_items`, which is a list consisting of: 1) field name 2) optional: example value (can be empty) 3) optional: empty flag ("X" = field is allowed to be empty) 4) optional: length (using the syntax of :py:class:`cutplace.ranges.Range`) 5) optional: field type (e.g. 'Integer' for :py:class:`cutplace.fields.IntegerFieldFormat`) 6) optional: rule to validate field (depending on type) Any missing items are interpreted as empty string (``''``). Additional items are ignored. :raises cutplace.errors.InterfaceError: on broken \ ``possibly_incomplete_items`` """ assert possibly_incomplete_items is not None assert self._location is not None if self._data_format is None: raise errors.InterfaceError("data format must be specified before first field", self._location) # Assert that the various lists and maps related to fields are in a consistent state. # Ideally this would be a class invariant, but this is Python, not Eiffel. field_count = len(self.field_names) assert len(self._field_formats) == field_count assert len(self._field_name_to_format_map) == field_count assert len(self._field_name_to_index_map) == field_count items = (possibly_incomplete_items + 6 * [''])[:6] # Obtain field name. field_name = fields.validated_field_name(items[0], self._location) if field_name in self._field_name_to_format_map: # TODO: Add see_also_location pointing to previous declaration. raise errors.InterfaceError( 'duplicate field name must be changed to a unique one: %s' % field_name, self._location) # Obtain example. self._location.advance_cell() field_example = items[1] # Obtain "empty" mark. self._location.advance_cell() field_is_allowed_to_be_empty_text = items[2].strip().lower() if field_is_allowed_to_be_empty_text == '': field_is_allowed_to_be_empty = False elif field_is_allowed_to_be_empty_text == self._EMPTY_INDICATOR: field_is_allowed_to_be_empty = True else: raise errors.InterfaceError( "mark for empty field must be %s or empty but is %s" % (self._EMPTY_INDICATOR, field_is_allowed_to_be_empty_text), self._location) # Obtain length. self._location.advance_cell() field_length = items[3] # Obtain field type and rule. self._location.advance_cell() field_type_item = items[4].strip() if field_type_item == '': field_type = 'Text' else: field_type = '' field_type_parts = field_type_item.split(".") try: for part in field_type_parts: if field_type: field_type += "." field_type += _tools.validated_python_name("field type part", part) assert field_type, "empty field type must be detected by validated_python_name()" except NameError as error: raise errors.InterfaceError(six.text_type(error), self._location) field_class = self._create_field_format_class(field_type) self._location.advance_cell() field_rule = items[5].strip() _log.debug("create field: %s(%r, %r, %r)", field_class.__name__, field_name, field_type, field_rule) try: field_format = field_class.__new__( field_class, field_name, field_is_allowed_to_be_empty, field_length, field_rule) field_format.__init__( field_name, field_is_allowed_to_be_empty, field_length, field_rule, self._data_format) except errors.InterfaceError as error: error_location = error.location if error.location is not None else self._location error.prepend_message('cannot declare field %s' % _compat.text_repr(field_name), error_location) raise error # Validate field length. # TODO #82: Cleanup validation for declared field formats. self._location.set_cell(4) field_length = field_format.length if self._data_format.format == data.FORMAT_FIXED: if field_length.items is None: raise errors.InterfaceError( "length of field %s must be specified with fixed data format" % _compat.text_repr(field_name), self._location) if field_length.lower_limit != field_length.upper_limit: raise errors.InterfaceError( "length of field %s for fixed data format must be a specific number but is: %s" % (_compat.text_repr(field_name), field_format.length), self._location) if field_length.lower_limit < 1: raise errors.InterfaceError( "length of field %s for fixed data format must be at least 1 but is: %d" % (_compat.text_repr(field_name), field_format.length.lower_limit), self._location) elif field_length.lower_limit is not None: if field_length.lower_limit < 0: raise errors.InterfaceError( "lower limit for length of field %s must be at least 0 but is: %d" % (_compat.text_repr(field_name), field_format.length.lower_limit), self._location) elif field_length.upper_limit is not None: # Note: 0 as upper limit is valid for a field that must always be empty. if field_length.upper_limit < 0: raise errors.InterfaceError( "upper limit for length of field %s must be at least 0 but is: %d" % (_compat.text_repr(field_name), field_format.length.upper_limit), self._location) # Set and validate example in case there is one. if field_example != '': try: field_format.example = field_example except errors.FieldValueError as error: self._location.set_cell(2) raise errors.InterfaceError( "cannot validate example for field %s: %s" % (_compat.text_repr(field_name), error), self._location) self._location.set_cell(1) assert field_name assert field_type assert field_rule is not None self.add_field_format(field_format)
def __str__(self): return "%s(%s, %s, %s, %s)" % ( self.__class__.__name__, _compat.text_repr(self.field_name), self.is_allowed_to_be_empty, _compat.text_repr(self.length), _compat.text_repr(self.rule))
def fixed_rows(fixed_source, encoding, field_name_and_lengths, line_delimiter='any'): r""" Rows found in file ``fixed_source`` using ``encoding``. The name and (fixed) length of the fields for each row are specified as a list of tuples ``(name, length)``. Each row can end with a line feed unless ``line_delimiter`` equals ``None``. Valid values are: ``'\n'``, ``'\r'`` and ``'\r\n'``, in which case other values result in a `errors.DataFormatError`. Additionally ``'any'`` accepts any of the previous values. """ assert fixed_source is not None assert encoding is not None for name, length in field_name_and_lengths: assert name is not None assert length >= 1, 'length for %s must be at least 1 but is %s' % ( name, length) assert line_delimiter in _VALID_FIXED_LINE_DELIMITERS, \ 'line_delimiter=%s but must be one of: %s' % (_compat.text_repr(line_delimiter), _VALID_FIXED_LINE_DELIMITERS) # Predefine variable for access in local function. location = errors.Location(fixed_source, has_column=True) fixed_file = None # HACK: list with at most 1 character to be unread after a line feed. We # need to use a list so `_has_data_after_skipped_line_delimiter` can # modify its contents. unread_character_after_line_delimiter = [None] def _has_data_after_skipped_line_delimiter(): """ If `fixed_file` has data, assume they are a line delimiter as specified by `line_delimiter` and read and validate them. In case `line_delimiter` is `None`, the result is always ``True`` even if the input has already reached its end. """ assert location is not None assert line_delimiter in _VALID_FIXED_LINE_DELIMITERS assert unread_character_after_line_delimiter[0] is None result = True if line_delimiter is not None: if line_delimiter == '\r\n': actual_line_delimiter = fixed_file.read(2) else: assert line_delimiter in ('\n', '\r', 'any') actual_line_delimiter = fixed_file.read(1) if actual_line_delimiter == '': result = False elif line_delimiter == 'any': if actual_line_delimiter == '\r': # Process the optional '\n' for 'any'. anticipated_linefeed = fixed_file.read(1) if anticipated_linefeed == '\n': actual_line_delimiter += anticipated_linefeed elif anticipated_linefeed == '': result = False else: # Unread the previous character because it is unrelated to line delimiters. unread_character_after_line_delimiter[ 0] = anticipated_linefeed if actual_line_delimiter not in _VALID_FIXED_ANY_LINE_DELIMITERS: valid_line_delimiters = _tools.human_readable_list( _VALID_FIXED_ANY_LINE_DELIMITERS) raise errors.DataFormatError( 'line delimiter is %s but must be one of: %s' % (_compat.text_repr(actual_line_delimiter), valid_line_delimiters), location) elif actual_line_delimiter != line_delimiter: raise errors.DataFormatError( 'line delimiter is %s but must be %s' % (_compat.text_repr(actual_line_delimiter), _compat.text_repr(line_delimiter)), location) return result if isinstance(fixed_source, six.string_types): fixed_file = io.open(fixed_source, 'r', encoding=encoding) is_opened = True else: fixed_file = fixed_source is_opened = False has_data = True try: while has_data: field_index = 0 row = [] for field_name, field_length in field_name_and_lengths: if unread_character_after_line_delimiter[0] is None: item = fixed_file.read(field_length) else: assert len(unread_character_after_line_delimiter) == 1 item = unread_character_after_line_delimiter[0] if field_length >= 2: item += fixed_file.read(field_length - 1) unread_character_after_line_delimiter[0] = None assert unread_character_after_line_delimiter[0] is None if not is_opened: # Ensure that the input is a text file, `io.StringIO` or something similar. Binary files, # `io.BytesIO` and the like cannot be used because the return bytes instead of strings. # NOTE: We do not need to use _compat.text_repr(item) because type `unicode` does not fail here. assert isinstance(item, six.text_type), \ '%s: fixed_source must yield strings but got type %s, value %r' % (location, type(item), item) item_length = len(item) if item_length == 0: if field_index > 0: names = [name for name, _ in field_name_and_lengths] lengths = [ length for _, length in field_name_and_lengths ] previous_field_index = field_index - 1 characters_needed_count = sum(lengths[field_index:]) list_of_missing_field_names = _tools.human_readable_list( names[field_index:], 'and') raise errors.DataFormatError( "after field '%s' %d characters must follow for: %s" % (names[previous_field_index], characters_needed_count, list_of_missing_field_names), location) # End of input reached. has_data = False elif item_length == field_length: row.append(item) location.advance_column(field_length) field_index += 1 else: raise errors.DataFormatError( "cannot read field '%s': need %d characters but found only %d: %s" % (field_name, field_length, item_length, _compat.text_repr(item)), location) if has_data and not _has_data_after_skipped_line_delimiter(): has_data = False if len(row) > 0: yield row location.advance_line() finally: if is_opened: fixed_file.close()
def set_property(self, name, value, location=None): r""" Set data format property ``name`` to ``value`` possibly translating ``value`` from a human readable representation to an internal one. :param str name: any of the ``KEY_*`` constants :param value: the value to set the property to as it would show up in a CID. \ In some cases, the value will be translated to an internal representation. \ For example ``set_property(KEY_LINE_DELIMITER, 'lf')`` results in \ :py:attr:`cutplace.data.line_delimiter` being ``'\n'``. :type value: str or None :raises cutplace.errors.InterfaceError: if ``name`` is not a valid property name for this data format :raises cutplace.errors.InterfaceError: if ``value`` is invalid for the specified property """ assert not self.is_valid, 'after validate() has been called property %r cannot be set anymore' % name assert name is not None assert name == name.lower(), 'property name must be lower case: %r' % name assert (value is not None) or (name in (KEY_ALLOWED_CHARACTERS, KEY_LINE_DELIMITER)) name = name.replace(' ', '_') property_attribute_name = '_' + name if property_attribute_name not in self.__dict__: valid_property_names = _tools.human_readable_list(list(self.__dict__.keys())) raise errors.InterfaceError( 'data format property %s for format %s is %s but must be one of %s' % (_compat.text_repr(name), self.format, _compat.text_repr(value), valid_property_names), location) if name == KEY_ENCODING: try: codecs.lookup(value) except LookupError: raise errors.InterfaceError( 'value for data format property %s is %s but must be a valid encoding' % (_compat.text_repr(KEY_ENCODING), _compat.text_repr(self.encoding)), location) self.encoding = value elif name == KEY_HEADER: self.header = DataFormat._validated_int_at_least_0(name, value, location) elif name == KEY_ALLOWED_CHARACTERS: try: self._allowed_characters = ranges.Range(value) except errors.InterfaceError as error: raise errors.InterfaceError( 'data format property %s must be a valid range: %s' % (_compat.text_repr(KEY_ALLOWED_CHARACTERS), error), location) elif name == KEY_DECIMAL_SEPARATOR: self.decimal_separator = DataFormat._validated_choice( KEY_DECIMAL_SEPARATOR, value, _VALID_DECIMAL_SEPARATORS, location) elif name == KEY_ESCAPE_CHARACTER: self.escape_character = DataFormat._validated_choice( KEY_ESCAPE_CHARACTER, value, _VALID_ESCAPE_CHARACTERS, location) elif name == KEY_ITEM_DELIMITER: item_delimiter = DataFormat._validated_character(KEY_ITEM_DELIMITER, value, location) if item_delimiter == '\x00': raise errors.InterfaceError( "data format property %s must not be 0 (to avoid zero termindated strings in Python's C based CSV reader)" % _compat.text_repr(KEY_ITEM_DELIMITER), location) self.item_delimiter = item_delimiter elif name == KEY_LINE_DELIMITER: try: self.line_delimiter = _TEXT_TO_LINE_DELIMITER_MAP[value.lower()] except KeyError: raise errors.InterfaceError( 'line delimiter %s must be changed to one of: %s' % (_compat.text_repr(value), _tools.human_readable_list(self._VALID_LINE_DELIMITER_TEXTS)), location) elif name == KEY_QUOTE_CHARACTER: self.quote_character = DataFormat._validated_choice( KEY_QUOTE_CHARACTER, value, _VALID_QUOTE_CHARACTERS, location) elif name == KEY_SHEET: self.sheet = DataFormat._validated_int_at_least_0(KEY_SHEET, value, location) elif name == KEY_SKIP_INITIAL_SPACE: self.skip_initial_space = DataFormat._validated_bool(KEY_SKIP_INITIAL_SPACE, value, location) elif name == KEY_THOUSANDS_SEPARATOR: self.thousands_separator = DataFormat._validated_choice( KEY_THOUSANDS_SEPARATOR, value, _VALID_THOUSANDS_SEPARATORS, location) else: assert False, 'name=%r' % name