def validated_value(self, value): assert value if value not in self.choices: raise errors.FieldValueError( "value is %s but must be one of: %s" % (_compat.text_repr(value), _tools.human_readable_list(self.choices))) return value
def field_index(self, field_name): """ The column index of the field named ``field_name`` starting with 0. """ assert field_name in self._field_name_to_index_map, \ "unknown field name '%s' must be replaced by one of: %s" \ % (field_name, _tools.human_readable_list(sorted(self.field_names))) return self._field_name_to_index_map[field_name]
def add_check_row(self, possibly_incomplete_items): """ Add a check as declared in ``possibly_incomplete_items``, which ideally is a list composed of 3 elements: 1. description ('customer_id_must_be_unique') 2. type (e.g. 'IsUnique' mapping to :py:class:`cutplace.checks.IsUniqueCheck`) 3. rule (e.g. 'customer_id') Missing items are interpreted as empty string (``''``), additional items are ignored. :raises cutplace.errors.InterfaceError: on broken \ ``possibly_incomplete_items`` """ assert possibly_incomplete_items is not None items = list(possibly_incomplete_items) # HACK: Ignore possible concatenated (empty) cells between description and type. while (len(items) >= 2) and (items[1].strip() == ''): del items[1] check_description, check_type, check_rule = (items + 3 * [''])[:3] self._location.advance_cell() if check_description == '': raise errors.InterfaceError('check description must be specified', self._location) self._location.advance_cell() check_class_name = check_type + "Check" if check_class_name not in self._check_name_to_class_map: list_of_available_check_types = _tools.human_readable_list( sorted(self._check_name_to_class_map.keys())) raise errors.InterfaceError( "check type is '%s' but must be one of: %s" % (check_type, list_of_available_check_types), self._location) _log.debug("create check: %s(%r, %r)", check_type, check_description, check_rule) check_class = self._create_check_class(check_type) check = check_class.__new__(check_class, check_description, check_rule, self._field_names, self._location) check.__init__(check_description, check_rule, self._field_names, self._location) self._location.set_cell(1) existing_check = self._check_name_to_check_map.get(check_description) if existing_check is not None: raise errors.InterfaceError( "check description must be used only once: %s" % _compat.text_repr(check_description), self._location, "first declaration", existing_check.location) self._check_name_to_check_map[check_description] = check self._check_names.append(check_description) assert len(self.check_names) == len(self._check_name_to_check_map)
def _create_class(self, name_to_class_map, class_qualifier, class_name_appendix, type_name): assert name_to_class_map assert class_qualifier assert class_name_appendix assert type_name class_name = class_qualifier.split(".")[-1] + class_name_appendix result = name_to_class_map.get(class_name) if result is None: raise errors.InterfaceError( "cannot find class for %s %s: related class is %s but must be one of: %s" % ( type_name, class_qualifier, class_name, _tools.human_readable_list(sorted(name_to_class_map.keys()))), self._location) return result
def main(arguments): assert arguments is not None _FORMAT_CSV = "csv" _FORMAT_RST = "rst" _FORMATS = [_FORMAT_CSV, _FORMAT_RST] _DEFAULT_FORMAT = _FORMAT_CSV _DEFAULT_SHEET = 1 parser = argparse.ArgumentParser(description='convert ODS file to other formats') parser.add_argument( "-f", "--format", metavar="FORMAT", default=_DEFAULT_FORMAT, choices=sorted(_FORMATS), dest="format", help="target format: %s (default: %s)" % (_tools.human_readable_list(_FORMATS), _DEFAULT_FORMAT)) parser.add_argument( "-1", "--heading", action="store_true", dest="firstRowIsHeading", help="render first row as heading") parser.add_argument( "-s", "--sheet", metavar="SHEET", default=_DEFAULT_SHEET, type=int, dest="sheet", help="sheet to convert (default: %d)" % _DEFAULT_SHEET) parser.add_argument('source_ods_path', metavar='ODS-FILE', help='the ODS file to convert') parser.add_argument('target_path', metavar='TARGET-FILE', nargs='?', help='the target file to write') args = parser.parse_args(arguments) # Additional command line argument validation. if args.sheet < 1: parser.error("option --sheet is %d but must be at least 1" % args.sheet) if (args.format == _FORMAT_CSV) and args.firstRowIsHeading: parser.error("option --heading can not be used with --format=csv") if args.target_path is None: assert args.format in _FORMATS suffix = '.' + args.format args.target_path = _tools.with_suffix(args.source_ods_path, suffix) _log.info("convert %r to %r using format %r", args.source_ods_path, args.target_path, args.format) try: if args.format == _FORMAT_CSV: to_csv(args.source_ods_path, args.target_path, sheet=args.sheet) elif args.format == _FORMAT_RST: to_rst( args.source_ods_path, args.target_path, first_row_is_heading=args.firstRowIsHeading, sheet=args.sheet) else: # pragma: no cover raise NotImplementedError("format=%r" % args.format) except (EnvironmentError, OSError) as error: _log.error("cannot convert ods: %s", error) sys.exit(1) except Exception as error: _log.exception("cannot convert ods: %s", error) sys.exit(1)
def add_check_row(self, possibly_incomplete_items): """ Add a check as declared in ``possibly_incomplete_items``, which ideally is a list composed of 3 elements: 1. description ('customer_id_must_be_unique') 2. type (e.g. 'IsUnique' mapping to :py:class:`cutplace.checks.IsUniqueCheck`) 3. rule (e.g. 'customer_id') Missing items are interpreted as empty string (``''``), additional items are ignored. :raises cutplace.errors.InterfaceError: on broken \ ``possibly_incomplete_items`` """ assert possibly_incomplete_items is not None items = list(possibly_incomplete_items) # HACK: Ignore possible concatenated (empty) cells between description and type. while (len(items) >= 2) and (items[1].strip() == ''): del items[1] check_description, check_type, check_rule = (items + 3 * [''])[:3] self._location.advance_cell() if check_description == '': raise errors.InterfaceError( 'check description must be specified', self._location) self._location.advance_cell() check_class_name = check_type + "Check" if check_class_name not in self._check_name_to_class_map: list_of_available_check_types = _tools.human_readable_list(sorted(self._check_name_to_class_map.keys())) raise errors.InterfaceError( "check type is '%s' but must be one of: %s" % (check_type, list_of_available_check_types), self._location) _log.debug("create check: %s(%r, %r)", check_type, check_description, check_rule) check_class = self._create_check_class(check_type) check = check_class.__new__(check_class, check_description, check_rule, self._field_names, self._location) check.__init__(check_description, check_rule, self._field_names, self._location) self._location.set_cell(1) existing_check = self._check_name_to_check_map.get(check_description) if existing_check is not None: raise errors.InterfaceError( "check description must be used only once: %s" % _compat.text_repr(check_description), self._location, "first declaration", existing_check.location) self._check_name_to_check_map[check_description] = check self._check_names.append(check_description) assert len(self.check_names) == len(self._check_name_to_check_map)
def _validated_choice(key, value, choices, location, ignore_case=False): """ Same as ``value`` or ``value.lower()`` in case ``ignore_case`` is set to ``True``. If the supposed result is not on of the available ``choices``, raise `errors.InterfaceError`. """ assert key assert value is not None assert choices result = value if not ignore_case else value.lower() if result not in choices: raise errors.InterfaceError( 'data format property %s is %s but must be one of: %s' % (_compat.text_repr(key), _compat.text_repr(value), _tools.human_readable_list(choices)), location) return result
def _has_data_after_skipped_line_delimiter(): """ If `fixed_file` has data, assume they are a line delimiter as specified by `line_delimiter` and read and validate them. In case `line_delimiter` is `None`, the result is always ``True`` even if the input has already reached its end. """ assert location is not None assert line_delimiter in _VALID_FIXED_LINE_DELIMITERS assert unread_character_after_line_delimiter[0] is None result = True if line_delimiter is not None: if line_delimiter == '\r\n': actual_line_delimiter = fixed_file.read(2) else: assert line_delimiter in ('\n', '\r', 'any') actual_line_delimiter = fixed_file.read(1) if actual_line_delimiter == '': result = False elif line_delimiter == 'any': if actual_line_delimiter == '\r': # Process the optional '\n' for 'any'. anticipated_linefeed = fixed_file.read(1) if anticipated_linefeed == '\n': actual_line_delimiter += anticipated_linefeed elif anticipated_linefeed == '': result = False else: # Unread the previous character because it is unrelated to line delimiters. unread_character_after_line_delimiter[ 0] = anticipated_linefeed if actual_line_delimiter not in _VALID_FIXED_ANY_LINE_DELIMITERS: valid_line_delimiters = _tools.human_readable_list( _VALID_FIXED_ANY_LINE_DELIMITERS) raise errors.DataFormatError( 'line delimiter is %s but must be one of: %s' % (_compat.text_repr(actual_line_delimiter), valid_line_delimiters), location) elif actual_line_delimiter != line_delimiter: raise errors.DataFormatError( 'line delimiter is %s but must be %s' % (_compat.text_repr(actual_line_delimiter), _compat.text_repr(line_delimiter)), location) return result
def _validated_choice(key, value, choices, location, ignore_case=False): """ Same as ``value`` or ``value.lower()`` in case ``ignore_case`` is set to ``True``. If the supposed result is not on of the available ``choices``, raise `errors.InterfaceError`. """ assert key assert value is not None assert choices result = value if not ignore_case else value.lower() if result not in choices: raise errors.InterfaceError( _('data format property %s is %s but must be one of: %s') % (_compat.text_repr(key), _compat.text_repr(value), _tools.human_readable_list(choices)), location) return result
def _has_data_after_skipped_line_delimiter(): """ If `fixed_file` has data, assume they are a line delimiter as specified by `line_delimiter` and read and validate them. In case `line_delimiter` is `None`, the result is always ``True`` even if the input has already reached its end. """ assert location is not None assert line_delimiter in _VALID_FIXED_LINE_DELIMITERS assert unread_character_after_line_delimiter[0] is None result = True if line_delimiter is not None: if line_delimiter == '\r\n': actual_line_delimiter = fixed_file.read(2) else: assert line_delimiter in ('\n', '\r', 'any') actual_line_delimiter = fixed_file.read(1) if actual_line_delimiter == '': result = False elif line_delimiter == 'any': if actual_line_delimiter == '\r': # Process the optional '\n' for 'any'. anticipated_linefeed = fixed_file.read(1) if anticipated_linefeed == '\n': actual_line_delimiter += anticipated_linefeed elif anticipated_linefeed == '': result = False else: # Unread the previous character because it is unrelated to line delimiters. unread_character_after_line_delimiter[0] = anticipated_linefeed if actual_line_delimiter not in _VALID_FIXED_ANY_LINE_DELIMITERS: valid_line_delimiters = _tools.human_readable_list(_VALID_FIXED_ANY_LINE_DELIMITERS) raise errors.DataFormatError( 'line delimiter is %s but must be one of: %s' % (_compat.text_repr(actual_line_delimiter), valid_line_delimiters), location) elif actual_line_delimiter != line_delimiter: raise errors.DataFormatError( 'line delimiter is %s but must be %s' % (_compat.text_repr(actual_line_delimiter), _compat.text_repr(line_delimiter)), location) return result
def field_value_for(self, field_name, row): """ The value for field ``field_name`` in ``row``. :param list row: the row to obtain the :py:class:`str` value for \ ``field_name`` from :raises AssertionError: if ``field_name`` is not part of the CID :raises AssertionError: if ``row`` does not have the expected number of items """ assert field_name in self._field_name_to_index_map, \ "unknown field name %r must be replaced by one of: %s" \ % (field_name, _tools.human_readable_list(sorted(self.field_names))) assert row is not None actual_row_count = len(row) expected_row_count = len(self.field_names) assert actual_row_count == expected_row_count, \ "row must have %d items but has %d: %s" % (expected_row_count, actual_row_count, row) return row[self._field_name_to_index_map[field_name]]
def code_for_symbolic_token(name, value, location): """ The numeric code for text representing an a symbolic name in ``value``, which has to be one of the values in :py:const:`cutplace.errors.NAME_TO_ASCII_CODE_MAP`. :param str name: the name of the value as it is known to the end user :param str value: the text that represents a symbolic name :param cutplace.errors.Location location: the location of ``value`` or ``None`` """ assert name is not None assert value is not None try: result = errors.NAME_TO_ASCII_CODE_MAP[value.lower()] except KeyError: valid_symbols = _tools.human_readable_list(sorted(errors.NAME_TO_ASCII_CODE_MAP.keys())) raise errors.InterfaceError( 'symbolic name %s for %s must be one of: %s' % (_compat.text_repr(value), name, valid_symbols), location) return result
def field_name_index(field_name_to_look_up, available_field_names, location): """ The index of ``field_name_to_look_up`` (without leading or trailing white space) in ``available_field_names``. :param cutplace.errors.Location location: location used in case of errors :raise cutplace.errors.InterfaceError: if ``field_name_to_look_up`` is \ not part of ``available_field_names`` """ assert field_name_to_look_up is not None assert field_name_to_look_up == field_name_to_look_up.strip() assert available_field_names field_name_to_look_up = field_name_to_look_up.strip() try: field_index = available_field_names.index(field_name_to_look_up) except ValueError: raise errors.InterfaceError( 'unknown field name %s must be replaced by one of: %s' % (_compat.text_repr(field_name_to_look_up), _tools.human_readable_list(available_field_names)), location) return field_index
def fixed_rows(fixed_source, encoding, field_name_and_lengths, line_delimiter='any'): r""" Rows found in file ``fixed_source`` using ``encoding``. The name and (fixed) length of the fields for each row are specified as a list of tuples ``(name, length)``. Each row can end with a line feed unless ``line_delimiter`` equals ``None``. Valid values are: ``'\n'``, ``'\r'`` and ``'\r\n'``, in which case other values result in a `errors.DataFormatError`. Additionally ``'any'`` accepts any of the previous values. """ assert fixed_source is not None assert encoding is not None for name, length in field_name_and_lengths: assert name is not None assert length >= 1, 'length for %s must be at least 1 but is %s' % (name, length) assert line_delimiter in _VALID_FIXED_LINE_DELIMITERS, \ 'line_delimiter=%s but must be one of: %s' % (_compat.text_repr(line_delimiter), _VALID_FIXED_LINE_DELIMITERS) # Predefine variable for access in local function. location = errors.Location(fixed_source, has_column=True) fixed_file = None # HACK: list with at most 1 character to be unread after a line feed. We # need to use a list so `_has_data_after_skipped_line_delimiter` can # modify its contents. unread_character_after_line_delimiter = [None] def _has_data_after_skipped_line_delimiter(): """ If `fixed_file` has data, assume they are a line delimiter as specified by `line_delimiter` and read and validate them. In case `line_delimiter` is `None`, the result is always ``True`` even if the input has already reached its end. """ assert location is not None assert line_delimiter in _VALID_FIXED_LINE_DELIMITERS assert unread_character_after_line_delimiter[0] is None result = True if line_delimiter is not None: if line_delimiter == '\r\n': actual_line_delimiter = fixed_file.read(2) else: assert line_delimiter in ('\n', '\r', 'any') actual_line_delimiter = fixed_file.read(1) if actual_line_delimiter == '': result = False elif line_delimiter == 'any': if actual_line_delimiter == '\r': # Process the optional '\n' for 'any'. anticipated_linefeed = fixed_file.read(1) if anticipated_linefeed == '\n': actual_line_delimiter += anticipated_linefeed elif anticipated_linefeed == '': result = False else: # Unread the previous character because it is unrelated to line delimiters. unread_character_after_line_delimiter[0] = anticipated_linefeed if actual_line_delimiter not in _VALID_FIXED_ANY_LINE_DELIMITERS: valid_line_delimiters = _tools.human_readable_list(_VALID_FIXED_ANY_LINE_DELIMITERS) raise errors.DataFormatError( 'line delimiter is %s but must be one of: %s' % (_compat.text_repr(actual_line_delimiter), valid_line_delimiters), location) elif actual_line_delimiter != line_delimiter: raise errors.DataFormatError( 'line delimiter is %s but must be %s' % (_compat.text_repr(actual_line_delimiter), _compat.text_repr(line_delimiter)), location) return result if isinstance(fixed_source, six.string_types): fixed_file = io.open(fixed_source, 'r', encoding=encoding) is_opened = True else: fixed_file = fixed_source is_opened = False has_data = True try: while has_data: field_index = 0 row = [] for field_name, field_length in field_name_and_lengths: if unread_character_after_line_delimiter[0] is None: item = fixed_file.read(field_length) else: assert len(unread_character_after_line_delimiter) == 1 item = unread_character_after_line_delimiter[0] if field_length >= 2: item += fixed_file.read(field_length - 1) unread_character_after_line_delimiter[0] = None assert unread_character_after_line_delimiter[0] is None if not is_opened: # Ensure that the input is a text file, `io.StringIO` or something similar. Binary files, # `io.BytesIO` and the like cannot be used because the return bytes instead of strings. # NOTE: We do not need to use _compat.text_repr(item) because type `unicode` does not fail here. assert isinstance(item, six.text_type), \ '%s: fixed_source must yield strings but got type %s, value %r' % (location, type(item), item) item_length = len(item) if item_length == 0: if field_index > 0: names = [name for name, _ in field_name_and_lengths] lengths = [length for _, length in field_name_and_lengths] previous_field_index = field_index - 1 characters_needed_count = sum(lengths[field_index:]) list_of_missing_field_names = _tools.human_readable_list(names[field_index:], 'and') raise errors.DataFormatError( "after field '%s' %d characters must follow for: %s" % (names[previous_field_index], characters_needed_count, list_of_missing_field_names), location) # End of input reached. has_data = False elif item_length == field_length: row.append(item) location.advance_column(field_length) field_index += 1 else: raise errors.DataFormatError( "cannot read field '%s': need %d characters but found only %d: %s" % (field_name, field_length, item_length, _compat.text_repr(item)), location) if has_data and not _has_data_after_skipped_line_delimiter(): has_data = False if len(row) > 0: yield row location.advance_line() finally: if is_opened: fixed_file.close()
def fixed_rows(fixed_source, encoding, field_name_and_lengths, line_delimiter='any'): r""" Rows found in file ``fixed_source`` using ``encoding``. The name and (fixed) length of the fields for each row are specified as a list of tuples ``(name, length)``. Each row can end with a line feed unless ``line_delimiter`` equals ``None``. Valid values are: ``'\n'``, ``'\r'`` and ``'\r\n'``, in which case other values result in a `errors.DataFormatError`. Additionally ``'any'`` accepts any of the previous values. """ assert fixed_source is not None assert encoding is not None for name, length in field_name_and_lengths: assert name is not None assert length >= 1, 'length for %s must be at least 1 but is %s' % ( name, length) assert line_delimiter in _VALID_FIXED_LINE_DELIMITERS, \ 'line_delimiter=%s but must be one of: %s' % (_compat.text_repr(line_delimiter), _VALID_FIXED_LINE_DELIMITERS) # Predefine variable for access in local function. location = errors.Location(fixed_source, has_column=True) fixed_file = None # HACK: list with at most 1 character to be unread after a line feed. We # need to use a list so `_has_data_after_skipped_line_delimiter` can # modify its contents. unread_character_after_line_delimiter = [None] def _has_data_after_skipped_line_delimiter(): """ If `fixed_file` has data, assume they are a line delimiter as specified by `line_delimiter` and read and validate them. In case `line_delimiter` is `None`, the result is always ``True`` even if the input has already reached its end. """ assert location is not None assert line_delimiter in _VALID_FIXED_LINE_DELIMITERS assert unread_character_after_line_delimiter[0] is None result = True if line_delimiter is not None: if line_delimiter == '\r\n': actual_line_delimiter = fixed_file.read(2) else: assert line_delimiter in ('\n', '\r', 'any') actual_line_delimiter = fixed_file.read(1) if actual_line_delimiter == '': result = False elif line_delimiter == 'any': if actual_line_delimiter == '\r': # Process the optional '\n' for 'any'. anticipated_linefeed = fixed_file.read(1) if anticipated_linefeed == '\n': actual_line_delimiter += anticipated_linefeed elif anticipated_linefeed == '': result = False else: # Unread the previous character because it is unrelated to line delimiters. unread_character_after_line_delimiter[ 0] = anticipated_linefeed if actual_line_delimiter not in _VALID_FIXED_ANY_LINE_DELIMITERS: valid_line_delimiters = _tools.human_readable_list( _VALID_FIXED_ANY_LINE_DELIMITERS) raise errors.DataFormatError( 'line delimiter is %s but must be one of: %s' % (_compat.text_repr(actual_line_delimiter), valid_line_delimiters), location) elif actual_line_delimiter != line_delimiter: raise errors.DataFormatError( 'line delimiter is %s but must be %s' % (_compat.text_repr(actual_line_delimiter), _compat.text_repr(line_delimiter)), location) return result if isinstance(fixed_source, six.string_types): fixed_file = io.open(fixed_source, 'r', encoding=encoding) is_opened = True else: fixed_file = fixed_source is_opened = False has_data = True try: while has_data: field_index = 0 row = [] for field_name, field_length in field_name_and_lengths: if unread_character_after_line_delimiter[0] is None: item = fixed_file.read(field_length) else: assert len(unread_character_after_line_delimiter) == 1 item = unread_character_after_line_delimiter[0] if field_length >= 2: item += fixed_file.read(field_length - 1) unread_character_after_line_delimiter[0] = None assert unread_character_after_line_delimiter[0] is None if not is_opened: # Ensure that the input is a text file, `io.StringIO` or something similar. Binary files, # `io.BytesIO` and the like cannot be used because the return bytes instead of strings. # NOTE: We do not need to use _compat.text_repr(item) because type `unicode` does not fail here. assert isinstance(item, six.text_type), \ '%s: fixed_source must yield strings but got type %s, value %r' % (location, type(item), item) item_length = len(item) if item_length == 0: if field_index > 0: names = [name for name, _ in field_name_and_lengths] lengths = [ length for _, length in field_name_and_lengths ] previous_field_index = field_index - 1 characters_needed_count = sum(lengths[field_index:]) list_of_missing_field_names = _tools.human_readable_list( names[field_index:], 'and') raise errors.DataFormatError( "after field '%s' %d characters must follow for: %s" % (names[previous_field_index], characters_needed_count, list_of_missing_field_names), location) # End of input reached. has_data = False elif item_length == field_length: row.append(item) location.advance_column(field_length) field_index += 1 else: raise errors.DataFormatError( "cannot read field '%s': need %d characters but found only %d: %s" % (field_name, field_length, item_length, _compat.text_repr(item)), location) if has_data and not _has_data_after_skipped_line_delimiter(): has_data = False if len(row) > 0: yield row location.advance_line() finally: if is_opened: fixed_file.close()
def test_can_build_human_readable_list(self): self.assertEqual(_tools.human_readable_list([]), "") self.assertEqual(_tools.human_readable_list(["a"]), "'a'") self.assertEqual(_tools.human_readable_list(["a", "b"]), "'a' or 'b'") self.assertEqual(_tools.human_readable_list(["a", "b", "c"]), "'a', 'b' or 'c'")
def set_property(self, name, value, location=None): r""" Set data format property ``name`` to ``value`` possibly translating ``value`` from a human readable representation to an internal one. :param str name: any of the ``KEY_*`` constants :param value: the value to set the property to as it would show up in a CID. \ In some cases, the value will be translated to an internal representation. \ For example ``set_property(KEY_LINE_DELIMITER, 'lf')`` results in \ :py:attr:`cutplace.data.line_delimiter` being ``'\n'``. :type value: str or None :raises cutplace.errors.InterfaceError: if ``name`` is not a valid property name for this data format :raises cutplace.errors.InterfaceError: if ``value`` is invalid for the specified property """ assert not self.is_valid, 'after validate() has been called property %r cannot be set anymore' % name assert name is not None assert name == name.lower( ), 'property name must be lower case: %r' % name assert (value is not None) or (name in (KEY_ALLOWED_CHARACTERS, KEY_LINE_DELIMITER)) name = name.replace(' ', '_') property_attribute_name = '_' + name if property_attribute_name not in self.__dict__: valid_property_names = _tools.human_readable_list( list(self.__dict__.keys())) raise errors.InterfaceError( _('data format property %s for format %s is %s but must be one of %s' ) % (_compat.text_repr(name), self.format, _compat.text_repr(value), valid_property_names), location) if name == KEY_ENCODING: try: codecs.lookup(value) except LookupError: raise errors.InterfaceError( _('value for data format property %s is %s but must be a valid encoding' ) % (_compat.text_repr(KEY_ENCODING), _compat.text_repr(self.encoding)), location) self.encoding = value elif name == KEY_HEADER: self.header = DataFormat._validated_int_at_least_0( name, value, location) elif name == KEY_VALIDATE_HEADER_ROW_AGAINST_FIELD_NAMES: self.validate_header_row_against_field_names = DataFormat._validated_bool( KEY_VALIDATE_HEADER_ROW_AGAINST_FIELD_NAMES, value, location) elif name == KEY_ALLOWED_CHARACTERS: try: self._allowed_characters = ranges.Range(value) except errors.InterfaceError as error: raise errors.InterfaceError( _('data format property %s must be a valid range: %s') % (_compat.text_repr(KEY_ALLOWED_CHARACTERS), error), location) elif name == KEY_DECIMAL_SEPARATOR: self.decimal_separator = DataFormat._validated_choice( KEY_DECIMAL_SEPARATOR, value, _VALID_DECIMAL_SEPARATORS, location) elif name == KEY_ESCAPE_CHARACTER: self.escape_character = DataFormat._validated_choice( KEY_ESCAPE_CHARACTER, value, _VALID_ESCAPE_CHARACTERS, location) elif name == KEY_ITEM_DELIMITER: item_delimiter = DataFormat._validated_character( KEY_ITEM_DELIMITER, value, location) if item_delimiter == '\x00': raise errors.InterfaceError( _("data format property %s must not be 0 (to avoid zero termindated strings in Python's C based CSV reader)" ) % _compat.text_repr(KEY_ITEM_DELIMITER), location) self.item_delimiter = item_delimiter elif name == KEY_LINE_DELIMITER: try: self.line_delimiter = _TEXT_TO_LINE_DELIMITER_MAP[ value.lower()] except KeyError: raise errors.InterfaceError( _('line delimiter %s must be changed to one of: %s') % (_compat.text_repr(value), _tools.human_readable_list( self._VALID_LINE_DELIMITER_TEXTS)), location) elif name == KEY_QUOTE_CHARACTER: self.quote_character = DataFormat._validated_choice( KEY_QUOTE_CHARACTER, value, _VALID_QUOTE_CHARACTERS, location) elif name == KEY_SHEET: self.sheet = DataFormat._validated_int_at_least_0( KEY_SHEET, value, location) elif name == KEY_SKIP_INITIAL_SPACE: self.skip_initial_space = DataFormat._validated_bool( KEY_SKIP_INITIAL_SPACE, value, location) elif name == KEY_THOUSANDS_SEPARATOR: self.thousands_separator = DataFormat._validated_choice( KEY_THOUSANDS_SEPARATOR, value, _VALID_THOUSANDS_SEPARATORS, location) elif name == KEY_QUOTING: result = DataFormat._validated_choice(KEY_QUOTING, value, _VALID_QUOTING, location, ignore_case=True) self.quoting = READABLE_TO_CSV_QUOTING_FORMAT[result] elif name == KEY_STRICT_FIELD_NAMES: self.strict_field_names = DataFormat._validated_bool( KEY_STRICT_FIELD_NAMES, value, location) else: assert False, 'name=%r' % name
def set_property(self, name, value, location=None): r""" Set data format property ``name`` to ``value`` possibly translating ``value`` from a human readable representation to an internal one. :param str name: any of the ``KEY_*`` constants :param value: the value to set the property to as it would show up in a CID. \ In some cases, the value will be translated to an internal representation. \ For example ``set_property(KEY_LINE_DELIMITER, 'lf')`` results in \ :py:attr:`cutplace.data.line_delimiter` being ``'\n'``. :type value: str or None :raises cutplace.errors.InterfaceError: if ``name`` is not a valid property name for this data format :raises cutplace.errors.InterfaceError: if ``value`` is invalid for the specified property """ assert not self.is_valid, 'after validate() has been called property %r cannot be set anymore' % name assert name is not None assert name == name.lower(), 'property name must be lower case: %r' % name assert (value is not None) or (name in (KEY_ALLOWED_CHARACTERS, KEY_LINE_DELIMITER)) name = name.replace(' ', '_') property_attribute_name = '_' + name if property_attribute_name not in self.__dict__: valid_property_names = _tools.human_readable_list(list(self.__dict__.keys())) raise errors.InterfaceError( 'data format property %s for format %s is %s but must be one of %s' % (_compat.text_repr(name), self.format, _compat.text_repr(value), valid_property_names), location) if name == KEY_ENCODING: try: codecs.lookup(value) except LookupError: raise errors.InterfaceError( 'value for data format property %s is %s but must be a valid encoding' % (_compat.text_repr(KEY_ENCODING), _compat.text_repr(self.encoding)), location) self.encoding = value elif name == KEY_HEADER: self.header = DataFormat._validated_int_at_least_0(name, value, location) elif name == KEY_ALLOWED_CHARACTERS: try: self._allowed_characters = ranges.Range(value) except errors.InterfaceError as error: raise errors.InterfaceError( 'data format property %s must be a valid range: %s' % (_compat.text_repr(KEY_ALLOWED_CHARACTERS), error), location) elif name == KEY_DECIMAL_SEPARATOR: self.decimal_separator = DataFormat._validated_choice( KEY_DECIMAL_SEPARATOR, value, _VALID_DECIMAL_SEPARATORS, location) elif name == KEY_ESCAPE_CHARACTER: self.escape_character = DataFormat._validated_choice( KEY_ESCAPE_CHARACTER, value, _VALID_ESCAPE_CHARACTERS, location) elif name == KEY_ITEM_DELIMITER: item_delimiter = DataFormat._validated_character(KEY_ITEM_DELIMITER, value, location) if item_delimiter == '\x00': raise errors.InterfaceError( "data format property %s must not be 0 (to avoid zero termindated strings in Python's C based CSV reader)" % _compat.text_repr(KEY_ITEM_DELIMITER), location) self.item_delimiter = item_delimiter elif name == KEY_LINE_DELIMITER: try: self.line_delimiter = _TEXT_TO_LINE_DELIMITER_MAP[value.lower()] except KeyError: raise errors.InterfaceError( 'line delimiter %s must be changed to one of: %s' % (_compat.text_repr(value), _tools.human_readable_list(self._VALID_LINE_DELIMITER_TEXTS)), location) elif name == KEY_QUOTE_CHARACTER: self.quote_character = DataFormat._validated_choice( KEY_QUOTE_CHARACTER, value, _VALID_QUOTE_CHARACTERS, location) elif name == KEY_SHEET: self.sheet = DataFormat._validated_int_at_least_0(KEY_SHEET, value, location) elif name == KEY_SKIP_INITIAL_SPACE: self.skip_initial_space = DataFormat._validated_bool(KEY_SKIP_INITIAL_SPACE, value, location) elif name == KEY_THOUSANDS_SEPARATOR: self.thousands_separator = DataFormat._validated_choice( KEY_THOUSANDS_SEPARATOR, value, _VALID_THOUSANDS_SEPARATORS, location) else: assert False, 'name=%r' % name
def test_can_build_human_readable_list(self): self.assertEqual(_tools.human_readable_list([]), '') self.assertEqual(_tools.human_readable_list(['a']), "'a'") self.assertEqual(_tools.human_readable_list(['a', 'b']), "'a' or 'b'") self.assertEqual(_tools.human_readable_list(['a', 'b', 'c']), "'a', 'b' or 'c'")
def main(arguments): assert arguments is not None _FORMAT_CSV = "csv" _FORMAT_RST = "rst" _FORMATS = [_FORMAT_CSV, _FORMAT_RST] _DEFAULT_FORMAT = _FORMAT_CSV _DEFAULT_SHEET = 1 parser = argparse.ArgumentParser( description='convert ODS file to other formats') parser.add_argument( "-f", "--format", metavar="FORMAT", default=_DEFAULT_FORMAT, choices=sorted(_FORMATS), dest="format", help="target format: %s (default: %s)" % (_tools.human_readable_list(_FORMATS), _DEFAULT_FORMAT)) parser.add_argument("-1", "--heading", action="store_true", dest="firstRowIsHeading", help="render first row as heading") parser.add_argument("-s", "--sheet", metavar="SHEET", default=_DEFAULT_SHEET, type=int, dest="sheet", help="sheet to convert (default: %d)" % _DEFAULT_SHEET) parser.add_argument('source_ods_path', metavar='ODS-FILE', help='the ODS file to convert') parser.add_argument('target_path', metavar='TARGET-FILE', nargs='?', help='the target file to write') args = parser.parse_args(arguments) # Additional command line argument validation. if args.sheet < 1: parser.error("option --sheet is %d but must be at least 1" % args.sheet) if (args.format == _FORMAT_CSV) and args.firstRowIsHeading: parser.error("option --heading can not be used with --format=csv") if args.target_path is None: assert args.format in _FORMATS suffix = '.' + args.format args.target_path = _tools.with_suffix(args.source_ods_path, suffix) _log.info("convert %r to %r using format %r", args.source_ods_path, args.target_path, args.format) try: if args.format == _FORMAT_CSV: toCsv(args.source_ods_path, args.target_path, sheet=args.sheet) elif args.format == _FORMAT_RST: toRst(args.source_ods_path, args.target_path, firstRowIsHeading=args.firstRowIsHeading, sheet=args.sheet) else: # pragma: no cover raise NotImplementedError("format=%r" % args.format) except EnvironmentError as error: _log.error("cannot convert ods: %s", error) sys.exit(1) except Exception as error: _log.exception("cannot convert ods: %s", error) sys.exit(1)