Exemplo n.º 1
0
 def test_create_data_format(self):
     for format_name in [
             data.FORMAT_DELIMITED, data.FORMAT_FIXED, data.FORMAT_EXCEL
     ]:
         data_format = data.DataFormat(format_name)
         self.assertTrue(data_format)
         self.assertTrue(data_format.__str__())
Exemplo n.º 2
0
    def test_fails_on_broken_item_delimiter(self):
        delimited_format = data.DataFormat(data.FORMAT_DELIMITED)
        self.assertRaises(errors.InterfaceError, delimited_format.set_property,
                          data.KEY_ITEM_DELIMITER, 'broken-item-delimiter')

        self.assertRaises(
            errors.InterfaceError, data.DataFormat._validated_character, data.KEY_ITEM_DELIMITER, '', self._location)
Exemplo n.º 3
0
def auto_rows(source):
    """
    Determine basic data format of `source` based on heuristics and return its contents.
    If source is a string, it is considered a path to a file, otherwise assume it is a
    text stream providing a ``read()`` method.
    """
    result = None
    if isinstance(source, six.string_types):
        suffix = os.path.splitext(source)[1].lstrip('.').lower()
        if suffix == 'ods':
            result = ods_rows(source)
        elif suffix in ('xls', 'xlsx'):
            result = excel_rows(source)
    elif isinstance(source, io.BytesIO):
        # TODO: Assume ODS; cannot use XLS and XLSX (at least not without temp file) because the readers need a file.
        raise NotImplementedError('ODS from io.BytesIO')
    if result is None:
        delimited_format = data.DataFormat(data.FORMAT_DELIMITED)
        # TODO: Use chardet to figure out an encoding.
        delimited_format.set_property(data.KEY_ENCODING, 'utf-8')
        # TODO: Determine delimiter by counting common delimiters with the first 4096 bytes and choosing the maximum one.
        delimited_format.set_property(data.KEY_ITEM_DELIMITER, ',')
        delimited_format.validate()
        result = delimited_rows(source, delimited_format)

    return result
Exemplo n.º 4
0
    def add_data_format_row(self, row_data):
        """
        Extract name and value from ``row_data`` and apply it to
        :py:attr:`~cutplace.interface.Cid.data_format` by calling
        :py:meth:`~cutplace.data.DataFormat.set_property`.

        :param list row_data: a list with at least 2 items for name and value \
            that can be passed to \
            :py:meth:`cutplace.data.DataFormat.set_property()`.
        """
        assert row_data is not None
        assert len(row_data) >= 2

        name, value = row_data[:2]
        lower_name = name.lower()
        self._location.advance_cell()
        if name == '':
            raise errors.InterfaceError(
                'name of data format property must be specified',
                self._location)
        self._location.advance_cell()
        if (self._data_format is None) and (lower_name != data.KEY_FORMAT):
            raise errors.InterfaceError(
                'first data format row must set property %s instead of %s' %
                (_compat.text_repr(data.KEY_FORMAT), _compat.text_repr(name)),
                self._location)
        if (self._data_format is not None) and (lower_name == data.KEY_FORMAT):
            raise errors.InterfaceError(
                'data format already is %s and must be set only once' %
                _compat.text_repr(self._data_format.format), self._location)
        lower_value = value.lower()
        if self._data_format is None:
            self._data_format = data.DataFormat(lower_value, self._location)
        else:
            self._data_format.set_property(name.lower(), value, self._location)
Exemplo n.º 5
0
 def test_can_set_excel_properties(self):
     excel_format = data.DataFormat(data.FORMAT_EXCEL)
     excel_format.set_property(data.KEY_ENCODING, DataFormatTest._TEST_ENCODING)
     excel_format.set_property(data.KEY_ALLOWED_CHARACTERS, None)
     excel_format.set_property(data.KEY_HEADER, 0)
     excel_format.set_property(data.KEY_SHEET, 1)
     excel_format.validate()
Exemplo n.º 6
0
 def test_can_read_empty_fixed_rows(self):
     data_format = data.DataFormat(data.FORMAT_FIXED)
     data_format.validate()
     with io.StringIO('') as data_io:
         rows = list(rowio.fixed_rows(
             data_io, data_format.encoding, (('dummy', 1),), data_format.line_delimiter))
     self.assertEqual([], rows)
Exemplo n.º 7
0
 def test_can_set_ods_properties(self):
     ods_format = data.DataFormat(data.FORMAT_ODS)
     ods_format.set_property(data.KEY_ENCODING, DataFormatTest._TEST_ENCODING)
     ods_format.set_property(data.KEY_ALLOWED_CHARACTERS, None)
     ods_format.set_property(data.KEY_HEADER, 0)
     ods_format.set_property(data.KEY_SHEET, 1)
     ods_format.validate()
Exemplo n.º 8
0
 def test_fails_on_unsupported_ods_property(self):
     ods_format = data.DataFormat(data.FORMAT_ODS)
     self.assertRaises(errors.InterfaceError, ods_format.set_property, data.KEY_DECIMAL_SEPARATOR, ',')
     self.assertRaises(errors.InterfaceError, ods_format.set_property, data.KEY_ESCAPE_CHARACTER, '\\')
     self.assertRaises(errors.InterfaceError, ods_format.set_property, data.KEY_LINE_DELIMITER, '\n')
     self.assertRaises(errors.InterfaceError, ods_format.set_property, data.KEY_QUOTE_CHARACTER, '"')
     self.assertRaises(errors.InterfaceError, ods_format.set_property, data.KEY_ITEM_DELIMITER, ';')
     self.assertRaises(errors.InterfaceError, ods_format.set_property, data.KEY_SKIP_INITIAL_SPACE, True)
Exemplo n.º 9
0
 def test_fails_on_unsupported_fixed_property(self):
     fixed_format = data.DataFormat(data.FORMAT_FIXED)
     self.assertRaises(errors.InterfaceError, fixed_format.set_property,
                       data.KEY_SHEET, 1)
     self.assertRaises(errors.InterfaceError, fixed_format.set_property,
                       data.KEY_ITEM_DELIMITER, ',')
     self.assertRaises(errors.InterfaceError, fixed_format.set_property,
                       data.KEY_SKIP_INITIAL_SPACE, True)
Exemplo n.º 10
0
 def test_can_validate_german_decimals(self):
     german_data_format = data.DataFormat(data.FORMAT_DELIMITED)
     german_data_format.set_property(data.KEY_DECIMAL_SEPARATOR, ",")
     german_data_format.set_property(data.KEY_THOUSANDS_SEPARATOR, ".")
     german_decimal_field_format = _create_german_decimal_format()
     self.assertEqual(decimal.Decimal("17.23"), german_decimal_field_format.validated("17,23"))
     self.assertEqual(decimal.Decimal("12345678"), german_decimal_field_format.validated("12.345.678"))
     self.assertEqual(decimal.Decimal("171234567.89"), german_decimal_field_format.validated("171.234.567,89"))
Exemplo n.º 11
0
 def test_can_write_fixed_data_without_line_delimiter(self):
     fixed_data_format = data.DataFormat(data.FORMAT_FIXED)
     fixed_data_format.set_property(data.KEY_LINE_DELIMITER, 'none')
     fixed_data_format.validate()
     with io.StringIO() as target:
         with rowio.FixedRowWriter(target, fixed_data_format, [('x', 1)]) as fixed_writer:
             fixed_writer.write_rows([['1'], ['2'], ['3']])
         data_written = target.getvalue()
     self.assertEqual(data_written, '123')
Exemplo n.º 12
0
 def test_can_validate_allowed_characters(self):
     delimited_format = data.DataFormat(data.FORMAT_DELIMITED)
     delimited_format.set_property(data.KEY_ALLOWED_CHARACTERS, '"a"..."z"')
     self.assertEqual([(97, 122)],
                      delimited_format.allowed_characters.items)
     delimited_format.allowed_characters.validate('x', ord('a'))
     self.assertRaises(errors.RangeValueError,
                       delimited_format.allowed_characters.validate, 'x',
                       ord('*'))
Exemplo n.º 13
0
 def test_can_set_fixed_properties(self):
     fixed_format = data.DataFormat(data.FORMAT_FIXED)
     fixed_format.set_property(data.KEY_ENCODING, DataFormatTest._TEST_ENCODING)
     fixed_format.set_property(data.KEY_ALLOWED_CHARACTERS, None)
     fixed_format.set_property(data.KEY_HEADER, 0)
     fixed_format.set_property(data.KEY_DECIMAL_SEPARATOR, ',')
     fixed_format.set_property(data.KEY_LINE_DELIMITER, data.CRLF)
     fixed_format.set_property(data.KEY_THOUSANDS_SEPARATOR, '.')
     fixed_format.validate()
Exemplo n.º 14
0
 def test_can_write_delimited_data_to_string_io(self):
     delimited_data_format = data.DataFormat(data.FORMAT_DELIMITED)
     delimited_data_format.validate()
     with io.StringIO() as target:
         with rowio.DelimitedRowWriter(target, delimited_data_format) as delimited_writer:
             delimited_writer.write_row(['a', 'b', _EURO_SIGN])
             delimited_writer.write_row([])
             delimited_writer.write_row([1, 2, 'end'])
         data_written = dev_test.unified_newlines(target.getvalue())
     self.assertEqual('%r' % data_written, '%r' % 'a,b,\u20ac\n\n1,2,end\n')
Exemplo n.º 15
0
 def test_can_write_fixed_data_to_string(self):
     fixed_data_format = data.DataFormat(data.FORMAT_FIXED)
     fixed_data_format.set_property(data.KEY_ENCODING, 'utf-8')
     fixed_data_format.validate()
     field_names_and_lengths = [('a', 1), ('b', 3)]
     with io.StringIO() as target:
         with rowio.FixedRowWriter(target, fixed_data_format, field_names_and_lengths) as fixed_writer:
             fixed_writer.write_row(['a', 'bcd'])
             fixed_writer.write_row([_EURO_SIGN, '   '])
         data_written = dev_test.unified_newlines(target.getvalue())
     self.assertEqual('%r' % data_written, '%r' % 'abcd\n\u20ac   \n')
Exemplo n.º 16
0
 def test_fails_on_invalid_character(self):
     data_format = data.DataFormat(data.FORMAT_DELIMITED)
     data_format.set_property(data.KEY_ALLOWED_CHARACTERS, '"a"..."c"')
     field_format = fields.AbstractFieldFormat('something', False, '3...5', '', data_format)
     field_format.validate_characters('cba')
     dev_test.assert_raises_and_fnmatches(
         self, errors.FieldValueError,
         "character 'x' (code point U+0078, decimal 120) in field 'something' at column 3 "
         + "must be an allowed character: 97...99",
         field_format.validate_characters, 'abxba'
     )
Exemplo n.º 17
0
 def test_can_set_delimited_properties(self):
     delimited_format = data.DataFormat(data.FORMAT_DELIMITED)
     delimited_format.set_property(data.KEY_ALLOWED_CHARACTERS, None)
     delimited_format.set_property(data.KEY_ENCODING, DataFormatTest._TEST_ENCODING)
     delimited_format.set_property(data.KEY_HEADER, '0')
     delimited_format.set_property(data.KEY_ITEM_DELIMITER, ',')
     delimited_format.set_property(data.KEY_SKIP_INITIAL_SPACE, 'True')
     delimited_format.set_property(data.KEY_SKIP_INITIAL_SPACE, 'False')
     delimited_format.set_property(data.KEY_DECIMAL_SEPARATOR, ',')
     delimited_format.set_property(data.KEY_ESCAPE_CHARACTER, '\\')
     delimited_format.set_property(data.KEY_LINE_DELIMITER, data.CRLF)
     delimited_format.set_property(data.KEY_QUOTE_CHARACTER, '\"')
     delimited_format.set_property(data.KEY_THOUSANDS_SEPARATOR, '.')
     delimited_format.validate()
Exemplo n.º 18
0
 def test_can_write_delimited_data_to_path(self):
     delimited_data_format = data.DataFormat(data.FORMAT_DELIMITED)
     delimited_data_format.set_property(data.KEY_ENCODING, 'utf-8')
     delimited_data_format.validate()
     delimited_path = dev_test.path_to_test_result('test_can_write_delimited_to_path.csv')
     with io.open(delimited_path, 'w', newline='', encoding=delimited_data_format.encoding) as delimited_target_stream:
         with rowio.DelimitedRowWriter(delimited_target_stream, delimited_data_format) as delimited_writer:
             delimited_writer.write_row(['a', 'b', _EURO_SIGN])
             delimited_writer.write_row([])
             delimited_writer.write_row([1, 2, 'end'])
     with io.open(delimited_path, 'r', encoding=delimited_data_format.encoding) as delimited_source_stream:
         # Note: all kinds of newline characters are translated to '\n' because of newline=None.
         data_written = delimited_source_stream.read()
     self.assertEqual('%r' % data_written, '%r' % 'a,b,\u20ac\n\n1,2,end\n')
Exemplo n.º 19
0
 def test_fails_on_unicode_error_during_fixed_write(self):
     fixed_data_format = data.DataFormat(data.FORMAT_FIXED)
     fixed_data_format.set_property(data.KEY_ENCODING, 'ascii')
     fixed_data_format.validate()
     fixed_path = dev_test.path_to_test_result('test_fails_on_unicode_error_during_fixed_write.txt')
     with rowio.FixedRowWriter(fixed_path, fixed_data_format, [('x', 1)]) as fixed_writer:
         fixed_writer.write_row(['a'])
         try:
             fixed_writer.write_row([_EURO_SIGN])
             self.fail()
         except errors.DataError as anticipated_error:
             anticipated_error_message = str(anticipated_error)
             dev_test.assert_fnmatches(
                 self, anticipated_error_message, "*.txt (R2C1): cannot write data row: *; row=*")
Exemplo n.º 20
0
 def _create_fixed_data_format_and_fields_for_name_and_height(line_delimiter='any', validate=True):
     """
     A tuple of ``(data_format, field_names_and_lengths)`` that can be
     passed to `iotools.fixed_rows()` and describes a fixed data format
     with 2 fields ``name``  and ``size``.
     """
     data_format = data.DataFormat(data.FORMAT_FIXED)
     data_format.set_property(data.KEY_LINE_DELIMITER, line_delimiter)
     if validate:
         data_format.validate()
     field_names_and_lengths = (
         ('name', 4),
         ('size', 3),
     )
     return data_format, field_names_and_lengths
Exemplo n.º 21
0
 def test_fails_on_unicode_error_during_delimited_write(self):
     delimited_data_format = data.DataFormat(data.FORMAT_DELIMITED)
     delimited_data_format.set_property(data.KEY_ENCODING, 'ascii')
     delimited_data_format.validate()
     delimited_path = dev_test.path_to_test_result('test_fails_on_unicode_error_during_delimited_write.csv')
     with io.open(delimited_path, 'w', newline='', encoding=delimited_data_format.encoding) as delimited_target_stream:
         with rowio.DelimitedRowWriter(delimited_target_stream, delimited_data_format) as delimited_writer:
             try:
                 delimited_writer.write_row(['a'])
                 delimited_writer.write_row(['b', _EURO_SIGN])
                 self.fail()
             except errors.DataError as anticipated_error:
                 anticipated_error_message = str(anticipated_error)
                 dev_test.assert_fnmatches(
                     self, anticipated_error_message, "*.csv (R2C1): cannot write data row: *; row=*'b', *")
Exemplo n.º 22
0
 def test_can_map_csv_to_delimited_format(self):
     csv_format = data.DataFormat('csv')
     self.assertEqual(data.FORMAT_DELIMITED, csv_format.format)
Exemplo n.º 23
0
 def test_fails_on_same_item_delimiter_and_quote_character(self):
     delimited_format = data.DataFormat(data.FORMAT_DELIMITED)
     delimited_format.set_property(data.KEY_ITEM_DELIMITER, '"')
     delimited_format.set_property(data.KEY_QUOTE_CHARACTER, '"')
     self.assertRaises(errors.InterfaceError, delimited_format.validate)
Exemplo n.º 24
0
 def test_fails_on_same_decimal_and_thousands_separator(self):
     delimited_format = data.DataFormat(data.FORMAT_DELIMITED)
     delimited_format.set_property(data.KEY_DECIMAL_SEPARATOR, '.')
     delimited_format.set_property(data.KEY_THOUSANDS_SEPARATOR, '.')
     self.assertRaises(errors.InterfaceError, delimited_format.validate)
Exemplo n.º 25
0
 def test_fails_on_non_numeric_sheet(self):
     excel_format = data.DataFormat(data.FORMAT_EXCEL)
     self.assertRaises(errors.InterfaceError, excel_format.set_property, data.KEY_SHEET, 'xxx')
Exemplo n.º 26
0
 def test_can_set_sheet(self):
     excel_format = data.DataFormat(data.FORMAT_EXCEL)
     excel_format.set_property(data.KEY_SHEET, '1')
     self.assertEqual(excel_format.sheet, 1)
Exemplo n.º 27
0
 def test_fails_on_broken_thousands_separator(self):
     delimited_format = data.DataFormat(data.FORMAT_DELIMITED)
     self.assertRaises(errors.InterfaceError, delimited_format.set_property,
                       data.KEY_THOUSANDS_SEPARATOR, 'broken-thousands-separator')
Exemplo n.º 28
0
 def test_fails_on_unsupported_delimited_property(self):
     delimited_format = data.DataFormat(data.FORMAT_DELIMITED)
     self.assertRaises(errors.InterfaceError, delimited_format.set_property, data.KEY_SHEET, '2')
Exemplo n.º 29
0
 def test_can_set_thousands_separator(self):
     delimited_format = data.DataFormat(data.FORMAT_DELIMITED)
     delimited_format.set_property(data.KEY_THOUSANDS_SEPARATOR, '')
     self.assertEqual(delimited_format.thousands_separator, '')
Exemplo n.º 30
0
 def test_fails_on_same_line_and_item_delimiter(self):
     delimited_format = data.DataFormat(data.FORMAT_DELIMITED)
     delimited_format.set_property(data.KEY_LINE_DELIMITER, data.LF)
     delimited_format.set_property(data.KEY_ITEM_DELIMITER, '\n')
     self.assertRaises(errors.InterfaceError, delimited_format.validate)