Пример #1
0
  def test_read_single_file_without_striping_eol_crlf(self):
    file_name, written_data = write_data(TextSourceTest.DEFAULT_NUM_RECORDS,
                                         eol=EOL.CRLF)
    assert len(written_data) == TextSourceTest.DEFAULT_NUM_RECORDS
    source = TextSource(file_name, 0, CompressionTypes.UNCOMPRESSED,
                        False, coders.StrUtf8Coder())

    range_tracker = source.get_range_tracker(None, None)
    read_data = list(source.read(range_tracker))
    self.assertCountEqual([line + '\r\n' for line in written_data], read_data)
Пример #2
0
 def _run_read_test(self, file_or_pattern, expected_data,
                    buffer_size=DEFAULT_NUM_RECORDS,
                    compression=CompressionTypes.UNCOMPRESSED):
   # Since each record usually takes more than 1 byte, default buffer size is
   # smaller than the total size of the file. This is done to
   # increase test coverage for cases that hit the buffer boundary.
   source = TextSource(file_or_pattern, 0, compression,
                       True, coders.StrUtf8Coder(), buffer_size)
   range_tracker = source.get_range_tracker(None, None)
   read_data = list(source.read(range_tracker))
   self.assertCountEqual(expected_data, read_data)
Пример #3
0
    def _read_skip_header_lines(self, file_or_pattern, skip_header_lines):
        """Simple wrapper function for instantiating TextSource."""
        source = TextSource(file_or_pattern,
                            0,
                            CompressionTypes.UNCOMPRESSED,
                            True,
                            coders.StrUtf8Coder(),
                            skip_header_lines=skip_header_lines)

        range_tracker = source.get_range_tracker(None, None)
        return list(source.read(range_tracker))
Пример #4
0
    def test_read_single_file_without_striping_eol_crlf(self):
        file_name, written_data = write_data(
            TextSourceTest.DEFAULT_NUM_RECORDS, eol=EOL.CRLF)
        assert len(written_data) == TextSourceTest.DEFAULT_NUM_RECORDS
        source = TextSource(file_name, 0, CompressionTypes.UNCOMPRESSED, False,
                            coders.StrUtf8Coder())

        range_tracker = source.get_range_tracker(None, None)
        read_data = list(source.read(range_tracker))
        self.assertCountEqual([line + '\r\n' for line in written_data],
                              read_data)
Пример #5
0
 def _run_read_test(self, file_or_pattern, expected_data,
                    buffer_size=DEFAULT_NUM_RECORDS,
                    compression=CompressionTypes.UNCOMPRESSED):
   # Since each record usually takes more than 1 byte, default buffer size is
   # smaller than the total size of the file. This is done to
   # increase test coverage for cases that hit the buffer boundary.
   source = TextSource(file_or_pattern, 0, compression,
                       True, coders.StrUtf8Coder(), buffer_size)
   range_tracker = source.get_range_tracker(None, None)
   read_data = list(source.read(range_tracker))
   self.assertCountEqual(expected_data, read_data)
Пример #6
0
  def _read_skip_header_lines(self, file_or_pattern, skip_header_lines):
    """Simple wrapper function for instantiating TextSource."""
    source = TextSource(
        file_or_pattern,
        0,
        CompressionTypes.UNCOMPRESSED,
        True,
        coders.StrUtf8Coder(),
        skip_header_lines=skip_header_lines)

    range_tracker = source.get_range_tracker(None, None)
    return list(source.read(range_tracker))