def test_read_delimited_chunks_empty_input(self): """Test empty input for read_delimited_chunks.""" # splittign an empty file yields no chunks at all, similar to # how a string's readline method yields an empty list for an # empty string. self.assertEqual(list(read_delimited_chunks(io.BytesIO(b""), 1000)), [])
def test_read_delimited_chunks_long_lines(self): """Test lines longer than chunksize, data longer than chunk_size.""" chunk_size = 100 # Make sure the test data fits the testcase: # # 1. At least one lines must be longer than the chunk_size, so # that at least one chunk has to be longer as well self.assertTrue(max(map(len, csv_test_data.splitlines())) > chunk_size) # The actual test chunked = list( read_delimited_chunks(io.BytesIO(csv_test_data), chunk_size)) # Appending all chunks yields the original unchunked data self.assertEqual(csv_test_data, b"".join(chunked)) # Chunks longer than chunk_size have newlines only at the end. long_chunks = list( filter(lambda chunk: len(chunk) > chunk_size, chunked)) self.assertTrue(len(long_chunks) > 0) for chunk in long_chunks: if chunk.endswith(b"\n"): chunk = chunk[:-1] self.assertFalse(b"\n" in chunk)
def test_read_delimited_chunks_short_lines(self): """Test lines shorter than chunksize, data longer than chunk_size.""" chunk_size = 500 # Make sure the test data fits the testcase: # # 1. The lines must be shorter than the chunk_size, so that all # chunks can be shorter than chunk_size self.assertTrue(max(map(len, csv_test_data.splitlines())) < chunk_size) # 2. The data must be longer than chunk_size so that chunking is # useful self.assertTrue(len(csv_test_data) > chunk_size) # The actual test chunked = list( read_delimited_chunks(io.BytesIO(csv_test_data), chunk_size)) # Appending all chunks yields the original unchunked data self.assertEqual(csv_test_data, b"".join(chunked)) # All chunks are shorter than chunk_size self.assertTrue(max(map(len, chunked)) <= chunk_size) # All chunks were split at line separators self.assertEqual( csv_test_data.splitlines(), list( itertools.chain.from_iterable(chunk.splitlines() for chunk in chunked)))
def test_read_delimited_chunks_long_lines(self): """Test lines longer than chunksize, data longer than chunk_size.""" chunk_size = 100 # Make sure the test data fits the testcase: # # 1. At least one lines must be longer than the chunk_size, so # that at least one chunk has to be longer as well self.assertTrue(max(map(len, csv_test_data.splitlines())) > chunk_size) # The actual test chunked = list(read_delimited_chunks(io.BytesIO(csv_test_data), chunk_size)) # Appending all chunks yields the original unchunked data self.assertEqual(csv_test_data, b"".join(chunked)) # Chunks longer than chunk_size have newlines only at the end. long_chunks = list(filter(lambda chunk: len(chunk) > chunk_size, chunked)) self.assertTrue(len(long_chunks) > 0) for chunk in long_chunks: if chunk.endswith(b"\n"): chunk = chunk[:-1] self.assertFalse(b"\n" in chunk)
def test_read_delimited_chunks_short_lines(self): """Test lines shorter than chunksize, data longer than chunk_size.""" chunk_size = 500 # Make sure the test data fits the testcase: # # 1. The lines must be shorter than the chunk_size, so that all # chunks can be shorter than chunk_size self.assertTrue(max(map(len, csv_test_data.splitlines())) < chunk_size) # 2. The data must be longer than chunk_size so that chunking is # useful self.assertTrue(len(csv_test_data) > chunk_size) # The actual test chunked = list(read_delimited_chunks(io.BytesIO(csv_test_data), chunk_size)) # Appending all chunks yields the original unchunked data self.assertEqual(csv_test_data, b"".join(chunked)) # All chunks are shorter than chunk_size self.assertTrue(max(map(len, chunked)) <= chunk_size) # All chunks were split at line separators self.assertEqual(csv_test_data.splitlines(), list(itertools.chain.from_iterable( chunk.splitlines() for chunk in chunked)))