def test_block_sizes(self): rows = b'{"a": 1}\n{"a": 2}\n{"a": 3}' read_options = ReadOptions() parse_options = ParseOptions() for data in [rows, rows + b'\n']: for newlines_in_values in [False, True]: parse_options.newlines_in_values = newlines_in_values read_options.block_size = 4 with pytest.raises(ValueError, match="try to increase block size"): self.read_bytes(data, read_options=read_options, parse_options=parse_options) # Validate reader behavior with various block sizes. # There used to be bugs in this area. for block_size in range(9, 20): read_options.block_size = block_size table = self.read_bytes(data, read_options=read_options, parse_options=parse_options) assert table.to_pydict() == {'a': [1, 2, 3]}
def test_stress_block_sizes(self): # Test a number of small block sizes to stress block stitching data_base, expected = make_random_json(num_cols=2, num_rows=100) read_options = ReadOptions() parse_options = ParseOptions() for data in [data_base, data_base.rstrip(b'\r\n')]: for newlines_in_values in [False, True]: parse_options.newlines_in_values = newlines_in_values for block_size in [22, 23, 37]: read_options.block_size = block_size table = self.read_bytes(data, read_options=read_options, parse_options=parse_options) assert table.schema == expected.schema if not table.equals(expected): # Better error output assert table.to_pydict() == expected.to_pydict()