def test_unknown_compression_raises(): with pytest.raises(ValueError): Codec.is_available('unknown') with pytest.raises(TypeError): Codec(None) with pytest.raises(ValueError): Codec('unknown')
def test_compression_detection(path, expected_compression): if not Codec.is_available(expected_compression): with pytest.raises(pa.lib.ArrowNotImplementedError): Codec.detect(path) else: codec = Codec.detect(path) assert isinstance(codec, Codec) assert codec.name == expected_compression
def test_compress_decompress(compression): if not Codec.is_available(compression): pytest.skip("{} support is not built".format(compression)) INPUT_SIZE = 10000 test_data = (np.random.randint(0, 255, size=INPUT_SIZE) .astype(np.uint8) .tostring()) test_buf = pa.py_buffer(test_data) compressed_buf = pa.compress(test_buf, codec=compression) compressed_bytes = pa.compress(test_data, codec=compression, asbytes=True) assert isinstance(compressed_bytes, bytes) decompressed_buf = pa.decompress(compressed_buf, INPUT_SIZE, codec=compression) decompressed_bytes = pa.decompress(compressed_bytes, INPUT_SIZE, codec=compression, asbytes=True) assert isinstance(decompressed_bytes, bytes) assert decompressed_buf.equals(test_buf) assert decompressed_bytes == test_data with pytest.raises(ValueError): pa.decompress(compressed_bytes, codec=compression)
def test_output_stream_constructor(tmpdir): if not Codec.is_available("gzip"): pytest.skip("gzip support is not built") with pa.CompressedOutputStream(tmpdir / "ctor.gz", "gzip") as stream: stream.write(b"test") with (tmpdir / "ctor2.gz").open("wb") as f: with pa.CompressedOutputStream(f, "gzip") as stream: stream.write(b"test")
def test_compressed_roundtrip(compression): if not Codec.is_available(compression): pytest.skip("{} support is not built".format(compression)) data = b"some test data\n" * 10 + b"eof\n" raw = pa.BufferOutputStream() with pa.CompressedOutputStream(raw, compression) as compressed: compressed.write(data) cdata = raw.getvalue() assert len(cdata) < len(data) raw = pa.BufferReader(cdata) with pa.CompressedInputStream(raw, compression) as compressed: got = compressed.read() assert got == data
def test_compressed_recordbatch_stream(compression): if not Codec.is_available(compression): pytest.skip("{} support is not built".format(compression)) # ARROW-4836: roundtrip a RecordBatch through a compressed stream table = pa.Table.from_arrays([pa.array([1, 2, 3, 4, 5])], ['a']) raw = pa.BufferOutputStream() stream = pa.CompressedOutputStream(raw, compression) writer = pa.RecordBatchStreamWriter(stream, table.schema) writer.write_table(table, max_chunksize=3) writer.close() stream.close() # Flush data buf = raw.getvalue() stream = pa.CompressedInputStream(pa.BufferReader(buf), compression) got_table = pa.RecordBatchStreamReader(stream).read_all() assert got_table == table
'nopandas', 'orc', 'pandas', 'parquet', 'plasma', 's3', 'snappy', 'tensorflow', 'flight', 'slow', 'requires_testing_data', 'zstd', ] defaults = { 'brotli': Codec.is_available('brotli'), 'bz2': Codec.is_available('bz2'), 'cython': False, 'dataset': False, 'fastparquet': False, 'flight': False, 'gandiva': False, 'gdb': True, 'gzip': Codec.is_available('gzip'), 'hdfs': False, 'hypothesis': False, 'large_memory': False, 'lz4': Codec.is_available('lz4'), 'memory_leak': False, 'nopandas': False, 'orc': False,