def __iter__(self): stream = pa.output_stream(self.filename) for item in self.parent: tensor = pa.Tensor.from_numpy(item) pa.write_tensor(tensor, stream) yield stream.close()
def test_tensor_ipc_strided(tmpdir): data = np.random.randn(10, 4) tensor = pa.Tensor.from_numpy(data[::2]) path = os.path.join(str(tmpdir), 'pyarrow-tensor-ipc-strided') with pytest.raises(ValueError): mmap = pa.create_memory_map(path, 1024) pa.write_tensor(tensor, mmap)
def test_tensor_ipc_strided(): data = np.random.randn(10, 4) tensor = pa.Tensor.from_numpy(data[::2]) path = 'pyarrow-tensor-ipc-strided' try: with pytest.raises(ValueError): mmap = pa.create_memory_map(path, 1024) pa.write_tensor(tensor, mmap) finally: _try_delete(path)
def test_read_tensor(tmpdir): # Create and write tensor tensor data = np.random.randn(10, 4) tensor = pa.Tensor.from_numpy(data) data_size = pa.get_tensor_size(tensor) path = os.path.join(str(tmpdir), 'pyarrow-tensor-ipc-read-tensor') write_mmap = pa.create_memory_map(path, data_size) pa.write_tensor(tensor, write_mmap) # Try to read tensor read_mmap = pa.memory_map(path, mode='r') array = pa.read_tensor(read_mmap).to_numpy() np.testing.assert_equal(data, array)
def test_tensor_ipc_roundtrip(tmpdir): data = np.random.randn(10, 4) tensor = pa.Tensor.from_numpy(data) path = os.path.join(str(tmpdir), 'pyarrow-tensor-ipc-roundtrip') mmap = pa.create_memory_map(path, 1024) pa.write_tensor(tensor, mmap) mmap.seek(0) result = pa.read_tensor(mmap) assert result.equals(tensor)
def test_tensor_ipc_read_from_compressed(tempdir): # ARROW-5910 data = np.random.randn(10, 4) tensor = pa.Tensor.from_numpy(data) path = tempdir / 'tensor-compressed-file' out_stream = pa.output_stream(path, compression='gzip') pa.write_tensor(tensor, out_stream) out_stream.close() result = pa.read_tensor(pa.input_stream(path, compression='gzip')) assert result.equals(tensor)
def test_store_arrow_objects(self): data = np.random.randn(10, 4) # Write an arrow object. object_id = random_object_id() tensor = pa.Tensor.from_numpy(data) data_size = pa.get_tensor_size(tensor) buf = self.plasma_client.create(object_id, data_size) stream = pa.FixedSizeBufferWriter(buf) pa.write_tensor(tensor, stream) self.plasma_client.seal(object_id) # Read the arrow object. [tensor] = self.plasma_client.get_buffers([object_id]) reader = pa.BufferReader(tensor) array = pa.read_tensor(reader).to_numpy() # Assert that they are equal. np.testing.assert_equal(data, array)
def test_tensor_ipc_roundtrip(): data = np.random.randn(10, 4) tensor = pa.Tensor.from_numpy(data) path = 'pyarrow-tensor-ipc-roundtrip' try: mmap = pa.create_memory_map(path, 1024) pa.write_tensor(tensor, mmap) mmap.seek(0) result = pa.read_tensor(mmap) assert result.equals(tensor) finally: _try_delete(path)
def test_tensor_ipc_strided(tmpdir): data1 = np.random.randn(10, 4) tensor1 = pa.Tensor.from_numpy(data1[::2]) data2 = np.random.randn(10, 6, 4) tensor2 = pa.Tensor.from_numpy(data2[::, ::2, ::]) path = os.path.join(str(tmpdir), 'pyarrow-tensor-ipc-strided') mmap = pa.create_memory_map(path, 2048) for tensor in [tensor1, tensor2]: mmap.seek(0) pa.write_tensor(tensor, mmap) mmap.seek(0) result = pa.read_tensor(mmap) assert result.equals(tensor)
#!/usr/bin/env python import pyarrow as pa import numpy as np ndarray = np.random.randn(10, 6) print(ndarray) tensor = pa.Tensor.from_numpy(ndarray) with pa.OSFile("/tmp/tensor.arrow", "wb") as sink: pa.write_tensor(tensor, sink)