def setup(self, size): self.plasma_store_ctx = plasma.start_plasma_store( plasma_store_memory=10**9) plasma_store_name, p = self.plasma_store_ctx.__enter__() self.plasma_client = plasma.connect(plasma_store_name, "", 64) self.data = np.random.randn(size // 8)
def run_transfer_worker(pool_address, session_id, chunk_keys, spill_dir, msg_queue): options.worker.spill_directory = spill_dir plasma_size = 1024 * 1024 * 10 # don't use multiple with-statement as we need the options be forked with plasma.start_plasma_store(plasma_size) as store_args: options.worker.plasma_socket = plasma_socket = store_args[0] plasma_client = plasma.connect(plasma_socket, '', 0) with start_transfer_test_pool(address=pool_address, plasma_size=plasma_size) as pool: chunk_holder_ref = pool.actor_ref(ChunkHolderActor.default_name()) mapper_ref = pool.actor_ref(PlasmaKeyMapActor.default_name()) plasma_store = PlasmaChunkStore(plasma_client, mapper_ref) for _ in range(2): pool.create_actor(SenderActor, uid='%s' % str(uuid.uuid4())) pool.create_actor(ReceiverActor, uid='%s' % str(uuid.uuid4())) for idx in range(0, len(chunk_keys) - 7): data = np.ones((640 * 1024,), dtype=np.int16) * idx write_spill_file(chunk_keys[idx], data) for idx in range(len(chunk_keys) - 7, len(chunk_keys)): data = np.ones((640 * 1024,), dtype=np.int16) * idx plasma_store.put(session_id, chunk_keys[idx], data) chunk_holder_ref.register_chunk(session_id, chunk_keys[idx]) msg_queue.put(plasma_socket) t = time.time() while True: try: msg_queue.get_nowait() except Empty: if time.time() > t + 60: raise SystemError('Transfer finish timed out.') pool.sleep(0.1)
def test_use_huge_pages(): import pyarrow.plasma as plasma with plasma.start_plasma_store( plasma_store_memory=DEFAULT_PLASMA_STORE_MEMORY, plasma_directory="/mnt/hugepages", use_hugepages=True) as (plasma_store_name, p): plasma_client = plasma.connect(plasma_store_name, "", 64) create_object(plasma_client, 100000000)
def test_use_huge_pages(): import pyarrow.plasma as plasma with plasma.start_plasma_store( plasma_store_memory=2*10**9, plasma_directory="/mnt/hugepages", use_hugepages=True) as (plasma_store_name, p): plasma_client = plasma.connect(plasma_store_name) create_object(plasma_client, 10**8)
def test_use_huge_pages(): import pyarrow.plasma as plasma with plasma.start_plasma_store(plasma_store_memory=2 * 10**9, plasma_directory="/mnt/hugepages", use_hugepages=True) as (plasma_store_name, p): plasma_client = plasma.connect(plasma_store_name, "", 64) create_object(plasma_client, 10**8)
def run_transfer_worker(pool_address, session_id, chunk_keys, spill_dir, msg_queue): from mars.config import options options.worker.spill_directory = spill_dir plasma_size = 1024 * 1024 * 10 # don't use multiple with-statement as we need the options be forked with plasma.start_plasma_store(plasma_size) as store_args: options.worker.plasma_socket = plasma_socket = store_args[0] with create_actor_pool(n_process=2, backend='gevent', distributor=WorkerDistributor(2), address=pool_address) as pool: try: pool.create_actor(ClusterInfoActor, schedulers=[pool_address], uid=ClusterInfoActor.default_name()) pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_name()) pool.create_actor(DispatchActor, uid=DispatchActor.default_name()) pool.create_actor(QuotaActor, 1024 * 1024 * 20, uid=MemQuotaActor.default_name()) holder_ref = pool.create_actor(HolderActor, uid='HolderActor') chunk_holder_ref = pool.create_actor( ChunkHolderActor, plasma_size, uid=ChunkHolderActor.default_name()) pool.create_actor(SpillActor) pool.create_actor(SenderActor, uid='%s' % str(uuid.uuid4())) pool.create_actor(SenderActor, uid='%s' % str(uuid.uuid4())) pool.create_actor(ReceiverActor, uid='%s' % str(uuid.uuid4())) pool.create_actor(ReceiverActor, uid='%s' % str(uuid.uuid4())) register_actor = pool.create_actor(WorkerRegistrationTestActor) register_actor.register(session_id, chunk_keys) check_time = time.time() while not register_actor.get_finished(): gevent.sleep(0.5) if time.time() - check_time > 60: raise SystemError('Wait result timeout') register_actor.destroy() msg_queue.put(plasma_socket) check_time = time.time() while not holder_ref.obtain(): gevent.sleep(1) if time.time() - check_time > 60: raise SystemError('Wait result timeout') finally: pool.destroy_actor(chunk_holder_ref)
def setup_method(self, test_method): import pyarrow.plasma as plasma # Start Plasma store. self.plasma_store_ctx = plasma.start_plasma_store( plasma_store_memory=DEFAULT_PLASMA_STORE_MEMORY, use_valgrind=USE_VALGRIND) self.plasma_store_name, self.p = self.plasma_store_ctx.__enter__() # Connect to Plasma. self.plasma_client = plasma.connect(self.plasma_store_name) self.plasma_client2 = plasma.connect(self.plasma_store_name)
def setup_method(self, test_method): import pyarrow.plasma as plasma # Start Plasma store. self.plasma_store_ctx = plasma.start_plasma_store( plasma_store_memory=1000 * 1024, use_valgrind=USE_VALGRIND, external_store=EXTERNAL_STORE) self.plasma_store_name, self.p = self.plasma_store_ctx.__enter__() # Connect to Plasma. self.plasma_client = plasma.connect(self.plasma_store_name)
def plasma_store(monkeypatch): with plasma.start_plasma_store(PLASMA_STORE_CAPACITY) as info: store_socket_name, _ = info monkeypatch.setattr(orchest.Config, "STORE_SOCKET_NAME", store_socket_name) yield store_socket_name uuids = ["uuid-1______________", "uuid-2______________", "uuid-3______________"] for step_uuid in uuids: shutil.rmtree(f"tests/userdir/.data/{step_uuid}", ignore_errors=True)
def run_transfer_worker(pool_address, session_id, chunk_keys, spill_dir, msg_queue): from pyarrow import plasma options.worker.spill_directory = spill_dir options.worker.plasma_dir = '/tmp' if sys.platform == 'darwin' else '/dev/shm' plasma_size = 1024 * 1024 * 10 # don't use multiple with-statement as we need the options be forked with plasma.start_plasma_store(plasma_size) as store_args: options.worker.plasma_socket = plasma_socket = store_args[0] with start_transfer_test_pool(address=pool_address, plasma_size=plasma_size) as pool: storage_client_ref = pool.create_actor(StorageClientActor) for _ in range(2): pool.create_actor(SenderActor, uid=str(uuid.uuid4())) pool.create_actor(ReceiverWorkerActor, uid=str(uuid.uuid4())) for idx in range(0, len(chunk_keys) - 7): data = np.ones((640 * 1024, ), dtype=np.int16) * idx storage_client_ref.put_objects(session_id, [chunk_keys[idx]], [data], [DataStorageDevice.PROC_MEMORY]) for idx in range(len(chunk_keys) - 7, len(chunk_keys)): data = np.ones((640 * 1024, ), dtype=np.int16) * idx storage_client_ref.put_objects( session_id, [chunk_keys[idx]], [data], [DataStorageDevice.SHARED_MEMORY]) while not all( storage_client_ref.get_data_locations( session_id, chunk_keys)): pool.sleep(0.1) for idx in range(0, len(chunk_keys) - 7): storage_client_ref.copy_to(session_id, [chunk_keys[idx]], [DataStorageDevice.DISK]) while not all( (0, DataStorageDevice.DISK) in locations for locations in storage_client_ref.get_data_locations( session_id, chunk_keys[:-7])): pool.sleep(0.1) msg_queue.put(plasma_socket) t = time.time() while True: try: msg_queue.get_nowait() except Empty: if time.time() > t + 60: raise SystemError('Transfer finish timed out.') pool.sleep(0.1)
def test_plasma_client_sharing(): import pyarrow.plasma as plasma with plasma.start_plasma_store( plasma_store_memory=DEFAULT_PLASMA_STORE_MEMORY) \ as (plasma_store_name, p): plasma_client = plasma.connect(plasma_store_name) object_id = plasma_client.put(np.zeros(3)) buf = plasma_client.get(object_id) del plasma_client assert (buf == np.zeros(3)).all() del buf # This segfaulted pre ARROW-2448.
def test_plasma_client_sharing(): import pyarrow.plasma as plasma with plasma.start_plasma_store( plasma_store_memory=DEFAULT_PLASMA_STORE_MEMORY) \ as (plasma_store_name, p): plasma_client = plasma.connect(plasma_store_name, "", 64) object_id = plasma_client.put(np.zeros(3)) buf = plasma_client.get(object_id) del plasma_client assert (buf == np.zeros(3)).all() del buf # This segfaulted pre ARROW-2448.
def setUpClass(cls): import pyarrow.plasma as plasma from mars import kvstore cls._plasma_store = plasma.start_plasma_store(cls.plasma_storage_size) cls.plasma_socket = options.worker.plasma_socket = cls._plasma_store.__enter__( )[0] options.worker.spill_directory = cls.spill_dir cls._plasma_client = plasma.connect(options.worker.plasma_socket, '', 0) cls._kv_store = kvstore.get(options.kv_store)
def initialize(cls, shm_size_mb=SHM_SIZE_MB, nb_workers=NB_WORKERS, progress_bar=False): """ Initialize Pandarallel shared memory. Parameters ---------- shm_size_mb : int, optional Size of Pandarallel shared memory nb_workers : int, optional Number of worker used for parallelisation progress_bar : bool, optional Display a progress bar WARNING: Progress bar is an experimental feature. This can lead to a considerable performance loss. """ print("New pandarallel memory created - Size:", shm_size_mb, "MB") print("Pandarallel will run on", nb_workers, "workers") if progress_bar: print("WARNING: Progress bar is an experimental feature. This \ can lead to a considerable performance loss.") tqdm_notebook().pandas() cls.__store_ctx = _plasma.start_plasma_store(int(shm_size_mb * 1e6)) plasma_store_name, _ = cls.__store_ctx.__enter__() plasma_client = _plasma.connect(plasma_store_name) args = plasma_store_name, nb_workers, plasma_client _pd.DataFrame.parallel_apply = _DataFrame.apply(*args, progress_bar) _pd.DataFrame.parallel_applymap = _DataFrame.applymap( *args, progress_bar) _pd.Series.parallel_map = _Series.map(*args, progress_bar) _pd.Series.parallel_apply = _Series.apply(*args, progress_bar) _pd.core.window.Rolling.parallel_apply = _SeriesRolling.apply( *args, progress_bar) _pd.core.groupby.DataFrameGroupBy.parallel_apply = _DataFrameGroupBy.apply( *args) _pd.core.window.RollingGroupby.parallel_apply = _RollingGroupby.apply( *args)
def setup_method(self, test_method): use_one_memory_mapped_file = ( test_method == self.test_use_one_memory_mapped_file) import pyarrow.plasma as plasma # Start Plasma store. self.plasma_store_ctx = plasma.start_plasma_store( plasma_store_memory=DEFAULT_PLASMA_STORE_MEMORY, use_valgrind=USE_VALGRIND, use_one_memory_mapped_file=use_one_memory_mapped_file) self.plasma_store_name, self.p = self.plasma_store_ctx.__enter__() # Connect to Plasma. self.plasma_client = plasma.connect(self.plasma_store_name, "", 64) self.plasma_client2 = plasma.connect(self.plasma_store_name, "", 0)
def setup_method(self, test_method): use_one_memory_mapped_file = (test_method == self.test_use_one_memory_mapped_file) import pyarrow.plasma as plasma # Start Plasma store. self.plasma_store_ctx = plasma.start_plasma_store( plasma_store_memory=DEFAULT_PLASMA_STORE_MEMORY, use_valgrind=USE_VALGRIND, use_one_memory_mapped_file=use_one_memory_mapped_file) plasma_store_name, self.p = self.plasma_store_ctx.__enter__() # Connect to Plasma. self.plasma_client = plasma.connect(plasma_store_name, "", 64) self.plasma_client2 = plasma.connect(plasma_store_name, "", 0)
def test_plasma_tf_op(use_gpu=False): import pyarrow.plasma as plasma import tensorflow as tf plasma.build_plasma_tensorflow_op() if plasma.tf_plasma_op is None: pytest.skip("TensorFlow Op not found") with plasma.start_plasma_store(10**8) as (plasma_store_name, p): client = plasma.connect(plasma_store_name, "", 0) for dtype in [np.float32, np.float64, np.int8, np.int16, np.int32, np.int64]: run_tensorflow_test_with_dtype(tf, plasma, plasma_store_name, client, use_gpu, dtype)
def test_plasma_tf_op(use_gpu=False): import pyarrow.plasma as plasma import tensorflow as tf plasma.build_plasma_tensorflow_op() if plasma.tf_plasma_op is None: pytest.skip("TensorFlow Op not found") with plasma.start_plasma_store(10**8) as (plasma_store_name, p): client = plasma.connect(plasma_store_name, "", 0) for dtype in [np.float32, np.float64, np.int8, np.int16, np.int32, np.int64]: run_tensorflow_test_with_dtype(tf, plasma, plasma_store_name, client, use_gpu, dtype)
def test_plasma_list(): import pyarrow.plasma as plasma with plasma.start_plasma_store( plasma_store_memory=DEFAULT_PLASMA_STORE_MEMORY) \ as (plasma_store_name, p): plasma_client = plasma.connect(plasma_store_name) # Test sizes u, _, _ = create_object(plasma_client, 11, metadata_size=7, seal=False) l1 = plasma_client.list() assert l1[u]["data_size"] == 11 assert l1[u]["metadata_size"] == 7 # Test ref_count v = plasma_client.put(np.zeros(3)) # Ref count has already been released # XXX flaky test, disabled (ARROW-3344) # l2 = plasma_client.list() # assert l2[v]["ref_count"] == 0 a = plasma_client.get(v) l3 = plasma_client.list() assert l3[v]["ref_count"] == 1 del a # Test state w, _, _ = create_object(plasma_client, 3, metadata_size=0, seal=False) l4 = plasma_client.list() assert l4[w]["state"] == "created" plasma_client.seal(w) l5 = plasma_client.list() assert l5[w]["state"] == "sealed" # Test timestamps slack = 1.5 # seconds t1 = time.time() x, _, _ = create_object(plasma_client, 3, metadata_size=0, seal=False) t2 = time.time() l6 = plasma_client.list() assert t1 - slack <= l6[x]["create_time"] <= t2 + slack time.sleep(2.0) t3 = time.time() plasma_client.seal(x) t4 = time.time() l7 = plasma_client.list() assert t3 - t2 - slack <= l7[x]["construct_duration"] assert l7[x]["construct_duration"] <= t4 - t1 + slack
def test_plasma_list(): import pyarrow.plasma as plasma with plasma.start_plasma_store( plasma_store_memory=DEFAULT_PLASMA_STORE_MEMORY) \ as (plasma_store_name, p): plasma_client = plasma.connect(plasma_store_name, "", 0) # Test sizes u, _, _ = create_object(plasma_client, 11, metadata_size=7, seal=False) l1 = plasma_client.list() assert l1[u]["data_size"] == 11 assert l1[u]["metadata_size"] == 7 # Test ref_count v = plasma_client.put(np.zeros(3)) l2 = plasma_client.list() # Ref count has already been released assert l2[v]["ref_count"] == 0 a = plasma_client.get(v) l3 = plasma_client.list() assert l3[v]["ref_count"] == 1 del a # Test state w, _, _ = create_object(plasma_client, 3, metadata_size=0, seal=False) l4 = plasma_client.list() assert l4[w]["state"] == "created" plasma_client.seal(w) l5 = plasma_client.list() assert l5[w]["state"] == "sealed" # Test timestamps t1 = time.time() x, _, _ = create_object(plasma_client, 3, metadata_size=0, seal=False) t2 = time.time() l6 = plasma_client.list() assert math.floor(t1) <= l6[x]["create_time"] <= math.ceil(t2) time.sleep(2.0) t3 = time.time() plasma_client.seal(x) t4 = time.time() l7 = plasma_client.list() assert math.floor(t3 - t2) <= l7[x]["construct_duration"] assert l7[x]["construct_duration"] <= math.ceil(t4 - t1)
def test_plasma_list(): import pyarrow.plasma as plasma with plasma.start_plasma_store( plasma_store_memory=DEFAULT_PLASMA_STORE_MEMORY) \ as (plasma_store_name, p): plasma_client = plasma.connect(plasma_store_name, "", 0) # Test sizes u, _, _ = create_object(plasma_client, 11, metadata_size=7, seal=False) l1 = plasma_client.list() assert l1[u]["data_size"] == 11 assert l1[u]["metadata_size"] == 7 # Test ref_count v = plasma_client.put(np.zeros(3)) l2 = plasma_client.list() # Ref count has already been released assert l2[v]["ref_count"] == 0 a = plasma_client.get(v) l3 = plasma_client.list() assert l3[v]["ref_count"] == 1 del a # Test state w, _, _ = create_object(plasma_client, 3, metadata_size=0, seal=False) l4 = plasma_client.list() assert l4[w]["state"] == "created" plasma_client.seal(w) l5 = plasma_client.list() assert l5[w]["state"] == "sealed" # Test timestamps t1 = time.time() x, _, _ = create_object(plasma_client, 3, metadata_size=0, seal=False) t2 = time.time() l6 = plasma_client.list() assert math.floor(t1) <= l6[x]["create_time"] <= math.ceil(t2) time.sleep(2.0) t3 = time.time() plasma_client.seal(x) t4 = time.time() l7 = plasma_client.list() assert math.floor(t3 - t2) <= l7[x]["construct_duration"] assert l7[x]["construct_duration"] <= math.ceil(t4 - t1)
def test_plasma_tf_op(use_gpu=False): import pyarrow.plasma as plasma import tensorflow as tf plasma.build_plasma_tensorflow_op() if plasma.tf_plasma_op is None: pytest.skip("TensorFlow Op not found") with plasma.start_plasma_store(10**8) as (plasma_store_name, p): client = plasma.connect(plasma_store_name) for dtype in [np.float32, np.float64, np.int8, np.int16, np.int32, np.int64]: run_tensorflow_test_with_dtype(tf, plasma, plasma_store_name, client, use_gpu, dtype) # Make sure the objects have been released. for _, info in client.list().items(): assert info['ref_count'] == 0
def test_plasma_tf_op(use_gpu=False): import pyarrow.plasma as plasma import tensorflow as tf plasma.build_plasma_tensorflow_op() if plasma.tf_plasma_op is None: pytest.skip("TensorFlow Op not found") with plasma.start_plasma_store(10**8) as (plasma_store_name, p): client = plasma.connect(plasma_store_name, "", 0) for dtype in [np.float32, np.float64, np.int8, np.int16, np.int32, np.int64]: run_tensorflow_test_with_dtype(tf, plasma, plasma_store_name, client, use_gpu, dtype) # Make sure the objects have been released. for _, info in client.list().items(): assert info['ref_count'] == 0
async def setup(cls, **kwargs) -> Tuple[Dict, Dict]: store_memory = kwargs.pop('store_memory', None) plasma_directory = kwargs.pop('plasma_directory', None) check_dir_size = kwargs.pop('check_dir_size', True) if kwargs: raise TypeError(f'PlasmaStorage got unexpected config: {",".join(kwargs)}') plasma_store = plasma.start_plasma_store(store_memory, plasma_directory=plasma_directory) plasma_socket = plasma_store.__enter__()[0] init_params = dict(plasma_socket=plasma_socket, plasma_directory=plasma_directory, check_dir_size=check_dir_size) client = plasma.connect(plasma_socket) actual_capacity = get_actual_capacity(client) init_params['capacity'] = actual_capacity teardown_params = dict(plasma_store=plasma_store) return init_params, teardown_params
def setUpClass(cls): import pyarrow.plasma as plasma from mars import kvstore if sys.platform == 'darwin': options.worker.plasma_dir = '/tmp' else: options.worker.plasma_dir = '/dev/shm' cls._plasma_store = plasma.start_plasma_store( cls.plasma_storage_size, plasma_directory=options.worker.plasma_dir) cls.plasma_socket = options.worker.plasma_socket = cls._plasma_store.__enter__()[0] options.worker.spill_directory = cls.spill_dir try: cls._plasma_client = plasma.connect(options.worker.plasma_socket) except TypeError: cls._plasma_client = plasma.connect(options.worker.plasma_socket, '', 0) cls._kv_store = kvstore.get(options.kv_store)
async def setup(cls, **kwargs) -> Tuple[Dict, Dict]: loop = asyncio.get_running_loop() store_memory = kwargs.pop('store_memory') plasma_directory = kwargs.pop('plasma_directory', None) check_dir_size = kwargs.pop('check_dir_size', True) if kwargs: raise TypeError( f'PlasmaStorage got unexpected config: {",".join(kwargs)}') store_memory = calc_size_by_str(store_memory, virtual_memory().total) plasma_store = plasma.start_plasma_store( store_memory, plasma_directory=plasma_directory) plasma_socket = plasma_store.__enter__()[0] init_params = dict(plasma_socket=plasma_socket, plasma_directory=plasma_directory, check_dir_size=check_dir_size) client = plasma.connect(plasma_socket) actual_capacity = await loop.run_in_executor(None, get_actual_capacity, client) init_params['capacity'] = actual_capacity teardown_params = dict(plasma_store=plasma_store) return init_params, teardown_params
def start_plasma(self): self._plasma_store = plasma.start_plasma_store( self._cache_mem_limit, plasma_directory=self._plasma_dir) options.worker.plasma_socket, _ = self._plasma_store.__enter__()
def setup(self): self.plasma_store_ctx = plasma.start_plasma_store( plasma_store_memory=10**9) plasma_store_name, p = self.plasma_store_ctx.__enter__() self.plasma_client = plasma.connect(plasma_store_name, "", 64)
def testPlasmaSharedStore(self): import pyarrow from pyarrow import plasma store_size = 10 * 1024**2 test_addr = f'127.0.0.1:{get_next_port()}' with plasma.start_plasma_store(store_size) as (sckt, _), \ create_actor_pool(n_process=1, address=test_addr) as pool: km_ref = pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_uid()) try: plasma_client = plasma.connect(sckt) except TypeError: plasma_client = plasma.connect(sckt, '', 0) store = PlasmaSharedStore(plasma_client, km_ref) self.assertGreater(store.get_actual_capacity(store_size), store_size / 2) session_id = str(uuid.uuid4()) data_list = [ np.random.randint(0, 32767, (655360, ), np.int16) for _ in range(20) ] key_list = [str(uuid.uuid4()) for _ in range(20)] self.assertFalse(store.contains(session_id, str(uuid.uuid4()))) with self.assertRaises(KeyError): store.get(session_id, str(uuid.uuid4())) with self.assertRaises(KeyError): store.get_actual_size(session_id, str(uuid.uuid4())) with self.assertRaises(KeyError): store.seal(session_id, str(uuid.uuid4())) fake_data_key = str(uuid.uuid4()) km_ref.put(session_id, fake_data_key, plasma.ObjectID.from_random()) self.assertFalse(store.contains(session_id, fake_data_key)) self.assertIsNone(km_ref.get(session_id, fake_data_key)) with self.assertRaises(KeyError): km_ref.put(session_id, fake_data_key, plasma.ObjectID.from_random()) store.get(session_id, fake_data_key) self.assertIsNone(km_ref.get(session_id, fake_data_key)) with self.assertRaises(KeyError): km_ref.put(session_id, fake_data_key, plasma.ObjectID.from_random()) store.seal(session_id, fake_data_key) self.assertIsNone(km_ref.get(session_id, fake_data_key)) with self.assertRaises(KeyError): km_ref.put(session_id, fake_data_key, plasma.ObjectID.from_random()) store.get_actual_size(session_id, fake_data_key) self.assertIsNone(km_ref.get(session_id, fake_data_key)) with self.assertRaises(KeyError): km_ref.put(session_id, fake_data_key, plasma.ObjectID.from_random()) store.get_buffer(session_id, fake_data_key) self.assertIsNone(km_ref.get(session_id, fake_data_key)) store.delete(session_id, fake_data_key) with self.assertRaises(SerializationFailed): non_serial = type('non_serial', (object, ), dict(nbytes=10)) store.put(session_id, fake_data_key, non_serial()) self.assertIsNone(km_ref.get(session_id, fake_data_key)) with self.assertRaises(Exception): store.create(session_id, fake_data_key, 'abcd') self.assertIsNone(km_ref.get(session_id, fake_data_key)) with self.assertRaises(StorageFull): store.create(session_id, fake_data_key, store_size * 2) self.assertIsNone(km_ref.get(session_id, fake_data_key)) arrow_ser = pyarrow.serialize(data_list[0]) buf = store.create(session_id, key_list[0], arrow_ser.total_bytes) writer = pyarrow.FixedSizeBufferWriter(buf) arrow_ser.write_to(writer) writer.close() store.seal(session_id, key_list[0]) self.assertTrue(store.contains(session_id, key_list[0])) self.assertEqual(store.get_actual_size(session_id, key_list[0]), arrow_ser.total_bytes) assert_allclose(store.get(session_id, key_list[0]), data_list[0]) assert_allclose( pyarrow.deserialize(store.get_buffer(session_id, key_list[0])), data_list[0]) with self.assertRaises(StorageDataExists): store.create(session_id, key_list[0], arrow_ser.total_bytes) self.assertIsNotNone(km_ref.get(session_id, key_list[0])) store.delete(session_id, key_list[0]) del buf bufs = [] for key, data in zip(key_list, data_list): try: bufs.append(store.put(session_id, key, data)) except StorageFull: break del bufs
def start_plasma(self): from pyarrow import plasma self._plasma_store = plasma.start_plasma_store( self._cache_mem_size, plasma_directory=options.worker.plasma_dir) options.worker.plasma_socket, _ = self._plasma_store.__enter__()
def start_plasma(self, mem_limit, one_mapped_file=False): self._plasma_store = plasma.start_plasma_store( int(mem_limit), use_one_memory_mapped_file=one_mapped_file) options.worker.plasma_socket, _ = self._plasma_store.__enter__()
def test_store_capacity(): import pyarrow.plasma as plasma with plasma.start_plasma_store(plasma_store_memory=10000) as (name, p): plasma_client = plasma.connect(name) assert plasma_client.store_capacity() == 10000