def testDiskLoad(self, *_): test_addr = f'127.0.0.1:{get_next_port()}' with self.create_pool(n_process=1, address=test_addr) as pool, \ self.run_actor_test(pool) as test_actor: pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid()) storage_manager_ref = pool.create_actor( StorageManagerActor, uid=StorageManagerActor.default_uid()) pool.create_actor(QuotaActor, 1024 ** 2, uid=MemQuotaActor.default_uid()) pool.create_actor(InProcHolderActor) pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_uid()) pool.create_actor(SharedHolderActor, uid=SharedHolderActor.default_uid()) data1 = np.random.random((10, 10)) data2 = np.random.random((10, 10)) ser_data1 = dataserializer.serialize(data1) session_id = str(uuid.uuid4()) data_key1 = str(uuid.uuid4()) data_key2 = str(uuid.uuid4()) storage_client = test_actor.storage_client handler = storage_client.get_storage_handler((0, DataStorageDevice.DISK)) # load from bytes io shared_handler = storage_client.get_storage_handler((0, DataStorageDevice.SHARED_MEMORY)) with shared_handler.create_bytes_writer( session_id, data_key1, ser_data1.total_bytes) as writer: ser_data1.write_to(writer) handler.load_from_bytes_io(session_id, [data_key1], shared_handler) \ .then(lambda *_: test_actor.set_result(None), lambda *exc: test_actor.set_result(exc, accept=False)) self.get_result(5) self.assertEqual(sorted(storage_manager_ref.get_data_locations(session_id, [data_key1])[0]), [(0, DataStorageDevice.SHARED_MEMORY), (0, DataStorageDevice.DISK)]) shared_handler.delete(session_id, [data_key1]) handler.delete(session_id, [data_key1]) # load from object io ref_data2 = weakref.ref(data2) proc_handler = storage_client.get_storage_handler((0, DataStorageDevice.PROC_MEMORY)) proc_handler.put_objects(session_id, [data_key2], [data2]) del data2 handler.load_from_object_io(session_id, [data_key2], proc_handler) \ .then(lambda *_: test_actor.set_result(None), lambda *exc: test_actor.set_result(exc, accept=False)) self.get_result(5) self.assertEqual(sorted(storage_manager_ref.get_data_locations(session_id, [data_key2])[0]), [(0, DataStorageDevice.PROC_MEMORY), (0, DataStorageDevice.DISK)]) proc_handler.delete(session_id, [data_key2]) self.assertIsNone(ref_data2()) handler.delete(session_id, [data_key2])
def testSharedPutAndGet(self, *_): test_addr = '127.0.0.1:%d' % get_next_port() with self.create_pool(n_process=1, address=test_addr) as pool, \ self.run_actor_test(pool) as test_actor: pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid()) storage_manager_ref = pool.create_actor( StorageManagerActor, uid=StorageManagerActor.default_uid()) pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_uid()) pool.create_actor(SharedHolderActor, uid=SharedHolderActor.default_uid()) data1 = np.random.random((10, 10)) data2 = np.random.random((10, 10)) ser_data2 = dataserializer.serialize(data2) bytes_data2 = ser_data2.to_buffer() session_id = str(uuid.uuid4()) data_key1 = str(uuid.uuid4()) data_key2 = str(uuid.uuid4()) storage_client = test_actor.storage_client handler = storage_client.get_storage_handler( (0, DataStorageDevice.SHARED_MEMORY)) handler.put_objects(session_id, [data_key1], [data1]) self.assertEqual( sorted( storage_manager_ref.get_data_locations( session_id, [data_key1])[0]), [(0, DataStorageDevice.SHARED_MEMORY)]) assert_allclose(data1, handler.get_objects(session_id, [data_key1])[0]) handler.delete(session_id, [data_key1]) self.assertEqual( list( storage_manager_ref.get_data_locations( session_id, [data_key1])[0]), []) with self.assertRaises(KeyError): handler.get_objects(session_id, [data_key1]) handler.put_objects(session_id, [data_key2], [ser_data2], serialize=True) assert_allclose(data2, handler.get_objects(session_id, [data_key2])[0]) handler.delete(session_id, [data_key2]) handler.put_objects(session_id, [data_key2], [bytes_data2], serialize=True) assert_allclose(data2, handler.get_objects(session_id, [data_key2])[0]) handler.delete(session_id, [data_key2])
def testLoadStoreInOtherProcess(self): test_addr = '127.0.0.1:%d' % get_next_port() with self.create_pool(n_process=3, address=test_addr, distributor=MarsDistributor(3)) as pool: pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid()) pool.create_actor(StorageManagerActor, uid=StorageManagerActor.default_uid()) pool.create_actor(DispatchActor, uid=DispatchActor.default_uid()) pool.create_actor(QuotaActor, 1024**2, uid=MemQuotaActor.default_uid()) pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_uid()) pool.create_actor(SharedHolderActor, self.plasma_storage_size, uid=SharedHolderActor.default_uid()) pool.create_actor(InProcHolderActor, uid='w:1:InProcHolderActor1') pool.create_actor(InProcHolderActor, uid='w:2:InProcHolderActor2') pool.create_actor(IORunnerActor, lock_free=True, dispatched=False, uid=IORunnerActor.gen_uid(1)) test_ref = pool.create_actor(OtherProcessTestActor, uid='w:0:OtherProcTest') def _get_result(): start_time = time.time() while test_ref.get_result() is None: pool.sleep(0.5) if time.time() - start_time > 10: raise TimeoutError test_ref.run_copy_test((0, DataStorageDevice.SHARED_MEMORY), (1, DataStorageDevice.PROC_MEMORY), _tell=True) _get_result() test_ref.run_copy_test((1, DataStorageDevice.PROC_MEMORY), (0, DataStorageDevice.SHARED_MEMORY), _tell=True) _get_result() test_ref.run_copy_test((1, DataStorageDevice.PROC_MEMORY), (2, DataStorageDevice.PROC_MEMORY), _tell=True) _get_result()
def testSharedLoadFromObjects(self, *_): test_addr = '127.0.0.1:%d' % get_next_port() with self.create_pool(n_process=1, address=test_addr) as pool, \ self.run_actor_test(pool) as test_actor: pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid()) storage_manager_ref = pool.create_actor( StorageManagerActor, uid=StorageManagerActor.default_uid()) pool.create_actor(QuotaActor, 1024**2, uid=MemQuotaActor.default_uid()) pool.create_actor(InProcHolderActor) pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_uid()) pool.create_actor(SharedHolderActor, uid=SharedHolderActor.default_uid()) data1 = np.random.random((10, 10)) session_id = str(uuid.uuid4()) data_key1 = str(uuid.uuid4()) storage_client = test_actor.storage_client handler = storage_client.get_storage_handler( (0, DataStorageDevice.SHARED_MEMORY)) # load from object io ref_data1 = weakref.ref(data1) proc_handler = storage_client.get_storage_handler( (0, DataStorageDevice.PROC_MEMORY)) proc_handler.put_objects(session_id, [data_key1], [data1]) del data1 handler.load_from_object_io(session_id, [data_key1], proc_handler) \ .then(lambda *_: test_actor.set_result(None), lambda *exc: test_actor.set_result(exc, accept=False)) self.get_result(5) self.assertEqual( sorted( storage_manager_ref.get_data_locations( session_id, [data_key1])[0]), [(0, DataStorageDevice.PROC_MEMORY), (0, DataStorageDevice.SHARED_MEMORY)]) proc_handler.delete(session_id, [data_key1]) self.assertIsNone(ref_data1()) handler.delete(session_id, [data_key1])
def _start_shared_holder_pool(self): test_addr = '127.0.0.1:%d' % get_next_port() with self.create_pool(n_process=1, address=test_addr) as pool, \ self.run_actor_test(pool) as test_actor: pool.create_actor(WorkerClusterInfoActor, [test_addr], uid=WorkerClusterInfoActor.default_uid()) pool.create_actor(StatusActor, test_addr, uid=StatusActor.default_uid()) pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid()) pool.create_actor(StorageManagerActor, uid=StorageManagerActor.default_uid()) pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_uid()) pool.create_actor(SharedHolderActor, self.plasma_storage_size, uid=SharedHolderActor.default_uid()) yield pool, test_actor
def create_standard_actors(cls, pool, address, quota_size=None, with_daemon=True, with_status=True, with_resource=False): quota_size = quota_size or (1024 * 1024) pool.create_actor(SchedulerClusterInfoActor, [address], uid=SchedulerClusterInfoActor.default_uid()) pool.create_actor(WorkerClusterInfoActor, [address], uid=WorkerClusterInfoActor.default_uid()) pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_uid()) pool.create_actor(StorageManagerActor, uid=StorageManagerActor.default_uid()) if with_resource: pool.create_actor(ResourceActor, uid=ResourceActor.default_uid()) if with_daemon: pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid()) if with_status: pool.create_actor(StatusActor, address, uid=StatusActor.default_uid()) pool.create_actor(SharedHolderActor, cls.plasma_storage_size, uid=SharedHolderActor.default_uid()) pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_uid()) pool.create_actor(DispatchActor, uid=DispatchActor.default_uid()) pool.create_actor(QuotaActor, quota_size, uid=MemQuotaActor.default_uid()) pool.create_actor(ExecutionActor, uid=ExecutionActor.default_uid())
def testClientReadAndWrite(self): test_addr = '127.0.0.1:%d' % get_next_port() with self.create_pool(n_process=1, address=test_addr) as pool: options.worker.lock_free_fileio = True pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid()) pool.create_actor(StorageManagerActor, uid=StorageManagerActor.default_uid()) pool.create_actor(DispatchActor, uid=DispatchActor.default_uid()) pool.create_actor(IORunnerActor) pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_uid()) pool.create_actor(SharedHolderActor, self.plasma_storage_size, uid=SharedHolderActor.default_uid()) data1 = np.random.random((10, 10)) ser_data1 = dataserializer.serialize(data1) session_id = str(uuid.uuid4()) data_key1 = str(uuid.uuid4()) data_key2 = str(uuid.uuid4()) with self.run_actor_test(pool) as test_actor: storage_client = test_actor.storage_client file_names = [] def _write_data(ser, writer): file_names.append(writer.filename) self.assertEqual(writer.nbytes, ser_data1.total_bytes) with writer: ser.write_to(writer) # test creating non-promised writer and write with storage_client.create_writer(session_id, data_key1, ser_data1.total_bytes, (DataStorageDevice.DISK, ), _promise=False) as writer: _write_data(ser_data1, writer) self.assertTrue(os.path.exists(file_names[0])) self.assertEqual( sorted( storage_client.get_data_locations( session_id, [data_key1])[0]), [(0, DataStorageDevice.DISK)]) storage_client.delete(session_id, [data_key1]) # test creating promised writer and write file_names[:] = [] self.waitp( storage_client.create_writer( session_id, data_key2, ser_data1.total_bytes, (DataStorageDevice.DISK, )).then( functools.partial(_write_data, ser_data1))) self.assertTrue(os.path.exists(file_names[0])) self.assertEqual( sorted( storage_client.get_data_locations( session_id, [data_key2])[0]), [(0, DataStorageDevice.DISK)]) def _read_data(reader): with reader: return dataserializer.deserialize(reader.read()) # test creating reader when data exist in location result = self.waitp( storage_client.create_reader( session_id, data_key2, (DataStorageDevice.DISK, )).then(_read_data))[0] assert_allclose(result, data1) # test creating reader when no data in location (should raise) with self.assertRaises(IOError): storage_client.create_reader( session_id, data_key2, (DataStorageDevice.SHARED_MEMORY, ), _promise=False) # test creating reader when copy needed self.waitp( storage_client.create_reader( session_id, data_key2, (DataStorageDevice.SHARED_MEMORY, )).then(_read_data)) self.assertEqual( sorted( storage_client.get_data_locations( session_id, [data_key2])[0]), [(0, DataStorageDevice.SHARED_MEMORY), (0, DataStorageDevice.DISK)]) storage_client.delete(session_id, [data_key2]) while os.path.exists(file_names[0]): test_actor.ctx.sleep(0.05) self.assertFalse(os.path.exists(file_names[0]))
def testClientSpill(self, *_): test_addr = '127.0.0.1:%d' % get_next_port() with self.create_pool(n_process=1, address=test_addr) as pool: pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid()) storage_manager_ref = pool.create_actor( StorageManagerActor, uid=StorageManagerActor.default_uid()) pool.create_actor(DispatchActor, uid=DispatchActor.default_uid()) pool.create_actor(IORunnerActor) pool.create_actor(QuotaActor, 1024**2, uid=MemQuotaActor.default_uid()) pool.create_actor(InProcHolderActor) pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_uid()) pool.create_actor(SharedHolderActor, self.plasma_storage_size, uid=SharedHolderActor.default_uid()) session_id = str(uuid.uuid4()) data_list = [ np.random.randint(0, 32767, (655360, ), np.int16) for _ in range(20) ] data_keys = [str(uuid.uuid4()) for _ in range(20)] with self.run_actor_test(pool) as test_actor: storage_client = test_actor.storage_client idx = 0 shared_handler = storage_client.get_storage_handler( (0, DataStorageDevice.SHARED_MEMORY)) proc_handler = storage_client.get_storage_handler( (0, DataStorageDevice.PROC_MEMORY)) def _fill_data(): i = 0 for i, (key, data) in enumerate(zip(data_keys[idx:], data_list)): try: shared_handler.put_objects(session_id, [key], [data]) except StorageFull: break return i + idx idx = _fill_data() # test copying non-existing keys storage_client.copy_to(session_id, ['non-exist-key'], [DataStorageDevice.SHARED_MEMORY]) \ .then(lambda *_: test_actor.set_result(None), lambda *exc: test_actor.set_result(exc, accept=False)) with self.assertRaises(KeyError): self.get_result(5) # test copying into containing locations storage_client.copy_to(session_id, [data_keys[0]], [DataStorageDevice.SHARED_MEMORY]) \ .then(lambda *_: test_actor.set_result(None), lambda *exc: test_actor.set_result(exc, accept=False)) self.get_result(5) self.assertEqual( sorted( storage_manager_ref.get_data_locations( session_id, [data_keys[0]])[0]), [(0, DataStorageDevice.SHARED_MEMORY)]) # test unsuccessful copy when no data at target def _mock_load_from(*_, **__): return promise.finished(*build_exc_info(SystemError), _accept=False) with patch_method(StorageHandler.load_from, _mock_load_from), \ self.assertRaises(SystemError): storage_client.copy_to(session_id, [data_keys[0]], [DataStorageDevice.DISK]) \ .then(lambda *_: test_actor.set_result(None), lambda *exc: test_actor.set_result(exc, accept=False)) self.get_result(5) # test successful copy for multiple objects storage_client.delete(session_id, [data_keys[idx - 1]]) ref_data = weakref.ref(data_list[idx]) ref_data2 = weakref.ref(data_list[idx + 1]) proc_handler.put_objects(session_id, data_keys[idx:idx + 2], data_list[idx:idx + 2]) data_list[idx:idx + 2] = [None, None] storage_client.copy_to(session_id, data_keys[idx:idx + 2], [DataStorageDevice.SHARED_MEMORY, DataStorageDevice.DISK]) \ .then(lambda *_: test_actor.set_result(None), lambda *exc: test_actor.set_result(exc, accept=False)) self.get_result(5) proc_handler.delete(session_id, data_keys[idx:idx + 2]) self.assertEqual( storage_manager_ref.get_data_locations( session_id, data_keys[idx:idx + 2]), [{(0, DataStorageDevice.SHARED_MEMORY)}, {(0, DataStorageDevice.DISK)}]) self.assertIsNone(ref_data()) self.assertIsNone(ref_data2()) # test copy with spill idx += 2 proc_handler.put_objects(session_id, [data_keys[idx]], [data_list[idx]]) storage_client.copy_to(session_id, [data_keys[idx]], [DataStorageDevice.SHARED_MEMORY]) \ .then(lambda *_: test_actor.set_result(None), lambda *exc: test_actor.set_result(exc, accept=False)) self.get_result(5) self.assertEqual( sorted( storage_manager_ref.get_data_locations( session_id, [data_keys[idx]])[0]), [(0, DataStorageDevice.PROC_MEMORY), (0, DataStorageDevice.SHARED_MEMORY)])
def testClientPutAndGet(self): test_addr = '127.0.0.1:%d' % get_next_port() with self.create_pool(n_process=1, address=test_addr) as pool: pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid()) pool.create_actor(StorageManagerActor, uid=StorageManagerActor.default_uid()) pool.create_actor(DispatchActor, uid=DispatchActor.default_uid()) pool.create_actor(IORunnerActor) pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_uid()) pool.create_actor(SharedHolderActor, self.plasma_storage_size, uid=SharedHolderActor.default_uid()) pool.create_actor(InProcHolderActor, uid='w:1:InProcHolderActor') session_id = str(uuid.uuid4()) data_list = [ np.random.randint(0, 32767, (655360, ), np.int16) for _ in range(20) ] data_keys = [str(uuid.uuid4()) for _ in range(20)] data_dict = dict(zip(data_keys, data_list)) with self.run_actor_test(pool) as test_actor: storage_client = test_actor.storage_client # check batch object put with size exceeds storage_client.put_objects(session_id, data_keys, data_list, [DataStorageDevice.SHARED_MEMORY, DataStorageDevice.PROC_MEMORY]) \ .then(functools.partial(test_actor.set_result), lambda *exc: test_actor.set_result(exc, accept=False)) self.get_result(5) locations = storage_client.get_data_locations( session_id, data_keys) loc_to_keys = defaultdict(list) for key, location in zip(data_keys, locations): self.assertEqual(len(location), 1) loc_to_keys[list(location)[0][-1]].append(key) self.assertGreater( len(loc_to_keys[DataStorageDevice.PROC_MEMORY]), 1) self.assertGreater( len(loc_to_keys[DataStorageDevice.SHARED_MEMORY]), 1) # check get object with all cases with self.assertRaises(IOError): first_shared_key = loc_to_keys[ DataStorageDevice.SHARED_MEMORY][0] storage_client.get_object(session_id, first_shared_key, [DataStorageDevice.PROC_MEMORY], _promise=False) shared_objs = storage_client.get_objects( session_id, [first_shared_key], [DataStorageDevice.SHARED_MEMORY], _promise=False) self.assertEqual(len(shared_objs), 1) assert_allclose(shared_objs[0], data_dict[first_shared_key]) storage_client.get_object(session_id, first_shared_key, [DataStorageDevice.PROC_MEMORY], _promise=True) \ .then(functools.partial(test_actor.set_result), lambda *exc: test_actor.set_result(exc, accept=False)) assert_allclose(self.get_result(5), data_dict[first_shared_key]) storage_client.delete(session_id, data_keys) time.sleep(0.5) ref = weakref.ref(data_dict[data_keys[0]]) storage_client.put_objects(session_id, data_keys[:1], [ref()], [DataStorageDevice.SHARED_MEMORY]) data_list[:] = [] data_dict.clear() self.assertIsNone(ref())
def testPlasmaSharedStore(self): import pyarrow from pyarrow import plasma store_size = 10 * 1024**2 test_addr = f'127.0.0.1:{get_next_port()}' with plasma.start_plasma_store(store_size) as (sckt, _), \ create_actor_pool(n_process=1, address=test_addr) as pool: km_ref = pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_uid()) try: plasma_client = plasma.connect(sckt) except TypeError: plasma_client = plasma.connect(sckt, '', 0) store = PlasmaSharedStore(plasma_client, km_ref) self.assertGreater(store.get_actual_capacity(store_size), store_size / 2) session_id = str(uuid.uuid4()) data_list = [ np.random.randint(0, 32767, (655360, ), np.int16) for _ in range(20) ] key_list = [str(uuid.uuid4()) for _ in range(20)] self.assertFalse(store.contains(session_id, str(uuid.uuid4()))) with self.assertRaises(KeyError): store.get(session_id, str(uuid.uuid4())) with self.assertRaises(KeyError): store.get_actual_size(session_id, str(uuid.uuid4())) with self.assertRaises(KeyError): store.seal(session_id, str(uuid.uuid4())) fake_data_key = str(uuid.uuid4()) km_ref.put(session_id, fake_data_key, plasma.ObjectID.from_random()) self.assertFalse(store.contains(session_id, fake_data_key)) self.assertIsNone(km_ref.get(session_id, fake_data_key)) with self.assertRaises(KeyError): km_ref.put(session_id, fake_data_key, plasma.ObjectID.from_random()) store.get(session_id, fake_data_key) self.assertIsNone(km_ref.get(session_id, fake_data_key)) with self.assertRaises(KeyError): km_ref.put(session_id, fake_data_key, plasma.ObjectID.from_random()) store.seal(session_id, fake_data_key) self.assertIsNone(km_ref.get(session_id, fake_data_key)) with self.assertRaises(KeyError): km_ref.put(session_id, fake_data_key, plasma.ObjectID.from_random()) store.get_actual_size(session_id, fake_data_key) self.assertIsNone(km_ref.get(session_id, fake_data_key)) with self.assertRaises(KeyError): km_ref.put(session_id, fake_data_key, plasma.ObjectID.from_random()) store.get_buffer(session_id, fake_data_key) self.assertIsNone(km_ref.get(session_id, fake_data_key)) store.delete(session_id, fake_data_key) with self.assertRaises(SerializationFailed): non_serial = type('non_serial', (object, ), dict(nbytes=10)) store.put(session_id, fake_data_key, non_serial()) self.assertIsNone(km_ref.get(session_id, fake_data_key)) with self.assertRaises(Exception): store.create(session_id, fake_data_key, 'abcd') self.assertIsNone(km_ref.get(session_id, fake_data_key)) with self.assertRaises(StorageFull): store.create(session_id, fake_data_key, store_size * 2) self.assertIsNone(km_ref.get(session_id, fake_data_key)) arrow_ser = pyarrow.serialize(data_list[0]) buf = store.create(session_id, key_list[0], arrow_ser.total_bytes) writer = pyarrow.FixedSizeBufferWriter(buf) arrow_ser.write_to(writer) writer.close() store.seal(session_id, key_list[0]) self.assertTrue(store.contains(session_id, key_list[0])) self.assertEqual(store.get_actual_size(session_id, key_list[0]), arrow_ser.total_bytes) assert_allclose(store.get(session_id, key_list[0]), data_list[0]) assert_allclose( pyarrow.deserialize(store.get_buffer(session_id, key_list[0])), data_list[0]) with self.assertRaises(StorageDataExists): store.create(session_id, key_list[0], arrow_ser.total_bytes) self.assertIsNotNone(km_ref.get(session_id, key_list[0])) store.delete(session_id, key_list[0]) del buf bufs = [] for key, data in zip(key_list, data_list): try: bufs.append(store.put(session_id, key, data)) except StorageFull: break del bufs
def testSharedReadAndWrite(self, *_): test_addr = '127.0.0.1:%d' % get_next_port() io_size = dataserializer.HEADER_LENGTH * 2 with self.create_pool(n_process=1, address=test_addr) as pool, \ self.run_actor_test(pool) as test_actor: pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid()) storage_manager_ref = pool.create_actor( StorageManagerActor, uid=StorageManagerActor.default_uid()) pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_uid()) pool.create_actor(SharedHolderActor, uid=SharedHolderActor.default_uid()) data1 = np.random.random((100, 100)) ser_data1 = dataserializer.serialize(data1) session_id = str(uuid.uuid4()) data_key1 = str(uuid.uuid4()) storage_client = test_actor.storage_client handler = storage_client.get_storage_handler((0, DataStorageDevice.SHARED_MEMORY)) def _write_data(ser, writer): self.assertEqual(writer.nbytes, ser_data1.total_bytes) with writer: ser.write_to(writer) handler.create_bytes_writer(session_id, data_key1, ser_data1.total_bytes, _promise=True) \ .then(functools.partial(_write_data, ser_data1)) \ .then(lambda *_: test_actor.set_result(None), lambda *exc: test_actor.set_result(exc, accept=False)) self.get_result(5) self.assertEqual(sorted(storage_manager_ref.get_data_locations(session_id, [data_key1])[0]), [(0, DataStorageDevice.SHARED_MEMORY)]) handler.delete(session_id, [data_key1]) def _write_data(ser, writer): with writer: for start in range(0, len(ser), io_size): writer.write(ser[start:start + io_size]) handler.create_bytes_writer(session_id, data_key1, ser_data1.total_bytes, _promise=True) \ .then(functools.partial(_write_data, ser_data1.to_buffer())) \ .then(lambda *_: test_actor.set_result(None), lambda *exc: test_actor.set_result(exc, accept=False)) self.get_result(5) self.assertEqual(sorted(storage_manager_ref.get_data_locations(session_id, [data_key1])[0]), [(0, DataStorageDevice.SHARED_MEMORY)]) def _read_data_all(reader): with reader: return dataserializer.deserialize(reader.read()) handler.create_bytes_reader(session_id, data_key1, _promise=True) \ .then(_read_data_all) \ .then(functools.partial(test_actor.set_result), lambda *exc: test_actor.set_result(exc, accept=False)) assert_allclose(self.get_result(5), data1) def _read_data_batch(reader): bio = BytesIO() with reader: while True: buf = reader.read(io_size) if buf: bio.write(buf) else: break return dataserializer.deserialize(bio.getvalue()) handler.create_bytes_reader(session_id, data_key1, _promise=True) \ .then(_read_data_batch) \ .then(functools.partial(test_actor.set_result), lambda *exc: test_actor.set_result(exc, accept=False)) assert_allclose(self.get_result(5), data1) handler.delete(session_id, [data_key1])
def testSharedSpill(self, *_): test_addr = '127.0.0.1:%d' % get_next_port() with self.create_pool(n_process=1, address=test_addr) as pool, \ self.run_actor_test(pool) as test_actor: pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid()) storage_manager_ref = pool.create_actor( StorageManagerActor, uid=StorageManagerActor.default_uid()) pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_uid()) holder_ref = pool.create_actor( SharedHolderActor, self.plasma_storage_size, uid=SharedHolderActor.default_uid()) session_id = str(uuid.uuid4()) data_list = [np.random.randint(0, 32767, (655360,), np.int16) for _ in range(20)] data_keys = [str(uuid.uuid4()) for _ in range(20)] storage_client = test_actor.storage_client handler = storage_client.get_storage_handler((0, DataStorageDevice.SHARED_MEMORY)) idx = 0 def _fill_data(): i = 0 for i, (key, data) in enumerate(zip(data_keys[idx:], data_list)): try: handler.put_objects(session_id, [key], [data]) except StorageFull: break return i + idx def _do_spill(): data_size = storage_manager_ref.get_data_sizes(session_id, [data_keys[0]])[0] handler.spill_size(2 * data_size) \ .then(lambda *_: test_actor.set_result(None), lambda *exc: test_actor.set_result(exc, accept=False)) self.get_result(5) # test lift data key idx = _fill_data() handler.lift_data_keys(session_id, [data_keys[0]]) _do_spill() self.assertEqual(list(storage_manager_ref.get_data_locations(session_id, [data_keys[0]])[0]), [(0, DataStorageDevice.SHARED_MEMORY)]) self.assertEqual(list(storage_manager_ref.get_data_locations(session_id, [data_keys[1]])[0]), [(0, DataStorageDevice.DISK)]) handler.put_objects(session_id, [data_keys[idx]], [data_list[idx]]) self.assertEqual(list(storage_manager_ref.get_data_locations(session_id, [data_keys[idx]])[0]), [(0, DataStorageDevice.SHARED_MEMORY)]) idx += 1 # test pin data key idx = _fill_data() holder_ref.lift_data_keys(session_id, [data_keys[0]], last=False) pin_token = str(uuid.uuid4()) pinned_keys = handler.pin_data_keys(session_id, (data_keys[0],), pin_token) self.assertIn(data_keys[0], pinned_keys) _do_spill() self.assertEqual(list(storage_manager_ref.get_data_locations(session_id, [data_keys[0]])[0]), [(0, DataStorageDevice.SHARED_MEMORY)]) self.assertEqual(list(storage_manager_ref.get_data_locations(session_id, [data_keys[1]])[0]), [(0, DataStorageDevice.DISK)]) handler.put_objects(session_id, [data_keys[idx]], [data_list[idx]]) self.assertEqual(list(storage_manager_ref.get_data_locations(session_id, [data_keys[idx]])[0]), [(0, DataStorageDevice.SHARED_MEMORY)]) idx += 1 # test unpin data key idx = _fill_data() handler.unpin_data_keys(session_id, (data_keys[0],), pin_token) _do_spill() self.assertEqual(list(storage_manager_ref.get_data_locations(session_id, [data_keys[0]])[0]), [(0, DataStorageDevice.DISK)])
def testSharedLoadFromBytes(self, *_): import logging logging.basicConfig(level=logging.DEBUG) test_addr = '127.0.0.1:%d' % get_next_port() with self.create_pool(n_process=1, address=test_addr) as pool, \ self.run_actor_test(pool) as test_actor: pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid()) storage_manager_ref = pool.create_actor( StorageManagerActor, uid=StorageManagerActor.default_uid()) pool.create_actor(QuotaActor, 1024 ** 2, uid=MemQuotaActor.default_uid()) pool.create_actor(InProcHolderActor) pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_uid()) pool.create_actor(SharedHolderActor, uid=SharedHolderActor.default_uid()) data1 = np.random.random((10, 10)) ser_data1 = dataserializer.serialize(data1) session_id = str(uuid.uuid4()) data_key1 = str(uuid.uuid4()) storage_client = test_actor.storage_client handler = storage_client.get_storage_handler((0, DataStorageDevice.SHARED_MEMORY)) # load from bytes io disk_handler = storage_client.get_storage_handler((0, DataStorageDevice.DISK)) with disk_handler.create_bytes_writer( session_id, data_key1, ser_data1.total_bytes) as writer: ser_data1.write_to(writer) handler.load_from_bytes_io(session_id, [data_key1], disk_handler) \ .then(lambda *_: test_actor.set_result(None), lambda *exc: test_actor.set_result(exc, accept=False)) self.get_result(5) self.assertEqual(sorted(storage_manager_ref.get_data_locations(session_id, [data_key1])[0]), [(0, DataStorageDevice.SHARED_MEMORY), (0, DataStorageDevice.DISK)]) disk_handler.delete(session_id, [data_key1]) handler.delete(session_id, [data_key1]) # load from bytes io till no capacity data_list = [np.random.randint(0, 32767, (655360,), np.int16) for _ in range(20)] data_keys = [str(uuid.uuid4()) for _ in range(20)] for key, data in zip(data_keys, data_list): ser_data = dataserializer.serialize(data) with disk_handler.create_bytes_writer( session_id, key, ser_data.total_bytes) as writer: ser_data.write_to(writer) handler.load_from_bytes_io(session_id, data_keys, disk_handler) \ .then(lambda *_: test_actor.set_result(None), lambda *exc: test_actor.set_result(exc, accept=False)) affected_keys = set() try: self.get_result(5) except StorageFull as ex: affected_keys.update(ex.affected_keys) storage_client.delete(session_id, data_keys, [DataStorageDevice.DISK]) self.assertLess(len(affected_keys), len(data_keys)) self.assertGreater(len(affected_keys), 1) for k, size in zip(data_keys, storage_client.get_data_sizes(session_id, data_keys)): if k in affected_keys: self.assertIsNone(size) else: self.assertIsNotNone(size)