def start_transfer_test_pool(**kwargs): address = kwargs.pop('address') plasma_size = kwargs.pop('plasma_size') with create_actor_pool(n_process=1, backend='gevent', address=address, **kwargs) as pool: pool.create_actor(SchedulerClusterInfoActor, schedulers=[address], uid=SchedulerClusterInfoActor.default_uid()) pool.create_actor(WorkerClusterInfoActor, schedulers=[address], uid=WorkerClusterInfoActor.default_uid()) pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_uid()) pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_uid()) pool.create_actor(DispatchActor, uid=DispatchActor.default_uid()) pool.create_actor(QuotaActor, 1024 * 1024 * 20, uid=MemQuotaActor.default_uid()) chunk_holder_ref = pool.create_actor( ChunkHolderActor, plasma_size, uid=ChunkHolderActor.default_uid()) pool.create_actor(SpillActor) pool.create_actor(StatusActor, address, uid=StatusActor.default_uid()) yield pool chunk_holder_ref.destroy()
def start_transfer_test_pool(**kwargs): address = kwargs.pop('address') plasma_size = kwargs.pop('plasma_size') with create_actor_pool(n_process=1, backend='gevent', address=address, **kwargs) as pool: pool.create_actor(SchedulerClusterInfoActor, [address], uid=SchedulerClusterInfoActor.default_uid()) pool.create_actor(WorkerClusterInfoActor, [address], uid=WorkerClusterInfoActor.default_uid()) pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_uid()) pool.create_actor(StorageManagerActor, uid=StorageManagerActor.default_uid()) pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_uid()) pool.create_actor(DispatchActor, uid=DispatchActor.default_uid()) pool.create_actor(QuotaActor, 1024 * 1024 * 20, uid=MemQuotaActor.default_uid()) shared_holder_ref = pool.create_actor(SharedHolderActor, plasma_size, uid=SharedHolderActor.default_uid()) pool.create_actor(StatusActor, address, uid=StatusActor.default_uid()) pool.create_actor(IORunnerActor) pool.create_actor(StorageClientActor, uid=StorageClientActor.default_uid()) pool.create_actor(InProcHolderActor) pool.create_actor(ReceiverManagerActor, uid=ReceiverManagerActor.default_uid()) try: yield pool finally: shared_holder_ref.destroy()
def testDaemon(self): mock_scheduler_addr = '127.0.0.1:%d' % get_next_port() with create_actor_pool(n_process=2, backend='gevent', distributor=MarsDistributor(2, 'w:0:'), address=mock_scheduler_addr) as pool: daemon_ref = pool.create_actor( WorkerDaemonActor, uid=WorkerDaemonActor.default_name()) pool.create_actor(DispatchActor, uid=DispatchActor.default_name()) sleeper_ref = daemon_ref.create_actor(DaemonSleeperActor, uid='w:1:DaemonSleeperActor') daemon_ref.create_actor(ProcessHelperActor, uid='w:1:ProcHelper') test_actor = pool.create_actor(DaemonTestActor) daemon_ref.register_callback(test_actor, 'handle_process_down') test_actor.run_test_sleep(sleeper_ref, 10, _tell=True) self.assertTrue(daemon_ref.is_actor_process_alive(sleeper_ref)) pool.sleep(0.5) daemon_ref.kill_actor_process(sleeper_ref) # repeated kill shall not produce errors daemon_ref.kill_actor_process(sleeper_ref) self.assertFalse(daemon_ref.is_actor_process_alive(sleeper_ref)) pool.restart_process(1) daemon_ref.handle_process_down([1]) pool.sleep(1) self.assertTrue(pool.has_actor(sleeper_ref)) with self.assertRaises(WorkerProcessStopped): test_actor.get_result() test_actor.run_test_sleep(sleeper_ref, 1) pool.sleep(1.5) test_actor.get_result()
def testCalcProcessFailure(self): pool_address = '127.0.0.1:%d' % get_next_port() session_id = str(uuid.uuid4()) mock_data = np.array([1, 2, 3, 4]) with create_actor_pool(n_process=2, backend='gevent', address=pool_address, distributor=MarsDistributor(2, 'w:0:')) as pool: self.create_standard_actors(pool, pool_address, with_status=False) daemon_ref = pool.actor_ref(WorkerDaemonActor.default_uid()) dispatch_ref = pool.actor_ref(DispatchActor.default_uid()) calc_ref = daemon_ref.create_actor( MockCpuCalcActor, session_id, mock_data, 10, uid='w:1:cpu-calc-a') daemon_ref.create_actor(ProcessHelperActor, uid='w:1:proc-helper-a') test_actor = pool.create_actor(ExecutionTestActor, uid='w:0:test_actor') test_actor.run_simple_calc(session_id, _tell=True) pool.sleep(2) proc_id = pool.distributor.distribute(calc_ref.uid) daemon_ref.kill_actor_process(calc_ref) assert not daemon_ref.is_actor_process_alive(calc_ref) pool.restart_process(proc_id) daemon_ref.handle_process_down([proc_id]) with self.assertRaises(WorkerProcessStopped): self.wait_for_result(pool, test_actor) self.assertEqual(len(dispatch_ref.get_slots('cpu')), 1)
def post_create(self): super().post_create() self._dispatch_ref = self.ctx.actor_ref(DispatchActor.default_uid()) self._dispatch_ref.register_free_slot(self.uid, 'receiver') self._receiver_manager_ref = self.ctx.actor_ref(ReceiverManagerActor.default_uid()) if not self.ctx.has_actor(self._receiver_manager_ref): self._receiver_manager_ref = None
def _start_calc_pool(self): mock_addr = f'127.0.0.1:{get_next_port()}' with self.create_pool(n_process=1, backend='gevent', address=mock_addr) as pool: pool.create_actor(SchedulerClusterInfoActor, [mock_addr], uid=SchedulerClusterInfoActor.default_uid()) pool.create_actor(WorkerClusterInfoActor, [mock_addr], uid=WorkerClusterInfoActor.default_uid()) pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_uid()) pool.create_actor(StatusActor, mock_addr, uid=StatusActor.default_uid()) pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_uid()) pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid()) pool.create_actor(DispatchActor, uid=DispatchActor.default_uid()) pool.create_actor(StorageManagerActor, uid=StorageManagerActor.default_uid()) pool.create_actor(IORunnerActor) pool.create_actor(QuotaActor, 1024**2, uid=MemQuotaActor.default_uid()) shared_holder_ref = pool.create_actor( SharedHolderActor, uid=SharedHolderActor.default_uid()) pool.create_actor(InProcHolderActor) pool.create_actor(CpuCalcActor, uid=CpuCalcActor.default_uid()) with self.run_actor_test(pool) as test_actor: try: yield pool, test_actor finally: shared_holder_ref.destroy()
def testLoadStoreInOtherProcess(self): test_addr = '127.0.0.1:%d' % get_next_port() with self.create_pool(n_process=3, address=test_addr, distributor=MarsDistributor(3)) as pool: pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid()) pool.create_actor(StorageManagerActor, uid=StorageManagerActor.default_uid()) pool.create_actor(DispatchActor, uid=DispatchActor.default_uid()) pool.create_actor(QuotaActor, 1024**2, uid=MemQuotaActor.default_uid()) pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_uid()) pool.create_actor(SharedHolderActor, self.plasma_storage_size, uid=SharedHolderActor.default_uid()) pool.create_actor(InProcHolderActor, uid='w:1:InProcHolderActor1') pool.create_actor(InProcHolderActor, uid='w:2:InProcHolderActor2') pool.create_actor(IORunnerActor, lock_free=True, dispatched=False, uid=IORunnerActor.gen_uid(1)) test_ref = pool.create_actor(OtherProcessTestActor, uid='w:0:OtherProcTest') def _get_result(): start_time = time.time() while test_ref.get_result() is None: pool.sleep(0.5) if time.time() - start_time > 10: raise TimeoutError test_ref.run_copy_test((0, DataStorageDevice.SHARED_MEMORY), (1, DataStorageDevice.PROC_MEMORY), _tell=True) _get_result() test_ref.run_copy_test((1, DataStorageDevice.PROC_MEMORY), (0, DataStorageDevice.SHARED_MEMORY), _tell=True) _get_result() test_ref.run_copy_test((1, DataStorageDevice.PROC_MEMORY), (2, DataStorageDevice.PROC_MEMORY), _tell=True) _get_result()
def testWorkerProcessRestart(self): with self._start_worker_process() as (pool, worker_endpoint): daemon_ref = pool.actor_ref(WorkerDaemonActor.default_uid(), address=worker_endpoint) dispatch_ref = pool.actor_ref(DispatchActor.default_uid(), address=worker_endpoint) cpu_slots = dispatch_ref.get_slots('cpu') calc_ref = pool.actor_ref(cpu_slots[0], address=worker_endpoint) daemon_ref.kill_actor_process(calc_ref) check_start = time.time() while not daemon_ref.is_actor_process_alive(calc_ref): gevent.sleep(0.1) if time.time() - check_start > 10: raise TimeoutError('Check process restart timeout')
def testWorkerProcessRestart(self): mock_scheduler_addr = '127.0.0.1:%d' % get_next_port() try: with create_actor_pool(n_process=1, backend='gevent', address=mock_scheduler_addr) as pool: pool.create_actor(SchedulerClusterInfoActor, schedulers=[mock_scheduler_addr], uid=SchedulerClusterInfoActor.default_name()) pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_name()) resource_ref = pool.create_actor( ResourceActor, uid=ResourceActor.default_name()) proc = subprocess.Popen([ sys.executable, '-m', 'mars.worker', '-a', '127.0.0.1', '--schedulers', mock_scheduler_addr, '--cpu-procs', '1', '--cache-mem', '10m', '--spill-dir', self._spill_dir, '--ignore-avail-mem' ]) worker_endpoint = self._wait_worker_ready(proc, resource_ref) daemon_ref = pool.actor_ref(WorkerDaemonActor.default_name(), address=worker_endpoint) dispatch_ref = pool.actor_ref(DispatchActor.default_name(), address=worker_endpoint) cpu_slots = dispatch_ref.get_slots('cpu') calc_ref = pool.actor_ref(cpu_slots[0], address=worker_endpoint) daemon_ref.kill_actor_process(calc_ref) check_start = time.time() while not daemon_ref.is_actor_process_alive(calc_ref): gevent.sleep(0.1) if time.time() - check_start > 10: raise TimeoutError('Check process restart timeout') finally: if proc.poll() is None: proc.send_signal(signal.SIGINT) check_time = time.time() while True: time.sleep(0.1) if proc.poll( ) is not None or time.time() - check_time >= 5: break if proc.poll() is None: proc.kill() if os.path.exists(options.worker.plasma_socket): os.unlink(options.worker.plasma_socket)
def testMemQuotaAllocation(self): from mars import resource from mars.utils import AttributeDict mock_mem_stat = AttributeDict( dict(total=300, available=50, used=0, free=50)) local_pool_addr = 'localhost:%d' % get_next_port() with create_actor_pool(n_process=1, backend='gevent', address=local_pool_addr) as pool, \ patch_method(resource.virtual_memory, new=lambda: mock_mem_stat): pool.create_actor(WorkerClusterInfoActor, schedulers=[local_pool_addr], uid=WorkerClusterInfoActor.default_name()) pool.create_actor(StatusActor, local_pool_addr, uid=StatusActor.default_name()) pool.create_actor(DispatchActor, uid=DispatchActor.default_name()) pool.create_actor(ProcessHelperActor, uid=ProcessHelperActor.default_name()) quota_ref = pool.create_actor(MemQuotaActor, 300, refresh_time=0.1, uid=MemQuotaActor.default_name()) time_recs = [] with self.run_actor_test(pool) as test_actor: ref = test_actor.promise_ref(quota_ref) time_recs.append(time.time()) def actual_exec(x): ref.release_quota(x) time_recs.append(time.time()) test_actor.set_result(None) ref.request_quota('req', 100, _promise=True) \ .then(functools.partial(actual_exec, 'req')) pool.sleep(0.5) mock_mem_stat['available'] = 150 mock_mem_stat['free'] = 150 self.get_result(2) self.assertGreater(abs(time_recs[0] - time_recs[1]), 0.4)
def create_standard_actors(cls, pool, address, quota_size=None, with_daemon=True, with_status=True, with_resource=False): quota_size = quota_size or (1024 * 1024) pool.create_actor(SchedulerClusterInfoActor, [address], uid=SchedulerClusterInfoActor.default_uid()) pool.create_actor(WorkerClusterInfoActor, [address], uid=WorkerClusterInfoActor.default_uid()) pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_uid()) pool.create_actor(StorageManagerActor, uid=StorageManagerActor.default_uid()) if with_resource: pool.create_actor(ResourceActor, uid=ResourceActor.default_uid()) if with_daemon: pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid()) if with_status: pool.create_actor(StatusActor, address, uid=StatusActor.default_uid()) pool.create_actor(SharedHolderActor, cls.plasma_storage_size, uid=SharedHolderActor.default_uid()) pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_uid()) pool.create_actor(DispatchActor, uid=DispatchActor.default_uid()) pool.create_actor(QuotaActor, quota_size, uid=MemQuotaActor.default_uid()) pool.create_actor(ExecutionActor, uid=ExecutionActor.default_uid())
def post_create(self): super().post_create() self._dispatch_ref = self.promise_ref(DispatchActor.default_uid()) self._dispatch_ref.register_free_slot(self.uid, 'sender')
def post_create(self): self._dispatch_ref = self.promise_ref(DispatchActor.default_uid()) self._dispatch_ref.register_free_slot(self.uid, 'cpu')
def testSimpleTransfer(self): session_id = str(uuid.uuid4()) local_pool_addr = 'localhost:%d' % get_next_port() remote_pool_addr = 'localhost:%d' % get_next_port() remote_chunk_keys = [str(uuid.uuid4()) for _ in range(9)] msg_queue = multiprocessing.Queue() remote_spill_dir = tempfile.mkdtemp( prefix='mars_test_simple_transfer_') proc = multiprocessing.Process(target=run_transfer_worker, args=(remote_pool_addr, session_id, remote_chunk_keys, remote_spill_dir, msg_queue)) proc.start() try: remote_plasma_socket = msg_queue.get(timeout=30) except Empty: if proc.is_alive(): proc.terminate() raise with start_transfer_test_pool( address=local_pool_addr, plasma_size=self.plasma_storage_size) as pool: sender_refs, receiver_refs = [], [] for _ in range(2): sender_refs.append( pool.create_actor(SenderActor, uid=str(uuid.uuid4()))) receiver_refs.append( pool.create_actor(ReceiverActor, uid=str(uuid.uuid4()))) try: for data_id in (-1, 0): chunk_key = remote_chunk_keys[data_id] with self.run_actor_test(pool) as test_actor: remote_dispatch_ref = test_actor.promise_ref( DispatchActor.default_uid(), address=remote_pool_addr) def _call_send_data(sender_uid): sender_ref = test_actor.promise_ref( sender_uid, address=remote_pool_addr) return sender_ref.send_data(session_id, chunk_key, local_pool_addr, _promise=True) def _test_data_exist(*_): local_client_ref = test_actor.promise_ref( StorageClientActor.default_uid()) remote_client_ref = test_actor.promise_ref( StorageClientActor.default_uid(), address=remote_pool_addr) targets = [DataStorageDevice.PROC_MEMORY] return local_client_ref.get_object(session_id, chunk_key, targets, _promise=True) \ .then(lambda local_data: remote_client_ref.get_object( session_id, chunk_key, targets, _promise=True) .then(lambda remote_data: assert_array_equal(local_data, remote_data))) \ remote_dispatch_ref.get_free_slot('sender', _promise=True) \ .then(_call_send_data) \ .then(_test_data_exist) \ .then( lambda *_: test_actor.set_result(chunk_key), lambda *exc: test_actor.set_result(exc, False), ) self.assertEqual(self.get_result(60), chunk_key) msg_queue.put(1) finally: [ pool.destroy_actor(ref) for ref in sender_refs + receiver_refs ] os.unlink(remote_plasma_socket) os.kill(proc.pid, signal.SIGINT) t = time.time() while proc.is_alive() and time.time() < t + 2: time.sleep(1) if proc.is_alive(): proc.terminate() self.rm_spill_dirs(remote_spill_dir)
def post_create(self): super(MockReceiverActor, self).post_create() self._dispatch_ref = self.ctx.actor_ref(DispatchActor.default_name()) self._dispatch_ref.register_free_slot(self.uid, 'receiver')
def testSimpleTransfer(self): session_id = str(uuid.uuid4()) local_pool_addr = 'localhost:%d' % get_next_port() remote_pool_addr = 'localhost:%d' % get_next_port() remote_chunk_keys = [str(uuid.uuid4()) for _ in range(9)] msg_queue = multiprocessing.Queue() remote_spill_dir = os.path.join(tempfile.gettempdir(), 'mars_spill_%d_%d' % (os.getpid(), id(run_transfer_worker))) proc = multiprocessing.Process( target=run_transfer_worker, args=(remote_pool_addr, session_id, remote_chunk_keys, remote_spill_dir, msg_queue) ) proc.start() try: remote_plasma_socket = msg_queue.get(timeout=30) except Empty: if proc.is_alive(): proc.terminate() raise with start_transfer_test_pool(address=local_pool_addr, plasma_size=self.plasma_storage_size) as pool: sender_refs, receiver_refs = [], [] for _ in range(2): sender_refs.append(pool.create_actor(SenderActor, uid=str(uuid.uuid4()))) receiver_refs.append(pool.create_actor(ReceiverActor, uid=str(uuid.uuid4()))) try: for data_id in (-1, 0): chunk_key = remote_chunk_keys[data_id] with self.run_actor_test(pool) as test_actor: remote_dispatch_ref = test_actor.promise_ref( DispatchActor.default_name(), address=remote_pool_addr) remote_mapper_ref = pool.actor_ref( PlasmaKeyMapActor.default_name(), address=remote_pool_addr) remote_plasma_client = plasma.connect(remote_plasma_socket, '', 0) remote_store = PlasmaChunkStore(remote_plasma_client, remote_mapper_ref) def _call_send_data(sender_uid): sender_ref = test_actor.promise_ref(sender_uid, address=remote_pool_addr) return sender_ref.send_data(session_id, chunk_key, local_pool_addr, _promise=True) def _test_data_exist(*_): try: local_data = test_actor._chunk_store.get(session_id, chunk_key) except KeyError: with open(build_spill_file_name(chunk_key), 'rb') as spill_file: local_data = dataserializer.load(spill_file) try: remote_data = remote_store.get(session_id, chunk_key) except KeyError: with open(build_spill_file_name(chunk_key, remote_spill_dir), 'rb') as spill_file: remote_data = dataserializer.load(spill_file) assert_array_equal(local_data, remote_data) del local_data, remote_data remote_dispatch_ref.get_free_slot('sender', _promise=True) \ .then(_call_send_data) \ .then(_test_data_exist) \ .then( lambda *_: test_actor.set_result(chunk_key), lambda *exc: test_actor.set_result(exc, False), ) self.assertEqual(self.get_result(60), chunk_key) msg_queue.put(1) finally: [pool.destroy_actor(ref) for ref in sender_refs + receiver_refs] os.unlink(remote_plasma_socket) os.kill(proc.pid, signal.SIGINT) t = time.time() while proc.is_alive() and time.time() < t + 2: time.sleep(1) if proc.is_alive(): proc.terminate()
def testClientReadAndWrite(self): test_addr = '127.0.0.1:%d' % get_next_port() with self.create_pool(n_process=1, address=test_addr) as pool: options.worker.lock_free_fileio = True pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid()) pool.create_actor(StorageManagerActor, uid=StorageManagerActor.default_uid()) pool.create_actor(DispatchActor, uid=DispatchActor.default_uid()) pool.create_actor(IORunnerActor) pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_uid()) pool.create_actor(SharedHolderActor, self.plasma_storage_size, uid=SharedHolderActor.default_uid()) data1 = np.random.random((10, 10)) ser_data1 = dataserializer.serialize(data1) session_id = str(uuid.uuid4()) data_key1 = str(uuid.uuid4()) data_key2 = str(uuid.uuid4()) with self.run_actor_test(pool) as test_actor: storage_client = test_actor.storage_client file_names = [] def _write_data(ser, writer): file_names.append(writer.filename) self.assertEqual(writer.nbytes, ser_data1.total_bytes) with writer: ser.write_to(writer) # test creating non-promised writer and write with storage_client.create_writer(session_id, data_key1, ser_data1.total_bytes, (DataStorageDevice.DISK, ), _promise=False) as writer: _write_data(ser_data1, writer) self.assertTrue(os.path.exists(file_names[0])) self.assertEqual( sorted( storage_client.get_data_locations( session_id, [data_key1])[0]), [(0, DataStorageDevice.DISK)]) storage_client.delete(session_id, [data_key1]) # test creating promised writer and write file_names[:] = [] self.waitp( storage_client.create_writer( session_id, data_key2, ser_data1.total_bytes, (DataStorageDevice.DISK, )).then( functools.partial(_write_data, ser_data1))) self.assertTrue(os.path.exists(file_names[0])) self.assertEqual( sorted( storage_client.get_data_locations( session_id, [data_key2])[0]), [(0, DataStorageDevice.DISK)]) def _read_data(reader): with reader: return dataserializer.deserialize(reader.read()) # test creating reader when data exist in location result = self.waitp( storage_client.create_reader( session_id, data_key2, (DataStorageDevice.DISK, )).then(_read_data))[0] assert_allclose(result, data1) # test creating reader when no data in location (should raise) with self.assertRaises(IOError): storage_client.create_reader( session_id, data_key2, (DataStorageDevice.SHARED_MEMORY, ), _promise=False) # test creating reader when copy needed self.waitp( storage_client.create_reader( session_id, data_key2, (DataStorageDevice.SHARED_MEMORY, )).then(_read_data)) self.assertEqual( sorted( storage_client.get_data_locations( session_id, [data_key2])[0]), [(0, DataStorageDevice.SHARED_MEMORY), (0, DataStorageDevice.DISK)]) storage_client.delete(session_id, [data_key2]) while os.path.exists(file_names[0]): test_actor.ctx.sleep(0.05) self.assertFalse(os.path.exists(file_names[0]))
def testClientSpill(self, *_): test_addr = '127.0.0.1:%d' % get_next_port() with self.create_pool(n_process=1, address=test_addr) as pool: pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid()) storage_manager_ref = pool.create_actor( StorageManagerActor, uid=StorageManagerActor.default_uid()) pool.create_actor(DispatchActor, uid=DispatchActor.default_uid()) pool.create_actor(IORunnerActor) pool.create_actor(QuotaActor, 1024**2, uid=MemQuotaActor.default_uid()) pool.create_actor(InProcHolderActor) pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_uid()) pool.create_actor(SharedHolderActor, self.plasma_storage_size, uid=SharedHolderActor.default_uid()) session_id = str(uuid.uuid4()) data_list = [ np.random.randint(0, 32767, (655360, ), np.int16) for _ in range(20) ] data_keys = [str(uuid.uuid4()) for _ in range(20)] with self.run_actor_test(pool) as test_actor: storage_client = test_actor.storage_client idx = 0 shared_handler = storage_client.get_storage_handler( (0, DataStorageDevice.SHARED_MEMORY)) proc_handler = storage_client.get_storage_handler( (0, DataStorageDevice.PROC_MEMORY)) def _fill_data(): i = 0 for i, (key, data) in enumerate(zip(data_keys[idx:], data_list)): try: shared_handler.put_objects(session_id, [key], [data]) except StorageFull: break return i + idx idx = _fill_data() # test copying non-existing keys storage_client.copy_to(session_id, ['non-exist-key'], [DataStorageDevice.SHARED_MEMORY]) \ .then(lambda *_: test_actor.set_result(None), lambda *exc: test_actor.set_result(exc, accept=False)) with self.assertRaises(KeyError): self.get_result(5) # test copying into containing locations storage_client.copy_to(session_id, [data_keys[0]], [DataStorageDevice.SHARED_MEMORY]) \ .then(lambda *_: test_actor.set_result(None), lambda *exc: test_actor.set_result(exc, accept=False)) self.get_result(5) self.assertEqual( sorted( storage_manager_ref.get_data_locations( session_id, [data_keys[0]])[0]), [(0, DataStorageDevice.SHARED_MEMORY)]) # test unsuccessful copy when no data at target def _mock_load_from(*_, **__): return promise.finished(*build_exc_info(SystemError), _accept=False) with patch_method(StorageHandler.load_from, _mock_load_from), \ self.assertRaises(SystemError): storage_client.copy_to(session_id, [data_keys[0]], [DataStorageDevice.DISK]) \ .then(lambda *_: test_actor.set_result(None), lambda *exc: test_actor.set_result(exc, accept=False)) self.get_result(5) # test successful copy for multiple objects storage_client.delete(session_id, [data_keys[idx - 1]]) ref_data = weakref.ref(data_list[idx]) ref_data2 = weakref.ref(data_list[idx + 1]) proc_handler.put_objects(session_id, data_keys[idx:idx + 2], data_list[idx:idx + 2]) data_list[idx:idx + 2] = [None, None] storage_client.copy_to(session_id, data_keys[idx:idx + 2], [DataStorageDevice.SHARED_MEMORY, DataStorageDevice.DISK]) \ .then(lambda *_: test_actor.set_result(None), lambda *exc: test_actor.set_result(exc, accept=False)) self.get_result(5) proc_handler.delete(session_id, data_keys[idx:idx + 2]) self.assertEqual( storage_manager_ref.get_data_locations( session_id, data_keys[idx:idx + 2]), [{(0, DataStorageDevice.SHARED_MEMORY)}, {(0, DataStorageDevice.DISK)}]) self.assertIsNone(ref_data()) self.assertIsNone(ref_data2()) # test copy with spill idx += 2 proc_handler.put_objects(session_id, [data_keys[idx]], [data_list[idx]]) storage_client.copy_to(session_id, [data_keys[idx]], [DataStorageDevice.SHARED_MEMORY]) \ .then(lambda *_: test_actor.set_result(None), lambda *exc: test_actor.set_result(exc, accept=False)) self.get_result(5) self.assertEqual( sorted( storage_manager_ref.get_data_locations( session_id, [data_keys[idx]])[0]), [(0, DataStorageDevice.PROC_MEMORY), (0, DataStorageDevice.SHARED_MEMORY)])
def testClientPutAndGet(self): test_addr = '127.0.0.1:%d' % get_next_port() with self.create_pool(n_process=1, address=test_addr) as pool: pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid()) pool.create_actor(StorageManagerActor, uid=StorageManagerActor.default_uid()) pool.create_actor(DispatchActor, uid=DispatchActor.default_uid()) pool.create_actor(IORunnerActor) pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_uid()) pool.create_actor(SharedHolderActor, self.plasma_storage_size, uid=SharedHolderActor.default_uid()) pool.create_actor(InProcHolderActor, uid='w:1:InProcHolderActor') session_id = str(uuid.uuid4()) data_list = [ np.random.randint(0, 32767, (655360, ), np.int16) for _ in range(20) ] data_keys = [str(uuid.uuid4()) for _ in range(20)] data_dict = dict(zip(data_keys, data_list)) with self.run_actor_test(pool) as test_actor: storage_client = test_actor.storage_client # check batch object put with size exceeds storage_client.put_objects(session_id, data_keys, data_list, [DataStorageDevice.SHARED_MEMORY, DataStorageDevice.PROC_MEMORY]) \ .then(functools.partial(test_actor.set_result), lambda *exc: test_actor.set_result(exc, accept=False)) self.get_result(5) locations = storage_client.get_data_locations( session_id, data_keys) loc_to_keys = defaultdict(list) for key, location in zip(data_keys, locations): self.assertEqual(len(location), 1) loc_to_keys[list(location)[0][-1]].append(key) self.assertGreater( len(loc_to_keys[DataStorageDevice.PROC_MEMORY]), 1) self.assertGreater( len(loc_to_keys[DataStorageDevice.SHARED_MEMORY]), 1) # check get object with all cases with self.assertRaises(IOError): first_shared_key = loc_to_keys[ DataStorageDevice.SHARED_MEMORY][0] storage_client.get_object(session_id, first_shared_key, [DataStorageDevice.PROC_MEMORY], _promise=False) shared_objs = storage_client.get_objects( session_id, [first_shared_key], [DataStorageDevice.SHARED_MEMORY], _promise=False) self.assertEqual(len(shared_objs), 1) assert_allclose(shared_objs[0], data_dict[first_shared_key]) storage_client.get_object(session_id, first_shared_key, [DataStorageDevice.PROC_MEMORY], _promise=True) \ .then(functools.partial(test_actor.set_result), lambda *exc: test_actor.set_result(exc, accept=False)) assert_allclose(self.get_result(5), data_dict[first_shared_key]) storage_client.delete(session_id, data_keys) time.sleep(0.5) ref = weakref.ref(data_dict[data_keys[0]]) storage_client.put_objects(session_id, data_keys[:1], [ref()], [DataStorageDevice.SHARED_MEMORY]) data_list[:] = [] data_dict.clear() self.assertIsNone(ref())
def testDispatch(self, *_): call_records = dict() group_size = 4 mock_scheduler_addr = f'127.0.0.1:{get_next_port()}' with create_actor_pool(n_process=1, backend='gevent', address=mock_scheduler_addr) as pool: dispatch_ref = pool.create_actor(DispatchActor, uid=DispatchActor.default_uid()) # actors of g1 [ pool.create_actor(TaskActor, 'g1', call_records) for _ in range(group_size) ] [ pool.create_actor(TaskActor, 'g2', call_records) for _ in range(group_size) ] self.assertEqual(len(dispatch_ref.get_slots('g1')), group_size) self.assertEqual(len(dispatch_ref.get_slots('g2')), group_size) self.assertEqual(len(dispatch_ref.get_slots('g3')), 0) self.assertEqual(dispatch_ref.get_hash_slot('g1', 'hash_str'), dispatch_ref.get_hash_slot('g1', 'hash_str')) dispatch_ref.acquire_free_slot('g1', callback=(('NonExist', mock_scheduler_addr), '_non_exist', {})) self.assertEqual(dispatch_ref.get_free_slots_num().get('g1'), group_size) # tasks within [0, group_size - 1] will run almost simultaneously, # while the last one will be delayed due to lack of slots delay = 1 with self.run_actor_test(pool) as test_actor: p = promise.finished() _dispatch_ref = test_actor.promise_ref( DispatchActor.default_uid()) def _call_on_dispatched(uid, key=None): if uid is None: call_records[key] = 'NoneUID' else: test_actor.promise_ref(uid).queued_call(key, delay, _tell=True, _wait=False) for idx in range(group_size + 1): p = p.then(lambda *_: _dispatch_ref.acquire_free_slot('g1', _promise=True)) \ .then(partial(_call_on_dispatched, key=f'{idx}_1')) \ .then(lambda *_: _dispatch_ref.acquire_free_slot('g2', _promise=True)) \ .then(partial(_call_on_dispatched, key=f'{idx}_2')) p.then(lambda *_: _dispatch_ref.acquire_free_slot('g3', _promise=True)) \ .then(partial(_call_on_dispatched, key='N_1')) \ .then(lambda *_: test_actor.set_result(None)) self.get_result(20) self.assertEqual(call_records['N_1'], 'NoneUID') self.assertLess( sum( abs(call_records[f'{idx}_1'] - call_records['0_1']) for idx in range(group_size)), delay * 0.5) self.assertGreater( call_records[f'{group_size}_1'] - call_records['0_1'], delay * 0.5) self.assertLess( call_records[f'{group_size}_1'] - call_records['0_1'], delay * 1.5) dispatch_ref.destroy()
def testProcMemLoad(self): test_addr = '127.0.0.1:%d' % get_next_port() with self.create_pool(n_process=1, address=test_addr) as pool, \ self.run_actor_test(pool) as test_actor: pool.create_actor(DispatchActor, uid=DispatchActor.default_uid()) pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid()) storage_manager_ref = pool.create_actor( StorageManagerActor, uid=StorageManagerActor.default_uid()) pool.create_actor(QuotaActor, 1024**2, uid=MemQuotaActor.default_uid()) pool.create_actor(InProcHolderActor) pool.create_actor(IORunnerActor) pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_uid()) pool.create_actor(SharedHolderActor, uid=SharedHolderActor.default_uid()) data1 = np.random.random((10, 10)) data2 = np.random.random((10, 10)) ser_data1 = dataserializer.serialize(data1) session_id = str(uuid.uuid4()) data_key1 = str(uuid.uuid4()) data_key2 = str(uuid.uuid4()) storage_client = test_actor.storage_client handler = storage_client.get_storage_handler( (0, DataStorageDevice.PROC_MEMORY)) # load from bytes io disk_handler = storage_client.get_storage_handler( (0, DataStorageDevice.DISK)) with disk_handler.create_bytes_writer( session_id, data_key1, ser_data1.total_bytes) as writer: ser_data1.write_to(writer) handler.load_from_bytes_io(session_id, [data_key1], disk_handler) \ .then(lambda *_: test_actor.set_result(None), lambda *exc: test_actor.set_result(exc, accept=False)) self.get_result(5) self.assertEqual( sorted( storage_manager_ref.get_data_locations( session_id, [data_key1])[0]), [(0, DataStorageDevice.PROC_MEMORY), (0, DataStorageDevice.DISK)]) disk_handler.delete(session_id, [data_key1]) data_load = handler.get_objects(session_id, [data_key1])[0] ref_data = weakref.ref(data_load) del data_load handler.delete(session_id, [data_key1]) self.assertIsNone(ref_data()) # load from object io shared_handler = storage_client.get_storage_handler( (0, DataStorageDevice.SHARED_MEMORY)) shared_handler.put_objects(session_id, [data_key2], [data2]) handler.load_from_object_io(session_id, [data_key2], shared_handler) \ .then(lambda *_: test_actor.set_result(None), lambda *exc: test_actor.set_result(exc, accept=False)) self.get_result(5) self.assertEqual( sorted( storage_manager_ref.get_data_locations( session_id, [data_key2])[0]), [(0, DataStorageDevice.PROC_MEMORY), (0, DataStorageDevice.SHARED_MEMORY)]) shared_handler.delete(session_id, [data_key2]) data_load = handler.get_objects(session_id, [data_key2])[0] ref_data = weakref.ref(data_load) del data_load handler.delete(session_id, [data_key2]) self.assertIsNone(ref_data())
def testSharedHolderSpill(self): with self._start_shared_holder_pool() as (pool, test_actor): pool.create_actor(DispatchActor, uid=DispatchActor.default_uid()) pool.create_actor(MockIORunnerActor, uid=MockIORunnerActor.default_uid()) manager_ref = pool.actor_ref(StorageManagerActor.default_uid()) shared_holder_ref = pool.actor_ref(SharedHolderActor.default_uid()) mock_runner_ref = pool.actor_ref(MockIORunnerActor.default_uid()) status_ref = pool.actor_ref(StatusActor.default_uid()) storage_client = test_actor.storage_client shared_handler = storage_client.get_storage_handler( (0, DataStorageDevice.SHARED_MEMORY)) cache_allocations = status_ref.get_cache_allocations() self.assertGreater(cache_allocations['total'], 0) session_id = str(uuid.uuid4()) data_list = [ np.random.randint(0, 32767, (655360, ), np.int16) for _ in range(20) ] key_list = [str(uuid.uuid4()) for _ in range(20)] self._fill_shared_storage(session_id, key_list, data_list) data_size = manager_ref.get_data_sizes(session_id, [key_list[0]])[0] # spill huge sizes with self.assertRaises(SpillSizeExceeded): self.waitp( shared_handler.spill_size(self.plasma_storage_size * 2), ) # spill size of two data chunks keys_before = [tp[1] for tp in shared_holder_ref.dump_keys()] pin_token = str(uuid.uuid4()) shared_holder_ref.pin_data_keys(session_id, key_list[1:2], pin_token) expect_spills = key_list[2:4] shared_holder_ref.lift_data_keys(session_id, [key_list[0]]) shared_handler.spill_size(data_size * 2) \ .then(lambda *_: test_actor.set_result(None), lambda *exc: test_actor.set_result(exc, accept=False)) pool.sleep(0.5) # when the key is in spill (here we trigger it manually in mock), # it cannot be spilled with self.assertRaises(PinDataKeyFailed): shared_holder_ref.pin_data_keys(session_id, key_list[2:3], str(uuid.uuid4())) for k in key_list[2:6]: mock_runner_ref.submit_item(session_id, k) self.get_result(5) shared_holder_ref.unpin_data_keys(session_id, key_list[1:2], pin_token) keys_after = [tp[1] for tp in shared_holder_ref.dump_keys()] self.assertSetEqual( set(keys_before) - set(keys_after), set(expect_spills)) # spill size of a single chunk, should return immediately keys_before = [tp[1] for tp in shared_holder_ref.dump_keys()] shared_handler.spill_size(data_size) \ .then(lambda *_: test_actor.set_result(None), lambda *exc: test_actor.set_result(exc, accept=False)) self.get_result(5) keys_after = [tp[1] for tp in shared_holder_ref.dump_keys()] self.assertSetEqual(set(keys_before), set(keys_after)) # when all pinned, nothing can be spilled # and spill_size() should raises an error pin_token = str(uuid.uuid4()) shared_holder_ref.pin_data_keys(session_id, key_list, pin_token) shared_handler.spill_size(data_size * 3) \ .then(lambda *_: test_actor.set_result(None), lambda *exc: test_actor.set_result(exc, accept=False)) with self.assertRaises(NoDataToSpill): self.get_result(5) shared_holder_ref.unpin_data_keys(session_id, key_list, pin_token) # when some errors raise when spilling, # spill_size() should report it mock_runner_ref.clear_submissions() shared_handler.spill_size(data_size * 3) \ .then(lambda *_: test_actor.set_result(None), lambda *exc: test_actor.set_result(exc, accept=False)) pool.sleep(0.5) spill_keys = mock_runner_ref.get_request_keys() mock_runner_ref.submit_item(session_id, spill_keys[0], build_exc_info(SystemError)) for k in spill_keys[1:]: mock_runner_ref.submit_item(session_id, k) with self.assertRaises(SystemError): self.get_result(5)
def post_create(self): super().post_create() dispatch_ref = self.ctx.actor_ref(DispatchActor.default_uid()) dispatch_ref.register_free_slot(self.uid, 'iorunner')