def testEstimateGraphFinishTime(self): pool_address = '127.0.0.1:%d' % get_next_port() session_id = str(uuid.uuid4()) with create_actor_pool(n_process=1, backend='gevent', address=pool_address) as pool: self.create_standard_actors(pool, pool_address, with_daemon=False) status_ref = pool.actor_ref(StatusActor.default_uid()) execution_ref = pool.actor_ref(ExecutionActor.default_uid()) pool.create_actor(CpuCalcActor) import mars.tensor as mt arr = mt.ones((10, 8), chunk_size=10) graph = arr.build_graph(compose=False, tiled=True) arr = get_tiled(arr) graph_key = str(uuid.uuid4()) for _ in range(options.optimize.min_stats_count + 1): status_ref.update_mean_stats( 'calc_speed.' + type(arr.chunks[0].op).__name__, 10) status_ref.update_mean_stats('disk_read_speed', 10) status_ref.update_mean_stats('disk_write_speed', 10) status_ref.update_mean_stats('net_transfer_speed', 10) execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph), dict(chunks=[arr.chunks[0].key]), None) execution_ref.estimate_graph_finish_time(session_id, graph_key) stats_dict = status_ref.get_stats(['min_est_finish_time', 'max_est_finish_time']) self.assertIsNotNone(stats_dict.get('min_est_finish_time')) self.assertIsNotNone(stats_dict.get('max_est_finish_time'))
def start_transfer_test_pool(**kwargs): address = kwargs.pop('address') plasma_size = kwargs.pop('plasma_size') with create_actor_pool(n_process=1, backend='gevent', address=address, **kwargs) as pool: pool.create_actor(SchedulerClusterInfoActor, schedulers=[address], uid=SchedulerClusterInfoActor.default_uid()) pool.create_actor(WorkerClusterInfoActor, schedulers=[address], uid=WorkerClusterInfoActor.default_uid()) pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_uid()) pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_uid()) pool.create_actor(DispatchActor, uid=DispatchActor.default_uid()) pool.create_actor(QuotaActor, 1024 * 1024 * 20, uid=MemQuotaActor.default_uid()) chunk_holder_ref = pool.create_actor( ChunkHolderActor, plasma_size, uid=ChunkHolderActor.default_uid()) pool.create_actor(SpillActor) pool.create_actor(StatusActor, address, uid=StatusActor.default_uid()) yield pool chunk_holder_ref.destroy()
def _start_calc_pool(self): mock_addr = f'127.0.0.1:{get_next_port()}' with self.create_pool(n_process=1, backend='gevent', address=mock_addr) as pool: pool.create_actor(SchedulerClusterInfoActor, [mock_addr], uid=SchedulerClusterInfoActor.default_uid()) pool.create_actor(WorkerClusterInfoActor, [mock_addr], uid=WorkerClusterInfoActor.default_uid()) pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_uid()) pool.create_actor(StatusActor, mock_addr, uid=StatusActor.default_uid()) pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_uid()) pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid()) pool.create_actor(DispatchActor, uid=DispatchActor.default_uid()) pool.create_actor(StorageManagerActor, uid=StorageManagerActor.default_uid()) pool.create_actor(IORunnerActor) pool.create_actor(QuotaActor, 1024**2, uid=MemQuotaActor.default_uid()) shared_holder_ref = pool.create_actor( SharedHolderActor, uid=SharedHolderActor.default_uid()) pool.create_actor(InProcHolderActor) pool.create_actor(CpuCalcActor, uid=CpuCalcActor.default_uid()) with self.run_actor_test(pool) as test_actor: try: yield pool, test_actor finally: shared_holder_ref.destroy()
def start_transfer_test_pool(**kwargs): address = kwargs.pop('address') plasma_size = kwargs.pop('plasma_size') with create_actor_pool(n_process=1, backend='gevent', address=address, **kwargs) as pool: pool.create_actor(SchedulerClusterInfoActor, [address], uid=SchedulerClusterInfoActor.default_uid()) pool.create_actor(WorkerClusterInfoActor, [address], uid=WorkerClusterInfoActor.default_uid()) pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_uid()) pool.create_actor(StorageManagerActor, uid=StorageManagerActor.default_uid()) pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_uid()) pool.create_actor(DispatchActor, uid=DispatchActor.default_uid()) pool.create_actor(QuotaActor, 1024 * 1024 * 20, uid=MemQuotaActor.default_uid()) shared_holder_ref = pool.create_actor(SharedHolderActor, plasma_size, uid=SharedHolderActor.default_uid()) pool.create_actor(StatusActor, address, uid=StatusActor.default_uid()) pool.create_actor(IORunnerActor) pool.create_actor(StorageClientActor, uid=StorageClientActor.default_uid()) pool.create_actor(InProcHolderActor) pool.create_actor(ReceiverManagerActor, uid=ReceiverManagerActor.default_uid()) try: yield pool finally: shared_holder_ref.destroy()
def testStatus(self): pool_address = '127.0.0.1:%d' % get_next_port() old_spill_dir = options.worker.spill_directory dir_name = options.worker.spill_directory = tempfile.mkdtemp(prefix='temp-mars-spill-') try: with create_actor_pool(n_process=1, backend='gevent', address=pool_address) as pool: pool.create_actor(SchedulerClusterInfoActor, schedulers=[pool_address], uid=SchedulerClusterInfoActor.default_uid()) pool.create_actor(WorkerClusterInfoActor, schedulers=[pool_address], uid=WorkerClusterInfoActor.default_uid()) resource_ref = pool.create_actor(ResourceActor, uid=ResourceActor.default_uid()) pool.create_actor(ChunkHolderActor, self.plasma_storage_size, uid=ChunkHolderActor.default_uid()) status_ref = pool.create_actor(StatusActor, pool_address, uid=StatusActor.default_uid()) status_ref.enable_status_upload() status_ref.update_slots(dict(cpu=4)) status_ref.update_stats(dict(min_est_finish_time=10)) def delay_read(): gevent.sleep(1.5) return resource_ref.get_workers_meta() gl = gevent.spawn(delay_read) gl.join() v = gl.value self.assertIsNotNone(v) pool.destroy_actor(status_ref) finally: options.worker.spill_directory = old_spill_dir shutil.rmtree(dir_name)
def testDiskReadAndWritePacked(self, *_): test_addr = f'127.0.0.1:{get_next_port()}' with self.create_pool(n_process=1, address=test_addr) as pool, \ self.run_actor_test(pool) as test_actor: pool.create_actor(WorkerClusterInfoActor, [test_addr], uid=WorkerClusterInfoActor.default_uid()) pool.create_actor(StatusActor, test_addr, uid=StatusActor.default_uid()) pool.create_actor(EventsActor, uid=EventsActor.default_uid()) pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid()) storage_manager_ref = pool.create_actor( StorageManagerActor, uid=StorageManagerActor.default_uid()) session_id = str(uuid.uuid4()) data1 = np.random.random((10, 10)) ser_data1 = dataserializer.serialize(data1) storage_client = test_actor.storage_client handler = storage_client.get_storage_handler( (0, DataStorageDevice.DISK)) for handler._compress in self._get_compress_types(): data_key1 = str(uuid.uuid4()) storage_client.delete(session_id, [data_key1]) self.rm_spill_dirs() block_data1 = dataserializer.dumps(data1, compress=handler._compress) def _write_data(ser, writer): with writer: writer.write(ser) return writer.filename handler.create_bytes_writer(session_id, data_key1, ser_data1.total_bytes, packed=True, _promise=True) \ .then(functools.partial(_write_data, block_data1)) \ .then(test_actor.set_result, lambda *exc: test_actor.set_result(exc, accept=False)) file_name = self.get_result(5) self.assertEqual( sorted( storage_manager_ref.get_data_locations( session_id, [data_key1])[0]), [(0, DataStorageDevice.DISK)]) self.assertTrue(os.path.exists(file_name)) def _read_data(reader): with reader: return dataserializer.loads(reader.read()) handler.create_bytes_reader(session_id, data_key1, packed=True, _promise=True) \ .then(_read_data) \ .then(functools.partial(test_actor.set_result), lambda *exc: test_actor.set_result(exc, accept=False)) assert_allclose(self.get_result(5), data1)
def _start_shared_holder_pool(self): test_addr = '127.0.0.1:%d' % get_next_port() with self.create_pool(n_process=1, address=test_addr) as pool, \ self.run_actor_test(pool) as test_actor: pool.create_actor(WorkerClusterInfoActor, [test_addr], uid=WorkerClusterInfoActor.default_uid()) pool.create_actor(StatusActor, test_addr, uid=StatusActor.default_uid()) pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid()) pool.create_actor(StorageManagerActor, uid=StorageManagerActor.default_uid()) pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_uid()) pool.create_actor(SharedHolderActor, self.plasma_storage_size, uid=SharedHolderActor.default_uid()) yield pool, test_actor
def testMemQuotaAllocation(self): from mars import resource from mars.utils import AttributeDict mock_mem_stat = AttributeDict( dict(total=300, available=50, used=0, free=50)) local_pool_addr = 'localhost:%d' % get_next_port() with create_actor_pool(n_process=1, backend='gevent', address=local_pool_addr) as pool, \ patch_method(resource.virtual_memory, new=lambda: mock_mem_stat): pool.create_actor(WorkerClusterInfoActor, [local_pool_addr], uid=WorkerClusterInfoActor.default_uid()) pool.create_actor(StatusActor, local_pool_addr, uid=StatusActor.default_uid()) pool.create_actor(DispatchActor, uid=DispatchActor.default_uid()) pool.create_actor(ProcessHelperActor, uid=ProcessHelperActor.default_uid()) quota_ref = pool.create_actor(MemQuotaActor, 300, refresh_time=0.1, uid=MemQuotaActor.default_uid()) time_recs = [] with self.run_actor_test(pool) as test_actor: ref = test_actor.promise_ref(quota_ref) time_recs.append(time.time()) def actual_exec(x): ref.release_quota(x) time_recs.append(time.time()) test_actor.set_result(None) ref.request_quota('req', 100, _promise=True) \ .then(functools.partial(actual_exec, 'req')) pool.sleep(0.5) mock_mem_stat['available'] = 150 mock_mem_stat['free'] = 150 self.get_result(2) self.assertGreater(abs(time_recs[0] - time_recs[1]), 0.4)
def create_standard_actors(cls, pool, address, quota_size=None, with_daemon=True, with_status=True, with_resource=False): quota_size = quota_size or (1024 * 1024) pool.create_actor(SchedulerClusterInfoActor, [address], uid=SchedulerClusterInfoActor.default_uid()) pool.create_actor(WorkerClusterInfoActor, [address], uid=WorkerClusterInfoActor.default_uid()) pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_uid()) pool.create_actor(StorageManagerActor, uid=StorageManagerActor.default_uid()) if with_resource: pool.create_actor(ResourceActor, uid=ResourceActor.default_uid()) if with_daemon: pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid()) if with_status: pool.create_actor(StatusActor, address, uid=StatusActor.default_uid()) pool.create_actor(SharedHolderActor, cls.plasma_storage_size, uid=SharedHolderActor.default_uid()) pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_uid()) pool.create_actor(DispatchActor, uid=DispatchActor.default_uid()) pool.create_actor(QuotaActor, quota_size, uid=MemQuotaActor.default_uid()) pool.create_actor(ExecutionActor, uid=ExecutionActor.default_uid())
def testDiskReadAndWriteMerger(self): import logging logging.basicConfig(level=logging.DEBUG) test_addr = f'127.0.0.1:{get_next_port()}' options.worker.filemerger.max_file_size = 2400 options.worker.filemerger.concurrency = 16 with self.create_pool(n_process=1, address=test_addr) as pool, \ self.run_actor_test(pool) as test_actor: pool.create_actor(WorkerClusterInfoActor, [test_addr], uid=WorkerClusterInfoActor.default_uid()) pool.create_actor(StatusActor, test_addr, uid=StatusActor.default_uid()) pool.create_actor(EventsActor, uid=EventsActor.default_uid()) disk_file_merger_ref = pool.create_actor( DiskFileMergerActor, uid=DiskFileMergerActor.default_uid()) pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid()) storage_manager_ref = pool.create_actor( StorageManagerActor, uid=StorageManagerActor.default_uid()) session_id = str(uuid.uuid4()) data_count = 30 data = [ np.random.rand(random.randint(10, 30), random.randint(10, 30)) for _ in range(data_count) ] ser_data = [dataserializer.serialize(d) for d in data] storage_client = test_actor.storage_client handler = storage_client.get_storage_handler( (0, DataStorageDevice.DISK)) for handler._compress in self._get_compress_types(): data_keys = [str(uuid.uuid4()) for _ in range(data_count)] promises = [] for idx in range(data_count): block_data = dataserializer.dumps( data[idx], compress=handler._compress) def _write_data(ser, writer): with writer: writer.write(ser) return writer.filename promises.append( handler.create_bytes_writer(session_id, data_keys[idx], ser_data[idx].total_bytes, packed=True, with_merger_lock=True, _promise=True).then( functools.partial( _write_data, block_data))) promise.all_(promises).then( lambda *_: test_actor.set_result(0), lambda *exc: test_actor.set_result(exc, accept=False)) self.get_result(50) for key in data_keys: self.assertEqual( sorted( storage_manager_ref.get_data_locations( session_id, [key])[0]), [(0, DataStorageDevice.DISK)]) dump_result = disk_file_merger_ref.dump_info() written_files = list(dump_result[2]) for fn in written_files: self.assertTrue(os.path.exists(fn)) data_store = [None] * len(data) promises = [] for idx in range(data_count): def _read_data(reader, idx): with reader: data_store[idx] = dataserializer.loads( reader.read()) promises.append( handler.create_bytes_reader(session_id, data_keys[idx], with_merger_lock=True, packed=True, _promise=True).then( functools.partial( _read_data, idx=idx))) promise.all_(promises).then( lambda *_: test_actor.set_result(0), lambda *exc: test_actor.set_result(exc, accept=False)) self.get_result(50) for true_data, read_data in zip(data, data_store): assert_allclose(true_data, read_data) data_store = [None] * len(data) promises = [] for idx in range(data_count): def _read_data(reader, idx): with reader: data_store[idx] = dataserializer.deserialize( reader.read()) promises.append( handler.create_bytes_reader(session_id, data_keys[idx], _promise=True).then( functools.partial( _read_data, idx=idx))) promise.all_(promises).then( lambda *_: test_actor.set_result(0), lambda *exc: test_actor.set_result(exc, accept=False)) self.get_result(50) for true_data, read_data in zip(data, data_store): assert_allclose(true_data, read_data) storage_client.delete(session_id, data_keys) pool.sleep(0.1) for fn in written_files: self.assertFalse(os.path.exists(fn))
def testQuota(self): def _raiser(*_, **__): raise ValueError local_pool_addr = 'localhost:%d' % get_next_port() with create_actor_pool(n_process=1, backend='gevent', address=local_pool_addr) as pool: pool.create_actor(WorkerClusterInfoActor, [local_pool_addr], uid=WorkerClusterInfoActor.default_uid()) pool.create_actor(StatusActor, local_pool_addr, uid=StatusActor.default_uid()) quota_ref = pool.create_actor(QuotaActor, 300, uid=QuotaActor.default_uid()) quota_ref.process_quota('non_exist') quota_ref.hold_quota('non_exist') quota_ref.release_quota('non_exist') with self.assertRaises(ValueError): quota_ref.request_quota('ERROR', 1000) self.assertTrue(quota_ref.request_quota('0', 100)) self.assertTrue(quota_ref.request_quota('0', 50)) self.assertTrue(quota_ref.request_quota('0', 200)) quota_ref.process_quota('0') self.assertIn('0', quota_ref.dump_data().proc_sizes) quota_ref.alter_allocation('0', 190, new_key=('0', 0)) self.assertEqual(quota_ref.dump_data().allocations[('0', 0)], 190) quota_ref.hold_quota(('0', 0)) self.assertIn(('0', 0), quota_ref.dump_data().hold_sizes) quota_ref.alter_allocation(('0', 0), new_key=('0', 1)) self.assertEqual(quota_ref.dump_data().allocations[('0', 1)], 190) with self.run_actor_test(pool) as test_actor: ref = test_actor.promise_ref(QuotaActor.default_uid()) ref.request_quota('1', 150, _promise=True) \ .then(lambda *_: test_actor.set_result(True)) \ .catch(lambda *exc: test_actor.set_result(exc, accept=False)) pool.sleep(0.5) self.assertFalse(quota_ref.request_quota('2', 50)) self.assertFalse(quota_ref.request_quota('3', 200)) self.assertFalse(quota_ref.request_quota('3', 180)) self.assertNotIn('2', quota_ref.dump_data().allocations) ref.cancel_requests(('1',), reject_exc=build_exc_info(OSError)) with self.assertRaises(OSError): self.get_result(5) with patch_method(QuotaActor._request_quota, new=_raiser): ref.request_quota('err_raise', 1, _promise=True) \ .catch(lambda *exc: test_actor.set_result(exc, accept=False)) with self.assertRaises(ValueError): self.get_result(5) ref.request_batch_quota({'err_raise': 1}, _promise=True) \ .catch(lambda *exc: test_actor.set_result(exc, accept=False)) with self.assertRaises(ValueError): self.get_result(5) self.assertNotIn('1', quota_ref.dump_data().requests) self.assertIn('2', quota_ref.dump_data().allocations) self.assertNotIn('3', quota_ref.dump_data().allocations) quota_ref.release_quotas([('0', 1)]) self.assertIn('3', quota_ref.dump_data().allocations) self.assertFalse(quota_ref.request_quota('4', 180)) quota_ref.alter_allocations(['3'], [50]) self.assertIn('4', quota_ref.dump_data().allocations) with self.run_actor_test(pool) as test_actor: ref = test_actor.promise_ref(QuotaActor.default_uid()) ref.request_quota('5', 50, _promise=True) \ .catch(lambda *exc: test_actor.set_result(exc, accept=False)) with patch_method(QuotaActor.alter_allocation, new=_raiser): quota_ref.release_quota('2') with self.assertRaises(ValueError): self.get_result(5)
def testSharedHolderSpill(self): with self._start_shared_holder_pool() as (pool, test_actor): pool.create_actor(DispatchActor, uid=DispatchActor.default_uid()) pool.create_actor(MockIORunnerActor, uid=MockIORunnerActor.default_uid()) manager_ref = pool.actor_ref(StorageManagerActor.default_uid()) shared_holder_ref = pool.actor_ref(SharedHolderActor.default_uid()) mock_runner_ref = pool.actor_ref(MockIORunnerActor.default_uid()) status_ref = pool.actor_ref(StatusActor.default_uid()) storage_client = test_actor.storage_client shared_handler = storage_client.get_storage_handler( (0, DataStorageDevice.SHARED_MEMORY)) cache_allocations = status_ref.get_cache_allocations() self.assertGreater(cache_allocations['total'], 0) session_id = str(uuid.uuid4()) data_list = [ np.random.randint(0, 32767, (655360, ), np.int16) for _ in range(20) ] key_list = [str(uuid.uuid4()) for _ in range(20)] self._fill_shared_storage(session_id, key_list, data_list) data_size = manager_ref.get_data_sizes(session_id, [key_list[0]])[0] # spill huge sizes with self.assertRaises(SpillSizeExceeded): self.waitp( shared_handler.spill_size(self.plasma_storage_size * 2), ) # spill size of two data chunks keys_before = [tp[1] for tp in shared_holder_ref.dump_keys()] pin_token = str(uuid.uuid4()) shared_holder_ref.pin_data_keys(session_id, key_list[1:2], pin_token) expect_spills = key_list[2:4] shared_holder_ref.lift_data_keys(session_id, [key_list[0]]) shared_handler.spill_size(data_size * 2) \ .then(lambda *_: test_actor.set_result(None), lambda *exc: test_actor.set_result(exc, accept=False)) pool.sleep(0.5) # when the key is in spill (here we trigger it manually in mock), # it cannot be spilled with self.assertRaises(PinDataKeyFailed): shared_holder_ref.pin_data_keys(session_id, key_list[2:3], str(uuid.uuid4())) for k in key_list[2:6]: mock_runner_ref.submit_item(session_id, k) self.get_result(5) shared_holder_ref.unpin_data_keys(session_id, key_list[1:2], pin_token) keys_after = [tp[1] for tp in shared_holder_ref.dump_keys()] self.assertSetEqual( set(keys_before) - set(keys_after), set(expect_spills)) # spill size of a single chunk, should return immediately keys_before = [tp[1] for tp in shared_holder_ref.dump_keys()] shared_handler.spill_size(data_size) \ .then(lambda *_: test_actor.set_result(None), lambda *exc: test_actor.set_result(exc, accept=False)) self.get_result(5) keys_after = [tp[1] for tp in shared_holder_ref.dump_keys()] self.assertSetEqual(set(keys_before), set(keys_after)) # when all pinned, nothing can be spilled # and spill_size() should raises an error pin_token = str(uuid.uuid4()) shared_holder_ref.pin_data_keys(session_id, key_list, pin_token) shared_handler.spill_size(data_size * 3) \ .then(lambda *_: test_actor.set_result(None), lambda *exc: test_actor.set_result(exc, accept=False)) with self.assertRaises(NoDataToSpill): self.get_result(5) shared_holder_ref.unpin_data_keys(session_id, key_list, pin_token) # when some errors raise when spilling, # spill_size() should report it mock_runner_ref.clear_submissions() shared_handler.spill_size(data_size * 3) \ .then(lambda *_: test_actor.set_result(None), lambda *exc: test_actor.set_result(exc, accept=False)) pool.sleep(0.5) spill_keys = mock_runner_ref.get_request_keys() mock_runner_ref.submit_item(session_id, spill_keys[0], build_exc_info(SystemError)) for k in spill_keys[1:]: mock_runner_ref.submit_item(session_id, k) with self.assertRaises(SystemError): self.get_result(5)