def testStopGraphCalc(self): pool_address = '127.0.0.1:%d' % get_next_port() session_id = str(uuid.uuid4()) mock_data = np.array([1, 2, 3, 4]) with create_actor_pool(n_process=2, backend='gevent', address=pool_address, distributor=MarsDistributor(2, 'w:0:')) as pool: self.create_standard_actors(pool, pool_address, with_status=False) daemon_ref = pool.actor_ref(WorkerDaemonActor.default_uid()) execution_ref = pool.actor_ref(ExecutionActor.default_uid()) calc_ref = daemon_ref.create_actor( MockCpuCalcActor, session_id, mock_data, 10, uid='w:1:cpu-calc-a') daemon_ref.create_actor(ProcessHelperActor, uid='w:1:proc-helper-a') test_actor = pool.create_actor(ExecutionTestActor, uid='w:0:test_actor') test_actor.run_simple_calc(session_id, _tell=True) pool.sleep(2) proc_id = pool.distributor.distribute(calc_ref.uid) execution_ref.stop_execution(session_id, test_actor.get_graph_key(), _tell=True) while daemon_ref.is_actor_process_alive(calc_ref): pool.sleep(0.1) pool.restart_process(proc_id) daemon_ref.handle_process_down([proc_id]) with self.assertRaises(ExecutionInterrupted): self.wait_for_result(pool, test_actor)
def testDaemon(self): mock_scheduler_addr = '127.0.0.1:%d' % get_next_port() with create_actor_pool(n_process=2, backend='gevent', distributor=MarsDistributor(2, 'w:0:'), address=mock_scheduler_addr) as pool: daemon_ref = pool.create_actor( WorkerDaemonActor, uid=WorkerDaemonActor.default_name()) pool.create_actor(DispatchActor, uid=DispatchActor.default_name()) sleeper_ref = daemon_ref.create_actor(DaemonSleeperActor, uid='w:1:DaemonSleeperActor') daemon_ref.create_actor(ProcessHelperActor, uid='w:1:ProcHelper') test_actor = pool.create_actor(DaemonTestActor) daemon_ref.register_callback(test_actor, 'handle_process_down') test_actor.run_test_sleep(sleeper_ref, 10, _tell=True) self.assertTrue(daemon_ref.is_actor_process_alive(sleeper_ref)) pool.sleep(0.5) daemon_ref.kill_actor_process(sleeper_ref) # repeated kill shall not produce errors daemon_ref.kill_actor_process(sleeper_ref) self.assertFalse(daemon_ref.is_actor_process_alive(sleeper_ref)) pool.restart_process(1) daemon_ref.handle_process_down([1]) pool.sleep(1) self.assertTrue(pool.has_actor(sleeper_ref)) with self.assertRaises(WorkerProcessStopped): test_actor.get_result() test_actor.run_test_sleep(sleeper_ref, 1) pool.sleep(1.5) test_actor.get_result()
def testCalcProcessFailure(self): pool_address = '127.0.0.1:%d' % get_next_port() session_id = str(uuid.uuid4()) mock_data = np.array([1, 2, 3, 4]) with create_actor_pool(n_process=2, backend='gevent', address=pool_address, distributor=MarsDistributor(2, 'w:0:')) as pool: self.create_standard_actors(pool, pool_address, with_status=False) daemon_ref = pool.actor_ref(WorkerDaemonActor.default_uid()) dispatch_ref = pool.actor_ref(DispatchActor.default_uid()) calc_ref = daemon_ref.create_actor( MockCpuCalcActor, session_id, mock_data, 10, uid='w:1:cpu-calc-a') daemon_ref.create_actor(ProcessHelperActor, uid='w:1:proc-helper-a') test_actor = pool.create_actor(ExecutionTestActor, uid='w:0:test_actor') test_actor.run_simple_calc(session_id, _tell=True) pool.sleep(2) proc_id = pool.distributor.distribute(calc_ref.uid) daemon_ref.kill_actor_process(calc_ref) assert not daemon_ref.is_actor_process_alive(calc_ref) pool.restart_process(proc_id) daemon_ref.handle_process_down([proc_id]) with self.assertRaises(WorkerProcessStopped): self.wait_for_result(pool, test_actor) self.assertEqual(len(dispatch_ref.get_slots('cpu')), 1)
def testSendTargets(self): pool_address = '127.0.0.1:%d' % get_next_port() session_id = str(uuid.uuid4()) mock_data = np.array([1, 2, 3, 4]) with create_actor_pool(n_process=1, backend='gevent', address=pool_address, distributor=MarsDistributor(2, 'w:0:')) as pool: self.create_standard_actors(pool, pool_address, with_daemon=False, with_status=False) pool.create_actor(CpuCalcActor) import mars.tensor as mt arr = mt.ones((4,), chunk_size=4) arr_add = mt.array(mock_data) result_tensor = arr + arr_add graph = result_tensor.build_graph(compose=False, tiled=True) result_key = result_tensor.chunks[0].key pool.create_actor(MockSenderActor, mock_data + np.ones((4,)), 'out', uid='w:mock_sender') with self.run_actor_test(pool) as test_actor: def _validate(_): data = test_actor._chunk_store.get(session_id, result_tensor.chunks[0].key) assert_array_equal(data, mock_data + np.ones((4,))) graph_key = str(uuid.uuid4()) execution_ref = test_actor.promise_ref(ExecutionActor.default_name()) execution_ref.enqueue_graph(session_id, graph_key, serialize_graph(graph), dict(chunks=[result_tensor.chunks[0].key]), None, send_addresses={result_key: (pool_address,)}, _promise=True) \ .then(lambda *_: execution_ref.start_execution(session_id, graph_key, _promise=True)) \ .then(_validate) \ .then(lambda *_: test_actor.set_result(None)) \ .catch(lambda *exc: test_actor.set_result(exc, False)) self.get_result()
def testLoadStoreInOtherProcess(self): test_addr = '127.0.0.1:%d' % get_next_port() with self.create_pool(n_process=3, address=test_addr, distributor=MarsDistributor(3)) as pool: pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid()) pool.create_actor(StorageManagerActor, uid=StorageManagerActor.default_uid()) pool.create_actor(DispatchActor, uid=DispatchActor.default_uid()) pool.create_actor(QuotaActor, 1024**2, uid=MemQuotaActor.default_uid()) pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_uid()) pool.create_actor(SharedHolderActor, self.plasma_storage_size, uid=SharedHolderActor.default_uid()) pool.create_actor(InProcHolderActor, uid='w:1:InProcHolderActor1') pool.create_actor(InProcHolderActor, uid='w:2:InProcHolderActor2') pool.create_actor(IORunnerActor, lock_free=True, dispatched=False, uid=IORunnerActor.gen_uid(1)) test_ref = pool.create_actor(OtherProcessTestActor, uid='w:0:OtherProcTest') def _get_result(): start_time = time.time() while test_ref.get_result() is None: pool.sleep(0.5) if time.time() - start_time > 10: raise TimeoutError test_ref.run_copy_test((0, DataStorageDevice.SHARED_MEMORY), (1, DataStorageDevice.PROC_MEMORY), _tell=True) _get_result() test_ref.run_copy_test((1, DataStorageDevice.PROC_MEMORY), (0, DataStorageDevice.SHARED_MEMORY), _tell=True) _get_result() test_ref.run_copy_test((1, DataStorageDevice.PROC_MEMORY), (2, DataStorageDevice.PROC_MEMORY), _tell=True) _get_result()
def testReExecuteExisting(self): pool_address = '127.0.0.1:%d' % get_next_port() session_id = str(uuid.uuid4()) mock_data = np.array([1, 2, 3, 4]) with create_actor_pool(n_process=1, backend='gevent', address=pool_address, distributor=MarsDistributor(2, 'w:0:')) as pool: self.create_standard_actors(pool, pool_address, with_daemon=False, with_status=False) pool.create_actor(CpuCalcActor, uid='w:1:cpu-calc') pool.create_actor(InProcHolderActor, uid='w:1:inproc-holder') import mars.tensor as mt arr = mt.ones((4, ), chunk_size=4) arr_add = mt.array(mock_data) result_tensor = arr + arr_add graph = result_tensor.build_graph(compose=False, tiled=True) result_tensor = get_tiled(result_tensor) def _validate(*_): data = test_actor.shared_store.get(session_id, result_tensor.chunks[0].key) assert_array_equal(data, mock_data + np.ones((4, ))) with self.run_actor_test(pool) as test_actor: graph_key = str(uuid.uuid4()) execution_ref = test_actor.promise_ref( ExecutionActor.default_uid()) execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph), dict(chunks=[result_tensor.chunks[0].key]), None, _promise=True) \ .then(_validate) \ .then(lambda *_: test_actor.set_result(None)) \ .catch(lambda *exc: test_actor.set_result(exc, False)) self.get_result() with self.run_actor_test(pool) as test_actor: execution_ref = test_actor.promise_ref( ExecutionActor.default_uid()) execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph), dict(chunks=[result_tensor.chunks[0].key]), None, _promise=True) \ .then(_validate) \ .then(lambda *_: test_actor.set_result(None)) \ .catch(lambda *exc: test_actor.set_result(exc, False)) self.get_result()
def testFetchRemoteData(self): pool_address = '127.0.0.1:%d' % get_next_port() session_id = str(uuid.uuid4()) mock_data = np.array([1, 2, 3, 4]) with create_actor_pool(n_process=1, backend='gevent', address=pool_address, distributor=MarsDistributor(2, 'w:0:')) as pool: self.create_standard_actors(pool, pool_address, with_daemon=False, with_status=False, with_resource=True) pool.create_actor(CpuCalcActor) pool.create_actor(InProcHolderActor) pool.create_actor(MockSenderActor, [mock_data], 'in', uid='w:mock_sender') import mars.tensor as mt from mars.tensor.fetch import TensorFetch arr = mt.ones((4,), chunk_size=4) arr_add = mt.array(mock_data) result_tensor = arr + arr_add graph = result_tensor.build_graph(compose=False, tiled=True) arr_add = get_tiled(arr_add) result_tensor = get_tiled(result_tensor) modified_chunk = arr_add.chunks[0] arr_add.chunks[0]._op = TensorFetch( dtype=modified_chunk.dtype, _outputs=[weakref.ref(o) for o in modified_chunk.op.outputs], _key=modified_chunk.op.key) with self.run_actor_test(pool) as test_actor: graph_key = str(uuid.uuid4()) execution_ref = test_actor.promise_ref(ExecutionActor.default_uid()) execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph), dict(chunks=[result_tensor.chunks[0].key]), None, _tell=True) execution_ref.add_finish_callback(session_id, graph_key, _promise=True) \ .then(lambda *_: test_actor.set_result(None)) \ .catch(lambda *exc: test_actor.set_result(exc, False)) with self.assertRaises(DependencyMissing): self.get_result() metas = {modified_chunk.key: WorkerMeta(mock_data.nbytes, mock_data.shape, ('0.0.0.0:1234',))} with self.run_actor_test(pool) as test_actor: graph_key = str(uuid.uuid4()) execution_ref = test_actor.promise_ref(ExecutionActor.default_uid()) execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph), dict(chunks=[result_tensor.chunks[0].key]), metas, _tell=True) execution_ref.add_finish_callback(session_id, graph_key, _promise=True) \ .then(lambda *_: test_actor.set_result(None)) \ .catch(lambda *exc: test_actor.set_result(exc, False)) with self.assertRaises(DependencyMissing): self.get_result() metas[modified_chunk.key] = WorkerMeta( mock_data.nbytes, mock_data.shape, ('0.0.0.0:1234', pool_address.replace('127.0.0.1', 'localhost'))) with self.run_actor_test(pool) as test_actor: def _validate(_): data = test_actor.shared_store.get(session_id, result_tensor.chunks[0].key) assert_array_equal(data, mock_data + np.ones((4,))) graph_key = str(uuid.uuid4()) execution_ref = test_actor.promise_ref(ExecutionActor.default_uid()) execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph), dict(chunks=[result_tensor.chunks[0].key]), metas, _tell=True) execution_ref.add_finish_callback(session_id, graph_key, _promise=True) \ .then(_validate) \ .then(lambda *_: test_actor.set_result(None)) \ .catch(lambda *exc: test_actor.set_result(exc, False)) self.get_result()
def testDistributor(self): distributor = MarsDistributor(1) self.assertEqual(distributor.distribute('Actor'), 0) distributor = MarsDistributor(3) self.assertEqual(distributor.distribute('w:1:Actor'), 1) self.assertEqual(distributor.distribute('w:-1:Actor'), 2) uid = 'w:h:Actor' self.assertEqual(distributor.distribute(uid), mmh_hash(uid) % distributor.n_process) uid = 'w:h1:Actor' self.assertEqual(distributor.distribute(uid), 1 + mmh_hash(uid) % (distributor.n_process - 1)) with self.assertRaises(ValueError): distributor.distribute('Actor') with self.assertRaises(ValueError): distributor.distribute('w:x:Actor') distributor = MarsDistributor(3, 'w:2:') self.assertEqual(distributor.distribute('Actor'), 2) self.assertEqual(distributor.distribute('w:x:Actor'), 2) distributor = MarsDistributor(3) ref_uid = 'w:h1:Actor2' new_uid = distributor.make_same_process('w::Actor', ref_uid) self.assertEqual( new_uid, 'w:%d:Actor' % (1 + mmh_hash(ref_uid) % (distributor.n_process - 1))) with self.assertRaises(ValueError): distributor.make_same_process('Actor', ref_uid) distributor = MarsDistributor(3, 'w:2:') self.assertEqual('w:2:' + repr('Actor'), distributor.make_same_process('Actor', 'w:2:Actor'))
def testFetchRemoteData(self): pool_address = '127.0.0.1:%d' % get_next_port() session_id = str(uuid.uuid4()) mock_data = np.array([1, 2, 3, 4]) with create_actor_pool(n_process=1, backend='gevent', address=pool_address, distributor=MarsDistributor(2, 'w:0:')) as pool: self.create_standard_actors(pool, pool_address, with_daemon=False, with_status=False, with_resource=True) pool.create_actor(CpuCalcActor) pool.create_actor(MockSenderActor, mock_data, 'in', uid='w:mock_sender') cluster_info_ref = pool.actor_ref(WorkerClusterInfoActor.default_name()) chunk_meta_client = ChunkMetaClient(pool, cluster_info_ref) import mars.tensor as mt from mars.tensor.expressions.fetch import TensorFetch arr = mt.ones((4,), chunk_size=4) arr_add = mt.array(mock_data) result_tensor = arr + arr_add graph = result_tensor.build_graph(compose=False, tiled=True) modified_chunk = arr_add.chunks[0] arr_add.chunks[0]._op = TensorFetch( dtype=modified_chunk.dtype, _outputs=[weakref.ref(o) for o in modified_chunk.op.outputs], _key=modified_chunk.op.key) with self.run_actor_test(pool) as test_actor: graph_key = str(uuid.uuid4()) execution_ref = test_actor.promise_ref(ExecutionActor.default_name()) execution_ref.enqueue_graph(session_id, graph_key, serialize_graph(graph), dict(chunks=[result_tensor.chunks[0].key]), None, _promise=True) \ .then(lambda *_: execution_ref.start_execution(session_id, graph_key, _promise=True)) \ .then(lambda *_: test_actor.set_result(None)) \ .catch(lambda *exc: test_actor.set_result(exc, False)) with self.assertRaises(DependencyMissing): self.get_result() chunk_meta_client.set_chunk_meta(session_id, modified_chunk.key, size=mock_data.nbytes, shape=mock_data.shape, workers=('0.0.0.0:1234',)) with self.run_actor_test(pool) as test_actor: graph_key = str(uuid.uuid4()) execution_ref = test_actor.promise_ref(ExecutionActor.default_name()) execution_ref.enqueue_graph(session_id, graph_key, serialize_graph(graph), dict(chunks=[result_tensor.chunks[0].key]), None, _promise=True) \ .then(lambda *_: execution_ref.start_execution(session_id, graph_key, _promise=True)) \ .then(lambda *_: test_actor.set_result(None)) \ .catch(lambda *exc: test_actor.set_result(exc, False)) with self.assertRaises(DependencyMissing): self.get_result() chunk_meta_client.set_chunk_meta(session_id, modified_chunk.key, size=mock_data.nbytes, shape=mock_data.shape, workers=('0.0.0.0:1234', pool_address)) with self.run_actor_test(pool) as test_actor: def _validate(_): data = test_actor._chunk_store.get(session_id, result_tensor.chunks[0].key) assert_array_equal(data, mock_data + np.ones((4,))) graph_key = str(uuid.uuid4()) execution_ref = test_actor.promise_ref(ExecutionActor.default_name()) execution_ref.enqueue_graph(session_id, graph_key, serialize_graph(graph), dict(chunks=[result_tensor.chunks[0].key]), None, _promise=True) \ .then(lambda *_: execution_ref.start_execution(session_id, graph_key, _promise=True)) \ .then(_validate) \ .then(lambda *_: test_actor.set_result(None)) \ .catch(lambda *exc: test_actor.set_result(exc, False)) self.get_result()