def testDestroyCalcActor(self): import gevent.event with self._start_calc_pool() as (_pool, test_actor): calc_ref = _pool.actor_ref(CpuCalcActor.default_uid()) calc_ref.mark_destroy() gevent.sleep(0.8) self.assertFalse(_pool.has_actor(calc_ref)) with self._start_calc_pool() as (_pool, test_actor): calc_ref = test_actor.promise_ref(CpuCalcActor.default_uid()) session_id = str(uuid.uuid4()) data_list = [np.random.random((10, 10)) for _ in range(2)] exec_graph, fetch_chunks, add_chunk = self._build_test_graph( data_list) exec_graph2, fetch_chunks2, add_chunk2 = self._build_test_graph( data_list[::-1]) storage_client = test_actor.storage_client for fetch_chunk, d in zip(fetch_chunks, data_list): self.waitp( storage_client.put_objects( session_id, [fetch_chunk.key], [d], [DataStorageDevice.SHARED_MEMORY]), ) for fetch_chunk2, d in zip(fetch_chunks2, data_list[::-1]): self.waitp( storage_client.put_objects( session_id, [fetch_chunk2.key], [d], [DataStorageDevice.SHARED_MEMORY]), ) orig_calc_results = CpuCalcActor._calc_results start_event = gevent.event.Event() def _mock_calc_delayed(actor_obj, *args, **kwargs): start_event.set() gevent.sleep(1) return orig_calc_results(actor_obj, *args, **kwargs) with patch_method(CpuCalcActor._calc_results, _mock_calc_delayed): p = calc_ref.calc(session_id, add_chunk.op.key, serialize_graph(exec_graph), [add_chunk.key], _promise=True) \ .then(lambda *_: calc_ref.store_results( session_id, add_chunk.op.key, [add_chunk.key], None, _promise=True)) start_event.wait() calc_ref.mark_destroy() p2 = calc_ref.calc(session_id, add_chunk2.op.key, serialize_graph(exec_graph2), [add_chunk2.key], _promise=True) \ .then(lambda *_: calc_ref.store_results( session_id, add_chunk2.op.key, [add_chunk2.key], None, _promise=True)) self.assertTrue(_pool.has_actor(calc_ref._ref)) self.waitp(p) self.waitp(p2) gevent.sleep(0.8) self.assertFalse(_pool.has_actor(calc_ref._ref))
def testPrepareSpilled(self): from mars.worker.spill import write_spill_file pool_address = '127.0.0.1:%d' % get_next_port() session_id = str(uuid.uuid4()) mock_data = np.array([1, 2, 3, 4]) options.worker.spill_directory = tempfile.mkdtemp(prefix='mars_worker_prep_spilled-') with create_actor_pool(n_process=1, backend='gevent', address=pool_address) as pool: self.create_standard_actors(pool, pool_address, with_daemon=False, with_status=False) pool.create_actor(SpillActor) pool.create_actor(CpuCalcActor) cluster_info_ref = pool.actor_ref(WorkerClusterInfoActor.default_uid()) chunk_meta_client = ChunkMetaClient(pool, cluster_info_ref) pool.actor_ref(ChunkHolderActor.default_uid()) import mars.tensor as mt from mars.tensor.fetch import TensorFetch arr = mt.ones((4,), chunk_size=4) arr_add = mt.array(mock_data) result_tensor = arr + arr_add graph = result_tensor.build_graph(compose=False, tiled=True) modified_chunk = arr_add.chunks[0] arr_add.chunks[0]._op = TensorFetch( dtype=modified_chunk.dtype, _outputs=[weakref.ref(o) for o in modified_chunk.op.outputs], _key=modified_chunk.op.key) # test meta missing with self.run_actor_test(pool) as test_actor: graph_key = str(uuid.uuid4()) execution_ref = test_actor.promise_ref(ExecutionActor.default_uid()) execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph), dict(chunks=[result_tensor.chunks[0].key]), None, _promise=True) \ .then(lambda *_: test_actor.set_result(None)) \ .catch(lambda *exc: test_actor.set_result(exc, False)) with self.assertRaises(DependencyMissing): self.get_result() chunk_meta_client.set_chunk_meta(session_id, modified_chunk.key, size=mock_data.nbytes, shape=mock_data.shape, workers=('0.0.0.0:1234', pool_address)) write_spill_file(modified_chunk.key, mock_data) # test read from spilled file with self.run_actor_test(pool) as test_actor: def _validate(_): data = test_actor._chunk_store.get(session_id, result_tensor.chunks[0].key) assert_array_equal(data, mock_data + np.ones((4,))) graph_key = str(uuid.uuid4()) execution_ref = test_actor.promise_ref(ExecutionActor.default_uid()) execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph), dict(chunks=[result_tensor.chunks[0].key]), None, _promise=True) \ .then(_validate) \ .then(lambda *_: test_actor.set_result(None)) \ .catch(lambda *exc: test_actor.set_result(exc, False)) self.get_result()
def testReExecuteExisting(self): pool_address = '127.0.0.1:%d' % get_next_port() session_id = str(uuid.uuid4()) mock_data = np.array([1, 2, 3, 4]) with create_actor_pool(n_process=1, backend='gevent', address=pool_address, distributor=WorkerDistributor(2)) as pool: self.create_standard_actors(pool, pool_address, with_daemon=False, with_status=False) pool.create_actor(CpuCalcActor) import mars.tensor as mt arr = mt.ones((4, ), chunk_size=4) arr_add = mt.array(mock_data) result_tensor = arr + arr_add graph = result_tensor.build_graph(compose=False, tiled=True) pool.create_actor(MockSenderActor, mock_data + np.ones((4, )), 'out', uid='w:mock_sender') def _validate(_): data = test_actor._chunk_store.get(session_id, result_tensor.chunks[0].key) assert_array_equal(data, mock_data + np.ones((4, ))) with self.run_actor_test(pool) as test_actor: graph_key = str(uuid.uuid4()) execution_ref = test_actor.promise_ref( ExecutionActor.default_name()) execution_ref.enqueue_graph(session_id, graph_key, serialize_graph(graph), dict(chunks=[result_tensor.chunks[0].key]), None, _promise=True) \ .then(lambda *_: execution_ref.start_execution(session_id, graph_key, _promise=True)) \ .then(_validate) \ .then(lambda *_: test_actor.set_result(None)) \ .catch(lambda *exc: test_actor.set_result(exc, False)) self.get_result() with self.run_actor_test(pool) as test_actor: execution_ref = test_actor.promise_ref( ExecutionActor.default_name()) execution_ref.enqueue_graph(session_id, graph_key, serialize_graph(graph), dict(chunks=[result_tensor.chunks[0].key]), None, _promise=True) \ .then(lambda *_: execution_ref.start_execution(session_id, graph_key, _promise=True)) \ .then(_validate) \ .then(lambda *_: test_actor.set_result(None)) \ .catch(lambda *exc: test_actor.set_result(exc, False)) self.get_result()
def testReExecuteExisting(self): pool_address = f'127.0.0.1:{get_next_port()}' session_id = str(uuid.uuid4()) mock_data = np.array([1, 2, 3, 4]) with create_actor_pool(n_process=1, backend='gevent', address=pool_address, distributor=MarsDistributor(2, 'w:0:')) as pool: self.create_standard_actors(pool, pool_address, with_daemon=False, with_status=False) pool.create_actor(CpuCalcActor, uid='w:1:cpu-calc') pool.create_actor(InProcHolderActor, uid='w:1:inproc-holder') import mars.tensor as mt arr = mt.ones((4, ), chunk_size=4) arr_add = mt.array(mock_data) result_tensor = arr + arr_add graph = result_tensor.build_graph(fuse_enabled=False, tiled=True) result_tensor = get_tiled(result_tensor) def _validate(*_): data = test_actor.shared_store.get(session_id, result_tensor.chunks[0].key) assert_array_equal(data, mock_data + np.ones((4, ))) with self.run_actor_test(pool) as test_actor: graph_key = str(uuid.uuid4()) execution_ref = test_actor.promise_ref( ExecutionActor.default_uid()) execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph), dict(chunks=[result_tensor.chunks[0].key]), None, _promise=True) \ .then(_validate) \ .then(lambda *_: test_actor.set_result(None)) \ .catch(lambda *exc: test_actor.set_result(exc, False)) self.get_result() with self.run_actor_test(pool) as test_actor: execution_ref = test_actor.promise_ref( ExecutionActor.default_uid()) execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph), dict(chunks=[result_tensor.chunks[0].key]), None, _promise=True) \ .then(_validate) \ .then(lambda *_: test_actor.set_result(None)) \ .catch(lambda *exc: test_actor.set_result(exc, False)) self.get_result()
def _prepare_test_graph(self, session_id, graph_key, mock_workers): addr = f'127.0.0.1:{get_next_port()}' a1 = mt.random.random((100,)) a2 = mt.random.random((100,)) s = a1 + a2 v1, v2 = mt.split(s, 2) graph = TileableGraph([v1.data, v2.data]) builder = TileableGraphBuilder(graph) next(iter(builder.build())) with create_actor_pool(n_process=1, backend='gevent', address=addr) as pool: pool.create_actor(SchedulerClusterInfoActor, [pool.cluster_info.address], uid=SchedulerClusterInfoActor.default_uid()) resource_ref = pool.create_actor(ResourceActor, uid=ResourceActor.default_uid()) pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_uid()) pool.create_actor(AssignerActor, uid=AssignerActor.gen_uid(session_id)) graph_ref = pool.create_actor(GraphActor, session_id, graph_key, serialize_graph(graph), uid=GraphActor.gen_uid(session_id, graph_key)) for w in mock_workers: resource_ref.set_worker_meta(w, dict(hardware=dict(cpu=4, cpu_total=4, memory=1600))) graph_ref.prepare_graph() graph_ref.analyze_graph() graph_ref.create_operand_actors(_start=False) yield pool, graph_ref
def testEmptyGraph(self, *_): session_id = str(uuid.uuid4()) addr = '127.0.0.1:%d' % get_next_port() with create_actor_pool(n_process=1, backend='gevent', address=addr) as pool: pool.create_actor(SchedulerClusterInfoActor, [pool.cluster_info.address], uid=SchedulerClusterInfoActor.default_uid()) resource_ref = pool.create_actor(ResourceActor, uid=ResourceActor.default_uid()) pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_uid()) pool.create_actor(AssignerActor, uid=AssignerActor.gen_uid(session_id)) resource_ref.set_worker_meta('localhost:12345', dict(hardware=dict(cpu_total=4))) resource_ref.set_worker_meta('localhost:23456', dict(hardware=dict(cpu_total=4))) graph_key = str(uuid.uuid4()) serialized_graph = serialize_graph(DAG()) graph_ref = pool.create_actor(GraphActor, session_id, graph_key, serialized_graph, uid=GraphActor.gen_uid( session_id, graph_key)) graph_ref.execute_graph() self.assertEqual(graph_ref.get_state(), GraphState.SUCCEEDED)
def execute_case(): pool.create_actor(ClusterInfoActor, [pool.cluster_info.address], uid=ClusterInfoActor.default_name()) resource_ref = pool.create_actor( ResourceActor, uid=ResourceActor.default_name()) pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_name()) pool.create_actor(AssignerActor, uid=AssignerActor.gen_name(session_id)) graph_ref = pool.create_actor(GraphActor, session_id, graph_key, serialize_graph(graph), uid=GraphActor.gen_name( session_id, graph_key)) execution_ref = pool.create_actor(FakeExecutionActor, sleep=1) # handle mock objects OperandActor._get_raw_execution_ref.side_effect = lambda: execution_ref mock_resource = dict( hardware=dict(cpu=4, cpu_total=4, memory=512)) def write_mock_meta(): resource_ref.set_worker_meta('localhost:12345', mock_resource) resource_ref.set_worker_meta('localhost:23456', mock_resource) v = gevent.spawn(write_mock_meta) v.join() graph_ref.prepare_graph() fetched_graph = graph_ref.get_chunk_graph() graph_ref.scan_node() graph_ref.place_initial_chunks() final_keys = set() for c in fetched_graph: if fetched_graph.count_successors(c) == 0: final_keys.add(c.op.key) graph_ref.create_operand_actors() graph_meta_ref = pool.actor_ref( GraphMetaActor.gen_name(session_id, graph_key)) start_time = time.time() cancel_called = False while True: gevent.sleep(0.1) if not cancel_called and time.time() > start_time + 0.8: cancel_called = True graph_ref.stop_graph(_tell=True) if time.time() - start_time > 30: raise SystemError('Wait for execution finish timeout') if graph_meta_ref.get_state() in (GraphState.SUCCEEDED, GraphState.FAILED, GraphState.CANCELLED): break
def testCpuCalcErrorInRunning(self): with self._start_calc_pool() as (_pool, test_actor): calc_ref = test_actor.promise_ref(CpuCalcActor.default_uid()) session_id = str(uuid.uuid4()) data_list = [np.random.random((10, 10)) for _ in range(2)] exec_graph, fetch_chunks, add_chunk = self._build_test_graph( data_list) storage_client = test_actor.storage_client for fetch_chunk, d in zip(fetch_chunks, data_list): self.waitp( storage_client.put_objects( session_id, [fetch_chunk.key], [d], [DataStorageDevice.SHARED_MEMORY]), ) def _mock_calc_results_error(*_, **__): raise ValueError with patch_method(CpuCalcActor._calc_results, _mock_calc_results_error), \ self.assertRaises(ValueError): self.waitp( calc_ref.calc( session_id, add_chunk.op.key, serialize_graph(exec_graph), [add_chunk.key], _promise=True).then(lambda *_: calc_ref.store_results( session_id, add_chunk.op.key, [add_chunk.key], None, _promise=True)))
def testEstimateGraphFinishTime(self): pool_address = '127.0.0.1:%d' % get_next_port() session_id = str(uuid.uuid4()) with create_actor_pool(n_process=1, backend='gevent', address=pool_address) as pool: self.create_standard_actors(pool, pool_address, with_daemon=False) status_ref = pool.actor_ref(StatusActor.default_uid()) execution_ref = pool.actor_ref(ExecutionActor.default_uid()) pool.create_actor(CpuCalcActor) import mars.tensor as mt arr = mt.ones((10, 8), chunk_size=10) graph = arr.build_graph(compose=False, tiled=True) arr = get_tiled(arr) graph_key = str(uuid.uuid4()) for _ in range(options.optimize.min_stats_count + 1): status_ref.update_mean_stats( 'calc_speed.' + type(arr.chunks[0].op).__name__, 10) status_ref.update_mean_stats('disk_read_speed', 10) status_ref.update_mean_stats('disk_write_speed', 10) status_ref.update_mean_stats('net_transfer_speed', 10) execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph), dict(chunks=[arr.chunks[0].key]), None) execution_ref.estimate_graph_finish_time(session_id, graph_key) stats_dict = status_ref.get_stats(['min_est_finish_time', 'max_est_finish_time']) self.assertIsNotNone(stats_dict.get('min_est_finish_time')) self.assertIsNotNone(stats_dict.get('max_est_finish_time'))
def _prepare_test_graph(self, session_id, graph_key, mock_workers): addr = '127.0.0.1:%d' % get_next_port() a1 = mt.random.random((100,)) a2 = mt.random.random((100,)) s = a1 + a2 v1, v2 = mt.split(s, 2) graph = DAG() v1.build_graph(graph=graph, compose=False) v2.build_graph(graph=graph, compose=False) with create_actor_pool(n_process=1, backend='gevent', address=addr) as pool: pool.create_actor(ClusterInfoActor, [pool.cluster_info.address], uid=ClusterInfoActor.default_name()) resource_ref = pool.create_actor(ResourceActor, uid=ResourceActor.default_name()) pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_name()) pool.create_actor(AssignerActor, uid=AssignerActor.default_name()) graph_ref = pool.create_actor(GraphActor, session_id, graph_key, serialize_graph(graph), uid=GraphActor.gen_name(session_id, graph_key)) for w in mock_workers: resource_ref.set_worker_meta(w, dict(hardware=dict(cpu_total=4))) graph_ref.prepare_graph() graph_ref.analyze_graph() graph_ref.create_operand_actors(_start=False) yield pool, graph_ref
def testExecute(self): pool_address = '127.0.0.1:%d' % get_next_port() with create_actor_pool(n_process=1, backend='gevent', address=pool_address) as pool: pool.create_actor(ClusterInfoActor, schedulers=[pool_address], uid=ClusterInfoActor.default_name()) cache_ref = pool.create_actor( ChunkHolderActor, self.plasma_storage_size, uid=ChunkHolderActor.default_name()) pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_name()) pool.create_actor(TaskQueueActor, uid=TaskQueueActor.default_name()) pool.create_actor(DispatchActor, uid=DispatchActor.default_name()) pool.create_actor(QuotaActor, 1024 * 1024, uid=MemQuotaActor.default_name()) pool.create_actor(CpuCalcActor) pool.create_actor(ExecutionActor, uid=ExecutionActor.default_name()) try: with self.run_actor_test(pool) as test_actor: import mars.tensor as mt from mars.tensor.expressions.datasource import TensorOnes, TensorFetchChunk arr = mt.ones((10, 8), chunk_size=10) arr_add = mt.ones((10, 8), chunk_size=10) arr2 = arr + arr_add graph = arr2.build_graph(compose=False, tiled=True) for chunk in graph: if isinstance(chunk.op, TensorOnes): chunk._op = TensorFetchChunk( dtype=chunk.dtype, _outputs=[weakref.ref(o) for o in chunk.op.outputs], _key=chunk.op.key) session_id = str(uuid.uuid4()) chunk_holder_ref = test_actor.promise_ref(ChunkHolderActor.default_name()) refs = test_actor._chunk_store.put(session_id, arr.chunks[0].key, np.ones((10, 8), dtype=np.int16)) chunk_holder_ref.register_chunk(session_id, arr.chunks[0].key) del refs refs = test_actor._chunk_store.put(session_id, arr_add.chunks[0].key, np.ones((10, 8), dtype=np.int16)) chunk_holder_ref.register_chunk(session_id, arr_add.chunks[0].key) del refs executor_ref = test_actor.promise_ref(ExecutionActor.default_name()) def _validate(_): data = test_actor._chunk_store.get(session_id, arr2.chunks[0].key) assert_array_equal(data, 2 * np.ones((10, 8))) executor_ref.enqueue_graph(session_id, str(id(graph)), serialize_graph(graph), dict(chunks=[arr2.chunks[0].key]), None, _promise=True) \ .then(lambda *_: executor_ref.start_execution(session_id, str(id(graph)), _promise=True)) \ .then(_validate) \ .then(lambda *_: test_actor.set_result(None)) \ .catch(lambda *exc: test_actor.set_result(exc, False)) self.get_result() finally: pool.destroy_actor(cache_ref)
def testErrorOnPrepare(self, *_): session_id = str(uuid.uuid4()) addr = '127.0.0.1:%d' % get_next_port() with create_actor_pool(n_process=1, backend='gevent', address=addr) as pool: pool.create_actor(ClusterInfoActor, [pool.cluster_info.address], uid=ClusterInfoActor.default_name()) resource_ref = pool.create_actor(ResourceActor, uid=ResourceActor.default_name()) pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_name()) pool.create_actor(AssignerActor, uid=AssignerActor.default_name()) resource_ref.set_worker_meta('localhost:12345', dict(hardware=dict(cpu_total=4))) resource_ref.set_worker_meta('localhost:23456', dict(hardware=dict(cpu_total=4))) # error occurred in create_operand_actors graph_key = str(uuid.uuid4()) expr = mt.random.random((8, 2), chunk_size=2) + 1 graph = expr.build_graph(compose=False) serialized_graph = serialize_graph(graph) graph_ref = pool.create_actor(GraphActor, session_id, graph_key, serialized_graph, uid=GraphActor.gen_name(session_id, graph_key)) def _mock_raises(*_, **__): raise RuntimeError with patch_method(GraphActor.create_operand_actors, new=_mock_raises): with self.assertRaises(RuntimeError): graph_ref.execute_graph() self.assertEqual(graph_ref.get_state(), GraphState.FAILED) graph_ref.destroy() # interrupted during create_operand_actors graph_key = str(uuid.uuid4()) graph_ref = pool.create_actor(GraphActor, session_id, graph_key, serialized_graph, uid=GraphActor.gen_name(session_id, graph_key)) def _mock_cancels(*_, **__): graph_meta_ref = pool.actor_ref(GraphMetaActor.gen_name(session_id, graph_key)) graph_meta_ref.set_state(GraphState.CANCELLING) with patch_method(GraphActor.create_operand_actors, new=_mock_cancels): graph_ref.execute_graph() self.assertEqual(graph_ref.get_state(), GraphState.CANCELLED) # interrupted during previous steps graph_key = str(uuid.uuid4()) graph_ref = pool.create_actor(GraphActor, session_id, graph_key, serialized_graph, uid=GraphActor.gen_name(session_id, graph_key)) def _mock_cancels(*_, **__): graph_meta_ref = pool.actor_ref(GraphMetaActor.gen_name(session_id, graph_key)) graph_meta_ref.set_state(GraphState.CANCELLING) return dict() with patch_method(GraphAnalyzer.calc_operand_assignments, new=_mock_cancels): graph_ref.execute_graph() self.assertEqual(graph_ref.get_state(), GraphState.CANCELLED)
def testOperandActorWithCancel(self, *_): arr = mt.random.randint(10, size=(10, 8), chunk_size=4) arr_add = mt.random.randint(10, size=(10, 8), chunk_size=4) arr2 = arr + arr_add session_id = str(uuid.uuid4()) graph_key = str(uuid.uuid4()) graph = arr2.build_graph(compose=False) with create_actor_pool(n_process=1, backend='gevent') as pool: pool.create_actor(SchedulerClusterInfoActor, [pool.cluster_info.address], uid=SchedulerClusterInfoActor.default_uid()) resource_ref = pool.create_actor(ResourceActor, uid=ResourceActor.default_uid()) pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_uid()) pool.create_actor(AssignerActor, uid=AssignerActor.gen_uid(session_id)) graph_ref = pool.create_actor(GraphActor, session_id, graph_key, serialize_graph(graph), uid=GraphActor.gen_uid(session_id, graph_key)) def _build_mock_ref(uid=None, address=None): try: return pool.create_actor( FakeExecutionActor, exec_delay=0.2, uid=FakeExecutionActor.gen_uid(address)) except ActorAlreadyExist: return pool.actor_ref(FakeExecutionActor.gen_uid(address)) # handle mock objects OperandActor._get_raw_execution_ref.side_effect = _build_mock_ref mock_resource = dict(hardware=dict(cpu=4, cpu_total=4, memory=512)) for idx in range(20): resource_ref.set_worker_meta('localhost:%d' % (idx + 12345), mock_resource) graph_ref.prepare_graph(compose=False) fetched_graph = graph_ref.get_chunk_graph() graph_ref.analyze_graph() final_keys = set() for c in fetched_graph: if fetched_graph.count_successors(c) == 0: final_keys.add(c.op.key) graph_ref.create_operand_actors() graph_meta_ref = pool.actor_ref(GraphMetaActor.gen_uid(session_id, graph_key)) start_time = time.time() cancel_called = False while True: pool.sleep(0.05) if not cancel_called and time.time() > start_time + 0.3: cancel_called = True graph_ref.stop_graph(_tell=True) if time.time() - start_time > 30: raise SystemError('Wait for execution finish timeout') if graph_meta_ref.get_state() in (GraphState.SUCCEEDED, GraphState.FAILED, GraphState.CANCELLED): break self.assertEqual(graph_meta_ref.get_state(), GraphState.CANCELLED)
def _run_operand_case(session_id, graph_key, tensor, execution_creator): graph = tensor.build_graph(compose=False) with create_actor_pool(n_process=1, backend='gevent') as pool: pool.create_actor(SchedulerClusterInfoActor, [pool.cluster_info.address], uid=SchedulerClusterInfoActor.default_uid()) resource_ref = pool.create_actor(ResourceActor, uid=ResourceActor.default_uid()) pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_uid()) pool.create_actor(AssignerActor, uid=AssignerActor.gen_uid(session_id)) graph_ref = pool.create_actor(GraphActor, session_id, graph_key, serialize_graph(graph), uid=GraphActor.gen_uid( session_id, graph_key)) def _build_mock_ref(uid=None, address=None): try: return execution_creator( pool, FakeExecutionActor.gen_uid(address)) except ActorAlreadyExist: return pool.actor_ref(FakeExecutionActor.gen_uid(address)) # handle mock objects OperandActor._get_raw_execution_ref.side_effect = _build_mock_ref mock_resource = dict(hardware=dict(cpu=4, cpu_total=4, memory=512)) resource_ref.set_worker_meta('localhost:12345', mock_resource) resource_ref.set_worker_meta('localhost:23456', mock_resource) graph_ref.prepare_graph() fetched_graph = graph_ref.get_chunk_graph() graph_ref.analyze_graph() final_keys = set() for c in fetched_graph: if fetched_graph.count_successors(c) == 0: final_keys.add(c.op.key) graph_ref.create_operand_actors() graph_meta_ref = pool.actor_ref( GraphMetaActor.gen_uid(session_id, graph_key)) start_time = time.time() while True: pool.sleep(0.1) if time.time() - start_time > 30: raise SystemError('Wait for execution finish timeout') if graph_meta_ref.get_state() in (GraphState.SUCCEEDED, GraphState.FAILED, GraphState.CANCELLED): break
def run_test(self): import mars.tensor as mt from mars.tensor.expressions.datasource import TensorOnes, TensorFetchChunk arr = mt.ones((10, 8), chunks=10) arr_add = mt.ones((10, 8), chunks=10) arr2 = arr + arr_add graph = arr2.build_graph(compose=False, tiled=True) for chunk in graph: if isinstance(chunk.op, TensorOnes): chunk._op = TensorFetchChunk( dtype=chunk.dtype, _outputs=[weakref.ref(o) for o in chunk.op.outputs], _key=chunk.op.key) session_id = str(uuid.uuid4()) op_key = str(uuid.uuid4()) self._kv_store.write( '/sessions/%s/operands/%s/execution_graph' % (session_id, op_key), serialize_graph(graph)) chunk_holder_ref = self.promise_ref('ChunkHolderActor') refs = self._chunk_store.put(session_id, arr.chunks[0].key, np.ones((10, 8), dtype=np.int16)) chunk_holder_ref.register_chunk(session_id, arr.chunks[0].key) del refs refs = self._chunk_store.put(session_id, arr_add.chunks[0].key, np.ones((10, 8), dtype=np.int16)) chunk_holder_ref.register_chunk(session_id, arr_add.chunks[0].key) del refs executor_ref = self.promise_ref('ExecutionActor') def _validate(_): data = self._chunk_store.get(session_id, arr2.chunks[0].key) assert_array_equal(data, 2 * np.ones((10, 8))) executor_ref.execute_graph(session_id, str(id(graph)), serialize_graph(graph), dict(chunks=[arr2.chunks[0].key]), None, _promise=True) \ .then(_validate) \ .catch(lambda *exc: setattr(self, '_exc_info', exc)) \ .then(lambda *_: setattr(self, '_finished', True))
def execute_case(): pool.create_actor(ClusterInfoActor, [pool.cluster_info.address], uid=ClusterInfoActor.default_name()) resource_ref = pool.create_actor( ResourceActor, uid=ResourceActor.default_name()) kv_store_ref = pool.create_actor( KVStoreActor, uid=KVStoreActor.default_name()) pool.create_actor(AssignerActor, uid=AssignerActor.gen_name(session_id)) graph_ref = pool.create_actor(GraphActor, session_id, graph_key, serialize_graph(graph), uid=GraphActor.gen_name( session_id, graph_key)) execution_ref = execution_creator(pool) # handle mock objects OperandActor._get_raw_execution_ref.side_effect = lambda: execution_ref mock_resource = dict( hardware=dict(cpu=4, cpu_total=4, memory=512)) def write_mock_meta(): resource_ref.set_worker_meta('localhost:12345', mock_resource) resource_ref.set_worker_meta('localhost:23456', mock_resource) v = gevent.spawn(write_mock_meta) v.join() graph_ref.prepare_graph() graph_data = kv_store_ref.read( '/sessions/%s/graphs/%s/chunk_graph' % (session_id, graph_key)).value fetched_graph = deserialize_graph(graph_data) graph_ref.scan_node() graph_ref.place_initial_chunks() final_keys = set() for c in fetched_graph: if fetched_graph.count_successors(c) == 0: final_keys.add(c.op.key) graph_ref.create_operand_actors() start_time = time.time() while True: gevent.sleep(0.1) if time.time() - start_time > 30: raise SystemError('Wait for execution finish timeout') if kv_store_ref.read('/sessions/%s/graph/%s/state' % (session_id, graph_key)).value.lower() \ in ('succeeded', 'failed', 'cancelled'): break
def testFetchRemoteData(self): pool_address = '127.0.0.1:%d' % get_next_port() session_id = str(uuid.uuid4()) mock_data = np.array([1, 2, 3, 4]) with create_actor_pool(n_process=1, backend='gevent', address=pool_address, distributor=WorkerDistributor(2)) as pool: self.create_standard_actors(pool, pool_address, with_daemon=False, with_status=False) pool.create_actor(CpuCalcActor) pool.create_actor(MockSenderActor, mock_data, 'in', uid='w:mock_sender') chunk_meta_ref = pool.actor_ref(ChunkMetaActor.default_name()) import mars.tensor as mt from mars.tensor.expressions.datasource import TensorFetch arr = mt.ones((4, ), chunk_size=4) arr_add = mt.array(mock_data) result_tensor = arr + arr_add graph = result_tensor.build_graph(compose=False, tiled=True) modified_chunk = arr_add.chunks[0] arr_add.chunks[0]._op = TensorFetch( dtype=modified_chunk.dtype, _outputs=[weakref.ref(o) for o in modified_chunk.op.outputs], _key=modified_chunk.op.key) chunk_meta_ref.set_chunk_meta(session_id, modified_chunk.key, size=mock_data.nbytes, shape=mock_data.shape, workers=('0.0.0.0:1234', pool_address)) with self.run_actor_test(pool) as test_actor: def _validate(_): data = test_actor._chunk_store.get( session_id, result_tensor.chunks[0].key) assert_array_equal(data, mock_data + np.ones((4, ))) graph_key = str(uuid.uuid4()) execution_ref = test_actor.promise_ref( ExecutionActor.default_name()) execution_ref.enqueue_graph(session_id, graph_key, serialize_graph(graph), dict(chunks=[result_tensor.chunks[0].key]), None, _promise=True) \ .then(lambda *_: execution_ref.start_execution(session_id, graph_key, _promise=True)) \ .then(_validate) \ .then(lambda *_: test_actor.set_result(None)) \ .catch(lambda *exc: test_actor.set_result(exc, False)) self.get_result()
def testPrepareQuota(self, *_): pinned = [True] def _mock_pin(_graph_key, chunk_keys): from mars.errors import PinChunkFailed if pinned[0]: raise PinChunkFailed return chunk_keys ChunkHolderActor.pin_chunks.side_effect = _mock_pin pool_address = '127.0.0.1:%d' % get_next_port() session_id = str(uuid.uuid4()) mock_data = np.array([1, 2, 3, 4]) with create_actor_pool(n_process=1, backend='gevent', address=pool_address) as pool: self.create_standard_actors(pool, pool_address, with_daemon=False, with_status=False) pool.create_actor(MockSenderActor, mock_data, 'in', uid='w:mock_sender') pool.create_actor(CpuCalcActor) cluster_info_ref = pool.actor_ref(WorkerClusterInfoActor.default_uid()) chunk_meta_client = ChunkMetaClient(pool, cluster_info_ref) import mars.tensor as mt from mars.tensor.fetch import TensorFetch arr = mt.ones((4,), chunk_size=4) arr_add = mt.array(mock_data) result_tensor = arr + arr_add graph = result_tensor.build_graph(compose=False, tiled=True) modified_chunk = arr_add.chunks[0] arr_add.chunks[0]._op = TensorFetch( dtype=modified_chunk.dtype, _outputs=[weakref.ref(o) for o in modified_chunk.op.outputs], _key=modified_chunk.op.key) chunk_meta_client.set_chunk_meta(session_id, modified_chunk.key, size=mock_data.nbytes, shape=mock_data.shape, workers=('0.0.0.0:1234', pool_address)) with self.run_actor_test(pool) as test_actor: graph_key = str(uuid.uuid4()) execution_ref = test_actor.promise_ref(ExecutionActor.default_uid()) start_time = time.time() execution_ref.execute_graph( session_id, graph_key, serialize_graph(graph), dict(chunks=[result_tensor.chunks[0].key]), None, _tell=True) execution_ref.add_finish_callback(session_id, graph_key, _promise=True) \ .then(lambda *_: test_actor.set_result(time.time())) \ .catch(lambda *exc: test_actor.set_result(exc, False)) def _delay_fun(): time.sleep(1) pinned[0] = False threading.Thread(target=_delay_fun).start() finish_time = self.get_result() self.assertGreaterEqual(finish_time, start_time + 1)
def testSendTargets(self): pool_address = '127.0.0.1:%d' % get_next_port() session_id = str(uuid.uuid4()) mock_data = np.array([1, 2, 3, 4]) with create_actor_pool(n_process=1, backend='gevent', address=pool_address, distributor=MarsDistributor(2, 'w:0:')) as pool: self.create_standard_actors(pool, pool_address, with_daemon=False, with_status=False) pool.create_actor(CpuCalcActor) pool.create_actor(InProcHolderActor) import mars.tensor as mt arr = mt.ones((4, ), chunk_size=4) arr_add = mt.array(mock_data) result_tensor = arr + arr_add graph = result_tensor.build_graph(compose=False, tiled=True) result_key = result_tensor.chunks[0].key pool.create_actor(MockSenderActor, mock_data + np.ones((4, )), 'out', uid='w:mock_sender') with self.run_actor_test(pool) as test_actor: def _validate(_): data = test_actor.shared_store.get( session_id, result_tensor.chunks[0].key) assert_array_equal(data, mock_data + np.ones((4, ))) graph_key = str(uuid.uuid4()) execution_ref = test_actor.promise_ref( ExecutionActor.default_uid()) execution_ref.execute_graph( session_id, graph_key, serialize_graph(graph), dict(chunks=[result_tensor.chunks[0].key]), None, _tell=True) execution_ref.send_data_to_workers( session_id, graph_key, {result_key: (pool_address, )}, _tell=True) execution_ref.add_finish_callback(session_id, graph_key, _promise=True) \ .then(_validate) \ .then(lambda *_: test_actor.set_result(None)) \ .catch(lambda *exc: test_actor.set_result(exc, False)) self.get_result()
def run_simple_calc(self, session_id): self._session_id = session_id import mars.tensor as mt arr = mt.ones((4, ), chunk_size=4) + 1 graph = arr.build_graph(compose=False, tiled=True) self._array_key = arr.chunks[0].key graph_key = self._graph_key = str(uuid.uuid4()) execution_ref = self.promise_ref(ExecutionActor.default_name()) execution_ref.enqueue_graph(session_id, graph_key, serialize_graph(graph), dict(chunks=[arr.chunks[0].key]), None, _promise=True) \ .then(lambda *_: execution_ref.start_execution(session_id, graph_key, _promise=True)) \ .then(lambda *_: self._results.append((True,))) \ .catch(lambda *exc: self._results.append((False, exc)))
def run_test(self, worker): import mars.tensor as mt from mars.worker import ExecutionActor session_id = str(uuid.uuid4()) a = mt.random.rand(100, 50, chunk_size=30) b = mt.random.rand(50, 200, chunk_size=30) result = a.dot(b) graph = result.build_graph(tiled=True) executor_ref = self.promise_ref(ExecutionActor.default_uid(), address=worker) io_meta = dict(chunks=[c.key for c in result.chunks]) graph_key = str(id(graph)) executor_ref.execute_graph(session_id, graph_key, serialize_graph(graph), io_meta, None, _promise=True) \ .then(lambda *_: setattr(self, '_replied', True))
def run_simple_calc(self, session_id): self._session_id = session_id import mars.tensor as mt arr = mt.ones((4, ), chunk_size=4) + 1 graph = arr.build_graph(fuse_enabled=False, tiled=True) arr = get_tiled(arr) self._array_key = arr.chunks[0].key graph_key = self._graph_key = str(uuid.uuid4()) execution_ref = self.promise_ref(ExecutionActor.default_uid()) execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph), dict(chunks=[arr.chunks[0].key]), None, _tell=True) execution_ref.add_finish_callback(session_id, graph_key, _promise=True) \ .then(lambda *_: self._results.append((True,))) \ .catch(lambda *exc: self._results.append((False, exc)))
def testFetchRemoteData(self): pool_address = '127.0.0.1:%d' % get_next_port() session_id = str(uuid.uuid4()) mock_data = np.array([1, 2, 3, 4]) with create_actor_pool(n_process=1, backend='gevent', address=pool_address, distributor=MarsDistributor(2, 'w:0:')) as pool: self.create_standard_actors(pool, pool_address, with_daemon=False, with_status=False, with_resource=True) pool.create_actor(CpuCalcActor) pool.create_actor(InProcHolderActor) pool.create_actor(MockSenderActor, [mock_data], 'in', uid='w:mock_sender') import mars.tensor as mt from mars.tensor.fetch import TensorFetch arr = mt.ones((4,), chunk_size=4) arr_add = mt.array(mock_data) result_tensor = arr + arr_add graph = result_tensor.build_graph(compose=False, tiled=True) arr_add = get_tiled(arr_add) result_tensor = get_tiled(result_tensor) modified_chunk = arr_add.chunks[0] arr_add.chunks[0]._op = TensorFetch( dtype=modified_chunk.dtype, _outputs=[weakref.ref(o) for o in modified_chunk.op.outputs], _key=modified_chunk.op.key) with self.run_actor_test(pool) as test_actor: graph_key = str(uuid.uuid4()) execution_ref = test_actor.promise_ref(ExecutionActor.default_uid()) execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph), dict(chunks=[result_tensor.chunks[0].key]), None, _tell=True) execution_ref.add_finish_callback(session_id, graph_key, _promise=True) \ .then(lambda *_: test_actor.set_result(None)) \ .catch(lambda *exc: test_actor.set_result(exc, False)) with self.assertRaises(DependencyMissing): self.get_result() metas = {modified_chunk.key: WorkerMeta(mock_data.nbytes, mock_data.shape, ('0.0.0.0:1234',))} with self.run_actor_test(pool) as test_actor: graph_key = str(uuid.uuid4()) execution_ref = test_actor.promise_ref(ExecutionActor.default_uid()) execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph), dict(chunks=[result_tensor.chunks[0].key]), metas, _tell=True) execution_ref.add_finish_callback(session_id, graph_key, _promise=True) \ .then(lambda *_: test_actor.set_result(None)) \ .catch(lambda *exc: test_actor.set_result(exc, False)) with self.assertRaises(DependencyMissing): self.get_result() metas[modified_chunk.key] = WorkerMeta( mock_data.nbytes, mock_data.shape, ('0.0.0.0:1234', pool_address.replace('127.0.0.1', 'localhost'))) with self.run_actor_test(pool) as test_actor: def _validate(_): data = test_actor.shared_store.get(session_id, result_tensor.chunks[0].key) assert_array_equal(data, mock_data + np.ones((4,))) graph_key = str(uuid.uuid4()) execution_ref = test_actor.promise_ref(ExecutionActor.default_uid()) execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph), dict(chunks=[result_tensor.chunks[0].key]), metas, _tell=True) execution_ref.add_finish_callback(session_id, graph_key, _promise=True) \ .then(_validate) \ .then(lambda *_: test_actor.set_result(None)) \ .catch(lambda *exc: test_actor.set_result(exc, False)) self.get_result()
def testPrepareSpilled(self): pool_address = '127.0.0.1:%d' % get_next_port() session_id = str(uuid.uuid4()) mock_data = np.array([1, 2, 3, 4]) options.worker.spill_directory = tempfile.mkdtemp(prefix='mars_worker_prep_spilled-') with create_actor_pool(n_process=1, backend='gevent', address=pool_address) as pool: self.create_standard_actors(pool, pool_address, with_daemon=False, with_status=False) pool.create_actor(IORunnerActor) pool.create_actor(CpuCalcActor) pool.create_actor(InProcHolderActor) import mars.tensor as mt from mars.tensor.fetch import TensorFetch arr = mt.ones((4,), chunk_size=4) arr_add = mt.array(mock_data) result_tensor = arr + arr_add graph = result_tensor.build_graph(compose=False, tiled=True) arr_add = get_tiled(arr_add) result_tensor = get_tiled(result_tensor) modified_chunk = arr_add.chunks[0] arr_add.chunks[0]._op = TensorFetch( dtype=modified_chunk.dtype, _outputs=[weakref.ref(o) for o in modified_chunk.op.outputs], _key=modified_chunk.op.key) # test meta missing with self.run_actor_test(pool) as test_actor: graph_key = str(uuid.uuid4()) execution_ref = test_actor.promise_ref(ExecutionActor.default_uid()) execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph), dict(chunks=[result_tensor.chunks[0].key]), None, _promise=True) \ .then(lambda *_: test_actor.set_result(None)) \ .catch(lambda *exc: test_actor.set_result(exc, False)) with self.assertRaises(DependencyMissing): self.get_result() metas = {modified_chunk.key: WorkerMeta( mock_data.nbytes, mock_data.shape, ('0.0.0.0:1234', pool_address))} # test read from spilled file with self.run_actor_test(pool) as test_actor: self.waitp( test_actor.storage_client.put_objects( session_id, [modified_chunk.key], [mock_data], [DataStorageDevice.PROC_MEMORY]) .then(lambda *_: test_actor.storage_client.copy_to( session_id, [modified_chunk.key], [DataStorageDevice.DISK])) ) test_actor.storage_client.delete(session_id, [modified_chunk.key], [DataStorageDevice.PROC_MEMORY]) def _validate(_): data = test_actor.shared_store.get(session_id, result_tensor.chunks[0].key) assert_array_equal(data, mock_data + np.ones((4,))) graph_key = str(uuid.uuid4()) execution_ref = test_actor.promise_ref(ExecutionActor.default_uid()) execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph), dict(chunks=[result_tensor.chunks[0].key]), metas, _promise=True) \ .then(_validate) \ .then(lambda *_: test_actor.set_result(None)) \ .catch(lambda *exc: test_actor.set_result(exc, False)) self.get_result()
def testPrepareQuota(self, *_): pinned = True orig_pin = SharedHolderActor.pin_data_keys def _mock_pin(self, session_id, chunk_keys, token): from mars.errors import PinDataKeyFailed if pinned: raise PinDataKeyFailed return orig_pin(self, session_id, chunk_keys, token) pool_address = '127.0.0.1:%d' % get_next_port() session_id = str(uuid.uuid4()) mock_data = np.array([1, 2, 3, 4]) with patch_method(SharedHolderActor.pin_data_keys, new=_mock_pin), \ create_actor_pool(n_process=1, backend='gevent', address=pool_address) as pool: self.create_standard_actors(pool, pool_address, with_daemon=False, with_status=False) pool.create_actor(MockSenderActor, [mock_data], 'in', uid='w:mock_sender') pool.create_actor(CpuCalcActor) pool.create_actor(InProcHolderActor) pool.actor_ref(WorkerClusterInfoActor.default_uid()) import mars.tensor as mt from mars.tensor.fetch import TensorFetch arr = mt.ones((4,), chunk_size=4) arr_add = mt.array(mock_data) result_tensor = arr + arr_add graph = result_tensor.build_graph(compose=False, tiled=True) arr_add = get_tiled(arr_add) result_tensor = get_tiled(result_tensor) modified_chunk = arr_add.chunks[0] arr_add.chunks[0]._op = TensorFetch( dtype=modified_chunk.dtype, _outputs=[weakref.ref(o) for o in modified_chunk.op.outputs], _key=modified_chunk.op.key) metas = {modified_chunk.key: WorkerMeta( mock_data.nbytes, mock_data.shape, ('0.0.0.0:1234', pool_address.replace('127.0.0.1', 'localhost')))} with self.run_actor_test(pool) as test_actor: graph_key = str(uuid.uuid4()) execution_ref = test_actor.promise_ref(ExecutionActor.default_uid()) start_time = time.time() execution_ref.execute_graph( session_id, graph_key, serialize_graph(graph), dict(chunks=[result_tensor.chunks[0].key]), metas, _tell=True) execution_ref.add_finish_callback(session_id, graph_key, _promise=True) \ .then(lambda *_: test_actor.set_result(time.time())) \ .catch(lambda *exc: test_actor.set_result(exc, False)) def _delay_fun(): nonlocal pinned time.sleep(0.5) pinned = False threading.Thread(target=_delay_fun).start() finish_time = self.get_result() self.assertGreaterEqual(finish_time, start_time + 0.5)
def testSimpleExecution(self): pool_address = '127.0.0.1:%d' % get_next_port() with create_actor_pool(n_process=1, backend='gevent', address=pool_address) as pool: self.create_standard_actors(pool, pool_address, with_daemon=False) pool.create_actor(CpuCalcActor, uid='w:1:calc-a') pool.create_actor(InProcHolderActor) import mars.tensor as mt from mars.tensor.datasource import TensorOnes from mars.tensor.fetch import TensorFetch arr = mt.ones((10, 8), chunk_size=10) arr_add = mt.ones((10, 8), chunk_size=10) arr2 = arr + arr_add graph = arr2.build_graph(compose=False, tiled=True) arr = get_tiled(arr) arr2 = get_tiled(arr2) metas = dict() for chunk in graph: if isinstance(chunk.op, TensorOnes): chunk._op = TensorFetch( dtype=chunk.dtype, _outputs=[weakref.ref(o) for o in chunk.op.outputs], _key=chunk.op.key) metas[chunk.key] = WorkerMeta(chunk.nbytes, chunk.shape, pool_address) with self.run_actor_test(pool) as test_actor: session_id = str(uuid.uuid4()) storage_client = test_actor.storage_client self.waitp( storage_client.put_objects(session_id, [arr.chunks[0].key], [np.ones((10, 8), dtype=np.int16)], [DataStorageDevice.SHARED_MEMORY]), ) execution_ref = test_actor.promise_ref(ExecutionActor.default_uid()) def _validate(_): data = test_actor.shared_store.get(session_id, arr2.chunks[0].key) assert_array_equal(data, 2 * np.ones((10, 8))) graph_key = str(uuid.uuid4()) execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph), dict(chunks=[arr2.chunks[0].key]), metas, _promise=True) \ .then(_validate) \ .then(lambda *_: test_actor.set_result(None)) \ .catch(lambda *exc: test_actor.set_result(exc, False)) self.get_result() with self.run_actor_test(pool) as test_actor: execution_ref = test_actor.promise_ref(ExecutionActor.default_uid()) def _validate(_): data = test_actor.shared_store.get(session_id, arr2.chunks[0].key) assert_array_equal(data, 2 * np.ones((10, 8))) execution_ref.add_finish_callback(session_id, graph_key, _promise=True) \ .then(_validate) \ .then(lambda *_: test_actor.set_result(None)) \ .catch(lambda *exc: test_actor.set_result(exc, False)) self.get_result()
def testCpuCalcSingleFetches(self): import gc with self._start_calc_pool() as (_pool, test_actor): quota_ref = test_actor.promise_ref(MemQuotaActor.default_uid()) calc_ref = test_actor.promise_ref(CpuCalcActor.default_uid()) session_id = str(uuid.uuid4()) data_list = [np.random.random((10, 10)) for _ in range(3)] exec_graph, fetch_chunks, add_chunk = self._build_test_graph( data_list) storage_client = test_actor.storage_client for fetch_chunk, d in zip(fetch_chunks, data_list): self.waitp( storage_client.put_objects( session_id, [fetch_chunk.key], [d], [DataStorageDevice.SHARED_MEMORY]), ) self.assertEqual( list( storage_client.get_data_locations( session_id, [fetch_chunks[0].key])[0]), [(0, DataStorageDevice.SHARED_MEMORY)]) quota_batch = { build_quota_key(session_id, add_chunk.key, add_chunk.op.key): data_list[0].nbytes, } for idx in [1, 2]: quota_batch[build_quota_key(session_id, fetch_chunks[idx].key, add_chunk.op.key)] \ = data_list[idx].nbytes self.waitp( storage_client.copy_to( session_id, [fetch_chunks[idx].key], [DataStorageDevice.DISK ]).then(lambda *_: storage_client.delete( session_id, [fetch_chunks[idx].key], [DataStorageDevice.SHARED_MEMORY]))) self.assertEqual( list( storage_client.get_data_locations( session_id, [fetch_chunks[idx].key])[0]), [(0, DataStorageDevice.DISK)]) self.waitp( quota_ref.request_batch_quota(quota_batch, _promise=True), ) o_create = PlasmaSharedStore.create def _mock_plasma_create(store, session_id, data_key, size): if data_key == fetch_chunks[2].key: raise StorageFull return o_create(store, session_id, data_key, size) id_type_set = set() def _extract_value_ref(*_): inproc_handler = storage_client.get_storage_handler( (0, DataStorageDevice.PROC_MEMORY)) obj = inproc_handler.get_objects(session_id, [add_chunk.key])[0] id_type_set.add((id(obj), type(obj))) del obj with patch_method(PlasmaSharedStore.create, _mock_plasma_create): self.waitp( calc_ref.calc(session_id, add_chunk.op.key, serialize_graph(exec_graph), [add_chunk.key], _promise=True).then(_extract_value_ref).then( lambda *_: calc_ref.store_results( session_id, add_chunk.op.key, [add_chunk.key], None, _promise=True))) self.assertTrue( all((id(obj), type(obj)) not in id_type_set for obj in gc.get_objects())) quota_dump = quota_ref.dump_data() self.assertEqual(len(quota_dump.allocations), 0) self.assertEqual(len(quota_dump.requests), 0) self.assertEqual(len(quota_dump.proc_sizes), 0) self.assertEqual(len(quota_dump.hold_sizes), 0) self.assertEqual( sorted( storage_client.get_data_locations( session_id, [fetch_chunks[0].key])[0]), [(0, DataStorageDevice.SHARED_MEMORY)]) self.assertEqual( sorted( storage_client.get_data_locations( session_id, [fetch_chunks[1].key])[0]), [(0, DataStorageDevice.DISK)]) self.assertEqual( sorted( storage_client.get_data_locations( session_id, [fetch_chunks[2].key])[0]), [(0, DataStorageDevice.DISK)]) self.assertEqual( sorted( storage_client.get_data_locations(session_id, [add_chunk.key])[0]), [(0, DataStorageDevice.SHARED_MEMORY)])
def prepare_graph_in_pool(self, expr, clean_io_meta=True, compose=False): session_id = str(uuid.uuid4()) graph_key = str(uuid.uuid4()) graph = expr.build_graph(compose=compose) serialized_graph = serialize_graph(graph) chunked_graph = expr.build_graph(compose=compose, tiled=True) addr = '127.0.0.1:%d' % get_next_port() with create_actor_pool(n_process=1, backend='gevent', address=addr) as pool: pool.create_actor(SchedulerClusterInfoActor, [pool.cluster_info.address], uid=SchedulerClusterInfoActor.default_uid()) resource_ref = pool.create_actor(ResourceActor, uid=ResourceActor.default_uid()) pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_uid()) pool.create_actor(AssignerActor, uid=AssignerActor.gen_uid(session_id)) graph_ref = pool.create_actor(GraphActor, session_id, graph_key, serialized_graph, uid=GraphActor.gen_uid( session_id, graph_key)) graph_ref.prepare_graph(compose=compose) fetched_graph = graph_ref.get_chunk_graph() self.assertIsNotNone(fetched_graph) self.assertEqual(len(chunked_graph), len(fetched_graph)) graph_ref.analyze_graph(do_placement=False) op_infos = graph_ref.get_operand_info() for n in fetched_graph: depth = op_infos[n.op.key]['optimize']['depth'] self.assertIsNotNone(depth) successor_size = op_infos[ n.op.key]['optimize']['successor_size'] self.assertIsNotNone(successor_size) descendant_size = op_infos[ n.op.key]['optimize']['descendant_size'] self.assertIsNotNone(descendant_size) resource_ref.set_worker_meta('localhost:12345', dict(hardware=dict(cpu_total=4))) resource_ref.set_worker_meta('localhost:23456', dict(hardware=dict(cpu_total=4))) graph_ref.analyze_graph() op_infos = graph_ref.get_operand_info() for n in fetched_graph: if fetched_graph.count_predecessors(n) != 0: continue target_worker = op_infos[n.op.key]['target_worker'] self.assertIsNotNone(target_worker) graph_ref.create_operand_actors(_clean_info=clean_io_meta) op_infos = graph_ref.get_operand_info() if not clean_io_meta: orig_metas = dict() for n in fetched_graph: try: meta = orig_metas[n.op.key] except KeyError: meta = orig_metas[n.op.key] = dict(predecessors=set(), successors=set(), input_chunks=set(), chunks=set()) meta['predecessors'].update([ pn.op.key for pn in fetched_graph.iter_predecessors(n) ]) meta['successors'].update( [sn.op.key for sn in fetched_graph.iter_successors(n)]) meta['input_chunks'].update( [pn.key for pn in fetched_graph.iter_predecessors(n)]) meta['chunks'].update([c.key for c in n.op.outputs]) for n in fetched_graph: self.assertEqual(op_infos[n.op.key]['op_name'], type(n.op).__name__) io_meta = op_infos[n.op.key]['io_meta'] orig_io_meta = orig_metas[n.op.key] self.assertSetEqual(set(io_meta['predecessors']), set(orig_io_meta['predecessors'])) self.assertSetEqual(set(io_meta['successors']), set(orig_io_meta['successors'])) self.assertSetEqual(set(io_meta['input_chunks']), set(orig_io_meta['input_chunks'])) self.assertSetEqual(set(io_meta['chunks']), set(orig_io_meta['chunks'])) yield pool, graph_ref
def testPrepushGraph(self): import mars.tensor as mt from mars.graph import DAG from mars.tensor.expressions.datasource import TensorFetch data_inputs = [np.random.random((4, )) for _ in range(2)] arr_inputs = [mt.tensor(di, chunk_size=4) for di in data_inputs] arr_add = arr_inputs[0] + arr_inputs[1] graph_inputs = [a.build_graph(tiled=True) for a in arr_inputs] graph_input_op_keys = [a.chunks[0].op.key for a in arr_inputs] arr_add.build_graph(tiled=True) graph_add = DAG() input_chunks = [] for a in arr_inputs: fetch_op = TensorFetch(dtype=a.dtype) inp_chunk = fetch_op.new_chunk(None, a.shape, _key=a.chunks[0].key).data input_chunks.append(inp_chunk) new_op = arr_add.chunks[0].op.copy() new_add_chunk = new_op.new_chunk(input_chunks, arr_add.shape, index=arr_add.chunks[0].index, dtype=arr_add.dtype, _key=arr_add.chunks[0].key) graph_add.add_node(new_add_chunk) for inp_chunk in input_chunks: graph_add.add_node(inp_chunk) graph_add.add_edge(inp_chunk, new_add_chunk) graph_add_key = arr_add.chunks[0].op.key pool_address = '127.0.0.1:%d' % get_next_port() session_id = str(uuid.uuid4()) def _validate(_): data = test_actor._chunk_store.get(session_id, arr_add.chunks[0].key) assert_array_equal(data, data_inputs[0] + data_inputs[1]) options.worker.spill_directory = tempfile.mkdtemp( 'mars_worker_prep_spilled-') # register when all predecessors unfinished with create_actor_pool(n_process=1, backend='gevent', address=pool_address) as pool: self.create_standard_actors(pool, pool_address, with_daemon=False, with_status=False) pool.create_actor(SpillActor) pool.create_actor(CpuCalcActor) with self.run_actor_test(pool) as test_actor: execution_ref = test_actor.promise_ref( ExecutionActor.default_name()) execution_ref.enqueue_graph( session_id, graph_add_key, serialize_graph(graph_add), dict(chunks=[new_add_chunk.key]), None, pred_keys=graph_input_op_keys, _promise=True) \ .then(lambda *_: execution_ref.start_execution(session_id, graph_add_key, _promise=True)) \ .then(_validate) \ .then(lambda *_: test_actor.set_result(None)) \ .catch(lambda *exc: test_actor.set_result(exc, False)) for ginput, op_key, gtensor in zip(graph_inputs, graph_input_op_keys, arr_inputs): def _start_exec_promise(session_id, op_key, *_): return execution_ref.start_execution(session_id, op_key, _promise=True) execution_ref.enqueue_graph( session_id, op_key, serialize_graph(ginput), dict(chunks=[gtensor.chunks[0].key]), None, succ_keys=[new_add_chunk.op.key], _promise=True) \ .then(functools.partial(_start_exec_promise, session_id, op_key)) self.get_result() pool_address = '127.0.0.1:%d' % get_next_port() session_id = str(uuid.uuid4()) # register when part of predecessors unfinished with create_actor_pool(n_process=1, backend='gevent', address=pool_address) as pool: self.create_standard_actors(pool, pool_address, with_daemon=False, with_status=False) pool.create_actor(SpillActor) pool.create_actor(CpuCalcActor) with self.run_actor_test(pool) as test_actor: execution_ref = test_actor.promise_ref( ExecutionActor.default_name()) execution_ref.enqueue_graph( session_id, graph_input_op_keys[0], serialize_graph(graph_inputs[0]), dict(chunks=[input_chunks[0].key]), None, succ_keys=[new_add_chunk.op.key], _promise=True) \ .then(lambda *_: execution_ref.start_execution(session_id, graph_input_op_keys[0], _promise=True)) \ .then(lambda *_: test_actor.set_result(None, destroy=False)) \ .catch(lambda *exc: test_actor.set_result(exc, False)) self.get_result() execution_ref.enqueue_graph( session_id, graph_add_key, serialize_graph(graph_add), dict(chunks=[new_add_chunk.key]), None, pred_keys=graph_input_op_keys, _promise=True) \ .then(lambda *_: execution_ref.start_execution(session_id, graph_add_key, _promise=True)) \ .then(_validate) \ .then(lambda *_: test_actor.set_result(None)) \ .catch(lambda *exc: test_actor.set_result(exc, False)) execution_ref.enqueue_graph( session_id, graph_input_op_keys[1], serialize_graph(graph_inputs[1]), dict(chunks=[input_chunks[1].key]), None, succ_keys=[new_add_chunk.op.key], _promise=True) \ .then(lambda *_: execution_ref.start_execution(session_id, graph_input_op_keys[1], _promise=True)) self.get_result()
def testSimpleExecution(self): pool_address = '127.0.0.1:%d' % get_next_port() with create_actor_pool(n_process=1, backend='gevent', address=pool_address) as pool: self.create_standard_actors(pool, pool_address, with_daemon=False) pool.create_actor(CpuCalcActor, uid='w:1:calc-a') import mars.tensor as mt from mars.tensor.expressions.datasource import TensorOnes, TensorFetch arr = mt.ones((10, 8), chunk_size=10) arr_add = mt.ones((10, 8), chunk_size=10) arr2 = arr + arr_add graph = arr2.build_graph(compose=False, tiled=True) for chunk in graph: if isinstance(chunk.op, TensorOnes): chunk._op = TensorFetch( dtype=chunk.dtype, _outputs=[weakref.ref(o) for o in chunk.op.outputs], _key=chunk.op.key) with self.run_actor_test(pool) as test_actor: session_id = str(uuid.uuid4()) chunk_holder_ref = test_actor.promise_ref( ChunkHolderActor.default_name()) refs = test_actor._chunk_store.put( session_id, arr.chunks[0].key, np.ones((10, 8), dtype=np.int16)) chunk_holder_ref.register_chunk(session_id, arr.chunks[0].key) del refs refs = test_actor._chunk_store.put( session_id, arr_add.chunks[0].key, np.ones((10, 8), dtype=np.int16)) chunk_holder_ref.register_chunk(session_id, arr_add.chunks[0].key) del refs execution_ref = test_actor.promise_ref( ExecutionActor.default_name()) def _validate(_): data = test_actor._chunk_store.get(session_id, arr2.chunks[0].key) assert_array_equal(data, 2 * np.ones((10, 8))) graph_key = str(uuid.uuid4()) execution_ref.enqueue_graph(session_id, graph_key, serialize_graph(graph), dict(chunks=[arr2.chunks[0].key]), None, _promise=True) \ .then(lambda *_: execution_ref.start_execution(session_id, graph_key, _tell=True)) execution_ref.add_finish_callback(session_id, graph_key, _promise=True) \ .then(_validate) \ .then(lambda *_: test_actor.set_result(None)) \ .catch(lambda *exc: test_actor.set_result(exc, False)) self.get_result() with self.run_actor_test(pool) as test_actor: execution_ref = test_actor.promise_ref( ExecutionActor.default_name()) def _validate(_): data = test_actor._chunk_store.get(session_id, arr2.chunks[0].key) assert_array_equal(data, 2 * np.ones((10, 8))) execution_ref.add_finish_callback(session_id, graph_key, _promise=True) \ .then(_validate) \ .then(lambda *_: test_actor.set_result(None)) \ .catch(lambda *exc: test_actor.set_result(exc, False)) self.get_result()