Пример #1
0
    def testStopGraphCalc(self):
        pool_address = '127.0.0.1:%d' % get_next_port()
        session_id = str(uuid.uuid4())
        mock_data = np.array([1, 2, 3, 4])
        with create_actor_pool(n_process=2, backend='gevent',
                               address=pool_address, distributor=MarsDistributor(2, 'w:0:')) as pool:
            self.create_standard_actors(pool, pool_address, with_status=False)

            daemon_ref = pool.actor_ref(WorkerDaemonActor.default_uid())
            execution_ref = pool.actor_ref(ExecutionActor.default_uid())

            calc_ref = daemon_ref.create_actor(
                MockCpuCalcActor, session_id, mock_data, 10, uid='w:1:cpu-calc-a')
            daemon_ref.create_actor(ProcessHelperActor, uid='w:1:proc-helper-a')

            test_actor = pool.create_actor(ExecutionTestActor, uid='w:0:test_actor')
            test_actor.run_simple_calc(session_id, _tell=True)

            pool.sleep(2)
            proc_id = pool.distributor.distribute(calc_ref.uid)
            execution_ref.stop_execution(session_id, test_actor.get_graph_key(), _tell=True)
            while daemon_ref.is_actor_process_alive(calc_ref):
                pool.sleep(0.1)
            pool.restart_process(proc_id)
            daemon_ref.handle_process_down([proc_id])

            with self.assertRaises(ExecutionInterrupted):
                self.wait_for_result(pool, test_actor)
Пример #2
0
    def testDaemon(self):
        mock_scheduler_addr = '127.0.0.1:%d' % get_next_port()
        with create_actor_pool(n_process=2,
                               backend='gevent',
                               distributor=MarsDistributor(2, 'w:0:'),
                               address=mock_scheduler_addr) as pool:
            daemon_ref = pool.create_actor(
                WorkerDaemonActor, uid=WorkerDaemonActor.default_name())
            pool.create_actor(DispatchActor, uid=DispatchActor.default_name())
            sleeper_ref = daemon_ref.create_actor(DaemonSleeperActor,
                                                  uid='w:1:DaemonSleeperActor')
            daemon_ref.create_actor(ProcessHelperActor, uid='w:1:ProcHelper')
            test_actor = pool.create_actor(DaemonTestActor)
            daemon_ref.register_callback(test_actor, 'handle_process_down')

            test_actor.run_test_sleep(sleeper_ref, 10, _tell=True)
            self.assertTrue(daemon_ref.is_actor_process_alive(sleeper_ref))

            pool.sleep(0.5)

            daemon_ref.kill_actor_process(sleeper_ref)
            # repeated kill shall not produce errors
            daemon_ref.kill_actor_process(sleeper_ref)
            self.assertFalse(daemon_ref.is_actor_process_alive(sleeper_ref))

            pool.restart_process(1)
            daemon_ref.handle_process_down([1])
            pool.sleep(1)
            self.assertTrue(pool.has_actor(sleeper_ref))
            with self.assertRaises(WorkerProcessStopped):
                test_actor.get_result()

            test_actor.run_test_sleep(sleeper_ref, 1)
            pool.sleep(1.5)
            test_actor.get_result()
Пример #3
0
    def testCalcProcessFailure(self):
        pool_address = '127.0.0.1:%d' % get_next_port()
        session_id = str(uuid.uuid4())
        mock_data = np.array([1, 2, 3, 4])
        with create_actor_pool(n_process=2, backend='gevent',
                               address=pool_address, distributor=MarsDistributor(2, 'w:0:')) as pool:
            self.create_standard_actors(pool, pool_address, with_status=False)

            daemon_ref = pool.actor_ref(WorkerDaemonActor.default_uid())
            dispatch_ref = pool.actor_ref(DispatchActor.default_uid())
            calc_ref = daemon_ref.create_actor(
                MockCpuCalcActor, session_id, mock_data, 10, uid='w:1:cpu-calc-a')
            daemon_ref.create_actor(ProcessHelperActor, uid='w:1:proc-helper-a')

            test_actor = pool.create_actor(ExecutionTestActor, uid='w:0:test_actor')
            test_actor.run_simple_calc(session_id, _tell=True)

            pool.sleep(2)
            proc_id = pool.distributor.distribute(calc_ref.uid)
            daemon_ref.kill_actor_process(calc_ref)
            assert not daemon_ref.is_actor_process_alive(calc_ref)
            pool.restart_process(proc_id)
            daemon_ref.handle_process_down([proc_id])

            with self.assertRaises(WorkerProcessStopped):
                self.wait_for_result(pool, test_actor)
            self.assertEqual(len(dispatch_ref.get_slots('cpu')), 1)
Пример #4
0
    def testSendTargets(self):
        pool_address = '127.0.0.1:%d' % get_next_port()
        session_id = str(uuid.uuid4())
        mock_data = np.array([1, 2, 3, 4])
        with create_actor_pool(n_process=1, backend='gevent',
                               address=pool_address, distributor=MarsDistributor(2, 'w:0:')) as pool:
            self.create_standard_actors(pool, pool_address, with_daemon=False, with_status=False)
            pool.create_actor(CpuCalcActor)

            import mars.tensor as mt
            arr = mt.ones((4,), chunk_size=4)
            arr_add = mt.array(mock_data)
            result_tensor = arr + arr_add
            graph = result_tensor.build_graph(compose=False, tiled=True)
            result_key = result_tensor.chunks[0].key

            pool.create_actor(MockSenderActor, mock_data + np.ones((4,)), 'out', uid='w:mock_sender')
            with self.run_actor_test(pool) as test_actor:
                def _validate(_):
                    data = test_actor._chunk_store.get(session_id, result_tensor.chunks[0].key)
                    assert_array_equal(data, mock_data + np.ones((4,)))

                graph_key = str(uuid.uuid4())
                execution_ref = test_actor.promise_ref(ExecutionActor.default_name())
                execution_ref.enqueue_graph(session_id, graph_key, serialize_graph(graph),
                                            dict(chunks=[result_tensor.chunks[0].key]), None,
                                            send_addresses={result_key: (pool_address,)}, _promise=True) \
                    .then(lambda *_: execution_ref.start_execution(session_id, graph_key, _promise=True)) \
                    .then(_validate) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

            self.get_result()
Пример #5
0
    def testLoadStoreInOtherProcess(self):
        test_addr = '127.0.0.1:%d' % get_next_port()
        with self.create_pool(n_process=3,
                              address=test_addr,
                              distributor=MarsDistributor(3)) as pool:
            pool.create_actor(WorkerDaemonActor,
                              uid=WorkerDaemonActor.default_uid())
            pool.create_actor(StorageManagerActor,
                              uid=StorageManagerActor.default_uid())

            pool.create_actor(DispatchActor, uid=DispatchActor.default_uid())

            pool.create_actor(QuotaActor,
                              1024**2,
                              uid=MemQuotaActor.default_uid())

            pool.create_actor(PlasmaKeyMapActor,
                              uid=PlasmaKeyMapActor.default_uid())
            pool.create_actor(SharedHolderActor,
                              self.plasma_storage_size,
                              uid=SharedHolderActor.default_uid())

            pool.create_actor(InProcHolderActor, uid='w:1:InProcHolderActor1')
            pool.create_actor(InProcHolderActor, uid='w:2:InProcHolderActor2')
            pool.create_actor(IORunnerActor,
                              lock_free=True,
                              dispatched=False,
                              uid=IORunnerActor.gen_uid(1))

            test_ref = pool.create_actor(OtherProcessTestActor,
                                         uid='w:0:OtherProcTest')

            def _get_result():
                start_time = time.time()
                while test_ref.get_result() is None:
                    pool.sleep(0.5)
                    if time.time() - start_time > 10:
                        raise TimeoutError

            test_ref.run_copy_test((0, DataStorageDevice.SHARED_MEMORY),
                                   (1, DataStorageDevice.PROC_MEMORY),
                                   _tell=True)
            _get_result()

            test_ref.run_copy_test((1, DataStorageDevice.PROC_MEMORY),
                                   (0, DataStorageDevice.SHARED_MEMORY),
                                   _tell=True)
            _get_result()

            test_ref.run_copy_test((1, DataStorageDevice.PROC_MEMORY),
                                   (2, DataStorageDevice.PROC_MEMORY),
                                   _tell=True)
            _get_result()
Пример #6
0
    def testReExecuteExisting(self):
        pool_address = '127.0.0.1:%d' % get_next_port()
        session_id = str(uuid.uuid4())
        mock_data = np.array([1, 2, 3, 4])
        with create_actor_pool(n_process=1,
                               backend='gevent',
                               address=pool_address,
                               distributor=MarsDistributor(2, 'w:0:')) as pool:
            self.create_standard_actors(pool,
                                        pool_address,
                                        with_daemon=False,
                                        with_status=False)
            pool.create_actor(CpuCalcActor, uid='w:1:cpu-calc')
            pool.create_actor(InProcHolderActor, uid='w:1:inproc-holder')

            import mars.tensor as mt
            arr = mt.ones((4, ), chunk_size=4)
            arr_add = mt.array(mock_data)
            result_tensor = arr + arr_add
            graph = result_tensor.build_graph(compose=False, tiled=True)

            result_tensor = get_tiled(result_tensor)

            def _validate(*_):
                data = test_actor.shared_store.get(session_id,
                                                   result_tensor.chunks[0].key)
                assert_array_equal(data, mock_data + np.ones((4, )))

            with self.run_actor_test(pool) as test_actor:
                graph_key = str(uuid.uuid4())
                execution_ref = test_actor.promise_ref(
                    ExecutionActor.default_uid())
                execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph),
                                            dict(chunks=[result_tensor.chunks[0].key]), None, _promise=True) \
                    .then(_validate) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

            self.get_result()

            with self.run_actor_test(pool) as test_actor:
                execution_ref = test_actor.promise_ref(
                    ExecutionActor.default_uid())
                execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph),
                                            dict(chunks=[result_tensor.chunks[0].key]), None, _promise=True) \
                    .then(_validate) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

            self.get_result()
Пример #7
0
    def testFetchRemoteData(self):
        pool_address = '127.0.0.1:%d' % get_next_port()
        session_id = str(uuid.uuid4())
        mock_data = np.array([1, 2, 3, 4])
        with create_actor_pool(n_process=1, backend='gevent',
                               address=pool_address, distributor=MarsDistributor(2, 'w:0:')) as pool:
            self.create_standard_actors(pool, pool_address, with_daemon=False, with_status=False,
                                        with_resource=True)
            pool.create_actor(CpuCalcActor)
            pool.create_actor(InProcHolderActor)
            pool.create_actor(MockSenderActor, [mock_data], 'in', uid='w:mock_sender')

            import mars.tensor as mt
            from mars.tensor.fetch import TensorFetch
            arr = mt.ones((4,), chunk_size=4)
            arr_add = mt.array(mock_data)
            result_tensor = arr + arr_add
            graph = result_tensor.build_graph(compose=False, tiled=True)

            arr_add = get_tiled(arr_add)
            result_tensor = get_tiled(result_tensor)

            modified_chunk = arr_add.chunks[0]
            arr_add.chunks[0]._op = TensorFetch(
                dtype=modified_chunk.dtype, _outputs=[weakref.ref(o) for o in modified_chunk.op.outputs],
                _key=modified_chunk.op.key)

            with self.run_actor_test(pool) as test_actor:
                graph_key = str(uuid.uuid4())
                execution_ref = test_actor.promise_ref(ExecutionActor.default_uid())
                execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph),
                                            dict(chunks=[result_tensor.chunks[0].key]), None, _tell=True)

                execution_ref.add_finish_callback(session_id, graph_key, _promise=True) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

            with self.assertRaises(DependencyMissing):
                self.get_result()

            metas = {modified_chunk.key: WorkerMeta(mock_data.nbytes, mock_data.shape, ('0.0.0.0:1234',))}
            with self.run_actor_test(pool) as test_actor:
                graph_key = str(uuid.uuid4())
                execution_ref = test_actor.promise_ref(ExecutionActor.default_uid())
                execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph),
                                            dict(chunks=[result_tensor.chunks[0].key]), metas, _tell=True)

                execution_ref.add_finish_callback(session_id, graph_key, _promise=True) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

            with self.assertRaises(DependencyMissing):
                self.get_result()

            metas[modified_chunk.key] = WorkerMeta(
                mock_data.nbytes, mock_data.shape,
                ('0.0.0.0:1234', pool_address.replace('127.0.0.1', 'localhost')))
            with self.run_actor_test(pool) as test_actor:
                def _validate(_):
                    data = test_actor.shared_store.get(session_id, result_tensor.chunks[0].key)
                    assert_array_equal(data, mock_data + np.ones((4,)))

                graph_key = str(uuid.uuid4())
                execution_ref = test_actor.promise_ref(ExecutionActor.default_uid())
                execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph),
                                            dict(chunks=[result_tensor.chunks[0].key]), metas, _tell=True)

                execution_ref.add_finish_callback(session_id, graph_key, _promise=True) \
                    .then(_validate) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

            self.get_result()
Пример #8
0
    def testDistributor(self):
        distributor = MarsDistributor(1)
        self.assertEqual(distributor.distribute('Actor'), 0)

        distributor = MarsDistributor(3)
        self.assertEqual(distributor.distribute('w:1:Actor'), 1)
        self.assertEqual(distributor.distribute('w:-1:Actor'), 2)

        uid = 'w:h:Actor'
        self.assertEqual(distributor.distribute(uid),
                         mmh_hash(uid) % distributor.n_process)

        uid = 'w:h1:Actor'
        self.assertEqual(distributor.distribute(uid),
                         1 + mmh_hash(uid) % (distributor.n_process - 1))

        with self.assertRaises(ValueError):
            distributor.distribute('Actor')
        with self.assertRaises(ValueError):
            distributor.distribute('w:x:Actor')

        distributor = MarsDistributor(3, 'w:2:')
        self.assertEqual(distributor.distribute('Actor'), 2)
        self.assertEqual(distributor.distribute('w:x:Actor'), 2)

        distributor = MarsDistributor(3)
        ref_uid = 'w:h1:Actor2'
        new_uid = distributor.make_same_process('w::Actor', ref_uid)
        self.assertEqual(
            new_uid, 'w:%d:Actor' % (1 + mmh_hash(ref_uid) %
                                     (distributor.n_process - 1)))

        with self.assertRaises(ValueError):
            distributor.make_same_process('Actor', ref_uid)

        distributor = MarsDistributor(3, 'w:2:')
        self.assertEqual('w:2:' + repr('Actor'),
                         distributor.make_same_process('Actor', 'w:2:Actor'))
Пример #9
0
    def testFetchRemoteData(self):
        pool_address = '127.0.0.1:%d' % get_next_port()
        session_id = str(uuid.uuid4())
        mock_data = np.array([1, 2, 3, 4])
        with create_actor_pool(n_process=1, backend='gevent',
                               address=pool_address, distributor=MarsDistributor(2, 'w:0:')) as pool:
            self.create_standard_actors(pool, pool_address, with_daemon=False, with_status=False,
                                        with_resource=True)
            pool.create_actor(CpuCalcActor)
            pool.create_actor(MockSenderActor, mock_data, 'in', uid='w:mock_sender')
            cluster_info_ref = pool.actor_ref(WorkerClusterInfoActor.default_name())
            chunk_meta_client = ChunkMetaClient(pool, cluster_info_ref)

            import mars.tensor as mt
            from mars.tensor.expressions.fetch import TensorFetch
            arr = mt.ones((4,), chunk_size=4)
            arr_add = mt.array(mock_data)
            result_tensor = arr + arr_add
            graph = result_tensor.build_graph(compose=False, tiled=True)

            modified_chunk = arr_add.chunks[0]
            arr_add.chunks[0]._op = TensorFetch(
                dtype=modified_chunk.dtype, _outputs=[weakref.ref(o) for o in modified_chunk.op.outputs],
                _key=modified_chunk.op.key)

            with self.run_actor_test(pool) as test_actor:
                graph_key = str(uuid.uuid4())
                execution_ref = test_actor.promise_ref(ExecutionActor.default_name())
                execution_ref.enqueue_graph(session_id, graph_key, serialize_graph(graph),
                                            dict(chunks=[result_tensor.chunks[0].key]), None, _promise=True) \
                    .then(lambda *_: execution_ref.start_execution(session_id, graph_key, _promise=True)) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

            with self.assertRaises(DependencyMissing):
                self.get_result()

            chunk_meta_client.set_chunk_meta(session_id, modified_chunk.key, size=mock_data.nbytes,
                                             shape=mock_data.shape, workers=('0.0.0.0:1234',))
            with self.run_actor_test(pool) as test_actor:
                graph_key = str(uuid.uuid4())
                execution_ref = test_actor.promise_ref(ExecutionActor.default_name())
                execution_ref.enqueue_graph(session_id, graph_key, serialize_graph(graph),
                                            dict(chunks=[result_tensor.chunks[0].key]), None, _promise=True) \
                    .then(lambda *_: execution_ref.start_execution(session_id, graph_key, _promise=True)) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

            with self.assertRaises(DependencyMissing):
                self.get_result()

            chunk_meta_client.set_chunk_meta(session_id, modified_chunk.key, size=mock_data.nbytes,
                                             shape=mock_data.shape, workers=('0.0.0.0:1234', pool_address))
            with self.run_actor_test(pool) as test_actor:
                def _validate(_):
                    data = test_actor._chunk_store.get(session_id, result_tensor.chunks[0].key)
                    assert_array_equal(data, mock_data + np.ones((4,)))

                graph_key = str(uuid.uuid4())
                execution_ref = test_actor.promise_ref(ExecutionActor.default_name())
                execution_ref.enqueue_graph(session_id, graph_key, serialize_graph(graph),
                                            dict(chunks=[result_tensor.chunks[0].key]), None, _promise=True) \
                    .then(lambda *_: execution_ref.start_execution(session_id, graph_key, _promise=True)) \
                    .then(_validate) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

            self.get_result()