Exemplo n.º 1
0
    def testPrepareQuota(self, *_):
        pinned = [True]

        def _mock_pin(graph_key, chunk_keys):
            from mars.errors import PinChunkFailed
            if pinned[0]:
                raise PinChunkFailed
            return chunk_keys

        ChunkHolderActor.pin_chunks.side_effect = _mock_pin

        pool_address = '127.0.0.1:%d' % get_next_port()
        session_id = str(uuid.uuid4())
        mock_data = np.array([1, 2, 3, 4])
        with create_actor_pool(n_process=1, backend='gevent', address=pool_address) as pool:
            self.create_standard_actors(pool, pool_address, with_daemon=False, with_status=False)
            pool.create_actor(MockSenderActor, mock_data, 'in', uid='w:mock_sender')
            chunk_meta_ref = pool.actor_ref(ChunkMetaActor.default_name())

            import mars.tensor as mt
            from mars.tensor.expressions.fetch import TensorFetch
            arr = mt.ones((4,), chunk_size=4)
            arr_add = mt.array(mock_data)
            result_tensor = arr + arr_add
            graph = result_tensor.build_graph(compose=False, tiled=True)

            modified_chunk = arr_add.chunks[0]
            arr_add.chunks[0]._op = TensorFetch(
                dtype=modified_chunk.dtype, _outputs=[weakref.ref(o) for o in modified_chunk.op.outputs],
                _key=modified_chunk.op.key)
            chunk_meta_ref.set_chunk_meta(session_id, modified_chunk.key, size=mock_data.nbytes,
                                          shape=mock_data.shape, workers=('0.0.0.0:1234', pool_address))
            with self.run_actor_test(pool) as test_actor:
                graph_key = str(uuid.uuid4())
                execution_ref = test_actor.promise_ref(ExecutionActor.default_name())

                start_time = time.time()

                execution_ref.enqueue_graph(
                    session_id, graph_key, serialize_graph(graph),
                    dict(chunks=[result_tensor.chunks[0].key]), None, _promise=True) \
                    .then(lambda *_: test_actor.set_result(time.time())) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

                def _delay_fun():
                    time.sleep(1)
                    pinned[0] = False

                threading.Thread(target=_delay_fun).start()

            finish_time = self.get_result()
            self.assertGreaterEqual(finish_time, start_time + 1)
Exemplo n.º 2
0
    def testWorkerProcessRestart(self):
        mock_scheduler_addr = '127.0.0.1:%d' % get_next_port()
        try:
            with create_actor_pool(n_process=1,
                                   backend='gevent',
                                   address=mock_scheduler_addr) as pool:
                pool.create_actor(SchedulerClusterInfoActor,
                                  schedulers=[mock_scheduler_addr],
                                  uid=SchedulerClusterInfoActor.default_name())

                pool.create_actor(ChunkMetaActor,
                                  uid=ChunkMetaActor.default_name())
                resource_ref = pool.create_actor(
                    ResourceActor, uid=ResourceActor.default_name())

                proc = subprocess.Popen([
                    sys.executable, '-m', 'mars.worker', '-a', '127.0.0.1',
                    '--schedulers', mock_scheduler_addr, '--cpu-procs', '1',
                    '--cache-mem', '10m', '--spill-dir', self._spill_dir,
                    '--ignore-avail-mem'
                ])
                worker_endpoint = self._wait_worker_ready(proc, resource_ref)

                daemon_ref = pool.actor_ref(WorkerDaemonActor.default_name(),
                                            address=worker_endpoint)
                dispatch_ref = pool.actor_ref(DispatchActor.default_name(),
                                              address=worker_endpoint)
                cpu_slots = dispatch_ref.get_slots('cpu')
                calc_ref = pool.actor_ref(cpu_slots[0],
                                          address=worker_endpoint)
                daemon_ref.kill_actor_process(calc_ref)

                check_start = time.time()
                while not daemon_ref.is_actor_process_alive(calc_ref):
                    gevent.sleep(0.1)
                    if time.time() - check_start > 10:
                        raise TimeoutError('Check process restart timeout')
        finally:
            if proc.poll() is None:
                proc.send_signal(signal.SIGINT)
                check_time = time.time()
                while True:
                    time.sleep(0.1)
                    if proc.poll(
                    ) is not None or time.time() - check_time >= 5:
                        break
                if proc.poll() is None:
                    proc.kill()
            if os.path.exists(options.worker.plasma_socket):
                os.unlink(options.worker.plasma_socket)
Exemplo n.º 3
0
    def _run_operand_case(session_id, graph_key, tensor, execution_creator):
        graph = tensor.build_graph(compose=False)

        with create_actor_pool(n_process=1, backend='gevent') as pool:
            pool.create_actor(ClusterInfoActor, [pool.cluster_info.address],
                              uid=ClusterInfoActor.default_name())
            resource_ref = pool.create_actor(ResourceActor, uid=ResourceActor.default_name())
            pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_name())
            pool.create_actor(AssignerActor, uid=AssignerActor.default_name())
            graph_ref = pool.create_actor(GraphActor, session_id, graph_key, serialize_graph(graph),
                                          uid=GraphActor.gen_name(session_id, graph_key))
            addr_dict = dict()

            def _build_mock_ref(uid=None, address=None):
                if address in addr_dict:
                    return addr_dict[address]
                else:
                    r = addr_dict[address] = execution_creator(pool)
                    return r

            # handle mock objects
            OperandActor._get_raw_execution_ref.side_effect = _build_mock_ref

            mock_resource = dict(hardware=dict(cpu=4, cpu_total=4, memory=512))

            resource_ref.set_worker_meta('localhost:12345', mock_resource)
            resource_ref.set_worker_meta('localhost:23456', mock_resource)

            graph_ref.prepare_graph()
            fetched_graph = graph_ref.get_chunk_graph()

            graph_ref.analyze_graph()

            final_keys = set()
            for c in fetched_graph:
                if fetched_graph.count_successors(c) == 0:
                    final_keys.add(c.op.key)

            graph_ref.create_operand_actors()

            graph_meta_ref = pool.actor_ref(GraphMetaActor.gen_name(session_id, graph_key))
            start_time = time.time()
            while True:
                pool.sleep(0.1)
                if time.time() - start_time > 30:
                    raise SystemError('Wait for execution finish timeout')
                if graph_meta_ref.get_state() in (GraphState.SUCCEEDED, GraphState.FAILED, GraphState.CANCELLED):
                    break
Exemplo n.º 4
0
def start_transfer_test_pool(**kwargs):
    address = kwargs.pop('address')
    plasma_size = kwargs.pop('plasma_size')
    with create_actor_pool(n_process=1, backend='gevent', address=address, **kwargs) as pool:
        pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_name())
        pool.create_actor(ClusterInfoActor, schedulers=[address],
                          uid=ClusterInfoActor.default_name())
        pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_name())
        pool.create_actor(DispatchActor, uid=DispatchActor.default_name())
        pool.create_actor(QuotaActor, 1024 * 1024 * 20, uid=MemQuotaActor.default_name())
        chunk_holder_ref = pool.create_actor(ChunkHolderActor,
                                             plasma_size, uid=ChunkHolderActor.default_name())
        pool.create_actor(SpillActor)
        pool.create_actor(StatusActor, address, uid=StatusActor.default_name())

        yield pool

        chunk_holder_ref.destroy()
Exemplo n.º 5
0
    def testExecuteWorker(self):
        mock_scheduler_addr = '127.0.0.1:%d' % get_next_port()
        try:
            with create_actor_pool(n_process=1,
                                   backend='gevent',
                                   address=mock_scheduler_addr) as pool:
                pool.create_actor(SchedulerClusterInfoActor,
                                  schedulers=[mock_scheduler_addr],
                                  uid=SchedulerClusterInfoActor.default_name())

                pool.create_actor(ChunkMetaActor,
                                  uid=ChunkMetaActor.default_name())
                resource_ref = pool.create_actor(
                    ResourceActor, uid=ResourceActor.default_name())

                proc = subprocess.Popen([
                    sys.executable, '-m', 'mars.worker', '-a', '127.0.0.1',
                    '--schedulers', mock_scheduler_addr, '--cpu-procs', '1',
                    '--cache-mem', '10m', '--spill-dir', self._spill_dir,
                    '--ignore-avail-mem'
                ])
                worker_endpoint = self._wait_worker_ready(proc, resource_ref)

                test_ref = pool.create_actor(WorkerProcessTestActor)
                test_ref.run_test(worker_endpoint, _tell=True)

                check_time = time.time()
                while not test_ref.get_reply():
                    gevent.sleep(0.1)
                    if time.time() - check_time > 20:
                        raise TimeoutError('Check reply timeout')
        finally:
            if proc.poll() is None:
                proc.send_signal(signal.SIGINT)
                check_time = time.time()
                while True:
                    time.sleep(0.1)
                    if proc.poll(
                    ) is not None or time.time() - check_time >= 5:
                        break
                if proc.poll() is None:
                    proc.kill()
            if os.path.exists(options.worker.plasma_socket):
                os.unlink(options.worker.plasma_socket)
Exemplo n.º 6
0
    def testFailoverMessage(self):
        mock_session_id = str(uuid.uuid4())
        mock_graph_key = str(uuid.uuid4())
        mock_chunk_key = str(uuid.uuid4())
        addr = '127.0.0.1:%d' % get_next_port()
        mock_worker_addr = '127.0.0.1:54132'

        options.scheduler.worker_blacklist_time = 0.5

        with create_actor_pool(n_process=1, backend='gevent', address=addr) as pool:
            pool.create_actor(ClusterInfoActor, [pool.cluster_info.address],
                              uid=ClusterInfoActor.default_name())
            pool.create_actor(AssignerActor, uid=AssignerActor.default_name())
            session_manager_ref = pool.create_actor(
                SessionManagerActor, uid=SessionManagerActor.default_name())
            resource_ref = pool.create_actor(ResourceActor, uid=ResourceActor.default_name())
            chunk_meta_ref = pool.create_actor(
                ChunkMetaActor, uid=ChunkMetaActor.default_name())

            session_ref = pool.actor_ref(session_manager_ref.create_session(mock_session_id))
            chunk_meta_ref.set_chunk_meta(mock_session_id, mock_chunk_key,
                                          size=80, shape=(10,), workers=(mock_worker_addr,))

            with mock.patch(GraphActor.__module__ + '.' + GraphActor.__name__, new=MockGraphActor):
                session_ref.submit_tensor_graph(None, mock_graph_key)
                graph_ref = pool.actor_ref(GraphActor.gen_name(mock_session_id, mock_graph_key))

                expire_time = time.time() - options.scheduler.status_timeout - 1
                resource_ref.set_worker_meta(mock_worker_addr, dict(update_time=expire_time))

                resource_ref.detect_dead_workers(_tell=True)
                pool.sleep(0.2)

                _, removes, lost_chunks = graph_ref.get_worker_change_args()
                self.assertListEqual(removes, [mock_worker_addr])
                self.assertListEqual(lost_chunks, [mock_chunk_key])

                self.assertNotIn(mock_worker_addr, resource_ref.get_workers_meta())
                resource_ref.set_worker_meta(mock_worker_addr, dict(update_time=time.time()))
                self.assertNotIn(mock_worker_addr, resource_ref.get_workers_meta())

                pool.sleep(0.4)
                resource_ref.set_worker_meta(mock_worker_addr, dict(update_time=time.time()))
                self.assertIn(mock_worker_addr, resource_ref.get_workers_meta())
Exemplo n.º 7
0
    def create_standard_actors(cls, pool, address, quota_size=None, with_daemon=True,
                               with_status=True, with_resource=False):
        quota_size = quota_size or (1024 * 1024)
        pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_name())
        pool.create_actor(ClusterInfoActor, schedulers=[address],
                          uid=ClusterInfoActor.default_name())

        if with_resource:
            pool.create_actor(ResourceActor, uid=ResourceActor.default_name())
        if with_daemon:
            pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_name())
        if with_status:
            pool.create_actor(StatusActor, address, uid=StatusActor.default_name())

        pool.create_actor(
            ChunkHolderActor, cls.plasma_storage_size, uid=ChunkHolderActor.default_name())
        pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_name())
        pool.create_actor(TaskQueueActor, uid=TaskQueueActor.default_name())
        pool.create_actor(DispatchActor, uid=DispatchActor.default_name())
        pool.create_actor(QuotaActor, quota_size, uid=MemQuotaActor.default_name())
        pool.create_actor(ExecutionActor, uid=ExecutionActor.default_name())
Exemplo n.º 8
0
    def testReadyState(self, *_):
        session_id = str(uuid.uuid4())
        graph_key = str(uuid.uuid4())
        mock_workers = ['localhost:12345', 'localhost:23456']

        with self._prepare_test_graph(session_id, graph_key, mock_workers) as (pool, graph_ref):
            input_op_keys, mid_op_key, output_op_keys = self._filter_graph_level_op_keys(graph_ref)
            meta_ref = pool.actor_ref(ChunkMetaActor.default_name())
            op_ref = pool.actor_ref(OperandActor.gen_uid(session_id, mid_op_key))

            input_refs = [pool.actor_ref(OperandActor.gen_uid(session_id, k)) for k in input_op_keys]

            def test_entering_state(target):
                for key in input_op_keys:
                    op_ref.remove_finished_predecessor(key)

                op_ref.start_operand(OperandState.UNSCHEDULED)
                for ref in input_refs:
                    ref.start_operand(OperandState.UNSCHEDULED)

                for ref in input_refs:
                    self.assertEqual(op_ref.get_state(), OperandState.UNSCHEDULED)
                    ref.start_operand(OperandState.FINISHED)
                pool.sleep(0.5)
                self.assertEqual(target, op_ref.get_state())

            # test entering state with no input meta
            test_entering_state(OperandState.UNSCHEDULED)

            # fill meta
            input_chunk_keys, _, _ = self._filter_graph_level_chunk_keys(graph_ref)
            for ck in input_chunk_keys:
                meta_ref.set_chunk_meta(session_id, ck, workers=('localhost:12345',), size=800)

            # test entering state with failure in fetching sizes
            with patch_method(ChunkMetaActor.batch_get_chunk_size, new=lambda *_: [None, None]):
                test_entering_state(OperandState.UNSCHEDULED)

            # test successful entering state
            test_entering_state(OperandState.READY)
Exemplo n.º 9
0
    def _prepare_test_graph(self, session_id, graph_key, mock_workers):
        addr = '127.0.0.1:%d' % get_next_port()
        a1 = mt.random.random((100, ))
        a2 = mt.random.random((100, ))
        s = a1 + a2
        v1, v2 = mt.split(s, 2)

        graph = DAG()
        v1.build_graph(graph=graph, compose=False)
        v2.build_graph(graph=graph, compose=False)

        with create_actor_pool(n_process=1, backend='gevent',
                               address=addr) as pool:
            pool.create_actor(ClusterInfoActor, [pool.cluster_info.address],
                              uid=ClusterInfoActor.default_name())
            resource_ref = pool.create_actor(ResourceActor,
                                             uid=ResourceActor.default_name())
            pool.create_actor(ChunkMetaActor,
                              uid=ChunkMetaActor.default_name())
            pool.create_actor(AssignerActor, uid=AssignerActor.default_name())
            graph_ref = pool.create_actor(GraphActor,
                                          session_id,
                                          graph_key,
                                          serialize_graph(graph),
                                          uid=GraphActor.gen_name(
                                              session_id, graph_key))

            for w in mock_workers:
                resource_ref.set_worker_meta(w,
                                             dict(hardware=dict(cpu_total=4)))

            graph_ref.prepare_graph()
            graph_ref.scan_node()
            graph_ref.place_initial_chunks()
            graph_ref.create_operand_actors(_start=False)

            yield pool, graph_ref
Exemplo n.º 10
0
    def testGetTensorNsplits(self, *_):
        session_id = 'mock_session_id'
        graph_key = 'mock_graph_key'
        tensor_key = 'mock_tensor_key'
        serialized_graph = 'mock_serialized_graph'

        graph_uid = GraphActor.gen_name(session_id, graph_key)
        self.pool.create_actor(GraphActor,
                               session_id,
                               serialized_graph,
                               graph_key,
                               uid=graph_uid)
        self.pool.create_actor(ChunkMetaActor,
                               uid=ChunkMetaActor.default_name())

        mock_indexes = [
            OrderedDict(
                zip(['chunk_key1', 'chunk_key2', 'chunk_key3', 'chunk_key4'],
                    [(0, ), (1, ), (2, ), (3, )])),
            OrderedDict(
                zip(['chunk_key1', 'chunk_key2', 'chunk_key3', 'chunk_key4'],
                    [(0, 0), (0, 1), (1, 0), (1, 1)]))
        ]
        mock_shapes = [[(3, ), (4, ), (5, ), (6, )],
                       [(3, 4), (3, 2), (2, 4), (2, 2)]]

        GraphActor.get_tensor_chunk_indexes.side_effect = mock_indexes
        ChunkMetaActor.batch_get_chunk_shape.side_effect = mock_shapes

        nsplits = self.api.get_tensor_nsplits(session_id, graph_key,
                                              tensor_key)
        self.assertEqual(((3, 4, 5, 6), ), nsplits)

        nsplits = self.api.get_tensor_nsplits(session_id, graph_key,
                                              tensor_key)
        self.assertEqual(((3, 2), (4, 2)), nsplits)
Exemplo n.º 11
0
    def testSimpleTransfer(self):
        import tempfile
        session_id = str(uuid.uuid4())

        local_pool_addr = 'localhost:%d' % get_next_port()
        remote_pool_addr = 'localhost:%d' % get_next_port()
        remote_chunk_keys = [str(uuid.uuid4()) for _ in range(9)]
        msg_queue = multiprocessing.Queue()

        remote_spill_dir = os.path.join(
            tempfile.gettempdir(),
            'mars_spill_%d_%d' % (os.getpid(), id(run_transfer_worker)))

        proc = multiprocessing.Process(target=run_transfer_worker,
                                       args=(remote_pool_addr, session_id,
                                             remote_chunk_keys,
                                             remote_spill_dir, msg_queue))
        proc.start()
        try:
            remote_plasma_socket = msg_queue.get(30)
        except:
            if proc.is_alive():
                proc.terminate()
            raise

        with create_actor_pool(n_process=1,
                               distributor=WorkerDistributor(1),
                               backend='gevent',
                               address=local_pool_addr) as pool:
            pool.create_actor(ClusterInfoActor,
                              schedulers=[local_pool_addr],
                              uid=ClusterInfoActor.default_name())
            pool.create_actor(ChunkMetaActor,
                              uid=ChunkMetaActor.default_name())
            pool.create_actor(DispatchActor, uid=DispatchActor.default_name())
            pool.create_actor(QuotaActor,
                              1024 * 1024 * 20,
                              uid=MemQuotaActor.default_name())
            cache_ref = pool.create_actor(ChunkHolderActor,
                                          self.plasma_storage_size,
                                          uid=ChunkHolderActor.default_name())
            pool.create_actor(SpillActor)

            sender_refs = [
                pool.create_actor(SenderActor,
                                  uid='w:1:%s' % str(uuid.uuid4())),
                pool.create_actor(SenderActor,
                                  uid='w:2:%s' % str(uuid.uuid4())),
            ]

            receiver_refs = [
                pool.create_actor(ReceiverActor,
                                  uid='w:1:%s' % str(uuid.uuid4())),
                pool.create_actor(ReceiverActor,
                                  uid='w:1:%s' % str(uuid.uuid4())),
                pool.create_actor(ReceiverActor,
                                  uid='w:2:%s' % str(uuid.uuid4())),
                pool.create_actor(ReceiverActor,
                                  uid='w:2:%s' % str(uuid.uuid4())),
            ]

            try:
                for data_id in (-1, 1):
                    chunk_key = remote_chunk_keys[data_id]

                    with self.run_actor_test(pool) as test_actor:
                        from mars.worker.spill import build_spill_file_name
                        from mars.serialize import dataserializer
                        from numpy.testing import assert_array_equal

                        remote_dispatch_ref = test_actor.promise_ref(
                            DispatchActor.default_name(),
                            address=remote_pool_addr)
                        remote_plasma_client = plasma.connect(
                            remote_plasma_socket, '', 0)
                        remote_store = PlasmaChunkStore(remote_plasma_client)

                        def _call_send_data(sender_uid):
                            sender_ref = test_actor.promise_ref(
                                sender_uid, address=remote_pool_addr)
                            return sender_ref.send_data(session_id,
                                                        chunk_key,
                                                        local_pool_addr,
                                                        _promise=True)

                        def _test_data_exist(*_):
                            try:
                                local_data = test_actor._chunk_store.get(
                                    session_id, chunk_key)
                            except KeyError:
                                with open(build_spill_file_name(chunk_key),
                                          'rb') as spill_file:
                                    local_data = dataserializer.load(
                                        spill_file)

                            try:
                                remote_data = remote_store.get(
                                    session_id, chunk_key)
                            except KeyError:
                                with open(
                                        build_spill_file_name(
                                            chunk_key, remote_spill_dir),
                                        'rb') as spill_file:
                                    remote_data = dataserializer.load(
                                        spill_file)
                            assert_array_equal(local_data, remote_data)

                            del local_data, remote_data

                        remote_dispatch_ref.get_free_slot('sender', _promise=True) \
                            .then(_call_send_data) \
                            .then(_test_data_exist) \
                            .then(
                            lambda *_: test_actor.set_result(chunk_key),
                            lambda *exc: test_actor.set_result(exc, False),
                        )
                    self.assertEqual(self.get_result(60), chunk_key)

                remote_holder_ref = pool.actor_ref('HolderActor',
                                                   address=remote_pool_addr)
                remote_holder_ref.trigger()
            finally:
                for ref in sender_refs:
                    pool.destroy_actor(ref)
                for ref in receiver_refs:
                    pool.destroy_actor(ref)
                pool.destroy_actor(cache_ref)

                os.unlink(remote_plasma_socket)

                os.kill(proc.pid, signal.SIGINT)
                t = time.time()
                while proc.is_alive() and time.time() < t + 5:
                    time.sleep(1)
                if proc.is_alive():
                    proc.terminate()
Exemplo n.º 12
0
    def testExecuteWorker(self):
        mock_scheduler_addr = '127.0.0.1:%d' % get_next_port()
        try:
            with create_actor_pool(n_process=1,
                                   backend='gevent',
                                   address=mock_scheduler_addr) as pool:
                pool.create_actor(ClusterInfoActor,
                                  schedulers=[mock_scheduler_addr],
                                  uid=ClusterInfoActor.default_name())
                pool.create_actor(ChunkMetaActor,
                                  uid=ChunkMetaActor.default_name())
                resource_ref = pool.create_actor(
                    ResourceActor, uid=ResourceActor.default_name())

                proc = subprocess.Popen([
                    sys.executable, '-m', 'mars.worker', '-a', '127.0.0.1',
                    '--schedulers', mock_scheduler_addr, '--cpu-procs', '1',
                    '--cache-mem', '10m', '--ignore-avail-mem'
                ])
                worker_ips = []

                def waiter():
                    check_time = time.time()
                    while True:
                        if not resource_ref.get_workers_meta():
                            gevent.sleep(0.5)
                            if proc.poll() is not None:
                                raise SystemError('Worker dead. exit code %s' %
                                                  proc.poll())
                            if time.time() - check_time > 20:
                                raise SystemError(
                                    'Check meta_timestamp timeout')
                            continue
                        else:
                            break
                    val = resource_ref.get_workers_meta()
                    worker_ips.extend(val.keys())

                gl = gevent.spawn(waiter)
                gl.join()

                test_ref = pool.create_actor(WorkerProcessTestActor)
                test_ref.run_test(worker_ips[0], _tell=True)

                check_time = time.time()
                while not test_ref.get_reply():
                    gevent.sleep(0.1)
                    if time.time() - check_time > 20:
                        raise SystemError('Check reply timeout')
        finally:
            if proc.poll() is None:
                proc.send_signal(signal.SIGINT)
                check_time = time.time()
                while True:
                    time.sleep(1)
                    if proc.poll(
                    ) is not None or time.time() - check_time >= 5:
                        break
                if proc.poll() is None:
                    proc.kill()
            if os.path.exists(options.worker.plasma_socket):
                os.unlink(options.worker.plasma_socket)
Exemplo n.º 13
0
    def prepare_graph_in_pool(self, expr, clean_io_meta=True, compose=False):
        session_id = str(uuid.uuid4())
        graph_key = str(uuid.uuid4())

        graph = expr.build_graph(compose=compose)
        serialized_graph = serialize_graph(graph)
        chunked_graph = expr.build_graph(compose=compose, tiled=True)

        addr = '127.0.0.1:%d' % get_next_port()
        with create_actor_pool(n_process=1, backend='gevent', address=addr) as pool:
            pool.create_actor(ClusterInfoActor, [pool.cluster_info.address],
                              uid=ClusterInfoActor.default_name())
            resource_ref = pool.create_actor(ResourceActor, uid=ResourceActor.default_name())
            pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_name())
            pool.create_actor(AssignerActor, uid=AssignerActor.default_name())
            graph_ref = pool.create_actor(GraphActor, session_id, graph_key, serialized_graph,
                                          uid=GraphActor.gen_name(session_id, graph_key))

            graph_ref.prepare_graph(compose=compose)
            fetched_graph = graph_ref.get_chunk_graph()
            self.assertIsNotNone(fetched_graph)
            self.assertEqual(len(chunked_graph), len(fetched_graph))

            graph_ref.analyze_graph(do_placement=False)
            op_infos = graph_ref.get_operand_info()
            for n in fetched_graph:
                depth = op_infos[n.op.key]['optimize']['depth']
                self.assertIsNotNone(depth)
                successor_size = op_infos[n.op.key]['optimize']['successor_size']
                self.assertIsNotNone(successor_size)
                descendant_size = op_infos[n.op.key]['optimize']['descendant_size']
                self.assertIsNotNone(descendant_size)

            resource_ref.set_worker_meta('localhost:12345', dict(hardware=dict(cpu_total=4)))
            resource_ref.set_worker_meta('localhost:23456', dict(hardware=dict(cpu_total=4)))

            graph_ref.analyze_graph()
            op_infos = graph_ref.get_operand_info()

            for n in fetched_graph:
                if fetched_graph.count_predecessors(n) != 0:
                    continue
                target_worker = op_infos[n.op.key]['target_worker']
                self.assertIsNotNone(target_worker)

            graph_ref.create_operand_actors(_clean_io_meta=clean_io_meta)
            op_infos = graph_ref.get_operand_info()

            if not clean_io_meta:
                orig_metas = dict()
                for n in fetched_graph:
                    try:
                        meta = orig_metas[n.op.key]
                    except KeyError:
                        meta = orig_metas[n.op.key] = dict(
                            predecessors=set(), successors=set(), input_chunks=set(), chunks=set()
                        )
                    meta['predecessors'].update([pn.op.key for pn in fetched_graph.iter_predecessors(n)])
                    meta['successors'].update([sn.op.key for sn in fetched_graph.iter_successors(n)])
                    meta['input_chunks'].update([pn.key for pn in fetched_graph.iter_predecessors(n)])
                    meta['chunks'].update([c.key for c in n.op.outputs])

                for n in fetched_graph:
                    self.assertEqual(op_infos[n.op.key]['op_name'], type(n.op).__name__)

                    io_meta = op_infos[n.op.key]['io_meta']
                    orig_io_meta = orig_metas[n.op.key]

                    self.assertSetEqual(set(io_meta['predecessors']), set(orig_io_meta['predecessors']))
                    self.assertSetEqual(set(io_meta['successors']), set(orig_io_meta['successors']))
                    self.assertSetEqual(set(io_meta['input_chunks']), set(orig_io_meta['input_chunks']))
                    self.assertSetEqual(set(io_meta['chunks']), set(orig_io_meta['chunks']))

            yield pool, graph_ref
Exemplo n.º 14
0
    def testExecuteWorker(self):
        import mars.tensor as mt
        mock_scheduler_addr = '127.0.0.1:%d' % get_next_port()
        try:

            session_id = str(uuid.uuid4())
            with create_actor_pool(n_process=1,
                                   backend='gevent',
                                   address=mock_scheduler_addr) as pool:
                pool.create_actor(ClusterInfoActor,
                                  schedulers=[mock_scheduler_addr],
                                  uid=ClusterInfoActor.default_name())
                pool.create_actor(ChunkMetaActor,
                                  uid=ChunkMetaActor.default_name())
                resource_ref = pool.create_actor(
                    ResourceActor, uid=ResourceActor.default_name())

                proc = subprocess.Popen([
                    sys.executable, '-m', 'mars.worker', '-a', '127.0.0.1',
                    '--schedulers', mock_scheduler_addr, '--cpu-procs', '1',
                    '--cache-mem', '10m', '--ignore-avail-mem'
                ])
                worker_ips = []

                def waiter():
                    check_time = time.time()
                    while True:
                        if not resource_ref.get_workers_meta():
                            gevent.sleep(0.5)
                            if proc.poll() is not None:
                                raise SystemError('Worker dead. exit code %s' %
                                                  proc.poll())
                            if time.time() - check_time > 20:
                                raise SystemError(
                                    'Check meta_timestamp timeout')
                            continue
                        else:
                            break
                    val = resource_ref.get_workers_meta()
                    worker_ips.extend(val.keys())

                gl = gevent.spawn(waiter)
                gl.join()

                a = mt.ones((100, 50), chunk_size=30)
                b = mt.ones((50, 200), chunk_size=30)
                result = a.dot(b)

                graph = result.build_graph(tiled=True)

                reply_ref = pool.create_actor(PromiseReplyTestActor)
                reply_callback = ((reply_ref.uid, reply_ref.address), 'reply')

                executor_ref = pool.actor_ref(ExecutionActor.default_name(),
                                              address=worker_ips[0])
                io_meta = dict(chunks=[c.key for c in result.chunks])
                executor_ref.execute_graph(session_id,
                                           str(id(graph)),
                                           serialize_graph(graph),
                                           io_meta,
                                           None,
                                           callback=reply_callback)

                check_time = time.time()
                while not reply_ref.get_reply():
                    gevent.sleep(0.1)
                    if time.time() - check_time > 20:
                        raise SystemError('Check reply timeout')
        finally:
            if proc.poll() is None:
                proc.send_signal(signal.SIGINT)
                check_time = time.time()
                while True:
                    time.sleep(1)
                    if proc.poll(
                    ) is not None or time.time() - check_time >= 5:
                        break
                if proc.poll() is None:
                    proc.kill()
            if os.path.exists(options.worker.plasma_socket):
                os.unlink(options.worker.plasma_socket)
Exemplo n.º 15
0
    def testAssignerActor(self):
        mock_scheduler_addr = '127.0.0.1:%d' % get_next_port()
        with create_actor_pool(n_process=1,
                               backend='gevent',
                               address=mock_scheduler_addr) as pool:
            pool.create_actor(ClusterInfoActor, [pool.cluster_info.address],
                              uid=ClusterInfoActor.default_name())
            resource_ref = pool.create_actor(ResourceActor,
                                             uid=ResourceActor.default_name())
            chunk_meta_ref = pool.create_actor(
                ChunkMetaActor, uid=ChunkMetaActor.default_name())

            endpoint1 = 'localhost:12345'
            endpoint2 = 'localhost:23456'
            res = dict(hardware=dict(cpu=4, memory=4096))

            def write_mock_meta():
                resource_ref.set_worker_meta(endpoint1, res)
                resource_ref.set_worker_meta(endpoint2, res)

            g = gevent.spawn(write_mock_meta)
            g.join()

            assigner_ref = pool.create_actor(AssignerActor,
                                             uid='AssignerActor')

            session_id = str(uuid.uuid4())
            op_key = str(uuid.uuid4())
            chunk_key1 = str(uuid.uuid4())
            chunk_key2 = str(uuid.uuid4())
            chunk_key3 = str(uuid.uuid4())

            op_info = {
                'op_name': 'test_op',
                'io_meta':
                dict(input_chunks=[chunk_key1, chunk_key2, chunk_key3]),
                'output_size': 512,
                'retries': 0,
                'optimize': {
                    'depth': 0,
                    'demand_depths': (),
                    'successor_size': 1,
                    'descendant_size': 0
                }
            }

            chunk_meta_ref.set_chunk_meta(session_id,
                                          chunk_key1,
                                          size=512,
                                          workers=(endpoint1, ))
            chunk_meta_ref.set_chunk_meta(session_id,
                                          chunk_key2,
                                          size=512,
                                          workers=(endpoint1, ))
            chunk_meta_ref.set_chunk_meta(session_id,
                                          chunk_key3,
                                          size=512,
                                          workers=(endpoint2, ))

            reply_ref = pool.create_actor(PromiseReplyTestActor)
            reply_callback = ((reply_ref.uid, reply_ref.address), 'reply')
            assigner_ref.apply_for_resource(session_id,
                                            op_key,
                                            op_info,
                                            callback=reply_callback)

            while not reply_ref.get_reply():
                gevent.sleep(0.1)
            _, ret_value = reply_ref.get_reply()
            self.assertEqual(ret_value, endpoint1)
Exemplo n.º 16
0
    def testFetchRemoteData(self):
        pool_address = '127.0.0.1:%d' % get_next_port()
        session_id = str(uuid.uuid4())
        mock_data = np.array([1, 2, 3, 4])
        with create_actor_pool(n_process=1,
                               backend='gevent',
                               address=pool_address,
                               distributor=WorkerDistributor(2)) as pool:
            self.create_standard_actors(pool,
                                        pool_address,
                                        with_daemon=False,
                                        with_status=False,
                                        with_resource=True)
            pool.create_actor(CpuCalcActor)
            pool.create_actor(MockSenderActor,
                              mock_data,
                              'in',
                              uid='w:mock_sender')
            chunk_meta_ref = pool.actor_ref(ChunkMetaActor.default_name())

            import mars.tensor as mt
            from mars.tensor.expressions.datasource import TensorFetch
            arr = mt.ones((4, ), chunk_size=4)
            arr_add = mt.array(mock_data)
            result_tensor = arr + arr_add
            graph = result_tensor.build_graph(compose=False, tiled=True)

            modified_chunk = arr_add.chunks[0]
            arr_add.chunks[0]._op = TensorFetch(
                dtype=modified_chunk.dtype,
                _outputs=[weakref.ref(o) for o in modified_chunk.op.outputs],
                _key=modified_chunk.op.key)

            with self.run_actor_test(pool) as test_actor:
                graph_key = str(uuid.uuid4())
                execution_ref = test_actor.promise_ref(
                    ExecutionActor.default_name())
                execution_ref.enqueue_graph(session_id, graph_key, serialize_graph(graph),
                                            dict(chunks=[result_tensor.chunks[0].key]), None, _promise=True) \
                    .then(lambda *_: execution_ref.start_execution(session_id, graph_key, _promise=True)) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

            with self.assertRaises(DependencyMissing):
                self.get_result()

            chunk_meta_ref.set_chunk_meta(session_id,
                                          modified_chunk.key,
                                          size=mock_data.nbytes,
                                          shape=mock_data.shape,
                                          workers=('0.0.0.0:1234', ))
            with self.run_actor_test(pool) as test_actor:
                graph_key = str(uuid.uuid4())
                execution_ref = test_actor.promise_ref(
                    ExecutionActor.default_name())
                execution_ref.enqueue_graph(session_id, graph_key, serialize_graph(graph),
                                            dict(chunks=[result_tensor.chunks[0].key]), None, _promise=True) \
                    .then(lambda *_: execution_ref.start_execution(session_id, graph_key, _promise=True)) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

            with self.assertRaises(DependencyMissing):
                self.get_result()

            chunk_meta_ref.set_chunk_meta(session_id,
                                          modified_chunk.key,
                                          size=mock_data.nbytes,
                                          shape=mock_data.shape,
                                          workers=('0.0.0.0:1234',
                                                   pool_address))
            with self.run_actor_test(pool) as test_actor:

                def _validate(_):
                    data = test_actor._chunk_store.get(
                        session_id, result_tensor.chunks[0].key)
                    assert_array_equal(data, mock_data + np.ones((4, )))

                graph_key = str(uuid.uuid4())
                execution_ref = test_actor.promise_ref(
                    ExecutionActor.default_name())
                execution_ref.enqueue_graph(session_id, graph_key, serialize_graph(graph),
                                            dict(chunks=[result_tensor.chunks[0].key]), None, _promise=True) \
                    .then(lambda *_: execution_ref.start_execution(session_id, graph_key, _promise=True)) \
                    .then(_validate) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

            self.get_result()
Exemplo n.º 17
0
            def execute_case():
                pool.create_actor(ClusterInfoActor,
                                  [pool.cluster_info.address],
                                  uid=ClusterInfoActor.default_name())
                resource_ref = pool.create_actor(
                    ResourceActor, uid=ResourceActor.default_name())
                pool.create_actor(ChunkMetaActor,
                                  uid=ChunkMetaActor.default_name())
                pool.create_actor(AssignerActor,
                                  uid=AssignerActor.default_name())
                graph_ref = pool.create_actor(GraphActor,
                                              session_id,
                                              graph_key,
                                              serialize_graph(graph),
                                              uid=GraphActor.gen_name(
                                                  session_id, graph_key))
                addr_dict = dict()

                def _build_mock_ref(uid=None, address=None):
                    if address in addr_dict:
                        return addr_dict[address]
                    else:
                        r = addr_dict[address] = pool.create_actor(
                            FakeExecutionActor, sleep=1)
                        return r

                # handle mock objects
                OperandActor._get_raw_execution_ref.side_effect = _build_mock_ref

                mock_resource = dict(
                    hardware=dict(cpu=4, cpu_total=4, memory=512))

                def write_mock_meta():
                    resource_ref.set_worker_meta('localhost:12345',
                                                 mock_resource)
                    resource_ref.set_worker_meta('localhost:23456',
                                                 mock_resource)

                v = gevent.spawn(write_mock_meta)
                v.join()

                graph_ref.prepare_graph()
                fetched_graph = graph_ref.get_chunk_graph()

                graph_ref.scan_node()
                graph_ref.place_initial_chunks()

                final_keys = set()
                for c in fetched_graph:
                    if fetched_graph.count_successors(c) == 0:
                        final_keys.add(c.op.key)

                graph_ref.create_operand_actors()
                graph_meta_ref = pool.actor_ref(
                    GraphMetaActor.gen_name(session_id, graph_key))
                start_time = time.time()
                cancel_called = False
                while True:
                    gevent.sleep(0.1)
                    if not cancel_called and time.time() > start_time + 0.8:
                        cancel_called = True
                        graph_ref.stop_graph(_tell=True)
                    if time.time() - start_time > 30:
                        raise SystemError('Wait for execution finish timeout')
                    if graph_meta_ref.get_state() in (GraphState.SUCCEEDED,
                                                      GraphState.FAILED,
                                                      GraphState.CANCELLED):
                        break
Exemplo n.º 18
0
 def post_create(self):
     self._chunk_meta_ref = self.ctx.actor_ref(
         ChunkMetaActor.default_name())
Exemplo n.º 19
0
    def testAssignerActor(self):
        mock_scheduler_addr = '127.0.0.1:%d' % get_next_port()
        with create_actor_pool(n_process=1,
                               backend='gevent',
                               address=mock_scheduler_addr) as pool:
            cluster_info_ref = pool.create_actor(
                SchedulerClusterInfoActor, [pool.cluster_info.address],
                uid=SchedulerClusterInfoActor.default_name())
            resource_ref = pool.create_actor(ResourceActor,
                                             uid=ResourceActor.default_name())
            pool.create_actor(ChunkMetaActor,
                              uid=ChunkMetaActor.default_name())

            endpoint1 = 'localhost:12345'
            endpoint2 = 'localhost:23456'
            res = dict(hardware=dict(cpu=4, memory=4096))

            def write_mock_meta():
                resource_ref.set_worker_meta(endpoint1, res)
                resource_ref.set_worker_meta(endpoint2, res)

            g = gevent.spawn(write_mock_meta)
            g.join()

            assigner_ref = pool.create_actor(AssignerActor,
                                             uid=AssignerActor.default_name())

            session_id = str(uuid.uuid4())
            chunk_key1 = str(uuid.uuid4())
            chunk_key2 = str(uuid.uuid4())
            chunk_key3 = str(uuid.uuid4())

            op_info = {
                'op_name': 'test_op',
                'io_meta':
                dict(input_chunks=[chunk_key1, chunk_key2, chunk_key3]),
                'retries': 0,
                'optimize': {
                    'depth': 0,
                    'demand_depths': (),
                    'successor_size': 1,
                    'descendant_size': 0
                }
            }

            chunk_meta_client = ChunkMetaClient(pool, cluster_info_ref)
            chunk_meta_client.set_chunk_meta(session_id,
                                             chunk_key1,
                                             size=512,
                                             workers=(endpoint1, ))
            chunk_meta_client.set_chunk_meta(session_id,
                                             chunk_key2,
                                             size=512,
                                             workers=(endpoint1, ))
            chunk_meta_client.set_chunk_meta(session_id,
                                             chunk_key3,
                                             size=512,
                                             workers=(endpoint2, ))

            workers = assigner_ref.get_worker_assignments(session_id, op_info)
            self.assertEqual(workers[0], endpoint1)
Exemplo n.º 20
0
    def testErrorOnPrepare(self, *_):
        session_id = str(uuid.uuid4())

        addr = '127.0.0.1:%d' % get_next_port()
        with create_actor_pool(n_process=1, backend='gevent',
                               address=addr) as pool:
            pool.create_actor(ClusterInfoActor, [pool.cluster_info.address],
                              uid=ClusterInfoActor.default_name())
            resource_ref = pool.create_actor(ResourceActor,
                                             uid=ResourceActor.default_name())
            pool.create_actor(ChunkMetaActor,
                              uid=ChunkMetaActor.default_name())
            pool.create_actor(AssignerActor, uid=AssignerActor.default_name())

            resource_ref.set_worker_meta('localhost:12345',
                                         dict(hardware=dict(cpu_total=4)))
            resource_ref.set_worker_meta('localhost:23456',
                                         dict(hardware=dict(cpu_total=4)))

            # error occurred in create_operand_actors
            graph_key = str(uuid.uuid4())
            expr = mt.random.random((8, 2), chunk_size=2) + 1
            graph = expr.build_graph(compose=False)
            serialized_graph = serialize_graph(graph)

            graph_ref = pool.create_actor(GraphActor,
                                          session_id,
                                          graph_key,
                                          serialized_graph,
                                          uid=GraphActor.gen_name(
                                              session_id, graph_key))

            def _mock_raises(*_, **__):
                raise RuntimeError

            with patch_method(GraphActor.create_operand_actors,
                              new=_mock_raises):
                with self.assertRaises(RuntimeError):
                    graph_ref.execute_graph()
            self.assertEqual(graph_ref.get_state(), GraphState.FAILED)
            graph_ref.destroy()

            # interrupted during create_operand_actors
            graph_key = str(uuid.uuid4())
            graph_ref = pool.create_actor(GraphActor,
                                          session_id,
                                          graph_key,
                                          serialized_graph,
                                          uid=GraphActor.gen_name(
                                              session_id, graph_key))

            def _mock_cancels(*_, **__):
                graph_meta_ref = pool.actor_ref(
                    GraphMetaActor.gen_name(session_id, graph_key))
                graph_meta_ref.set_state(GraphState.CANCELLING)

            with patch_method(GraphActor.create_operand_actors,
                              new=_mock_cancels):
                graph_ref.execute_graph()
            self.assertEqual(graph_ref.get_state(), GraphState.CANCELLED)

            # interrupted during previous steps
            graph_key = str(uuid.uuid4())
            graph_ref = pool.create_actor(GraphActor,
                                          session_id,
                                          graph_key,
                                          serialized_graph,
                                          uid=GraphActor.gen_name(
                                              session_id, graph_key))

            def _mock_cancels(*_, **__):
                graph_meta_ref = pool.actor_ref(
                    GraphMetaActor.gen_name(session_id, graph_key))
                graph_meta_ref.set_state(GraphState.CANCELLING)
                return dict()

            with patch_method(GraphAnalyzer.calc_initial_assignments,
                              new=_mock_cancels):
                graph_ref.execute_graph()
            self.assertEqual(graph_ref.get_state(), GraphState.CANCELLED)
Exemplo n.º 21
0
    def testOperandActorWithCancel(self, *_):
        import logging
        logging.basicConfig(level=logging.DEBUG)

        arr = mt.random.randint(10, size=(10, 8), chunk_size=4)
        arr_add = mt.random.randint(10, size=(10, 8), chunk_size=4)
        arr2 = arr + arr_add

        session_id = str(uuid.uuid4())
        graph_key = str(uuid.uuid4())

        graph = arr2.build_graph(compose=False)

        with create_actor_pool(n_process=1, backend='gevent') as pool:
            pool.create_actor(ClusterInfoActor, [pool.cluster_info.address],
                              uid=ClusterInfoActor.default_name())
            resource_ref = pool.create_actor(ResourceActor, uid=ResourceActor.default_name())
            pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_name())
            pool.create_actor(AssignerActor, uid=AssignerActor.default_name())
            graph_ref = pool.create_actor(GraphActor, session_id, graph_key, serialize_graph(graph),
                                          uid=GraphActor.gen_name(session_id, graph_key))
            addr_dict = dict()

            def _build_mock_ref(uid=None, address=None):
                if address in addr_dict:
                    return addr_dict[address]
                else:
                    r = addr_dict[address] = pool.create_actor(
                        FakeExecutionActor, exec_delay=0.2)
                    return r

            # handle mock objects
            OperandActor._get_raw_execution_ref.side_effect = _build_mock_ref

            mock_resource = dict(hardware=dict(cpu=4, cpu_total=4, memory=512))

            for idx in range(20):
                resource_ref.set_worker_meta('localhost:%d' % (idx + 12345), mock_resource)

            graph_ref.prepare_graph(compose=False)
            fetched_graph = graph_ref.get_chunk_graph()

            graph_ref.analyze_graph()

            final_keys = set()
            for c in fetched_graph:
                if fetched_graph.count_successors(c) == 0:
                    final_keys.add(c.op.key)

            graph_ref.create_operand_actors()
            graph_meta_ref = pool.actor_ref(GraphMetaActor.gen_name(session_id, graph_key))
            start_time = time.time()
            cancel_called = False
            while True:
                pool.sleep(0.05)
                if not cancel_called and time.time() > start_time + 0.3:
                    cancel_called = True
                    graph_ref.stop_graph(_tell=True)
                if time.time() - start_time > 30:
                    raise SystemError('Wait for execution finish timeout')
                if graph_meta_ref.get_state() in (GraphState.SUCCEEDED, GraphState.FAILED, GraphState.CANCELLED):
                    break
            self.assertEqual(graph_meta_ref.get_state(), GraphState.CANCELLED)
Exemplo n.º 22
0
    def testPrepareSpilled(self):
        from mars.worker.spill import write_spill_file

        pool_address = '127.0.0.1:%d' % get_next_port()
        session_id = str(uuid.uuid4())
        mock_data = np.array([1, 2, 3, 4])

        options.worker.spill_directory = tempfile.mkdtemp(
            'mars_worker_prep_spilled-')

        with create_actor_pool(n_process=1,
                               backend='gevent',
                               address=pool_address) as pool:
            self.create_standard_actors(pool,
                                        pool_address,
                                        with_daemon=False,
                                        with_status=False)
            pool.create_actor(SpillActor)
            pool.create_actor(CpuCalcActor)
            chunk_meta_ref = pool.actor_ref(ChunkMetaActor.default_name())
            chunk_holder_ref = pool.actor_ref(ChunkHolderActor.default_name())

            import mars.tensor as mt
            from mars.tensor.expressions.datasource import TensorFetchChunk
            arr = mt.ones((4, ), chunk_size=4)
            arr_add = mt.array(mock_data)
            result_tensor = arr + arr_add
            graph = result_tensor.build_graph(compose=False, tiled=True)

            modified_chunk = arr_add.chunks[0]
            arr_add.chunks[0]._op = TensorFetchChunk(
                dtype=modified_chunk.dtype,
                _outputs=[weakref.ref(o) for o in modified_chunk.op.outputs],
                _key=modified_chunk.op.key)

            with self.run_actor_test(pool) as test_actor:
                graph_key = str(uuid.uuid4())
                execution_ref = test_actor.promise_ref(
                    ExecutionActor.default_name())
                execution_ref.enqueue_graph(session_id, graph_key, serialize_graph(graph),
                                            dict(chunks=[result_tensor.chunks[0].key]), None, _promise=True) \
                    .then(lambda *_: execution_ref.start_execution(session_id, graph_key, _promise=True)) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

            with self.assertRaises(DependencyMissing):
                self.get_result()

            chunk_meta_ref.set_chunk_meta(session_id,
                                          modified_chunk.key,
                                          size=mock_data.nbytes,
                                          shape=mock_data.shape,
                                          workers=('0.0.0.0:1234',
                                                   pool_address))
            write_spill_file(modified_chunk.key, mock_data)

            with self.run_actor_test(pool) as test_actor:

                def _validate(_):
                    data = test_actor._chunk_store.get(
                        session_id, result_tensor.chunks[0].key)
                    assert_array_equal(data, mock_data + np.ones((4, )))

                graph_key = str(uuid.uuid4())
                execution_ref = test_actor.promise_ref(
                    ExecutionActor.default_name())
                execution_ref.enqueue_graph(session_id, graph_key, serialize_graph(graph),
                                            dict(chunks=[result_tensor.chunks[0].key]), None, _promise=True) \
                    .then(lambda *_: execution_ref.start_execution(session_id, graph_key, _promise=True)) \
                    .then(_validate) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

            self.get_result()

            chunk_meta_ref.destroy()
            chunk_holder_ref.destroy()
            pool.create_actor(ChunkHolderActor,
                              uid=ChunkHolderActor.default_name())
            chunk_meta_ref = pool.create_actor(
                ChunkMetaActor, uid=ChunkMetaActor.default_name())
            chunk_meta_ref.set_chunk_meta(session_id,
                                          modified_chunk.key,
                                          size=mock_data.nbytes,
                                          shape=mock_data.shape,
                                          workers=('0.0.0.0:1234',
                                                   pool_address))
            write_spill_file(modified_chunk.key, mock_data)

            with self.run_actor_test(pool) as test_actor:

                def _validate(_):
                    data = test_actor._chunk_store.get(
                        session_id, result_tensor.chunks[0].key)
                    assert_array_equal(data, mock_data + np.ones((4, )))

                graph_key = str(uuid.uuid4())
                op_meta = dict(
                    chunks=[result_tensor.chunks[0].key],
                    input_chunks=[modified_chunk.key],
                    shared_input_chunks=[modified_chunk.key],
                )
                execution_ref = test_actor.promise_ref(
                    ExecutionActor.default_name())
                execution_ref.enqueue_graph(session_id, graph_key, serialize_graph(graph),
                                            op_meta, None, _promise=True) \
                    .then(lambda *_: execution_ref.start_execution(session_id, graph_key, _promise=True)) \
                    .then(_validate) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

            self.get_result()
Exemplo n.º 23
0
    def testGraphActor(self):
        session_id = str(uuid.uuid4())
        graph_key = str(uuid.uuid4())

        arr = mt.random.randint(10, size=(10, 8), chunk_size=4)
        arr_add = mt.random.randint(10, size=(10, 8), chunk_size=4)
        arr2 = arr + arr_add

        graph = arr2.build_graph(compose=False)
        serialized_graph = serialize_graph(graph)
        chunked_graph = arr2.build_graph(compose=False, tiled=True)

        addr = '127.0.0.1:%d' % get_next_port()
        with create_actor_pool(n_process=1, backend='gevent',
                               address=addr) as pool:
            pool.create_actor(ClusterInfoActor, [pool.cluster_info.address],
                              uid=ClusterInfoActor.default_name())
            resource_ref = pool.create_actor(ResourceActor,
                                             uid=ResourceActor.default_name())
            pool.create_actor(ChunkMetaActor,
                              uid=ChunkMetaActor.default_name())
            pool.create_actor(AssignerActor,
                              uid=AssignerActor.gen_name(session_id))
            graph_ref = pool.create_actor(GraphActor,
                                          session_id,
                                          graph_key,
                                          serialized_graph,
                                          uid=GraphActor.gen_name(
                                              session_id, graph_key))

            graph_ref.prepare_graph(compose=False)
            fetched_graph = graph_ref.get_chunk_graph()
            self.assertIsNotNone(fetched_graph)
            self.assertEqual(len(chunked_graph), len(fetched_graph))

            graph_ref.scan_node()
            op_infos = graph_ref.get_operand_info()
            for n in fetched_graph:
                depth = op_infos[n.op.key]['optimize']['depth']
                self.assertIsNotNone(depth)
                successor_size = op_infos[
                    n.op.key]['optimize']['successor_size']
                self.assertIsNotNone(successor_size)
                descendant_size = op_infos[
                    n.op.key]['optimize']['descendant_size']
                self.assertIsNotNone(descendant_size)

            resource_ref.set_worker_meta('localhost:12345',
                                         dict(hardware=dict(cpu_total=4)))
            resource_ref.set_worker_meta('localhost:23456',
                                         dict(hardware=dict(cpu_total=4)))

            graph_ref.place_initial_chunks()
            op_infos = graph_ref.get_operand_info()

            for n in fetched_graph:
                if fetched_graph.count_predecessors(n) != 0:
                    continue
                target_worker = op_infos[n.op.key]['target_worker']
                self.assertIsNotNone(target_worker)

            graph_ref.create_operand_actors(_clean_io_meta=False)
            op_infos = graph_ref.get_operand_info()

            for n in fetched_graph:
                self.assertEqual(op_infos[n.op.key]['op_name'],
                                 type(n.op).__name__)

                io_meta = op_infos[n.op.key]['io_meta']
                orig_io_meta = dict(
                    predecessors=list(
                        set(pn.op.key
                            for pn in fetched_graph.iter_predecessors(n))),
                    successors=list(
                        set(sn.op.key
                            for sn in fetched_graph.iter_successors(n))),
                    input_chunks=list(
                        set(pn.key
                            for pn in fetched_graph.iter_predecessors(n))),
                    chunks=list(c.key for c in n.op.outputs),
                )
                self.assertSetEqual(set(io_meta['predecessors']),
                                    set(orig_io_meta['predecessors']))
                self.assertSetEqual(set(io_meta['successors']),
                                    set(orig_io_meta['successors']))
                self.assertSetEqual(set(io_meta['input_chunks']),
                                    set(orig_io_meta['input_chunks']))
                self.assertSetEqual(set(io_meta['chunks']),
                                    set(orig_io_meta['chunks']))

                self.assertEqual(op_infos[n.op.key]['output_size'],
                                 sum(ch.nbytes for ch in n.op.outputs))