示例#1
0
async def test_assign_gpu_tasks(actor_pool):
    pool, session_id, assigner_ref, cluster_api, meta_api = actor_pool

    input1 = TensorFetch(key='a', source_key='a',
                         dtype=np.dtype(int)).new_chunk([])
    input2 = TensorFetch(key='b', source_key='b',
                         dtype=np.dtype(int)).new_chunk([])
    result_chunk = TensorTreeAdd(args=[input1, input2], gpu=True) \
        .new_chunk([input1, input2])

    chunk_graph = ChunkGraph([result_chunk])
    chunk_graph.add_node(input1)
    chunk_graph.add_node(input2)
    chunk_graph.add_node(result_chunk)
    chunk_graph.add_edge(input1, result_chunk)
    chunk_graph.add_edge(input2, result_chunk)

    await meta_api.set_chunk_meta(input1,
                                  memory_size=200,
                                  store_size=200,
                                  bands=[('address0', 'numa-0')])
    await meta_api.set_chunk_meta(input2,
                                  memory_size=200,
                                  store_size=200,
                                  bands=[('address0', 'numa-0')])

    subtask = Subtask('test_task', session_id, chunk_graph=chunk_graph)
    [result] = await assigner_ref.assign_subtasks([subtask])
    assert result[1].startswith('gpu')
示例#2
0
    def testMockExecuteSize(self):
        import mars.tensor as mt
        from mars.core.graph import DAG
        from mars.tensor.fetch import TensorFetch
        from mars.tensor.arithmetic import TensorTreeAdd

        graph_add = DAG()
        input_chunks = []
        for _ in range(2):
            fetch_op = TensorFetch(dtype=np.dtype('int64'))
            inp_chunk = fetch_op.new_chunk(None, shape=(100, 100)).data
            input_chunks.append(inp_chunk)

        add_op = TensorTreeAdd(args=input_chunks, dtype=np.dtype('int64'))
        add_chunk = add_op.new_chunk(input_chunks,
                                     shape=(100, 100),
                                     dtype=np.dtype('int64')).data
        graph_add.add_node(add_chunk)
        for inp_chunk in input_chunks:
            graph_add.add_node(inp_chunk)
            graph_add.add_edge(inp_chunk, add_chunk)

        executor = Executor()
        res = executor.execute_graph(graph_add, [add_chunk.key],
                                     compose=False,
                                     mock=True)[0]
        self.assertEqual(res, (80000, 80000))
        self.assertEqual(executor.mock_max_memory, 80000)

        for _ in range(3):
            new_add_op = TensorTreeAdd(args=[add_chunk],
                                       dtype=np.dtype('int64'))
            new_add_chunk = new_add_op.new_chunk([add_chunk],
                                                 shape=(100, 100),
                                                 dtype=np.dtype('int64')).data
            graph_add.add_node(new_add_chunk)
            graph_add.add_edge(add_chunk, new_add_chunk)

            add_chunk = new_add_chunk

        executor = Executor()
        res = executor.execute_graph(graph_add, [add_chunk.key],
                                     compose=False,
                                     mock=True)[0]
        self.assertEqual(res, (80000, 80000))
        self.assertEqual(executor.mock_max_memory, 160000)

        a = mt.random.rand(10, 10, chunk_size=10)
        b = a[:, mt.newaxis, :] - a
        r = mt.triu(mt.sqrt(b**2).sum(axis=2))

        executor = Executor()
        res = executor.execute_tensor(r, concat=False, mock=True)
        # larger than maximal memory size in calc procedure
        self.assertGreaterEqual(res[0][0], 800)
        self.assertGreaterEqual(executor.mock_max_memory, 8000)
示例#3
0
async def test_execute_tensor(actor_pool):
    pool, session_id, meta_api, storage_api, execution_ref = actor_pool

    data1 = np.random.rand(10, 10)
    data2 = np.random.rand(10, 10)

    input1 = TensorFetch(key='input1',
                         source_key='input2',
                         dtype=np.dtype(int)).new_chunk([])
    input2 = TensorFetch(key='input2',
                         source_key='input2',
                         dtype=np.dtype(int)).new_chunk([])
    result_chunk = TensorTreeAdd(args=[input1, input2]) \
        .new_chunk([input1, input2], shape=data1.shape, dtype=data1.dtype)

    await meta_api.set_chunk_meta(input1,
                                  memory_size=data1.nbytes,
                                  store_size=data1.nbytes,
                                  bands=[(pool.external_address, 'numa-0')])
    await meta_api.set_chunk_meta(input2,
                                  memory_size=data1.nbytes,
                                  store_size=data2.nbytes,
                                  bands=[(pool.external_address, 'numa-0')])
    # todo use different storage level when storage ready
    await storage_api.put(input1.key, data1)
    await storage_api.put(input2.key, data2)

    chunk_graph = ChunkGraph([result_chunk])
    chunk_graph.add_node(input1)
    chunk_graph.add_node(input2)
    chunk_graph.add_node(result_chunk)
    chunk_graph.add_edge(input1, result_chunk)
    chunk_graph.add_edge(input2, result_chunk)

    subtask = Subtask('test_task',
                      session_id=session_id,
                      chunk_graph=chunk_graph)
    await execution_ref.run_subtask(subtask, 'numa-0', pool.external_address)

    # check if results are correct
    result = await storage_api.get(result_chunk.key)
    np.testing.assert_array_equal(data1 + data2, result)

    # check if quota computations are correct
    quota_ref = await mo.actor_ref(QuotaActor.gen_uid('numa-0'),
                                   address=pool.external_address)
    [quota] = await quota_ref.get_batch_quota_reqs()
    assert quota[(subtask.subtask_id, subtask.subtask_id)] == data1.nbytes

    # check if metas are correct
    result_meta = await meta_api.get_chunk_meta(result_chunk.key)
    assert result_meta['object_id'] == result_chunk.key
    assert result_meta['shape'] == result.shape
示例#4
0
    def testPrepareSpilled(self):
        from mars.worker.spill import write_spill_file

        pool_address = '127.0.0.1:%d' % get_next_port()
        session_id = str(uuid.uuid4())
        mock_data = np.array([1, 2, 3, 4])

        options.worker.spill_directory = tempfile.mkdtemp(prefix='mars_worker_prep_spilled-')

        with create_actor_pool(n_process=1, backend='gevent', address=pool_address) as pool:
            self.create_standard_actors(pool, pool_address, with_daemon=False, with_status=False)
            pool.create_actor(SpillActor)
            pool.create_actor(CpuCalcActor)
            cluster_info_ref = pool.actor_ref(WorkerClusterInfoActor.default_uid())
            chunk_meta_client = ChunkMetaClient(pool, cluster_info_ref)
            pool.actor_ref(ChunkHolderActor.default_uid())

            import mars.tensor as mt
            from mars.tensor.fetch import TensorFetch
            arr = mt.ones((4,), chunk_size=4)
            arr_add = mt.array(mock_data)
            result_tensor = arr + arr_add
            graph = result_tensor.build_graph(compose=False, tiled=True)

            modified_chunk = arr_add.chunks[0]
            arr_add.chunks[0]._op = TensorFetch(
                dtype=modified_chunk.dtype, _outputs=[weakref.ref(o) for o in modified_chunk.op.outputs],
                _key=modified_chunk.op.key)

            # test meta missing
            with self.run_actor_test(pool) as test_actor:
                graph_key = str(uuid.uuid4())
                execution_ref = test_actor.promise_ref(ExecutionActor.default_uid())
                execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph),
                                            dict(chunks=[result_tensor.chunks[0].key]), None, _promise=True) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

            with self.assertRaises(DependencyMissing):
                self.get_result()

            chunk_meta_client.set_chunk_meta(session_id, modified_chunk.key, size=mock_data.nbytes,
                                             shape=mock_data.shape, workers=('0.0.0.0:1234', pool_address))
            write_spill_file(modified_chunk.key, mock_data)

            # test read from spilled file
            with self.run_actor_test(pool) as test_actor:
                def _validate(_):
                    data = test_actor._chunk_store.get(session_id, result_tensor.chunks[0].key)
                    assert_array_equal(data, mock_data + np.ones((4,)))

                graph_key = str(uuid.uuid4())
                execution_ref = test_actor.promise_ref(ExecutionActor.default_uid())
                execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph),
                                            dict(chunks=[result_tensor.chunks[0].key]), None, _promise=True) \
                    .then(_validate) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

            self.get_result()
示例#5
0
    def testPrepareQuota(self, *_):
        pinned = [True]

        def _mock_pin(_graph_key, chunk_keys):
            from mars.errors import PinChunkFailed
            if pinned[0]:
                raise PinChunkFailed
            return chunk_keys

        ChunkHolderActor.pin_chunks.side_effect = _mock_pin

        pool_address = '127.0.0.1:%d' % get_next_port()
        session_id = str(uuid.uuid4())
        mock_data = np.array([1, 2, 3, 4])
        with create_actor_pool(n_process=1, backend='gevent', address=pool_address) as pool:
            self.create_standard_actors(pool, pool_address, with_daemon=False, with_status=False)
            pool.create_actor(MockSenderActor, mock_data, 'in', uid='w:mock_sender')
            pool.create_actor(CpuCalcActor)
            cluster_info_ref = pool.actor_ref(WorkerClusterInfoActor.default_uid())
            chunk_meta_client = ChunkMetaClient(pool, cluster_info_ref)

            import mars.tensor as mt
            from mars.tensor.fetch import TensorFetch
            arr = mt.ones((4,), chunk_size=4)
            arr_add = mt.array(mock_data)
            result_tensor = arr + arr_add
            graph = result_tensor.build_graph(compose=False, tiled=True)

            modified_chunk = arr_add.chunks[0]
            arr_add.chunks[0]._op = TensorFetch(
                dtype=modified_chunk.dtype, _outputs=[weakref.ref(o) for o in modified_chunk.op.outputs],
                _key=modified_chunk.op.key)
            chunk_meta_client.set_chunk_meta(session_id, modified_chunk.key, size=mock_data.nbytes,
                                             shape=mock_data.shape, workers=('0.0.0.0:1234', pool_address))
            with self.run_actor_test(pool) as test_actor:
                graph_key = str(uuid.uuid4())
                execution_ref = test_actor.promise_ref(ExecutionActor.default_uid())

                start_time = time.time()

                execution_ref.execute_graph(
                    session_id, graph_key, serialize_graph(graph),
                    dict(chunks=[result_tensor.chunks[0].key]), None, _tell=True)

                execution_ref.add_finish_callback(session_id, graph_key, _promise=True) \
                    .then(lambda *_: test_actor.set_result(time.time())) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

                def _delay_fun():
                    time.sleep(1)
                    pinned[0] = False

                threading.Thread(target=_delay_fun).start()

            finish_time = self.get_result()
            self.assertGreaterEqual(finish_time, start_time + 1)
示例#6
0
    def testCopyAdd(self):
        t1 = ones((3, 4), chunk_size=2)
        t2 = ones(4, chunk_size=2)
        t3 = t1 + t2
        t3.tiles()

        c = t3.chunks[0]
        inputs = c.op.lhs, TensorFetch().new_chunk(
            c.op.rhs.inputs, shape=c.op.rhs.shape, index=c.op.rhs.index, _key=c.op.rhs.key)
        new_c = c.op.copy().reset_key().new_chunk(inputs, shape=c.shape, _key='new_key')
        self.assertEqual(new_c.key, 'new_key')
        self.assertIs(new_c.inputs[1], new_c.op.rhs)
        self.assertIsInstance(new_c.inputs[1].op, TensorFetch)
示例#7
0
async def test_assigner(actor_pool):
    pool, session_id, assigner_ref, meta_api = actor_pool

    input1 = TensorFetch(key='a', source_key='a',
                         dtype=np.dtype(int)).new_chunk([])
    input2 = TensorFetch(key='b', source_key='b',
                         dtype=np.dtype(int)).new_chunk([])
    input3 = TensorFetch(key='c', source_key='c',
                         dtype=np.dtype(int)).new_chunk([])
    result_chunk = TensorTreeAdd(args=[input1, input2, input3]) \
        .new_chunk([input1, input2, input3])

    chunk_graph = ChunkGraph([result_chunk])
    chunk_graph.add_node(input1)
    chunk_graph.add_node(input2)
    chunk_graph.add_node(input3)
    chunk_graph.add_node(result_chunk)
    chunk_graph.add_edge(input1, result_chunk)
    chunk_graph.add_edge(input2, result_chunk)
    chunk_graph.add_edge(input3, result_chunk)

    await meta_api.set_chunk_meta(input1,
                                  memory_size=200,
                                  store_size=200,
                                  bands=[('address0', 'numa-0')])
    await meta_api.set_chunk_meta(input2,
                                  memory_size=400,
                                  store_size=400,
                                  bands=[('address1', 'numa-0')])
    await meta_api.set_chunk_meta(input3,
                                  memory_size=400,
                                  store_size=400,
                                  bands=[('address2', 'numa-0')])

    subtask = Subtask('test_task', session_id, chunk_graph=chunk_graph)
    [result] = await assigner_ref.assign_subtasks([subtask])
    assert result in (('address1', 'numa-0'), ('address2', 'numa-0'))
示例#8
0
def test_copy_add():
    t1 = ones((3, 4), chunk_size=2)
    t2 = ones(4, chunk_size=2)
    t3 = t1 + t2
    t3 = tile(t3)

    c = t3.chunks[0]
    inputs = c.op.lhs, TensorFetch().new_chunk(c.op.rhs.inputs,
                                               shape=c.op.rhs.shape,
                                               index=c.op.rhs.index,
                                               _key=c.op.rhs.key)
    new_c = c.op.copy().reset_key().new_chunk(inputs,
                                              shape=c.shape,
                                              _key='new_key')
    assert new_c.key == 'new_key'
    assert new_c.inputs[1] is new_c.op.rhs
    assert isinstance(new_c.inputs[1].op, TensorFetch)
示例#9
0
    def _build_test_graph(data_list):
        from mars.tensor.fetch import TensorFetch
        from mars.tensor.arithmetic import TensorTreeAdd

        inputs = []
        for idx, d in enumerate(data_list):
            chunk_key = 'chunk-%d' % idx
            fetch_chunk = TensorFetch(to_fetch_key=chunk_key, dtype=d.dtype) \
                .new_chunk([], shape=d.shape, _key=chunk_key)
            inputs.append(fetch_chunk)
        add_chunk = TensorTreeAdd(data_list[0].dtype).new_chunk(inputs, shape=data_list[0].shape)

        exec_graph = DAG()
        exec_graph.add_node(add_chunk)
        for input_chunk in inputs:
            exec_graph.add_node(input_chunk)
            exec_graph.add_edge(input_chunk, add_chunk)
        return exec_graph, inputs, add_chunk
示例#10
0
    def _build_test_graph(data_list):
        from mars.tensor.fetch import TensorFetch
        from mars.tensor.arithmetic import TensorTreeAdd

        inputs = []
        for idx, d in enumerate(data_list):
            chunk_key = f'chunk-{random.randint(0, 999)}-{idx}'
            fetch_chunk = TensorFetch(to_fetch_key=chunk_key, dtype=d.dtype) \
                .new_chunk([], shape=d.shape, _key=chunk_key)
            inputs.append(fetch_chunk)
        add_chunk = TensorTreeAdd(args=inputs, dtype=data_list[0].dtype) \
            .new_chunk(inputs, shape=data_list[0].shape)

        exec_graph = ChunkGraph([add_chunk.data])
        exec_graph.add_node(add_chunk.data)
        for input_chunk in inputs:
            exec_graph.add_node(input_chunk.data)
            exec_graph.add_edge(input_chunk.data, add_chunk.data)
        return exec_graph, inputs, add_chunk
示例#11
0
    def testPrepareSpilled(self):
        pool_address = '127.0.0.1:%d' % get_next_port()
        session_id = str(uuid.uuid4())
        mock_data = np.array([1, 2, 3, 4])

        options.worker.spill_directory = tempfile.mkdtemp(prefix='mars_worker_prep_spilled-')

        with create_actor_pool(n_process=1, backend='gevent', address=pool_address) as pool:
            self.create_standard_actors(pool, pool_address, with_daemon=False, with_status=False)
            pool.create_actor(IORunnerActor)
            pool.create_actor(CpuCalcActor)
            pool.create_actor(InProcHolderActor)

            import mars.tensor as mt
            from mars.tensor.fetch import TensorFetch
            arr = mt.ones((4,), chunk_size=4)
            arr_add = mt.array(mock_data)
            result_tensor = arr + arr_add
            graph = result_tensor.build_graph(compose=False, tiled=True)

            arr_add = get_tiled(arr_add)
            result_tensor = get_tiled(result_tensor)

            modified_chunk = arr_add.chunks[0]
            arr_add.chunks[0]._op = TensorFetch(
                dtype=modified_chunk.dtype, _outputs=[weakref.ref(o) for o in modified_chunk.op.outputs],
                _key=modified_chunk.op.key)

            # test meta missing
            with self.run_actor_test(pool) as test_actor:
                graph_key = str(uuid.uuid4())
                execution_ref = test_actor.promise_ref(ExecutionActor.default_uid())
                execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph),
                                            dict(chunks=[result_tensor.chunks[0].key]), None, _promise=True) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

            with self.assertRaises(DependencyMissing):
                self.get_result()

            metas = {modified_chunk.key: WorkerMeta(
                mock_data.nbytes, mock_data.shape, ('0.0.0.0:1234', pool_address))}

            # test read from spilled file
            with self.run_actor_test(pool) as test_actor:
                self.waitp(
                    test_actor.storage_client.put_objects(
                            session_id, [modified_chunk.key], [mock_data], [DataStorageDevice.PROC_MEMORY])
                        .then(lambda *_: test_actor.storage_client.copy_to(
                            session_id, [modified_chunk.key], [DataStorageDevice.DISK]))
                )
                test_actor.storage_client.delete(session_id, [modified_chunk.key],
                                                 [DataStorageDevice.PROC_MEMORY])

                def _validate(_):
                    data = test_actor.shared_store.get(session_id, result_tensor.chunks[0].key)
                    assert_array_equal(data, mock_data + np.ones((4,)))

                graph_key = str(uuid.uuid4())
                execution_ref = test_actor.promise_ref(ExecutionActor.default_uid())
                execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph),
                                            dict(chunks=[result_tensor.chunks[0].key]), metas, _promise=True) \
                    .then(_validate) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

            self.get_result()
示例#12
0
    def testFetchRemoteData(self):
        pool_address = '127.0.0.1:%d' % get_next_port()
        session_id = str(uuid.uuid4())
        mock_data = np.array([1, 2, 3, 4])
        with create_actor_pool(n_process=1, backend='gevent',
                               address=pool_address, distributor=MarsDistributor(2, 'w:0:')) as pool:
            self.create_standard_actors(pool, pool_address, with_daemon=False, with_status=False,
                                        with_resource=True)
            pool.create_actor(CpuCalcActor)
            pool.create_actor(InProcHolderActor)
            pool.create_actor(MockSenderActor, [mock_data], 'in', uid='w:mock_sender')

            import mars.tensor as mt
            from mars.tensor.fetch import TensorFetch
            arr = mt.ones((4,), chunk_size=4)
            arr_add = mt.array(mock_data)
            result_tensor = arr + arr_add
            graph = result_tensor.build_graph(compose=False, tiled=True)

            arr_add = get_tiled(arr_add)
            result_tensor = get_tiled(result_tensor)

            modified_chunk = arr_add.chunks[0]
            arr_add.chunks[0]._op = TensorFetch(
                dtype=modified_chunk.dtype, _outputs=[weakref.ref(o) for o in modified_chunk.op.outputs],
                _key=modified_chunk.op.key)

            with self.run_actor_test(pool) as test_actor:
                graph_key = str(uuid.uuid4())
                execution_ref = test_actor.promise_ref(ExecutionActor.default_uid())
                execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph),
                                            dict(chunks=[result_tensor.chunks[0].key]), None, _tell=True)

                execution_ref.add_finish_callback(session_id, graph_key, _promise=True) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

            with self.assertRaises(DependencyMissing):
                self.get_result()

            metas = {modified_chunk.key: WorkerMeta(mock_data.nbytes, mock_data.shape, ('0.0.0.0:1234',))}
            with self.run_actor_test(pool) as test_actor:
                graph_key = str(uuid.uuid4())
                execution_ref = test_actor.promise_ref(ExecutionActor.default_uid())
                execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph),
                                            dict(chunks=[result_tensor.chunks[0].key]), metas, _tell=True)

                execution_ref.add_finish_callback(session_id, graph_key, _promise=True) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

            with self.assertRaises(DependencyMissing):
                self.get_result()

            metas[modified_chunk.key] = WorkerMeta(
                mock_data.nbytes, mock_data.shape,
                ('0.0.0.0:1234', pool_address.replace('127.0.0.1', 'localhost')))
            with self.run_actor_test(pool) as test_actor:
                def _validate(_):
                    data = test_actor.shared_store.get(session_id, result_tensor.chunks[0].key)
                    assert_array_equal(data, mock_data + np.ones((4,)))

                graph_key = str(uuid.uuid4())
                execution_ref = test_actor.promise_ref(ExecutionActor.default_uid())
                execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph),
                                            dict(chunks=[result_tensor.chunks[0].key]), metas, _tell=True)

                execution_ref.add_finish_callback(session_id, graph_key, _promise=True) \
                    .then(_validate) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

            self.get_result()
示例#13
0
    def testPrepareQuota(self, *_):
        pinned = True

        orig_pin = SharedHolderActor.pin_data_keys

        def _mock_pin(self, session_id, chunk_keys, token):
            from mars.errors import PinDataKeyFailed
            if pinned:
                raise PinDataKeyFailed
            return orig_pin(self, session_id, chunk_keys, token)

        pool_address = '127.0.0.1:%d' % get_next_port()
        session_id = str(uuid.uuid4())
        mock_data = np.array([1, 2, 3, 4])
        with patch_method(SharedHolderActor.pin_data_keys, new=_mock_pin), \
                create_actor_pool(n_process=1, backend='gevent', address=pool_address) as pool:
            self.create_standard_actors(pool, pool_address, with_daemon=False, with_status=False)
            pool.create_actor(MockSenderActor, [mock_data], 'in', uid='w:mock_sender')
            pool.create_actor(CpuCalcActor)
            pool.create_actor(InProcHolderActor)
            pool.actor_ref(WorkerClusterInfoActor.default_uid())

            import mars.tensor as mt
            from mars.tensor.fetch import TensorFetch
            arr = mt.ones((4,), chunk_size=4)
            arr_add = mt.array(mock_data)
            result_tensor = arr + arr_add
            graph = result_tensor.build_graph(compose=False, tiled=True)

            arr_add = get_tiled(arr_add)
            result_tensor = get_tiled(result_tensor)

            modified_chunk = arr_add.chunks[0]
            arr_add.chunks[0]._op = TensorFetch(
                dtype=modified_chunk.dtype, _outputs=[weakref.ref(o) for o in modified_chunk.op.outputs],
                _key=modified_chunk.op.key)
            metas = {modified_chunk.key: WorkerMeta(
                mock_data.nbytes, mock_data.shape,
                ('0.0.0.0:1234', pool_address.replace('127.0.0.1', 'localhost')))}
            with self.run_actor_test(pool) as test_actor:
                graph_key = str(uuid.uuid4())
                execution_ref = test_actor.promise_ref(ExecutionActor.default_uid())

                start_time = time.time()

                execution_ref.execute_graph(
                    session_id, graph_key, serialize_graph(graph),
                    dict(chunks=[result_tensor.chunks[0].key]), metas, _tell=True)

                execution_ref.add_finish_callback(session_id, graph_key, _promise=True) \
                    .then(lambda *_: test_actor.set_result(time.time())) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

                def _delay_fun():
                    nonlocal pinned
                    time.sleep(0.5)
                    pinned = False

                threading.Thread(target=_delay_fun).start()

            finish_time = self.get_result()
            self.assertGreaterEqual(finish_time, start_time + 0.5)
示例#14
0
    def testSimpleExecution(self):
        pool_address = '127.0.0.1:%d' % get_next_port()
        with create_actor_pool(n_process=1, backend='gevent', address=pool_address) as pool:
            self.create_standard_actors(pool, pool_address, with_daemon=False)
            pool.create_actor(CpuCalcActor, uid='w:1:calc-a')
            pool.create_actor(InProcHolderActor)

            import mars.tensor as mt
            from mars.tensor.datasource import TensorOnes
            from mars.tensor.fetch import TensorFetch
            arr = mt.ones((10, 8), chunk_size=10)
            arr_add = mt.ones((10, 8), chunk_size=10)
            arr2 = arr + arr_add
            graph = arr2.build_graph(compose=False, tiled=True)

            arr = get_tiled(arr)
            arr2 = get_tiled(arr2)

            metas = dict()
            for chunk in graph:
                if isinstance(chunk.op, TensorOnes):
                    chunk._op = TensorFetch(
                        dtype=chunk.dtype, _outputs=[weakref.ref(o) for o in chunk.op.outputs],
                        _key=chunk.op.key)
                    metas[chunk.key] = WorkerMeta(chunk.nbytes, chunk.shape, pool_address)

            with self.run_actor_test(pool) as test_actor:
                session_id = str(uuid.uuid4())

                storage_client = test_actor.storage_client
                self.waitp(
                    storage_client.put_objects(session_id, [arr.chunks[0].key], [np.ones((10, 8), dtype=np.int16)],
                                               [DataStorageDevice.SHARED_MEMORY]),
                )

                execution_ref = test_actor.promise_ref(ExecutionActor.default_uid())

                def _validate(_):
                    data = test_actor.shared_store.get(session_id, arr2.chunks[0].key)
                    assert_array_equal(data, 2 * np.ones((10, 8)))

                graph_key = str(uuid.uuid4())
                execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph),
                                            dict(chunks=[arr2.chunks[0].key]), metas, _promise=True) \
                    .then(_validate) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

            self.get_result()

            with self.run_actor_test(pool) as test_actor:
                execution_ref = test_actor.promise_ref(ExecutionActor.default_uid())

                def _validate(_):
                    data = test_actor.shared_store.get(session_id, arr2.chunks[0].key)
                    assert_array_equal(data, 2 * np.ones((10, 8)))

                execution_ref.add_finish_callback(session_id, graph_key, _promise=True) \
                    .then(_validate) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

            self.get_result()
示例#15
0
async def test_execute_with_cancel(actor_pool, cancel_phase):
    pool, session_id, meta_api, storage_api, execution_ref = actor_pool

    # config for different phases
    ref_to_delay = None
    if cancel_phase == 'prepare':
        ref_to_delay = await mo.actor_ref(StorageManagerActor.default_uid(),
                                          address=pool.external_address)
    elif cancel_phase == 'quota':
        ref_to_delay = await mo.actor_ref(QuotaActor.gen_uid('numa-0'),
                                          address=pool.external_address)
    elif cancel_phase == 'slot':
        ref_to_delay = await mo.actor_ref(
            BandSlotManagerActor.gen_uid('numa-0'),
            address=pool.external_address)
    if ref_to_delay:
        await ref_to_delay.set_delay_fetch_time(100)

    def delay_fun(delay, _inp1):
        time.sleep(delay)
        return delay

    input1 = TensorFetch(key='input1',
                         source_key='input1',
                         dtype=np.dtype(int)).new_chunk([])
    remote_result = RemoteFunction(function=delay_fun, function_args=[100, input1],
                                   function_kwargs={}, n_output=1) \
        .new_chunk([input1])

    data1 = np.random.rand(10, 10)
    await meta_api.set_chunk_meta(input1,
                                  memory_size=data1.nbytes,
                                  store_size=data1.nbytes,
                                  bands=[(pool.external_address, 'numa-0')])
    await storage_api.put(input1.key, data1)

    chunk_graph = ChunkGraph([remote_result])
    chunk_graph.add_node(input1)
    chunk_graph.add_node(remote_result)
    chunk_graph.add_edge(input1, remote_result)

    subtask = Subtask(f'test_task_{uuid.uuid4()}',
                      session_id=session_id,
                      chunk_graph=chunk_graph)
    aiotask = asyncio.create_task(
        execution_ref.run_subtask(subtask, 'numa-0', pool.external_address))
    await asyncio.sleep(1)

    with Timer() as timer:
        await execution_ref.cancel_subtask(subtask.subtask_id, kill_timeout=1)
        with pytest.raises(asyncio.CancelledError):
            await asyncio.wait_for(aiotask, timeout=30)
    assert timer.duration < 6

    # check for different phases
    if ref_to_delay is not None:
        assert await ref_to_delay.get_is_cancelled()
        await ref_to_delay.set_delay_fetch_time(0)

    # test if slot is restored
    remote_tileable = mr.spawn(delay_fun, args=(0.5, None))
    graph = TileableGraph([remote_tileable.data])
    next(TileableGraphBuilder(graph).build())

    chunk_graph = next(ChunkGraphBuilder(graph, fuse_enabled=False).build())

    subtask = Subtask(f'test_task2_{uuid.uuid4()}',
                      session_id=session_id,
                      chunk_graph=chunk_graph)
    await asyncio.wait_for(execution_ref.run_subtask(subtask, 'numa-0',
                                                     pool.external_address),
                           timeout=30)
示例#16
0
async def test_assign_cpu_tasks(actor_pool):
    pool, session_id, assigner_ref, cluster_api, meta_api = actor_pool

    input1 = TensorFetch(key='a', source_key='a',
                         dtype=np.dtype(int)).new_chunk([])
    input2 = TensorFetch(key='b', source_key='b',
                         dtype=np.dtype(int)).new_chunk([])
    input3 = TensorFetch(key='c', source_key='c',
                         dtype=np.dtype(int)).new_chunk([])
    result_chunk = TensorTreeAdd(args=[input1, input2, input3]) \
        .new_chunk([input1, input2, input3])

    chunk_graph = ChunkGraph([result_chunk])
    chunk_graph.add_node(input1)
    chunk_graph.add_node(input2)
    chunk_graph.add_node(input3)
    chunk_graph.add_node(result_chunk)
    chunk_graph.add_edge(input1, result_chunk)
    chunk_graph.add_edge(input2, result_chunk)
    chunk_graph.add_edge(input3, result_chunk)

    await meta_api.set_chunk_meta(input1,
                                  memory_size=200,
                                  store_size=200,
                                  bands=[('address0', 'numa-0')])
    await meta_api.set_chunk_meta(input2,
                                  memory_size=400,
                                  store_size=400,
                                  bands=[('address1', 'numa-0')])
    await meta_api.set_chunk_meta(input3,
                                  memory_size=400,
                                  store_size=400,
                                  bands=[('address2', 'numa-0')])

    await cluster_api.set_node_status(node='address1',
                                      role=NodeRole.WORKER,
                                      status=NodeStatus.STOPPING)
    await cluster_api.set_node_status(node='address3',
                                      role=NodeRole.WORKER,
                                      status=NodeStatus.STOPPING)

    subtask = Subtask('test_task', session_id, chunk_graph=chunk_graph)
    [result] = await assigner_ref.assign_subtasks([subtask])
    assert result in (('address0', 'numa-0'), ('address2', 'numa-0'))

    subtask.expect_bands = [('address0', 'numa-0')]
    [result] = await assigner_ref.assign_subtasks([subtask])
    assert result == ('address0', 'numa-0')

    subtask.expect_bands = [('address0', 'numa-0'), ('address1', 'numa-0')]
    [result] = await assigner_ref.assign_subtasks([subtask])
    assert result == ('address0', 'numa-0')

    subtask.expect_bands = [('address1', 'numa-0')]
    [result] = await assigner_ref.assign_subtasks([subtask])
    assert result in (('address0', 'numa-0'), ('address2', 'numa-0'))

    result_chunk.op.gpu = True
    subtask = Subtask('test_task', session_id, chunk_graph=chunk_graph)
    with pytest.raises(NoMatchingSlots) as err:
        await assigner_ref.assign_subtasks([subtask])
    assert 'gpu' in str(err.value)
示例#17
0
    def testSimpleExecution(self):
        pool_address = '127.0.0.1:%d' % get_next_port()
        with create_actor_pool(n_process=1, backend='gevent', address=pool_address) as pool:
            self.create_standard_actors(pool, pool_address, with_daemon=False)
            pool.create_actor(CpuCalcActor, uid='w:1:calc-a')

            import mars.tensor as mt
            from mars.tensor.datasource import TensorOnes
            from mars.tensor.fetch import TensorFetch
            arr = mt.ones((10, 8), chunk_size=10)
            arr_add = mt.ones((10, 8), chunk_size=10)
            arr2 = arr + arr_add
            graph = arr2.build_graph(compose=False, tiled=True)

            for chunk in graph:
                if isinstance(chunk.op, TensorOnes):
                    chunk._op = TensorFetch(
                        dtype=chunk.dtype, _outputs=[weakref.ref(o) for o in chunk.op.outputs],
                        _key=chunk.op.key)

            with self.run_actor_test(pool) as test_actor:

                session_id = str(uuid.uuid4())
                chunk_holder_ref = test_actor.promise_ref(ChunkHolderActor.default_uid())

                refs = test_actor._chunk_store.put(session_id, arr.chunks[0].key,
                                                   np.ones((10, 8), dtype=np.int16))
                chunk_holder_ref.register_chunk(session_id, arr.chunks[0].key)
                del refs

                refs = test_actor._chunk_store.put(session_id, arr_add.chunks[0].key,
                                                   np.ones((10, 8), dtype=np.int16))
                chunk_holder_ref.register_chunk(session_id, arr_add.chunks[0].key)
                del refs

                execution_ref = test_actor.promise_ref(ExecutionActor.default_uid())

                def _validate(_):
                    data = test_actor._chunk_store.get(session_id, arr2.chunks[0].key)
                    assert_array_equal(data, 2 * np.ones((10, 8)))

                graph_key = str(uuid.uuid4())
                execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph),
                                            dict(chunks=[arr2.chunks[0].key]), None, _promise=True) \
                    .then(_validate) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

            self.get_result()

            with self.run_actor_test(pool) as test_actor:
                execution_ref = test_actor.promise_ref(ExecutionActor.default_uid())

                def _validate(_):
                    data = test_actor._chunk_store.get(session_id, arr2.chunks[0].key)
                    assert_array_equal(data, 2 * np.ones((10, 8)))

                execution_ref.add_finish_callback(session_id, graph_key, _promise=True) \
                    .then(_validate) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

            self.get_result()