Exemplo n.º 1
0
async def test_mem_quota_allocation(actor_pool, enable_kill_slot):
    from mars.utils import AttributeDict

    mock_mem_stat = AttributeDict(dict(total=300, available=50, used=0, free=50))
    mock_band_slot_manager_ref = await mo.create_actor(
        MockBandSlotManagerActor, uid=BandSlotManagerActor.gen_uid('numa-0'),
        address=actor_pool.external_address)
    quota_ref = await mo.create_actor(
        MemQuotaActor, (actor_pool.external_address, 'numa-0'), 300,
        hard_limit=300, refresh_time=0.1,
        enable_kill_slot=enable_kill_slot,
        uid=MemQuotaActor.gen_uid('cpu-0'),
        address=actor_pool.external_address)  # type: Union[QuotaActorRef, mo.ActorRef]

    with mock.patch('mars.resource.virtual_memory', new=lambda: mock_mem_stat):
        time_recs = [time.time()]

        async def task_fun():
            await quota_ref.request_batch_quota({'req': 100})
            await quota_ref.release_quotas(['req'])
            time_recs.append(time.time())

        task = asyncio.create_task(task_fun())
        await asyncio.sleep(0.2)
        assert 'req' not in (await quota_ref.dump_data()).allocations

        mock_mem_stat['available'] = 150
        mock_mem_stat['free'] = 150
        await asyncio.wait_for(task, timeout=1)
        assert 0.15 < abs(time_recs[0] - time_recs[1]) < 1
        assert bool(await mock_band_slot_manager_ref.get_restart_record()) == enable_kill_slot
Exemplo n.º 2
0
async def actor_pool(request):
    n_slots = request.param
    pool = await mo.create_actor_pool('127.0.0.1',
                                      labels=[None] + ['numa-0'] * n_slots,
                                      n_process=n_slots)

    async with pool:
        session_id = 'test_session'
        await MockClusterAPI.create(pool.external_address,
                                    band_to_slots={'numa-0': n_slots})
        await MockSessionAPI.create(pool.external_address,
                                    session_id=session_id)
        meta_api = await MockMetaAPI.create(session_id, pool.external_address)
        await MockLifecycleAPI.create(session_id, pool.external_address)
        await MockSubtaskAPI.create(pool.external_address)
        storage_api = await MockStorageAPI.create(
            session_id,
            pool.external_address,
            storage_manger_cls=MockStorageManagerActor)

        # create assigner actor
        execution_ref = await mo.create_actor(
            SubtaskExecutionActor,
            uid=SubtaskExecutionActor.default_uid(),
            address=pool.external_address)
        # create quota actor
        await mo.create_actor(MockQuotaActor,
                              102400,
                              uid=QuotaActor.gen_uid('numa-0'),
                              address=pool.external_address)
        # create dispatcher actor
        await mo.create_actor(MockBandSlotManagerActor,
                              'numa-0',
                              n_slots,
                              uid=BandSlotManagerActor.gen_uid('numa-0'),
                              address=pool.external_address)
        # create mock task manager actor
        await mo.create_actor(MockTaskManager,
                              uid=TaskManagerActor.gen_uid(session_id),
                              address=pool.external_address)

        yield pool, session_id, meta_api, storage_api, execution_ref
Exemplo n.º 3
0
async def actor_pool(request):
    start_method = os.environ.get('POOL_START_METHOD', 'forkserver') \
        if sys.platform != 'win32' else None
    n_slots = request.param
    pool = await mo.create_actor_pool(f'127.0.0.1:{get_next_port()}',
                                      n_process=n_slots,
                                      labels=[None] + ['numa-0'] * n_slots,
                                      subprocess_start_method=start_method)

    async with pool:
        global_slots_ref = await mo.create_actor(
            MockGlobalSlotManagerActor,
            uid=GlobalSlotManagerActor.default_uid(),
            address=pool.external_address)
        slot_manager_ref = await mo.create_actor(
            BandSlotManagerActor,
            'numa-0',
            n_slots,
            global_slots_ref,
            uid=BandSlotManagerActor.gen_uid('numa-0'),
            address=pool.external_address)
        yield pool, slot_manager_ref
Exemplo n.º 4
0
 async def __post_create__(self):
     self._dispatch_ref = await mo.actor_ref(
         BandSlotManagerActor.gen_uid('numa-0'), address=self.address)
     await self._dispatch_ref.release_free_slot.tell(self.ref())
Exemplo n.º 5
0
async def test_execute_with_cancel(actor_pool, cancel_phase):
    pool, session_id, meta_api, storage_api, execution_ref = actor_pool

    # config for different phases
    ref_to_delay = None
    if cancel_phase == 'prepare':
        ref_to_delay = await mo.actor_ref(StorageManagerActor.default_uid(),
                                          address=pool.external_address)
    elif cancel_phase == 'quota':
        ref_to_delay = await mo.actor_ref(QuotaActor.gen_uid('numa-0'),
                                          address=pool.external_address)
    elif cancel_phase == 'slot':
        ref_to_delay = await mo.actor_ref(
            BandSlotManagerActor.gen_uid('numa-0'),
            address=pool.external_address)
    if ref_to_delay:
        await ref_to_delay.set_delay_fetch_time(100)

    def delay_fun(delay, _inp1):
        time.sleep(delay)
        return delay

    input1 = TensorFetch(key='input1',
                         source_key='input1',
                         dtype=np.dtype(int)).new_chunk([])
    remote_result = RemoteFunction(function=delay_fun, function_args=[100, input1],
                                   function_kwargs={}, n_output=1) \
        .new_chunk([input1])

    data1 = np.random.rand(10, 10)
    await meta_api.set_chunk_meta(input1,
                                  memory_size=data1.nbytes,
                                  store_size=data1.nbytes,
                                  bands=[(pool.external_address, 'numa-0')])
    await storage_api.put(input1.key, data1)

    chunk_graph = ChunkGraph([remote_result])
    chunk_graph.add_node(input1)
    chunk_graph.add_node(remote_result)
    chunk_graph.add_edge(input1, remote_result)

    subtask = Subtask(f'test_task_{uuid.uuid4()}',
                      session_id=session_id,
                      chunk_graph=chunk_graph)
    aiotask = asyncio.create_task(
        execution_ref.run_subtask(subtask, 'numa-0', pool.external_address))
    await asyncio.sleep(1)

    with Timer() as timer:
        await execution_ref.cancel_subtask(subtask.subtask_id, kill_timeout=1)
        with pytest.raises(asyncio.CancelledError):
            await asyncio.wait_for(aiotask, timeout=30)
    assert timer.duration < 6

    # check for different phases
    if ref_to_delay is not None:
        assert await ref_to_delay.get_is_cancelled()
        await ref_to_delay.set_delay_fetch_time(0)

    # test if slot is restored
    remote_tileable = mr.spawn(delay_fun, args=(0.5, None))
    graph = TileableGraph([remote_tileable.data])
    next(TileableGraphBuilder(graph).build())

    chunk_graph = next(ChunkGraphBuilder(graph, fuse_enabled=False).build())

    subtask = Subtask(f'test_task2_{uuid.uuid4()}',
                      session_id=session_id,
                      chunk_graph=chunk_graph)
    await asyncio.wait_for(execution_ref.run_subtask(subtask, 'numa-0',
                                                     pool.external_address),
                           timeout=30)