Пример #1
0
async def test_auto_recover(ray_start_regular, auto_recover):
    pg_name, n_process = 'ray_cluster', 1
    pg = ray.util.placement_group(name=pg_name, bundles=[{'CPU': n_process}])
    assert pg.wait(timeout_seconds=20)
    address = process_placement_to_address(pg_name, 0, process_index=0)
    actor_handle = await mo.create_actor_pool(address,
                                              n_process=n_process,
                                              auto_recover=auto_recover)
    await actor_handle.actor_pool.remote('start')

    ctx = get_context()

    # wait for recover of main pool always returned immediately
    await ctx.wait_actor_pool_recovered(address, address)

    # create actor on main
    actor_ref = await ctx.create_actor(TestActor,
                                       address=address,
                                       allocate_strategy=MainPool())

    with pytest.raises(ValueError):
        # cannot kill actors on main pool
        await mo.kill_actor(actor_ref)

    # create actor
    actor_ref = await ctx.create_actor(TestActor,
                                       address=address,
                                       allocate_strategy=ProcessIndex(1))
    # kill_actor will cause kill corresponding process
    await ctx.kill_actor(actor_ref)

    if auto_recover:
        await ctx.wait_actor_pool_recovered(actor_ref.address, address)
        sub_pool_address = process_placement_to_address(pg_name,
                                                        0,
                                                        process_index=1)
        sub_pool_handle = ray.get_actor(sub_pool_address)
        assert await sub_pool_handle.actor_pool.remote('health_check'
                                                       ) == PoolStatus.HEALTHY

        expect_has_actor = True if auto_recover in ['actor', True] else False
        assert await ctx.has_actor(actor_ref) is expect_has_actor
    else:
        with pytest.raises((ServerClosed, ConnectionError)):
            await ctx.has_actor(actor_ref)

    if 'COV_CORE_SOURCE' in os.environ:
        for addr in [
                process_placement_to_address(pg_name, 0, process_index=i)
                for i in range(2)
        ]:
            # must save the local reference until this is fixed:
            # https://github.com/ray-project/ray/issues/7815
            ray_actor = ray.get_actor(addr)
            ray.get(ray_actor.cleanup.remote())
Пример #2
0
async def test_auto_recover(auto_recover):
    start_method = os.environ.get('POOL_START_METHOD', 'forkserver') \
        if sys.platform != 'win32' else None
    recovered = asyncio.Event()

    def on_process_recover(*_):
        recovered.set()

    pool = await create_actor_pool('127.0.0.1',
                                   pool_cls=MainActorPool,
                                   n_process=2,
                                   subprocess_start_method=start_method,
                                   auto_recover=auto_recover,
                                   on_process_recover=on_process_recover)

    async with pool:
        ctx = get_context()

        # wait for recover of main pool always returned immediately
        await ctx.wait_actor_pool_recovered(pool.external_address,
                                            pool.external_address)

        # create actor on main
        actor_ref = await ctx.create_actor(TestActor,
                                           address=pool.external_address,
                                           allocate_strategy=MainPool())

        with pytest.raises(ValueError):
            # cannot kill actors on main pool
            await kill_actor(actor_ref)

        # create actor
        actor_ref = await ctx.create_actor(TestActor,
                                           address=pool.external_address,
                                           allocate_strategy=ProcessIndex(1))
        # kill_actor will cause kill corresponding process
        await ctx.kill_actor(actor_ref)

        if auto_recover:
            # process must have been killed
            await ctx.wait_actor_pool_recovered(actor_ref.address,
                                                pool.external_address)
            assert recovered.is_set()

            expect_has_actor = True if auto_recover in ['actor', True
                                                        ] else False
            assert await ctx.has_actor(actor_ref) is expect_has_actor
        else:
            with pytest.raises((ServerClosed, ConnectionError)):
                await ctx.has_actor(actor_ref)
Пример #3
0
async def test_server_closed(ray_start_regular):
    pg_name, n_process = 'ray_cluster', 1
    pg = ray.util.placement_group(name=pg_name, bundles=[{'CPU': n_process}])
    ray.get(pg.ready())
    address = process_placement_to_address(pg_name, 0, process_index=0)
    # start the actor pool
    actor_handle = await mo.create_actor_pool(address, n_process=n_process)
    await actor_handle.actor_pool.remote('start')

    ctx = get_context()
    actor_main = await ctx.create_actor(TestActor,
                                        address=address,
                                        uid='Test-main',
                                        allocate_strategy=ProcessIndex(0))

    actor_sub = await ctx.create_actor(TestActor,
                                       address=address,
                                       uid='Test-sub',
                                       allocate_strategy=ProcessIndex(1))

    # test calling from ray driver to ray actor
    task = asyncio.create_task(actor_sub.crash())

    with pytest.raises(ServerClosed):
        # process already died,
        # ServerClosed will be raised
        await task

    # wait for recover of sub pool
    await ctx.wait_actor_pool_recovered(actor_sub.address, address)

    # test calling from ray actor to ray actor
    task = asyncio.create_task(actor_main.kill(actor_sub.address, 'Test-sub'))

    with pytest.raises(ServerClosed):
        await task
Пример #4
0
async def test_server_closed():
    start_method = os.environ.get('POOL_START_METHOD', 'forkserver') \
        if sys.platform != 'win32' else None
    pool = await create_actor_pool('127.0.0.1',
                                   pool_cls=MainActorPool,
                                   n_process=2,
                                   subprocess_start_method=start_method,
                                   auto_recover=False)

    ctx = get_context()

    async with pool:
        actor_ref = await ctx.create_actor(TestActor,
                                           address=pool.external_address,
                                           allocate_strategy=ProcessIndex(1))

        # check if error raised normally when subprocess killed
        task = asyncio.create_task(actor_ref.sleep(10))
        await asyncio.sleep(0)

        # kill subprocess 1
        process = list(pool._sub_processes.values())[0]
        process.kill()
        process.join()

        with pytest.raises(ServerClosed):
            # process already been killed,
            # ServerClosed will be raised
            await task

        assert not process.is_alive()

    with pytest.raises(RuntimeError):
        await pool.start()

    # test server unreachable
    with pytest.raises(ConnectionError):
        await ctx.has_actor(actor_ref)