Пример #1
0
def _tasks(config, params):
    tasks = params.get('tasks', ['cheetah_run'])
    if tasks == 'all':
        tasks = [
            'cartpole_balance', 'cartpole_swingup', 'finger_spin',
            'cheetah_run', 'cup_catch', 'walker_walk'
        ]
    tasks = [getattr(tasks_lib, name)(config, params) for name in tasks]
    config.isolate_envs = params.get('isolate_envs', 'thread')

    def common_spaces_ctor(task, action_spaces):
        env = task.env_ctor()
        env = control.wrappers.SelectObservations(env, ['image'])
        env = control.wrappers.PadActions(env, action_spaces)
        return env

    if len(tasks) > 1:
        action_spaces = [task.env_ctor().action_space for task in tasks]
        for index, task in enumerate(tasks):
            env_ctor = functools.partial(common_spaces_ctor, task,
                                         action_spaces)
            tasks[index] = tasks_lib.Task(task.name, env_ctor, task.max_length,
                                          ['reward'])
    for name in tasks[0].state_components:
        config.heads[name] = networks.feed_forward
        config.zero_step_losses[name] = 1.0
    config.tasks = tasks
    return config
Пример #2
0
def _tasks(config, params):
    tasks = params.get('tasks', ['cheetah_run'])  # tasks = ['breakout']
    if tasks == 'all':
        tasks = [
            'cartpole_balance', 'cartpole_swingup', 'finger_spin',
            'cheetah_run', 'cup_catch', 'walker_walk', 'pendulum', 'carla',
            'breakout'
        ]
    tasks = [
        getattr(tasks_lib, name)(config, params) for name in tasks
    ]  # tasks_lib is tasks.py  # config is not used for creating tasks.

    def common_spaces_ctor(task, action_spaces):
        env = task.env_ctor()
        env = control.wrappers.SelectObservations(env, ['image'])
        env = control.wrappers.PadActions(env, action_spaces)
        return env

    if len(tasks) > 1:
        action_spaces = [task.env_ctor().action_space for task in tasks]
        for index, task in enumerate(tasks):
            env_ctor = functools.partial(common_spaces_ctor, task,
                                         action_spaces)
            tasks[index] = tasks_lib.Task(task.name, env_ctor, task.max_length,
                                          ['reward'])
    for name in tasks[
            0].state_components:  # state_components of task, e.g. ['reward', 'position', 'velocity']
        config.heads[name] = networks.feed_forward
        config.zero_step_losses[name] = 1.0
    config.tasks = tasks
    return config
Пример #3
0
def _tasks(config, params):
    tasks = params.get('tasks', ['cheetah_run'])
    tasks = [getattr(tasks_lib, name)(config, params) for name in tasks]
    config.isolate_envs = params.get('isolate_envs', 'thread')

    def common_spaces_ctor(task, action_spaces):
        env = task.env_ctor()
        env = control.wrappers.SelectObservations(env, ['image'])
        env = control.wrappers.PadActions(env, action_spaces)
        return env

    if len(tasks) > 1:
        action_spaces = [task.env_ctor().action_space for task in tasks]
        for index, task in enumerate(tasks):
            env_ctor = tools.bind(common_spaces_ctor, task, action_spaces)
            tasks[index] = tasks_lib.Task(task.name, env_ctor, task.max_length,
                                          ['reward'])
    for name in tasks[0].state_components:
        if name == 'reward' or params.get('state_diagnostics', False):
            config.heads[name] = tools.bind(config.head_network,
                                            stop_gradient=name
                                            not in config.gradient_heads)
            config.loss_scales[name] = 1.0
    config.tasks = tasks
    return config
Пример #4
0
def vizdoom_takecover_tasks(config, params):
    tasks = params.get('tasks', ['cheetah_run'])

    tasks = [getattr(tasks_lib, name)(config, params) for name in tasks]

    # tasks = [getattr(tasks_lib, name)(config, params) for idx, name in enumerate(tasks)]
    config.isolate_envs = params.get('isolate_envs', 'thread')

    env_ctor_called = 0

    def common_spaces_ctor(task, action_spaces, index=None):
        env = task.env_ctor()
        env = control.wrappers.SelectObservations(env, ['image'])
        env = control.wrappers.PadActions(env, action_spaces)

        return env

    if len(tasks) > 1:
        action_spaces = [task.env_ctor().action_space for task in tasks]

        for index, task in enumerate(tasks):
            env_ctor = functools.partial(common_spaces_ctor, task,
                                         action_spaces)
            # env_ctor = lambda: common_spaces_ctor(task, action_spaces)
            tasks[index] = tasks_lib.Task(task.name, env_ctor, task.max_length,
                                          ['reward'])

    for name in tasks[0].state_components:
        config.heads[name] = networks.feed_forward
        config.zero_step_losses[name] = 1.0

    config.tasks = tasks
    config.test_tasks = tasks
    config.random_collect_tasks = tasks

    return config
Пример #5
0
def _tasks(config, params):
    tasks = params.get('tasks', ['cheetah_run'])

    if tasks == 'all':
        tasks = [
            'cartpole_balance', 'cartpole_swingup', 'finger_spin',
            'cheetah_run', 'cup_catch', 'walker_walk', 'vizdoom_basic',
            'gym_cheetah', 'gym_breakout', 'gym_seaquest', 'gym_pong',
            'gym_vizdoom_takecover'
        ]

    if tasks == ['gym_vizdoom_cig']:
        tasks = []

        # Multi Planet Train Tasks
        tasks.append('gym_vizdoom_cig_0_1')
        tasks.append('gym_vizdoom_cig_1_1')
        tasks.append('gym_vizdoom_cig_2_1')
        tasks.append('gym_vizdoom_cig_3_1')
        tasks.append('gym_vizdoom_cig_4_1')
        tasks.append('gym_vizdoom_cig_5_1')
        tasks.append('gym_vizdoom_cig_6_1')
        tasks.append('gym_vizdoom_cig_7_1')
        tasks.append('gym_vizdoom_cig_8_1')
        tasks.append('gym_vizdoom_cig_9_1')

        # Multi Planet Test Tasks
        # tasks.append('gym_vizdoom_cig_0_2')
        # tasks.append('gym_vizdoom_cig_1_2')
        # tasks.append('gym_vizdoom_cig_2_2')
        # tasks.append('gym_vizdoom_cig_3_2')
        # tasks.append('gym_vizdoom_cig_4_2')
        # tasks.append('gym_vizdoom_cig_5_2')
        # tasks.append('gym_vizdoom_cig_6_2')
        # tasks.append('gym_vizdoom_cig_7_2')
        # tasks.append('gym_vizdoom_cig_8_2')

        tasks.append('gym_vizdoom_cig_singleplayer_test')
        tasks.append('gym_vizdoom_cig_singleplayer')

        # tasks = ['gym_vizdoom_cig_0', 'gym_vizdoom_cig_1', 'gym_vizdoom_cig_2', 'gym_vizdoom_cig_singleplayer_test', 'gym_vizdoom_cig_singleplayer']

        # tasks = ['gym_vizdoom_cig_multiplayer', 'gym_vizdoom_cig_singleplayer']

    tasks = [getattr(tasks_lib, name)(config, params) for name in tasks]

    # tasks = [getattr(tasks_lib, name)(config, params) for idx, name in enumerate(tasks)]
    config.isolate_envs = params.get('isolate_envs', 'thread')

    env_ctor_called = 0

    def common_spaces_ctor(task, action_spaces, index=None):
        env = task.env_ctor()
        env = control.wrappers.SelectObservations(env, ['image'])
        env = control.wrappers.PadActions(env, action_spaces)

        return env

    if len(tasks) > 1:
        action_spaces = [task.env_ctor().action_space for task in tasks]

        for index, task in enumerate(tasks):
            env_ctor = functools.partial(common_spaces_ctor, task,
                                         action_spaces)
            # env_ctor = lambda: common_spaces_ctor(task, action_spaces)
            tasks[index] = tasks_lib.Task(task.name, env_ctor, task.max_length,
                                          ['reward'])

    for name in tasks[0].state_components:
        config.heads[name] = networks.feed_forward
        config.zero_step_losses[name] = 1.0

    config.tasks = [tasks[-2]]
    config.test_tasks = [tasks[-2]]
    config.random_collect_tasks = [tasks[-1]]

    assert len(config.tasks) == 1
    assert len(config.test_tasks) == 1
    assert len(config.random_collect_tasks) == 1

    return config