def _tasks(config, params): tasks = params.get('tasks', ['cheetah_run']) if tasks == 'all': tasks = [ 'cartpole_balance', 'cartpole_swingup', 'finger_spin', 'cheetah_run', 'cup_catch', 'walker_walk' ] tasks = [getattr(tasks_lib, name)(config, params) for name in tasks] config.isolate_envs = params.get('isolate_envs', 'thread') def common_spaces_ctor(task, action_spaces): env = task.env_ctor() env = control.wrappers.SelectObservations(env, ['image']) env = control.wrappers.PadActions(env, action_spaces) return env if len(tasks) > 1: action_spaces = [task.env_ctor().action_space for task in tasks] for index, task in enumerate(tasks): env_ctor = functools.partial(common_spaces_ctor, task, action_spaces) tasks[index] = tasks_lib.Task(task.name, env_ctor, task.max_length, ['reward']) for name in tasks[0].state_components: config.heads[name] = networks.feed_forward config.zero_step_losses[name] = 1.0 config.tasks = tasks return config
def _tasks(config, params): tasks = params.get('tasks', ['cheetah_run']) # tasks = ['breakout'] if tasks == 'all': tasks = [ 'cartpole_balance', 'cartpole_swingup', 'finger_spin', 'cheetah_run', 'cup_catch', 'walker_walk', 'pendulum', 'carla', 'breakout' ] tasks = [ getattr(tasks_lib, name)(config, params) for name in tasks ] # tasks_lib is tasks.py # config is not used for creating tasks. def common_spaces_ctor(task, action_spaces): env = task.env_ctor() env = control.wrappers.SelectObservations(env, ['image']) env = control.wrappers.PadActions(env, action_spaces) return env if len(tasks) > 1: action_spaces = [task.env_ctor().action_space for task in tasks] for index, task in enumerate(tasks): env_ctor = functools.partial(common_spaces_ctor, task, action_spaces) tasks[index] = tasks_lib.Task(task.name, env_ctor, task.max_length, ['reward']) for name in tasks[ 0].state_components: # state_components of task, e.g. ['reward', 'position', 'velocity'] config.heads[name] = networks.feed_forward config.zero_step_losses[name] = 1.0 config.tasks = tasks return config
def _tasks(config, params): tasks = params.get('tasks', ['cheetah_run']) tasks = [getattr(tasks_lib, name)(config, params) for name in tasks] config.isolate_envs = params.get('isolate_envs', 'thread') def common_spaces_ctor(task, action_spaces): env = task.env_ctor() env = control.wrappers.SelectObservations(env, ['image']) env = control.wrappers.PadActions(env, action_spaces) return env if len(tasks) > 1: action_spaces = [task.env_ctor().action_space for task in tasks] for index, task in enumerate(tasks): env_ctor = tools.bind(common_spaces_ctor, task, action_spaces) tasks[index] = tasks_lib.Task(task.name, env_ctor, task.max_length, ['reward']) for name in tasks[0].state_components: if name == 'reward' or params.get('state_diagnostics', False): config.heads[name] = tools.bind(config.head_network, stop_gradient=name not in config.gradient_heads) config.loss_scales[name] = 1.0 config.tasks = tasks return config
def vizdoom_takecover_tasks(config, params): tasks = params.get('tasks', ['cheetah_run']) tasks = [getattr(tasks_lib, name)(config, params) for name in tasks] # tasks = [getattr(tasks_lib, name)(config, params) for idx, name in enumerate(tasks)] config.isolate_envs = params.get('isolate_envs', 'thread') env_ctor_called = 0 def common_spaces_ctor(task, action_spaces, index=None): env = task.env_ctor() env = control.wrappers.SelectObservations(env, ['image']) env = control.wrappers.PadActions(env, action_spaces) return env if len(tasks) > 1: action_spaces = [task.env_ctor().action_space for task in tasks] for index, task in enumerate(tasks): env_ctor = functools.partial(common_spaces_ctor, task, action_spaces) # env_ctor = lambda: common_spaces_ctor(task, action_spaces) tasks[index] = tasks_lib.Task(task.name, env_ctor, task.max_length, ['reward']) for name in tasks[0].state_components: config.heads[name] = networks.feed_forward config.zero_step_losses[name] = 1.0 config.tasks = tasks config.test_tasks = tasks config.random_collect_tasks = tasks return config
def _tasks(config, params): tasks = params.get('tasks', ['cheetah_run']) if tasks == 'all': tasks = [ 'cartpole_balance', 'cartpole_swingup', 'finger_spin', 'cheetah_run', 'cup_catch', 'walker_walk', 'vizdoom_basic', 'gym_cheetah', 'gym_breakout', 'gym_seaquest', 'gym_pong', 'gym_vizdoom_takecover' ] if tasks == ['gym_vizdoom_cig']: tasks = [] # Multi Planet Train Tasks tasks.append('gym_vizdoom_cig_0_1') tasks.append('gym_vizdoom_cig_1_1') tasks.append('gym_vizdoom_cig_2_1') tasks.append('gym_vizdoom_cig_3_1') tasks.append('gym_vizdoom_cig_4_1') tasks.append('gym_vizdoom_cig_5_1') tasks.append('gym_vizdoom_cig_6_1') tasks.append('gym_vizdoom_cig_7_1') tasks.append('gym_vizdoom_cig_8_1') tasks.append('gym_vizdoom_cig_9_1') # Multi Planet Test Tasks # tasks.append('gym_vizdoom_cig_0_2') # tasks.append('gym_vizdoom_cig_1_2') # tasks.append('gym_vizdoom_cig_2_2') # tasks.append('gym_vizdoom_cig_3_2') # tasks.append('gym_vizdoom_cig_4_2') # tasks.append('gym_vizdoom_cig_5_2') # tasks.append('gym_vizdoom_cig_6_2') # tasks.append('gym_vizdoom_cig_7_2') # tasks.append('gym_vizdoom_cig_8_2') tasks.append('gym_vizdoom_cig_singleplayer_test') tasks.append('gym_vizdoom_cig_singleplayer') # tasks = ['gym_vizdoom_cig_0', 'gym_vizdoom_cig_1', 'gym_vizdoom_cig_2', 'gym_vizdoom_cig_singleplayer_test', 'gym_vizdoom_cig_singleplayer'] # tasks = ['gym_vizdoom_cig_multiplayer', 'gym_vizdoom_cig_singleplayer'] tasks = [getattr(tasks_lib, name)(config, params) for name in tasks] # tasks = [getattr(tasks_lib, name)(config, params) for idx, name in enumerate(tasks)] config.isolate_envs = params.get('isolate_envs', 'thread') env_ctor_called = 0 def common_spaces_ctor(task, action_spaces, index=None): env = task.env_ctor() env = control.wrappers.SelectObservations(env, ['image']) env = control.wrappers.PadActions(env, action_spaces) return env if len(tasks) > 1: action_spaces = [task.env_ctor().action_space for task in tasks] for index, task in enumerate(tasks): env_ctor = functools.partial(common_spaces_ctor, task, action_spaces) # env_ctor = lambda: common_spaces_ctor(task, action_spaces) tasks[index] = tasks_lib.Task(task.name, env_ctor, task.max_length, ['reward']) for name in tasks[0].state_components: config.heads[name] = networks.feed_forward config.zero_step_losses[name] = 1.0 config.tasks = [tasks[-2]] config.test_tasks = [tasks[-2]] config.random_collect_tasks = [tasks[-1]] assert len(config.tasks) == 1 assert len(config.test_tasks) == 1 assert len(config.random_collect_tasks) == 1 return config