def test_contextual_reward(self): """Check the functionality of the context_space attribute. This method is tested for the following environments: 1. AntMaze 2. AntPush 3. AntFall 4. AntFourRooms """ from hbaselines.envs.efficient_hrl.envs import REWARD_SCALE # test case 1 env = AntMaze(use_contexts=True, context_range=[0, 0]) self.assertAlmostEqual( env.contextual_reward( np.array([0, 0]), np.array([1, 1]), np.array([2, 2])), -1.4142135624084504 * REWARD_SCALE ) # test case 2 env = AntPush(use_contexts=True, context_range=[0, 0]) self.assertAlmostEqual( env.contextual_reward( np.array([0, 0]), np.array([1, 1]), np.array([2, 2])), -1.4142135624084504 * REWARD_SCALE ) # test case 3 env = AntFall(use_contexts=True, context_range=[0, 0, 0]) self.assertAlmostEqual( env.contextual_reward( np.array([0, 0, 0]), np.array([1, 1, 1]), np.array([2, 2, 2])), -1.7320508075977448 * REWARD_SCALE ) # test case 4 env = AntFourRooms(use_contexts=True, context_range=[0, 0]) self.assertAlmostEqual( env.contextual_reward( np.array([0, 0]), np.array([1, 1]), np.array([2, 2])), -1.4142135624084504 * REWARD_SCALE )
low=np.array([ -10, -10, -0.5, -1, -1, -1, -1, -0.5, -0.3, -0.5, -0.3, -0.5, -0.3, -0.5, -0.3 ]), high=np.array([ 10, 10, 0.5, 1, 1, 1, 1, 0.5, 0.3, 0.5, 0.3, 0.5, 0.3, 0.5, 0.3 ]), dtype=np.float32, ), "state_indices": lambda multiagent: [i for i in range(15)], "env": lambda evaluate, render, n_levels, multiagent, shared, maddpg: [ AntFourRooms( use_contexts=True, context_range=[20, 0], evaluate=True, num_levels=n_levels, ), AntFourRooms( use_contexts=True, context_range=[0, 20], evaluate=True, num_levels=n_levels, ), AntFourRooms( use_contexts=True, context_range=[20, 20], evaluate=True, num_levels=n_levels, ) ] if evaluate else AntFourRooms(
"AntFourRooms": { "meta_ac_space": lambda relative_goals: Box( low=np.array([ -10, -10, -0.5, -1, -1, -1, -1, -0.5, -0.3, -0.5, -0.3, -0.5, -0.3, -0.5, -0.3 ]), high=np.array([ 10, 10, 0.5, 1, 1, 1, 1, 0.5, 0.3, 0.5, 0.3, 0.5, 0.3, 0.5, 0.3 ]), dtype=np.float32, ), "state_indices": [i for i in range(15)], "env": lambda evaluate, render, multiagent, shared, maddpg: [ AntFourRooms(use_contexts=True, context_range=[30, 0]), AntFourRooms(use_contexts=True, context_range=[0, 30]), AntFourRooms(use_contexts=True, context_range=[30, 30]) ] if evaluate else AntFourRooms(use_contexts=True, random_contexts=False, context_range=[[30, 0], [0, 30], [30, 30]]), }, # ======================================================================= # # UR5 and Pendulum environments. # # ======================================================================= # "UR5": { "meta_ac_space": lambda relative_goals: Box( low=np.array([-2 * np.pi, -2 * np.pi, -2 * np.pi, -4, -4, -4]),
def create_env(env, render=False, evaluate=False): """Return, and potentially create, the environment. Parameters ---------- env : str or gym.Env the environment, or the name of a registered environment. render : bool whether to render the environment evaluate : bool specifies whether this is a training or evaluation environment Returns ------- gym.Env or list of gym.Env gym-compatible environment(s) """ if env == "AntGather": env = AntGatherEnv() elif env == "AntMaze": if evaluate: env = [ AntMaze(use_contexts=True, context_range=[16, 0]), AntMaze(use_contexts=True, context_range=[16, 16]), AntMaze(use_contexts=True, context_range=[0, 16]) ] else: env = AntMaze(use_contexts=True, random_contexts=True, context_range=[(-4, 20), (-4, 20)]) elif env == "AntPush": if evaluate: env = AntPush(use_contexts=True, context_range=[0, 19]) else: env = AntPush(use_contexts=True, context_range=[0, 19]) # env = AntPush(use_contexts=True, # random_contexts=True, # context_range=[(-16, 16), (-4, 20)]) elif env == "AntFall": if evaluate: env = AntFall(use_contexts=True, context_range=[0, 27, 4.5]) else: env = AntFall(use_contexts=True, context_range=[0, 27, 4.5]) # env = AntFall(use_contexts=True, # random_contexts=True, # context_range=[(-4, 12), (-4, 28), (0, 5)]) elif env == "AntFourRooms": if evaluate: env = [ AntFourRooms(use_contexts=True, context_range=[30, 0]), AntFourRooms(use_contexts=True, context_range=[0, 30]), AntFourRooms(use_contexts=True, context_range=[30, 30]) ] else: env = AntFourRooms(use_contexts=True, random_contexts=False, context_range=[[30, 0], [0, 30], [30, 30]]) elif env == "UR5": if evaluate: env = UR5(use_contexts=True, random_contexts=True, context_range=[(-np.pi, np.pi), (-np.pi / 4, 0), (-np.pi / 4, np.pi / 4)], show=render) else: env = UR5(use_contexts=True, random_contexts=True, context_range=[(-np.pi, np.pi), (-np.pi / 4, 0), (-np.pi / 4, np.pi / 4)], show=render) elif env == "Pendulum": if evaluate: env = Pendulum(use_contexts=True, context_range=[0, 0], show=render) else: env = Pendulum(use_contexts=True, random_contexts=True, context_range=[(np.deg2rad(-16), np.deg2rad(16)), (-0.6, 0.6)], show=render) elif env in [ "bottleneck0", "bottleneck1", "bottleneck2", "grid0", "grid1" ]: # Import the benchmark and fetch its flow_params benchmark = __import__("flow.benchmarks.{}".format(env), fromlist=["flow_params"]) flow_params = benchmark.flow_params # Get the env name and a creator for the environment. create_env, _ = make_create_env(flow_params, version=0, render=render) # Create the environment. env = create_env() elif env in ["ring0", "multi-ring0"]: env = FlowEnv("ring", render=render) # FIXME elif env in [ "merge0", "merge1", "merge2", "multi-merge0", "multi-merge1", "multi-merge2" ]: env_num = int(env[-1]) env = FlowEnv("merge", env_params={ "exp_num": env_num, "horizon": 6000, "simulator": "traci", "multiagent": env[:5] == "multi" }, render=render) elif env in [ "figureeight0", "figureeight1", "figureeight02", "multi-figureeight0", "multi-figureeight1", "multi-figureeight02" ]: env_num = int(env[-1]) env = FlowEnv("figure_eight", env_params={ "num_automated": [1, 7, 14][env_num], "horizon": 750, "simulator": "traci", "multiagent": env[:5] == "multi" }, render=render) elif env == "BipedalSoccer": env = BipedalSoccer(render=render) elif isinstance(env, str): # This is assuming the environment is registered with OpenAI gym. env = gym.make(env) # Reset the environment. if env is not None: if isinstance(env, list): for next_env in env: next_env.reset() else: env.reset() return env