def test_set_state_np(self): env = GymEnvironment("VPendulum-v0") env.reset() state, action = self.state_action action = np.round(action) env.state = state obs, _, _, _ = env.step(action) state = env.state np.testing.assert_allclose(obs, state)
def create_er_from_episodes(discrete, max_len, num_steps, num_episodes, episode_length): """Rollout an environment and return an Experience Replay Buffer.""" if discrete: env = GymEnvironment("NChain-v0") transformations = [] else: env = GymEnvironment("Pendulum-v0") transformations = [ MeanFunction(lambda state_, action_: state_), StateNormalizer(), ActionNormalizer(), RewardClipper(), ] memory = ExperienceReplay(max_len, transformations=transformations, num_steps=num_steps) for _ in range(num_episodes): state = env.reset() for _ in range(episode_length): action = env.action_space.sample() # sample a random action. observation, state, done, info = step_env(env, state, action, action_scale=1.0) memory.append(observation) memory.end_episode() return memory
def test_reward(environment, action_cost, action_type): env_name, reward_model_ = environment if action_cost is not None: env = GymEnvironment(env_name, action_cost=action_cost) else: env = GymEnvironment(env_name) state = env.reset() if action_cost is not None: reward_model = reward_model_(action_cost=action_cost) else: reward_model = reward_model_() reward_model.set_goal(env.goal) for _ in range(50): if action_type == "random": action = env.action_space.sample() elif action_type == "zero": action = np.zeros(env.dim_action) else: raise NotImplementedError next_state, reward, done, info = env.step(action) if env.goal is not None: state = np.concatenate((state, env.goal)) np.testing.assert_allclose(reward, reward_model(state, action, next_state)[0], rtol=1e-3, atol=1e-6) np.testing.assert_allclose( np.tile(reward, (5, )), reward_model( np.tile(state, (5, 1)), np.tile(action, (5, 1)), np.tile(next_state, (5, 1)), )[0], rtol=1e-3, atol=1e-6, ) state = torch.tensor(state, dtype=torch.get_default_dtype()) action = torch.tensor(action, dtype=torch.get_default_dtype()) next_state = torch.tensor(next_state, dtype=torch.get_default_dtype()) np.testing.assert_allclose(reward, reward_model(state, action, next_state)[0], rtol=1e-3, atol=1e-6) np.testing.assert_allclose( np.tile(reward, (5, 1)), reward_model(state.repeat(5, 1), action.repeat(5, 1), next_state.repeat(5, 1))[0], rtol=1e-3, atol=1e-6, ) state = next_state.numpy()
def test_tolerance(action_cost): env_name, reward_model_ = ("MBRLReacher3D-v0", ReacherReward) if action_cost is not None: env = GymEnvironment(env_name, action_cost=action_cost, sparse=True) else: env = GymEnvironment(env_name, sparse=True) state = env.reset() if action_cost is not None: reward_model = reward_model_(action_cost=action_cost, sparse=True) else: reward_model = reward_model_(sparse=True) reward_model.set_goal(env.goal) for _ in range(50): action = env.action_space.sample() next_state, reward, done, info = env.step(action) if env.goal is not None: state = np.concatenate((state, env.goal)) np.testing.assert_allclose(reward, reward_model(state, action, next_state)[0], rtol=1e-3, atol=1e-6) np.testing.assert_allclose( np.tile(reward, (5, )), reward_model( np.tile(state, (5, 1)), np.tile(action, (5, 1)), np.tile(next_state, (5, 1)), )[0], rtol=1e-3, atol=1e-6, ) state = torch.tensor(state, dtype=torch.get_default_dtype()) action = torch.tensor(action, dtype=torch.get_default_dtype()) next_state = torch.tensor(next_state, dtype=torch.get_default_dtype()) np.testing.assert_allclose(reward, reward_model(state, action, next_state)[0], rtol=1e-3, atol=1e-6) np.testing.assert_allclose( np.tile(reward, (5, 1)), reward_model(state.repeat(5, 1), action.repeat(5, 1), next_state.repeat(5, 1))[0], rtol=1e-3, atol=1e-6, ) state = next_state.numpy()
"probability": 0.5, "reward": reward }) for j in range(8): for a in range(2): transitions[(3 + j, a)].append({ "next_state": 0, "probability": 1.0, "reward": 0 }) return transitions if __name__ == "__main__": from rllib.environment import GymEnvironment from rllib.environment.utilities import transitions2kernelreward import qreps # noqa: F401 env = GymEnvironment("WideTree-v0", reward=1) kernel, reward = transitions2kernelreward(env.env.transitions, env.num_states, env.num_actions) print(kernel, reward) state = env.reset() print(state) for i in range(10): action = env.action_space.sample() next_state, r, done, f = env.step(action) print(state, action, next_state, r, done) state = next_state