def test_propagates_exceptions_to_main_thread(): class FailingPolicy(DummyGreedyPolicy): """Mock policy, throws an error every time.""" def compute_action(self, observation: ObservationType, maze_state: Optional[MazeStateType] = None, env: Optional[BaseEnv] = None, actor_id: ActorID = None, deterministic: bool = False) -> ActionType: """Throw an error.""" raise RuntimeError("Test error.") def compute_top_action_candidates(self, observation: ObservationType, num_candidates: Optional[int], maze_state: Optional[MazeStateType], env: Optional[BaseEnv], actor_id: ActorID = None) \ -> Tuple[Sequence[ActionType], Sequence[float]]: """Not used""" agent_deployment = AgentDeployment( policy=FailingPolicy(), env=build_dummy_maze_env() ) test_core_env = build_dummy_maze_env().core_env s = test_core_env.reset() # Just get a valid state, the content is not really important with pytest.raises(RuntimeError) as e_info: agent_deployment.act(s, 0, False, {})
def test_steps_env_with_single_policy(): agent_deployment = AgentDeployment( policy=DummyGreedyPolicy(), env=build_dummy_maze_env() ) # Step the environment manually here and query the agent integration wrapper for maze_actions test_policy = DummyGreedyPolicy() test_env = build_dummy_maze_env() maze_state = test_env.reset() reward, done, info = None, None, None for i in range(10): maze_action = agent_deployment.act(maze_state, reward, done, info) # Compare with the expected maze_action on top of the env that we are stepping raw_expected_action = test_policy.compute_action( observation=test_env.observation_conversion.maze_to_space(maze_state), maze_state=maze_state, deterministic=True) expected_action = test_env.action_conversion.space_to_maze(raw_expected_action, maze_state ) assert expected_action.keys() == maze_action.keys() assert np.all(expected_action[key] == maze_action[key] for key in maze_action.keys()) maze_state, reward, done, info = test_env.step(expected_action)
def test_records_stats(): env = LogStatsWrapper.wrap(build_dummy_maze_env()) agent_deployment = AgentDeployment( policy=DummyGreedyPolicy(), env=env ) # Step the environment manually here and query the agent integration wrapper for maze_actions test_core_env = build_dummy_maze_env().core_env maze_state = test_core_env.reset() reward, done, info = 0, False, {} for i in range(5): maze_action = agent_deployment.act(maze_state, reward, done, info) maze_state, reward, done, info = test_core_env.step(maze_action) agent_deployment.close(maze_state, reward, done, info) assert env.get_stats_value( RewardEvents.reward_original, LogStatsLevel.EPOCH, name="total_step_count" ) == 5 assert env.get_stats_value( BaseEnvEvents.reward, LogStatsLevel.EPOCH, name="total_step_count" ) == 5
def test_gets_maze_action_candidates(): class StaticPolicy(DummyGreedyPolicy): """Mock policy, returns static action candidates (careful, always three of them).""" def compute_top_action_candidates(self, observation: ObservationType, num_candidates: Optional[int], maze_state: Optional[MazeStateType], env: Optional[BaseEnv], actor_id: ActorID = None) \ -> Tuple[Sequence[ActionType], Sequence[float]]: """Return static action candidates""" return ( [{"action_0_0": j, "action_1_0": j, "action_1_1": [j % 2] * 5} for j in range(3)], [0.95, 0.04, 0.01] ) env = build_dummy_maze_env() core_env, act_conv, obs_conv = env.core_env, env.action_conversion, env.observation_conversion agent_deployment = AgentDeployment( policy=StaticPolicy(), env=build_dummy_maze_env(), num_candidates=3 ) test_core_env = build_dummy_maze_env().core_env maze_state = test_core_env.reset() # Just get a valid state, the content is not really important for i in range(10): maze_action = agent_deployment.act(maze_state, 0, False, {}) assert isinstance(maze_action, MazeActionCandidates) assert maze_action.candidates[0]["action_0_0"] == 0 assert maze_action.candidates[1]["action_0_0"] == 1 assert maze_action.candidates[2]["action_0_0"] == 2 assert maze_action.probabilities == [0.95, 0.04, 0.01]
def test_action_monitoring(): """ Action logging unit test """ # instantiate env env = build_dummy_maze_env() env = MazeEnvMonitoringWrapper.wrap(env, observation_logging=False, action_logging=True, reward_logging=False) env = LogStatsWrapper.wrap(env) # for accessing events from previous steps env.reset() # test application of wrapper for ii in range(2): env.step(env.action_space.sample()) action_events = env.get_last_step_events(query=[ ActionEvents.discrete_action, ActionEvents.continuous_action, ActionEvents.multi_binary_action ]) assert len(action_events) == 7 for event in action_events: if event.attributes['name'] in [ 'action_0_0', 'action_0_1_0', 'action_0_1_1', 'action_1_0' ]: assert event.interface_method == ActionEvents.discrete_action elif event.attributes['name'] in ['action_0_2', 'action_2_0']: assert event.interface_method == ActionEvents.continuous_action elif event.attributes['name'] in ['action_1_1']: assert event.interface_method == ActionEvents.multi_binary_action else: raise ValueError
def _test_dummy_env_for_split_config(split_config: Dict[str, Dict]) -> None: """Test the split action wrapper on the dummy env for a given split_config :param split_config: The split action config to apply and test. """ base_env = build_dummy_maze_env() env = SplitActionsWrapper.wrap(base_env, split_config=split_config) _ = env.action_space for key, sub_actions in split_config.items(): for sub_key in sub_actions.keys(): assert sub_key in env.action_space.spaces.keys() assert key not in env.action_space.spaces.keys() assert env.action_spaces_dict[0] == env.action_space base_env_action = base_env.action_space.sample() split_action = env.reverse_action(base_env_action) for key, sub_actions in split_config.items(): for sub_key in sub_actions.keys(): assert sub_key in split_action assert key not in split_action for key in base_env.action_space.spaces.keys(): assert np.all(base_env_action[key] == env.action(split_action)[key])
def test_rollout_evaluator(): env = SequentialVectorEnv([lambda: TimeLimitWrapper.wrap(build_dummy_maze_env(), max_episode_steps=2)] * 2) policy = flatten_concat_probabilistic_policy_for_env(build_dummy_maze_env()) model_selection = _MockModelSelection() evaluator = RolloutEvaluator(eval_env=env, n_episodes=3, model_selection=model_selection) for i in range(2): evaluator.evaluate(policy) increment_log_step() assert model_selection.update_count == 2 assert evaluator.eval_env.get_stats_value( BaseEnvEvents.reward, LogStatsLevel.EPOCH, name="total_episode_count" ) >= 2 * 3
def test_actor_id_is_optional_for_single_network_policies(): env = build_dummy_maze_env() policy = flatten_concat_probabilistic_policy_for_env(env) obs = env.reset() action = policy.compute_action(obs) # No actor ID provided assert all([key in env.action_space.spaces for key in action.keys()])
def test_observation_monitoring(): """ Observation logging unit test """ # instantiate env env = build_dummy_maze_env() env = MazeEnvMonitoringWrapper.wrap(env, observation_logging=True, action_logging=False, reward_logging=False) env = LogStatsWrapper.wrap(env) # for accessing events from previous steps env.reset() # test application of wrapper for ii in range(3): # Observation will get reported in the next step (when the agent is actually acting on it) obs = env.step(env.action_space.sample())[0] observation_events = env.get_last_step_events(query=[ ObservationEvents.observation_original, ObservationEvents.observation_processed ]) assert len(observation_events) == 4 for event in observation_events: assert issubclass(event.interface_class, ObservationEvents) obs_name = event.attributes['name'] assert obs_name in ['observation_0', 'observation_1'] if ii > 0: assert np.allclose(np.asarray(obs[obs_name]), np.asarray(event.attributes['value']))
def test_reward_monitoring(): """ Reward logging unit test """ # instantiate env env = build_dummy_maze_env() env = MazeEnvMonitoringWrapper.wrap(env, observation_logging=False, action_logging=False, reward_logging=True) env = LogStatsWrapper.wrap(env) # for accessing events from previous steps env.reset() env.step(env.action_space.sample()) # test application of wrapper for ii in range(2): env.step(env.action_space.sample()) reward_events = env.get_last_step_events(query=[ RewardEvents.reward_original, RewardEvents.reward_processed ]) assert len(reward_events) == 2 for event in reward_events: assert issubclass(event.interface_class, RewardEvents) assert event.attributes['value'] == 10 assert event.interface_method in [ RewardEvents.reward_original, RewardEvents.reward_processed ]
def test_does_not_carry_over_stats_from_unfinished_episodes(): policy = flatten_concat_probabilistic_policy_for_env(build_dummy_maze_env()) # Wrap envs in a time-limit wrapper env = SequentialVectorEnv([lambda: TimeLimitWrapper.wrap(build_dummy_maze_env())] * 2) # Make one env slower than the other env.envs[0].set_max_episode_steps(2) env.envs[1].set_max_episode_steps(10) evaluator = RolloutEvaluator(eval_env=env, n_episodes=1, model_selection=None) for i in range(2): evaluator.evaluate(policy) increment_log_step() # We should get just one episode counted in stats assert evaluator.eval_env.get_stats_value( BaseEnvEvents.reward, LogStatsLevel.EPOCH, name="episode_count" ) == 1
def test_records_events_in_reset(): env = build_dummy_maze_env() env = _EventsInResetWrapper.wrap(env) env = LogStatsWrapper.wrap(env) env.reset() for i in range(5): env.step(env.action_space.sample()) env.write_epoch_stats() assert env.get_stats_value( BaseEnvEvents.test_event, LogStatsLevel.EPOCH ) == 1 # only from the single event fired during env reset
def test_supports_trajectory_recording_wrapper(): """ Tests whether agent integration supports trajectory recording wrappers. """ class TestWriter(TrajectoryWriter): """Mock writer for checking that trajectory recording goes through.""" def __init__(self): self.step_count = 0 def write(self, episode_record: StateTrajectoryRecord): """Count recorded steps""" self.step_count += len(episode_record.step_records) assert episode_record.renderer is not None step_count = 10 writer = TestWriter() TrajectoryWriterRegistry.writers = [] # Ensure there is no other writer TrajectoryWriterRegistry.register_writer(writer) agent_deployment = AgentDeployment( policy=DummyGreedyPolicy(), env=TrajectoryRecordingWrapper.wrap(build_dummy_maze_env()), ) # Step the environment manually here and query the agent integration wrapper for maze_actions test_core_env = build_dummy_maze_env().core_env maze_state = test_core_env.reset() reward, done, info = None, None, None for i in range(10): maze_action = agent_deployment.act(maze_state, reward, done, info) maze_state, reward, done, info = test_core_env.step(maze_action) # Rollout needs to be finished to notify the wrappers agent_deployment.close(maze_state, reward, done, info) assert writer.step_count == step_count + 1 # count terminal state as well
def test_time_limit_wrapper(): """ time limit wrapper unit tests """ env = build_dummy_maze_env() env = TimeLimitWrapper.wrap(env, max_episode_steps=5) env.set_max_episode_steps(max_episode_steps=5) env.seed(1234) env.reset() for i in range(5): obs, rew, done, info = env.step(env.action_space.sample()) if i >= 4: assert done env.close()
def test_terminates_on_done(): """Resetting the env or terminating rollout early when the env is done.""" env = build_dummy_maze_env() env = TimeLimitWrapper.wrap(env, max_episode_steps=5) policy = RandomPolicy(env.action_spaces_dict) # Normal operation (should reset the env automatically and continue rollout) rollout_generator = RolloutGenerator(env=env) trajectory = rollout_generator.rollout(policy, n_steps=10) assert len(trajectory) == 10 # Terminate on done rollout_generator = RolloutGenerator(env=env, terminate_on_done=True) trajectory = rollout_generator.rollout(policy, n_steps=10) assert len(trajectory) == 5
def test_dummy_maze_env(): """ Unit test for the DummyEnvironment """ env = build_dummy_maze_env() _ = env.reset() for _ in range(10): action = env.action_space.sample() observation, _, _, _ = env.step(action) # check if actor is done assert not env.is_actor_done() env.close()
def test_dummy_struct_env(): """ Unit test for the DummyStructuredEnvironment """ maze_env = build_dummy_maze_env() env = DummyStructuredEnvironment(maze_env) env.reset() # check observation space assert isinstance(env.observation_space, spaces.Dict) for i in range(10): action = env.action_spaces_dict[env.actor_id()[0]].sample() observation, _, _, _ = env.step(action) env.close()
def test_records_policy_events(): env = build_dummy_maze_env() env = LogStatsWrapper.wrap(env) base_events = env.core_env.context.event_service.create_event_topic( BaseEnvEvents) env.reset() for i in range(5): base_events.test_event( 1) # Simulate firing event from policy (= outside of env.step) env.step(env.action_space.sample()) env.write_epoch_stats() assert env.get_stats_value( BaseEnvEvents.test_event, LogStatsLevel.EPOCH) == 5 # value of 1 x 5 steps
def test_dummy_model_with_dummy_network(): """ Unit test for the DummyStructuredEnvironment """ maze_env = build_dummy_maze_env() # init the distribution_mapper with the flat action space distribution_mapper_config = [{ "action_space": spaces.Box, "distribution": "maze.distributions.squashed_gaussian.SquashedGaussianProbabilityDistribution" }] distribution_mapper = DistributionMapper( action_space=maze_env.action_space, distribution_mapper_config=distribution_mapper_config) obs_shapes = observation_spaces_to_in_shapes( maze_env.observation_spaces_dict) dummy_actor = DummyPolicyNet( obs_shapes=obs_shapes[0], action_logits_shapes={ key: distribution_mapper.required_logits_shape(key) for key in maze_env.action_space.spaces.keys() }, non_lin=nn.Tanh) dummy_critic = DummyValueNet(obs_shapes=obs_shapes[0], non_lin=nn.Tanh) obs_np = maze_env.reset() obs = {k: torch.from_numpy(v) for k, v in obs_np.items()} for i in range(100): logits_dict = dummy_actor(obs) prob_dist = distribution_mapper.logits_dict_to_distribution( logits_dict=logits_dict, temperature=1.0) sampled_actions = prob_dist.sample() obs_np, _, _, _ = maze_env.step(sampled_actions) obs = {k: torch.from_numpy(v) for k, v in obs_np.items()} _ = dummy_critic(obs) maze_env.close()
def test_time_limit_wrapper_with_spec(): """ time limit wrapper unit tests """ class Spec: def __init__(self): self.max_episode_steps = 5 spec = Spec() env = build_dummy_maze_env() env.__setattr__("spec", spec) env = TimeLimitWrapper.wrap(env, max_episode_steps=None) env.seed(1234) env.reset() for i in range(5): obs, rew, done, info = env.step(env.action_space.sample()) if i >= 4: assert done env.close()
def _test_dummy_env_for_discretization_config( discretization_config: Dict[str, Dict]) -> None: """Test the Discretize action wrapper on the dummy env for a given discretization_config. :param discretization_config: The Discretize actions config to apply and test. """ base_env = build_dummy_maze_env() env = DiscretizeActionsWrapper.wrap( base_env, discretization_config=discretization_config) assert isinstance(env.action_space, spaces.Dict) for action_name, new_action_config in discretization_config.items(): assert action_name in env.action_space.spaces.keys() if base_env.action_space[action_name].shape[-1] == 1: assert isinstance(env.action_space[action_name], spaces.Discrete) assert env.action_space[action_name].n == new_action_config[ 'num_bins'] else: assert isinstance(env.action_space[action_name], spaces.MultiDiscrete) assert np.all(env.action_space[action_name].nvec == \ np.array([new_action_config['num_bins']] * base_env.action_space[action_name].shape[-1])) env.reset() for i in range(1): action = env.action_space.sample() env.step(action) reverse_action = env.action(action) restored_action = env.reverse_action(reverse_action) assert isinstance(action, dict) for action_name, action_config in discretization_config.items(): assert action_name in action if base_env.action_space[action_name].shape[-1] == 1: assert action[action_name] in spaces.Discrete( action_config['num_bins']) else: nvec = np.array([action_config['num_bins']] * base_env.action_space[action_name].shape[-1]) assert action[action_name] in spaces.MultiDiscrete(nvec) assert reverse_action[action_name] in base_env.action_space[ action_name] assert np.all(restored_action[action_name] == action[action_name])
def test_records_stats(): # the default simple setup: flat, single-step env, no step skipping etc. env = build_dummy_maze_env() env = LogStatsWrapper.wrap(env) env.reset() for i in range(5): env.step(env.action_space.sample()) # both step counts seen from outside and seen from core env should correspond to 5 env.write_epoch_stats() assert env.get_stats_value(RewardEvents.reward_original, LogStatsLevel.EPOCH, name="total_step_count") == 5 assert env.get_stats_value(BaseEnvEvents.reward, LogStatsLevel.EPOCH, name="total_step_count") == 5
def test_step_increment_in_single_step_core_env(): """In single sub-step envs, events should be cleared out and env time incremented automatically.""" env = build_dummy_maze_env() env = LogStatsWrapper.wrap(env) env.reset() assert env.get_env_time() == 0 # 10 steps for _ in range(10): env.step(env.action_space.sample()) assert env.get_env_time() == 10 env.reset() increment_log_step() assert env.get_stats_value(BaseEnvEvents.reward, LogStatsLevel.EPOCH, name="total_step_count") == 10
def test_records_multiple_episodes(): env = build_dummy_maze_env() env = SpacesRecordingWrapper.wrap(env, output_dir="space_records") env.reset() for _ in range(5): for _ in range(10): action = env.action_space.sample() env.step(action) env.reset() dumped_files = os.listdir("space_records") assert len(dumped_files) == 5 for file_path in dumped_files: with open("space_records/" + file_path, "rb") as in_f: episode_record = pickle.load(in_f) assert isinstance(episode_record, SpacesTrajectoryRecord) assert len(episode_record.step_records) == 10
def test_compatibility_with_dataset(): env = build_dummy_maze_env() env = SpacesRecordingWrapper.wrap(env, output_dir="space_records") # Generate 5 episodes, 10 steps each env.reset() for _ in range(5): for _ in range(10): action = env.action_space.sample() env.step(action) env.reset() dataset = InMemoryDataset( n_workers=2, conversion_env_factory=None, input_data="space_records", trajectory_processor=IdentityTrajectoryProcessor(), deserialize_in_main_thread=False) assert len(dataset) == 5 * 10
def test_handles_step_skipping_in_reset(): env = build_dummy_maze_env() env = _StepInResetWrapper.wrap(env) env = LogStatsWrapper.wrap(env) env.reset() # Step the env once (should be the third step -- first two were done in the reset) env.step(env.action_space.sample()) # Events should be collected for 3 steps in total -- two from the env reset done by the wrapper + one done above assert len(env.episode_event_log.step_event_logs) == 3 # The same goes for "original reward" stats env.write_epoch_stats() assert env.get_stats_value(RewardEvents.reward_original, LogStatsLevel.EPOCH, name="total_step_count") == 3 # The step count from outside is still one (as normal reward events should not be fired for "skipped" steps) assert env.get_stats_value(BaseEnvEvents.reward, LogStatsLevel.EPOCH, name="total_step_count") == 1
def test_handles_step_skipping_in_step(): env = build_dummy_maze_env() env = _StepInStepWrapper.wrap(env) env = LogStatsWrapper.wrap(env) # Step the env twice (should correspond to four core-env steps) env.reset() for i in range(2): env.step(env.action_space.sample()) # => events should be collected for 4 steps in total assert len(env.episode_event_log.step_event_logs) == 4 # The same goes for "original reward" stats env.write_epoch_stats() assert env.get_stats_value(RewardEvents.reward_original, LogStatsLevel.EPOCH, name="total_step_count") == 4 # The step count from outside is still just two (as normal reward events should not be fired for "skipped" steps) assert env.get_stats_value(BaseEnvEvents.reward, LogStatsLevel.EPOCH, name="total_step_count") == 2
def test_records_episode_with_correct_data(): env = build_dummy_maze_env() env = SpacesRecordingWrapper.wrap(env, output_dir="space_records") actions = [] observations = [] observation = env.reset() for _ in range(5): observations.append(observation) action = env.action_space.sample() actions.append(action) observation, _, _, _ = env.step(action) episode_id = env.get_episode_id() expected_file_path = str(episode_id) + ".pkl" assert not expected_file_path in os.listdir("space_records") # Now dump and load the data env.reset() assert expected_file_path in os.listdir("space_records") with open("space_records/" + expected_file_path, "rb") as in_f: episode_record = pickle.load(in_f) # Check the contents assert isinstance(episode_record, SpacesTrajectoryRecord) assert len(episode_record.step_records) == len(actions) for record, observation, action in zip(episode_record.step_records, observations, actions): assert len(record.actions) == 1 and len( record.observations) == 1 # single-step env for obs_key in observation: assert np.allclose(record.observations[0][obs_key], observation[obs_key]) for act_key in action: assert np.allclose(record.actions[0][act_key], action[act_key])
def test_log_action_events_dict_discrete(): """ action logging unit tests """ env = build_dummy_maze_env() train(env)
def test_default_action_space_sampling(): env = build_dummy_maze_env() policy = RandomPolicy(env.action_spaces_dict) action = policy.compute_action(observation=env.observation_space.sample(), maze_state=None) assert action in env.action_space