def test_default_action_discrete_adapter(self): ADAPTER_TYPE = adapters.AdapterType.DefaultActionDiscrete adapter = adapters.adapter_from_type(ADAPTER_TYPE) interface = adapters.required_interface_from_types(ADAPTER_TYPE) space = adapters.space_from_type(ADAPTER_TYPE) AVAILABLE_ACTIONS = [ "keep_lane", "slow_down", "change_lane_left", "change_lane_right", ] agent, environment = prepare_test_agent_and_environment( required_interface=interface, action_adapter=adapter, ) action_sequence, _, _, _ = run_experiment(agent, environment) for action in action_sequence: self.assertIsInstance(action, str) self.assertIn(action, AVAILABLE_ACTIONS) self.assertEqual(space.dtype, type(action)) self.assertEqual(space.shape, ()) self.assertTrue(space.contains(action))
def test_default_action_continuous_adapter(self): ADAPTER_TYPE = adapters.AdapterType.DefaultActionContinuous adapter = adapters.adapter_from_type(ADAPTER_TYPE) interface = adapters.required_interface_from_types(ADAPTER_TYPE) space = adapters.space_from_type(ADAPTER_TYPE) agent, environment = prepare_test_agent_and_environment( required_interface=interface, action_adapter=adapter, ) action_sequence, _, _, _ = run_experiment(agent, environment) for action in action_sequence: self.assertIsInstance(action, np.ndarray) self.assertEqual(action.dtype, "float32") self.assertEqual(action.shape, (3, )) self.assertGreaterEqual(action[0], 0.0) self.assertLessEqual(action[0], 1.0) self.assertGreaterEqual(action[1], 0.0) self.assertLessEqual(action[1], 1.0) self.assertGreaterEqual(action[2], -1.0) self.assertLessEqual(action[2], 1.0) self.assertEqual(space.dtype, action.dtype) self.assertEqual(space.shape, action.shape) self.assertTrue(space.contains(action))
def test_default_info_adapter(self): ADAPTER_TYPE = adapters.AdapterType.DefaultInfo adapter = adapters.adapter_from_type(ADAPTER_TYPE) interface = adapters.required_interface_from_types(ADAPTER_TYPE) agent, environment = prepare_test_agent_and_environment( required_interface=interface, info_adapter=adapter, ) _, infos_sequence, _, _ = run_experiment(agent, environment, max_steps=1) infos = infos_sequence[0] self.assertIsInstance(infos, dict) self.assertIn(AGENT_ID, infos) self.assertIsInstance(infos[AGENT_ID], dict) self.assertIn("score", infos[AGENT_ID]) self.assertIsInstance(infos[AGENT_ID]["score"], float) self.assertIn("env_obs", infos[AGENT_ID]) self.assertIsInstance(infos[AGENT_ID]["env_obs"], Observation) self.assertIn("logs", infos[AGENT_ID]) self.assertIsInstance(infos[AGENT_ID]["logs"], dict) self.assertIn("position", infos[AGENT_ID]["logs"]) self.assertIsInstance(infos[AGENT_ID]["logs"]["position"], np.ndarray) self.assertEqual(infos[AGENT_ID]["logs"]["position"].shape, (3, )) self.assertIn("speed", infos[AGENT_ID]["logs"]) self.assertIsInstance(infos[AGENT_ID]["logs"]["speed"], float) self.assertIn("steering", infos[AGENT_ID]["logs"]) self.assertIsInstance(infos[AGENT_ID]["logs"]["steering"], float) self.assertIn("heading", infos[AGENT_ID]["logs"]) self.assertIsInstance(infos[AGENT_ID]["logs"]["heading"], Heading) self.assertIn("dist_center", infos[AGENT_ID]["logs"]) self.assertIsInstance(infos[AGENT_ID]["logs"]["dist_center"], float) self.assertIn("start", infos[AGENT_ID]["logs"]) self.assertIsInstance(infos[AGENT_ID]["logs"]["start"], Start) self.assertIn("goal", infos[AGENT_ID]["logs"]) self.assertIsInstance(infos[AGENT_ID]["logs"]["goal"], PositionalGoal) self.assertIn("closest_wp", infos[AGENT_ID]["logs"]) self.assertIsInstance(infos[AGENT_ID]["logs"]["closest_wp"], Waypoint) self.assertIn("events", infos[AGENT_ID]["logs"]) self.assertIsInstance(infos[AGENT_ID]["logs"]["events"], Events) self.assertIn("ego_num_violations", infos[AGENT_ID]["logs"]) self.assertIsInstance(infos[AGENT_ID]["logs"]["ego_num_violations"], int) self.assertIn("social_num_violations", infos[AGENT_ID]["logs"]) self.assertIsInstance(infos[AGENT_ID]["logs"]["social_num_violations"], int) self.assertIn("goal_dist", infos[AGENT_ID]["logs"]) self.assertIsInstance(infos[AGENT_ID]["logs"]["goal_dist"], float) self.assertIn("linear_jerk", infos[AGENT_ID]["logs"]) self.assertIsInstance(infos[AGENT_ID]["logs"]["linear_jerk"], float) self.assertIn("angular_jerk", infos[AGENT_ID]["logs"]) self.assertIsInstance(infos[AGENT_ID]["logs"]["angular_jerk"], float) self.assertIn("env_score", infos[AGENT_ID]["logs"]) self.assertIsInstance(infos[AGENT_ID]["logs"]["env_score"], float)
def test_default_observation_vector_adapter(self): ADAPTER_TYPE = adapters.AdapterType.DefaultObservationVector adapter = adapters.adapter_from_type(ADAPTER_TYPE) interface = adapters.required_interface_from_types(ADAPTER_TYPE) space = adapters.space_from_type(ADAPTER_TYPE) agent, environment = prepare_test_agent_and_environment( required_interface=interface, observation_adapter=adapter, ) _, _, observations_sequence, _ = run_experiment(agent, environment, max_steps=1) observations = observations_sequence[0] self.assertIsInstance(observations, dict) self.assertIn(AGENT_ID, observations) self.assertIn("low_dim_states", observations[AGENT_ID]) self.assertIn("social_vehicles", observations[AGENT_ID]) self.assertIsInstance(observations[AGENT_ID]["low_dim_states"], np.ndarray) self.assertIsInstance(observations[AGENT_ID]["social_vehicles"], np.ndarray) self.assertEqual(observations[AGENT_ID]["low_dim_states"].dtype, "float32") self.assertEqual(observations[AGENT_ID]["social_vehicles"].dtype, "float32") self.assertEqual(observations[AGENT_ID]["low_dim_states"].shape, (47, )) self.assertEqual(observations[AGENT_ID]["social_vehicles"].shape, (10, 4)) self.assertEqual(space.dtype, None) self.assertEqual( space["low_dim_states"].dtype, observations[AGENT_ID]["low_dim_states"].dtype, ) self.assertEqual( space["social_vehicles"].dtype, observations[AGENT_ID]["social_vehicles"].dtype, ) self.assertEqual(space.shape, None) self.assertEqual( space["low_dim_states"].shape, observations[AGENT_ID]["low_dim_states"].shape, ) self.assertEqual( space["social_vehicles"].shape, observations[AGENT_ID]["social_vehicles"].shape, ) self.assertTrue(space.contains(observations[AGENT_ID]))
def test_default_reward_adapter(self): ADAPTER_TYPE = adapters.AdapterType.DefaultReward adapter = adapters.adapter_from_type(ADAPTER_TYPE) interface = adapters.required_interface_from_types(ADAPTER_TYPE) agent, environment = prepare_test_agent_and_environment( required_interface=interface, reward_adapter=adapter, ) _, _, _, rewards_sequence = run_experiment(agent, environment, max_steps=1) rewards = rewards_sequence[0] self.assertIsInstance(rewards, dict) self.assertIsInstance(rewards[AGENT_ID], float)
def test_default_observation_image_adapter(self): ADAPTER_TYPE = adapters.AdapterType.DefaultObservationImage adapter = adapters.adapter_from_type(ADAPTER_TYPE) interface = adapters.required_interface_from_types(ADAPTER_TYPE) space = adapters.space_from_type(ADAPTER_TYPE) agent, environment = prepare_test_agent_and_environment( required_interface=interface, observation_adapter=adapter, ) _, _, observations_sequence, _ = run_experiment(agent, environment, max_steps=1) observations = observations_sequence[0] self.assertIsInstance(observations, dict) self.assertIn(AGENT_ID, observations) self.assertIsInstance(observations[AGENT_ID], np.ndarray) self.assertEqual(observations[AGENT_ID].dtype, "float32") self.assertEqual(observations[AGENT_ID].shape, (4, 64, 64)) self.assertEqual(space.dtype, observations[AGENT_ID].dtype) self.assertEqual(space.shape, observations[AGENT_ID].shape) self.assertTrue(space.contains(observations[AGENT_ID]))
def __new__( self, policy_class, # action_type, policy_params=None, checkpoint_dir=None, # task=None, max_episode_steps=1200, experiment_dir=None, agent_id="", ): if experiment_dir: print( f"Loading spec for {agent_id} from {experiment_dir}/agent_metadata.pkl" ) with open(f"{experiment_dir}/agent_metadata.pkl", "rb") as metadata_file: agent_metadata = dill.load(metadata_file) spec = agent_metadata["agent_specs"][agent_id] new_spec = AgentSpec( interface=spec.interface, agent_params=dict( policy_params=spec.agent_params["policy_params"], checkpoint_dir=checkpoint_dir, ), agent_builder=spec.agent_builder, observation_adapter=spec.observation_adapter, reward_adapter=spec.reward_adapter, info_adapter=spec.info_adapter, ) spec = new_spec else: # If policy_params is None, then there must be a params.yaml file in the # same directory as the policy_class module. if not policy_params: policy_class_module_file = inspect.getfile(policy_class) policy_class_module_directory = os.path.dirname( policy_class_module_file) policy_params = load_yaml( os.path.join(policy_class_module_directory, "params.yaml")) action_type = adapters.type_from_string( string_type=policy_params["action_type"]) observation_type = adapters.type_from_string( string_type=policy_params["observation_type"]) reward_type = adapters.type_from_string( string_type=policy_params["reward_type"]) info_type = adapters.AdapterType.DefaultInfo adapter_interface_requirements = adapters.required_interface_from_types( action_type, observation_type, reward_type, info_type) action_adapter = adapters.adapter_from_type( adapter_type=action_type) observation_adapter = adapters.adapter_from_type( adapter_type=observation_type) reward_adapter = adapters.adapter_from_type( adapter_type=reward_type) info_adapter = adapters.adapter_from_type(adapter_type=info_type) spec = AgentSpec( interface=AgentInterface( **adapter_interface_requirements, max_episode_steps=max_episode_steps, debug=True, ), agent_params=dict(policy_params=policy_params, checkpoint_dir=checkpoint_dir), agent_builder=policy_class, action_adapter=action_adapter, observation_adapter=observation_adapter, reward_adapter=reward_adapter, info_adapter=info_adapter, ) return spec