Exemplo n.º 1
0
    def test_default_action_discrete_adapter(self):
        ADAPTER_TYPE = adapters.AdapterType.DefaultActionDiscrete
        adapter = adapters.adapter_from_type(ADAPTER_TYPE)
        interface = adapters.required_interface_from_types(ADAPTER_TYPE)
        space = adapters.space_from_type(ADAPTER_TYPE)

        AVAILABLE_ACTIONS = [
            "keep_lane",
            "slow_down",
            "change_lane_left",
            "change_lane_right",
        ]

        agent, environment = prepare_test_agent_and_environment(
            required_interface=interface,
            action_adapter=adapter,
        )
        action_sequence, _, _, _ = run_experiment(agent, environment)

        for action in action_sequence:
            self.assertIsInstance(action, str)
            self.assertIn(action, AVAILABLE_ACTIONS)
            self.assertEqual(space.dtype, type(action))
            self.assertEqual(space.shape, ())
            self.assertTrue(space.contains(action))
Exemplo n.º 2
0
    def test_default_action_continuous_adapter(self):
        ADAPTER_TYPE = adapters.AdapterType.DefaultActionContinuous
        adapter = adapters.adapter_from_type(ADAPTER_TYPE)
        interface = adapters.required_interface_from_types(ADAPTER_TYPE)
        space = adapters.space_from_type(ADAPTER_TYPE)

        agent, environment = prepare_test_agent_and_environment(
            required_interface=interface,
            action_adapter=adapter,
        )
        action_sequence, _, _, _ = run_experiment(agent, environment)

        for action in action_sequence:
            self.assertIsInstance(action, np.ndarray)
            self.assertEqual(action.dtype, "float32")
            self.assertEqual(action.shape, (3, ))
            self.assertGreaterEqual(action[0], 0.0)
            self.assertLessEqual(action[0], 1.0)
            self.assertGreaterEqual(action[1], 0.0)
            self.assertLessEqual(action[1], 1.0)
            self.assertGreaterEqual(action[2], -1.0)
            self.assertLessEqual(action[2], 1.0)
            self.assertEqual(space.dtype, action.dtype)
            self.assertEqual(space.shape, action.shape)
            self.assertTrue(space.contains(action))
Exemplo n.º 3
0
    def test_default_info_adapter(self):
        ADAPTER_TYPE = adapters.AdapterType.DefaultInfo
        adapter = adapters.adapter_from_type(ADAPTER_TYPE)
        interface = adapters.required_interface_from_types(ADAPTER_TYPE)

        agent, environment = prepare_test_agent_and_environment(
            required_interface=interface,
            info_adapter=adapter,
        )
        _, infos_sequence, _, _ = run_experiment(agent,
                                                 environment,
                                                 max_steps=1)

        infos = infos_sequence[0]
        self.assertIsInstance(infos, dict)
        self.assertIn(AGENT_ID, infos)
        self.assertIsInstance(infos[AGENT_ID], dict)
        self.assertIn("score", infos[AGENT_ID])
        self.assertIsInstance(infos[AGENT_ID]["score"], float)
        self.assertIn("env_obs", infos[AGENT_ID])
        self.assertIsInstance(infos[AGENT_ID]["env_obs"], Observation)
        self.assertIn("logs", infos[AGENT_ID])
        self.assertIsInstance(infos[AGENT_ID]["logs"], dict)
        self.assertIn("position", infos[AGENT_ID]["logs"])
        self.assertIsInstance(infos[AGENT_ID]["logs"]["position"], np.ndarray)
        self.assertEqual(infos[AGENT_ID]["logs"]["position"].shape, (3, ))
        self.assertIn("speed", infos[AGENT_ID]["logs"])
        self.assertIsInstance(infos[AGENT_ID]["logs"]["speed"], float)
        self.assertIn("steering", infos[AGENT_ID]["logs"])
        self.assertIsInstance(infos[AGENT_ID]["logs"]["steering"], float)
        self.assertIn("heading", infos[AGENT_ID]["logs"])
        self.assertIsInstance(infos[AGENT_ID]["logs"]["heading"], Heading)
        self.assertIn("dist_center", infos[AGENT_ID]["logs"])
        self.assertIsInstance(infos[AGENT_ID]["logs"]["dist_center"], float)
        self.assertIn("start", infos[AGENT_ID]["logs"])
        self.assertIsInstance(infos[AGENT_ID]["logs"]["start"], Start)
        self.assertIn("goal", infos[AGENT_ID]["logs"])
        self.assertIsInstance(infos[AGENT_ID]["logs"]["goal"], PositionalGoal)
        self.assertIn("closest_wp", infos[AGENT_ID]["logs"])
        self.assertIsInstance(infos[AGENT_ID]["logs"]["closest_wp"], Waypoint)
        self.assertIn("events", infos[AGENT_ID]["logs"])
        self.assertIsInstance(infos[AGENT_ID]["logs"]["events"], Events)
        self.assertIn("ego_num_violations", infos[AGENT_ID]["logs"])
        self.assertIsInstance(infos[AGENT_ID]["logs"]["ego_num_violations"],
                              int)
        self.assertIn("social_num_violations", infos[AGENT_ID]["logs"])
        self.assertIsInstance(infos[AGENT_ID]["logs"]["social_num_violations"],
                              int)
        self.assertIn("goal_dist", infos[AGENT_ID]["logs"])
        self.assertIsInstance(infos[AGENT_ID]["logs"]["goal_dist"], float)
        self.assertIn("linear_jerk", infos[AGENT_ID]["logs"])
        self.assertIsInstance(infos[AGENT_ID]["logs"]["linear_jerk"], float)
        self.assertIn("angular_jerk", infos[AGENT_ID]["logs"])
        self.assertIsInstance(infos[AGENT_ID]["logs"]["angular_jerk"], float)
        self.assertIn("env_score", infos[AGENT_ID]["logs"])
        self.assertIsInstance(infos[AGENT_ID]["logs"]["env_score"], float)
Exemplo n.º 4
0
    def test_default_observation_vector_adapter(self):
        ADAPTER_TYPE = adapters.AdapterType.DefaultObservationVector
        adapter = adapters.adapter_from_type(ADAPTER_TYPE)
        interface = adapters.required_interface_from_types(ADAPTER_TYPE)
        space = adapters.space_from_type(ADAPTER_TYPE)

        agent, environment = prepare_test_agent_and_environment(
            required_interface=interface,
            observation_adapter=adapter,
        )
        _, _, observations_sequence, _ = run_experiment(agent,
                                                        environment,
                                                        max_steps=1)

        observations = observations_sequence[0]
        self.assertIsInstance(observations, dict)
        self.assertIn(AGENT_ID, observations)
        self.assertIn("low_dim_states", observations[AGENT_ID])
        self.assertIn("social_vehicles", observations[AGENT_ID])
        self.assertIsInstance(observations[AGENT_ID]["low_dim_states"],
                              np.ndarray)
        self.assertIsInstance(observations[AGENT_ID]["social_vehicles"],
                              np.ndarray)
        self.assertEqual(observations[AGENT_ID]["low_dim_states"].dtype,
                         "float32")
        self.assertEqual(observations[AGENT_ID]["social_vehicles"].dtype,
                         "float32")
        self.assertEqual(observations[AGENT_ID]["low_dim_states"].shape,
                         (47, ))
        self.assertEqual(observations[AGENT_ID]["social_vehicles"].shape,
                         (10, 4))
        self.assertEqual(space.dtype, None)
        self.assertEqual(
            space["low_dim_states"].dtype,
            observations[AGENT_ID]["low_dim_states"].dtype,
        )
        self.assertEqual(
            space["social_vehicles"].dtype,
            observations[AGENT_ID]["social_vehicles"].dtype,
        )
        self.assertEqual(space.shape, None)
        self.assertEqual(
            space["low_dim_states"].shape,
            observations[AGENT_ID]["low_dim_states"].shape,
        )
        self.assertEqual(
            space["social_vehicles"].shape,
            observations[AGENT_ID]["social_vehicles"].shape,
        )
        self.assertTrue(space.contains(observations[AGENT_ID]))
Exemplo n.º 5
0
    def test_default_reward_adapter(self):
        ADAPTER_TYPE = adapters.AdapterType.DefaultReward
        adapter = adapters.adapter_from_type(ADAPTER_TYPE)
        interface = adapters.required_interface_from_types(ADAPTER_TYPE)

        agent, environment = prepare_test_agent_and_environment(
            required_interface=interface,
            reward_adapter=adapter,
        )
        _, _, _, rewards_sequence = run_experiment(agent,
                                                   environment,
                                                   max_steps=1)

        rewards = rewards_sequence[0]
        self.assertIsInstance(rewards, dict)
        self.assertIsInstance(rewards[AGENT_ID], float)
Exemplo n.º 6
0
    def test_default_observation_image_adapter(self):
        ADAPTER_TYPE = adapters.AdapterType.DefaultObservationImage
        adapter = adapters.adapter_from_type(ADAPTER_TYPE)
        interface = adapters.required_interface_from_types(ADAPTER_TYPE)
        space = adapters.space_from_type(ADAPTER_TYPE)

        agent, environment = prepare_test_agent_and_environment(
            required_interface=interface,
            observation_adapter=adapter,
        )
        _, _, observations_sequence, _ = run_experiment(agent,
                                                        environment,
                                                        max_steps=1)

        observations = observations_sequence[0]
        self.assertIsInstance(observations, dict)
        self.assertIn(AGENT_ID, observations)
        self.assertIsInstance(observations[AGENT_ID], np.ndarray)
        self.assertEqual(observations[AGENT_ID].dtype, "float32")
        self.assertEqual(observations[AGENT_ID].shape, (4, 64, 64))
        self.assertEqual(space.dtype, observations[AGENT_ID].dtype)
        self.assertEqual(space.shape, observations[AGENT_ID].shape)
        self.assertTrue(space.contains(observations[AGENT_ID]))
Exemplo n.º 7
0
def train(
    task,
    num_episodes,
    max_episode_steps,
    rollout_fragment_length,
    policy,
    eval_info,
    timestep_sec,
    headless,
    seed,
    train_batch_size,
    sgd_minibatch_size,
    log_dir,
):
    agent_name = policy
    policy_params = load_yaml(
        f"ultra/baselines/{agent_name}/{agent_name}/params.yaml")

    action_type = adapters.type_from_string(policy_params["action_type"])
    observation_type = adapters.type_from_string(
        policy_params["observation_type"])
    reward_type = adapters.type_from_string(policy_params["reward_type"])

    if action_type != adapters.AdapterType.DefaultActionContinuous:
        raise Exception(
            f"RLlib training only supports the "
            f"{adapters.AdapterType.DefaultActionContinuous} action type.")
    if observation_type != adapters.AdapterType.DefaultObservationVector:
        # NOTE: The SMARTS observations adaptation that is done in ULTRA's Gym
        #       environment is not done in ULTRA's RLlib environment. If other
        #       observation adapters are used, they may raise an Exception.
        raise Exception(
            f"RLlib training only supports the "
            f"{adapters.AdapterType.DefaultObservationVector} observation type."
        )

    action_space = adapters.space_from_type(adapter_type=action_type)
    observation_space = adapters.space_from_type(adapter_type=observation_type)

    action_adapter = adapters.adapter_from_type(adapter_type=action_type)
    info_adapter = adapters.adapter_from_type(
        adapter_type=adapters.AdapterType.DefaultInfo)
    observation_adapter = adapters.adapter_from_type(
        adapter_type=observation_type)
    reward_adapter = adapters.adapter_from_type(adapter_type=reward_type)

    params_seed = policy_params["seed"]
    encoder_key = policy_params["social_vehicles"]["encoder_key"]
    num_social_features = observation_space["social_vehicles"].shape[1]
    social_capacity = observation_space["social_vehicles"].shape[0]
    social_policy_hidden_units = int(policy_params["social_vehicles"].get(
        "social_policy_hidden_units", 0))
    social_policy_init_std = int(policy_params["social_vehicles"].get(
        "social_policy_init_std", 0))
    social_vehicle_config = get_social_vehicle_configs(
        encoder_key=encoder_key,
        num_social_features=num_social_features,
        social_capacity=social_capacity,
        seed=params_seed,
        social_policy_hidden_units=social_policy_hidden_units,
        social_policy_init_std=social_policy_init_std,
    )

    ModelCatalog.register_custom_model("fc_model", CustomFCModel)
    config = RllibAgent.rllib_default_config(agent_name)

    rllib_policies = {
        "default_policy": (
            None,
            observation_space,
            action_space,
            {
                "model": {
                    "custom_model": "fc_model",
                    "custom_model_config": {
                        "social_vehicle_config": social_vehicle_config
                    },
                }
            },
        )
    }
    agent_specs = {
        "AGENT-007":
        AgentSpec(
            interface=AgentInterface(
                waypoints=Waypoints(lookahead=20),
                neighborhood_vehicles=NeighborhoodVehicles(200),
                action=ActionSpaceType.Continuous,
                rgb=False,
                max_episode_steps=max_episode_steps,
                debug=True,
            ),
            agent_params={},
            agent_builder=None,
            action_adapter=action_adapter,
            info_adapter=info_adapter,
            observation_adapter=observation_adapter,
            reward_adapter=reward_adapter,
        )
    }

    tune_config = {
        "env": RLlibUltraEnv,
        "log_level": "WARN",
        "callbacks": Callbacks,
        "framework": "torch",
        "num_workers": 1,
        "train_batch_size": train_batch_size,
        "sgd_minibatch_size": sgd_minibatch_size,
        "rollout_fragment_length": rollout_fragment_length,
        "in_evaluation": True,
        "evaluation_num_episodes": eval_info["eval_episodes"],
        "evaluation_interval": eval_info[
            "eval_rate"],  # Evaluation occurs after # of eval-intervals (episodes)
        "evaluation_config": {
            "env_config": {
                "seed": seed,
                "scenario_info": task,
                "headless": headless,
                "eval_mode": True,
                "ordered_scenarios": False,
                "agent_specs": agent_specs,
                "timestep_sec": timestep_sec,
            },
            "explore": False,
        },
        "env_config": {
            "seed": seed,
            "scenario_info": task,
            "headless": headless,
            "eval_mode": False,
            "ordered_scenarios": False,
            "agent_specs": agent_specs,
            "timestep_sec": timestep_sec,
        },
        "multiagent": {
            "policies": rllib_policies
        },
    }

    config.update(tune_config)
    agent = RllibAgent(
        agent_name=agent_name,
        env=RLlibUltraEnv,
        config=tune_config,
        logger_creator=log_creator(log_dir),
    )

    # Iteration value in trainer.py (self._iterations) is the technically the number of episodes
    for i in range(num_episodes):
        results = agent.train()
        agent.log_evaluation_metrics(
            results)  # Evaluation metrics will now be displayed on Tensorboard
Exemplo n.º 8
0
    def __new__(
        self,
        policy_class,
        # action_type,
        policy_params=None,
        checkpoint_dir=None,
        # task=None,
        max_episode_steps=1200,
        experiment_dir=None,
        agent_id="",
    ):
        if experiment_dir:
            print(
                f"Loading spec for {agent_id} from {experiment_dir}/agent_metadata.pkl"
            )
            with open(f"{experiment_dir}/agent_metadata.pkl",
                      "rb") as metadata_file:
                agent_metadata = dill.load(metadata_file)
                spec = agent_metadata["agent_specs"][agent_id]

                new_spec = AgentSpec(
                    interface=spec.interface,
                    agent_params=dict(
                        policy_params=spec.agent_params["policy_params"],
                        checkpoint_dir=checkpoint_dir,
                    ),
                    agent_builder=spec.agent_builder,
                    observation_adapter=spec.observation_adapter,
                    reward_adapter=spec.reward_adapter,
                    info_adapter=spec.info_adapter,
                )

                spec = new_spec
        else:
            # If policy_params is None, then there must be a params.yaml file in the
            # same directory as the policy_class module.
            if not policy_params:
                policy_class_module_file = inspect.getfile(policy_class)
                policy_class_module_directory = os.path.dirname(
                    policy_class_module_file)
                policy_params = load_yaml(
                    os.path.join(policy_class_module_directory, "params.yaml"))

            action_type = adapters.type_from_string(
                string_type=policy_params["action_type"])
            observation_type = adapters.type_from_string(
                string_type=policy_params["observation_type"])
            reward_type = adapters.type_from_string(
                string_type=policy_params["reward_type"])
            info_type = adapters.AdapterType.DefaultInfo

            adapter_interface_requirements = adapters.required_interface_from_types(
                action_type, observation_type, reward_type, info_type)
            action_adapter = adapters.adapter_from_type(
                adapter_type=action_type)
            observation_adapter = adapters.adapter_from_type(
                adapter_type=observation_type)
            reward_adapter = adapters.adapter_from_type(
                adapter_type=reward_type)
            info_adapter = adapters.adapter_from_type(adapter_type=info_type)

            spec = AgentSpec(
                interface=AgentInterface(
                    **adapter_interface_requirements,
                    max_episode_steps=max_episode_steps,
                    debug=True,
                ),
                agent_params=dict(policy_params=policy_params,
                                  checkpoint_dir=checkpoint_dir),
                agent_builder=policy_class,
                action_adapter=action_adapter,
                observation_adapter=observation_adapter,
                reward_adapter=reward_adapter,
                info_adapter=info_adapter,
            )

        return spec