示例#1
0
    def _build_configuration(self):
        """Builds a configuration using an SAC agent
    """
        self._scenario_generator = \
            UniformVehicleDistribution(num_scenarios=20,
                                      random_seed=0,
                                      params=self._params)

        self._observer = CustomObserver(params=self._params)
        self._behavior_model = DynamicModel(params=self._params)
        self._evaluator = CustomEvaluator(params=self._params)

        self._viewer = MPViewer(params=self._params,
                                x_range=[-30, 30],
                                y_range=[-60, 20],
                                follow_agent_id=True)
        #self._viewer = VideoRenderer(renderer=viewer, world_step_time=0.2)
        self._runtime = RuntimeRL(action_wrapper=self._behavior_model,
                                  observer=self._observer,
                                  evaluator=self._evaluator,
                                  step_time=0.2,
                                  viewer=self._viewer,
                                  scenario_generator=self._scenario_generator)
        tfa_env = tf_py_environment.TFPyEnvironment(TFAWrapper(self._runtime))
        self._agent_0 = SACAgent(tfa_env, params=self._params)
        self._agent_1 = SACAgent(tfa_env, params=self._params)
        self._runner = SACRunner(tfa_env, [self._agent_0, self._agent_1],
                                 params=self._params,
                                 unwrapped_runtime=self._runtime)
示例#2
0
    def test_tfa_runtime():
        params = ParameterServer(
            filename="tests/data/deterministic_scenario_test.json")
        scenario_generation = DeterministicScenarioGeneration(num_scenarios=3,
                                                              random_seed=0,
                                                              params=params)
        state_observer = ClosestAgentsObserver(params=params)
        action_wrapper = DynamicModel(params=params)
        evaluator = GoalReached(params=params)
        viewer = MPViewer(params=params,
                          x_range=[-30, 30],
                          y_range=[-20, 40],
                          follow_agent_id=True)  # use_world_bounds=True

        runtimerl = RuntimeRL(action_wrapper=action_wrapper,
                              observer=state_observer,
                              evaluator=evaluator,
                              step_time=0.05,
                              viewer=viewer,
                              scenario_generator=scenario_generation)

        tfa_env = TFAWrapper(runtimerl)
        _ = tfa_env.reset()

        utils.validate_py_environment(tfa_env, episodes=5)
        _ = tf_py_environment.TFPyEnvironment(tfa_env)
示例#3
0
    def test_runner():
        params = ParameterServer(
            filename="tests/data/deterministic_scenario_test.json")
        base_dir = os.path.dirname(os.path.dirname(__file__))
        params["BaseDir"] = base_dir
        scenario_generation = DeterministicScenarioGeneration(num_scenarios=3,
                                                              random_seed=0,
                                                              params=params)
        state_observer = ClosestAgentsObserver(params=params)
        action_wrapper = DynamicModel(params=params)
        evaluator = GoalReached(params=params)
        viewer = MPViewer(params=params,
                          x_range=[-30, 30],
                          y_range=[-20, 40],
                          follow_agent_id=True)
        runtimerl = RuntimeRL(action_wrapper=action_wrapper,
                              observer=state_observer,
                              evaluator=evaluator,
                              step_time=0.2,
                              viewer=viewer,
                              scenario_generator=scenario_generation,
                              render=False)
        tfa_env = tf_py_environment.TFPyEnvironment(TFAWrapper(runtimerl))
        sac_agent = SACAgent(tfa_env, params=params)
        tfa_runner = TFARunner(tfa_env,
                               sac_agent,
                               params=params,
                               unwrapped_runtime=runtimerl)
        tfa_runner.collect_initial_episodes()

        # main functionalities
        tfa_runner.train()
        tfa_runner.visualize()
        tfa_runner.evaluate()
示例#4
0
    def test_agent(self):
        params = ParameterServer(
            filename="tests/data/deterministic_scenario_test.json")
        base_dir = os.path.dirname(os.path.dirname(__file__))
        params["BaseDir"] = base_dir
        scenario_generation = DeterministicScenarioGeneration(num_scenarios=2,
                                                              random_seed=0,
                                                              params=params)
        state_observer = ClosestAgentsObserver(params=params)
        action_wrapper = DynamicModel(params=params)
        evaluator = GoalReached(params=params)
        viewer = MPViewer(params=params,
                          x_range=[-30, 30],
                          y_range=[-20, 40],
                          follow_agent_id=True)  # use_world_bounds=True

        runtimerl = RuntimeRL(action_wrapper=action_wrapper,
                              observer=state_observer,
                              evaluator=evaluator,
                              step_time=0.05,
                              viewer=viewer,
                              scenario_generator=scenario_generation)

        tfa_env = tf_py_environment.TFPyEnvironment(TFAWrapper(runtimerl))
        sac_agent = SACAgent(tfa_env, params=params)
        self.assertEqual(sac_agent._agent.name, "sac_agent")
        sac_agent.reset()

        # TODO(@hart): does not work because of read-only file-system
        # sac_agent.save()
        sac_agent.load()
    def test_triple_int(self):
        params = ParameterServer(
            filename="tests/data/deterministic_scenario_drone_test.json")
        scenario_generation = DeterministicScenarioGeneration(num_scenarios=3,
                                                              random_seed=0,
                                                              params=params)
        state_observer = SimpleObserver(params=params)
        action_wrapper = DynamicModel(model_name="TripleIntegratorModel",
                                      params=params)
        evaluator = GoalReached(params=params)
        viewer = MPViewer(params=params,
                          x_range=[-30, 30],
                          y_range=[-40, 40],
                          use_world_bounds=True)

        runtimerl = RuntimeRL(action_wrapper=action_wrapper,
                              observer=state_observer,
                              evaluator=evaluator,
                              step_time=0.2,
                              viewer=viewer,
                              scenario_generator=scenario_generation,
                              render=True)

        for _ in range(0, 1):
            runtimerl.reset()
            for _ in range(0, 10):  # run each scenario for 10 steps
                action = action_wrapper.action_space.sample()
                next_observed_state, reward, done, info = \
                  runtimerl.step(action)
                if done:
                    print("State: {} \n Reward: {} \n Done {}, Info: {} \n \
              ================================================="                                                                          . \
                      format(next_observed_state, reward, done, info))
                    break
示例#6
0
  def _build_configuration(self):
    """Builds a configuration using an SAC agent
    """
    self._scenario_generator = \
      DeterministicDroneChallengeGeneration(num_scenarios=3,
                                            random_seed=0,
                                            params=self._params)
    self._observer = CustomObserver(params=self._params)
    self._behavior_model = DynamicModel(model_name="TripleIntegratorModel",
                                        params=self._params)
    self._evaluator = CustomEvaluator(params=self._params)

    viewer = MPViewer(params=self._params,
                      x_range=[-20, 20],
                      y_range=[-20, 20],
                      follow_agent_id=True)
    self._viewer = viewer
    # self._viewer = VideoRenderer(renderer=viewer, world_step_time=0.2)
    self._runtime = RuntimeRL(action_wrapper=self._behavior_model,
                              observer=self._observer,
                              evaluator=self._evaluator,
                              step_time=0.2,
                              viewer=self._viewer,
                              scenario_generator=self._scenario_generator)
    # tfa_env = tf_py_environment.TFPyEnvironment(TFAWrapper(self._runtime))
    tfa_env = tf_py_environment.TFPyEnvironment(
      parallel_py_environment.ParallelPyEnvironment(
        [lambda: TFAWrapper(self._runtime)] * self._params["ML"]["Agent"]["num_parallel_environments"]))
    self._agent = SACAgent(tfa_env, params=self._params)
    self._runner = SACRunner(tfa_env,
                             self._agent,
                             params=self._params,
                             unwrapped_runtime=self._runtime)
示例#7
0
    def test_runtime_rl(self):
        params = ParameterServer(
            filename="tests/data/deterministic_scenario_test.json")
        scenario_generation = DeterministicScenarioGeneration(num_scenarios=2,
                                                              random_seed=0,
                                                              params=params)
        state_observer = SimpleObserver(params=params)
        action_wrapper = DynamicModel(params=params)
        evaluator = GoalReached(params=params)
        viewer = MPViewer(params=params,
                          x_range=[-30, 30],
                          y_range=[-40, 40],
                          use_world_bounds=True)  #use_world_bounds=True) #

        runtimerl = RuntimeRL(action_wrapper=action_wrapper,
                              observer=state_observer,
                              evaluator=evaluator,
                              step_time=0.2,
                              viewer=viewer,
                              scenario_generator=scenario_generation,
                              render=False)

        start_time = time.time()
        for _ in range(0, 100):
            runtimerl.reset()
            done = False
            reward = 0.
            for _ in range(0, 50):  # run each scenario for 10 steps
                action = action_wrapper.action_space.sample(
                ) / 100  # to go straight
                next_observed_state, reward, done, info = \
                  runtimerl.step(action)
                # observer
                self.assertEqual(len(next_observed_state), 16)
                np.testing.assert_array_equal(
                    next_observed_state[0:4],
                    state_observer._normalize(
                        runtimerl._world.agents[100].state)[1:5])
                np.testing.assert_array_equal(
                    next_observed_state[4:8],
                    state_observer._normalize(
                        runtimerl._world.agents[101].state)[1:5])
                if done:
                    print("State: {} \n Reward: {} \n Done {}, Info: {} \n \
              ================================================="                                                                          . \
                      format(next_observed_state, reward, done, info))
                    break
            # must assert to equal as the agent reaches the goal in the
            # specified number of steps
            self.assertEqual(done, True)
            # goal must have been reached which returns a reward of 1.
            self.assertEqual(reward, 1.)
            self.assertEqual(runtimerl._world.agents[100].id, 100)
            self.assertEqual(runtimerl._world.agents[101].id, 101)
        end_time = time.time()
        print("100 runs took {}s.".format(str(end_time - start_time)))
示例#8
0
    def _build_configuration(self):
        """Builds a configuration using an SAC agent
    """
        # TODO(@hart): increase scenario number
        self._scenario_generator = \
          ConfigurableScenarioGeneration(num_scenarios=100,
                                         params=self._params)

        # USE THIS FOR DETERMINISTIC SCENARIO GEN.
        # self._scenario_generator = \
        #   DeterministicScenarioGeneration(num_scenarios=100,
        #                                   params=self._params)
        # self._observer = NearestObserver(self._params)
        self._observer = ClosestAgentsObserver(self._params)

        self._behavior_model = DynamicModel(params=self._params)
        self._evaluator = CustomEvaluator(params=self._params)
        sim_step_time = 0.2
        real_time_factor = 5

        resolution = (1920, 1080)
        dpi = 300
        fig_env = plt.figure(figsize=(resolution[0] / dpi,
                                      resolution[1] / dpi),
                             dpi=dpi)
        gs = gridspec.GridSpec(1, 1, left=0.0, right=1, bottom=0, top=0.9)
        axis = plt.subplot(gs[0])
        viewer = MPViewer(params=self._params,
                          y_length=80,
                          enforce_y_length=True,
                          enforce_x_length=False,
                          follow_agent_id=True,
                          axis=axis)
        # self._viewer = VideoRenderer(renderer=viewer, world_step_time=0.2)
        self._viewer = viewer
        self._runtime = RuntimeRL(action_wrapper=self._behavior_model,
                                  observer=self._observer,
                                  evaluator=self._evaluator,
                                  step_time=0.2,
                                  viewer=self._viewer,
                                  scenario_generator=self._scenario_generator)
        tfa_env = tf_py_environment.TFPyEnvironment(TFAWrapper(self._runtime))
        eval_tf_env = tf_py_environment.TFPyEnvironment(
            TFAWrapper(self._runtime))
        # self._agent = PPOAgent(tfa_env, params=self._params)
        # self._runner = PPORunner(tfa_env,
        #                          eval_tf_env,
        #                          self._agent,
        #                          params=self._params,
        #                          unwrapped_runtime=self._runtime)
        self._agent = SACAgent(tfa_env, params=self._params)
        self._runner = SACRunner(tfa_env,
                                 self._agent,
                                 params=self._params,
                                 unwrapped_runtime=self._runtime)