def _build_configuration(self): """Builds a configuration using an SAC agent """ self._scenario_generator = \ UniformVehicleDistribution(num_scenarios=20, random_seed=0, params=self._params) self._observer = CustomObserver(params=self._params) self._behavior_model = DynamicModel(params=self._params) self._evaluator = CustomEvaluator(params=self._params) self._viewer = MPViewer(params=self._params, x_range=[-30, 30], y_range=[-60, 20], follow_agent_id=True) #self._viewer = VideoRenderer(renderer=viewer, world_step_time=0.2) self._runtime = RuntimeRL(action_wrapper=self._behavior_model, observer=self._observer, evaluator=self._evaluator, step_time=0.2, viewer=self._viewer, scenario_generator=self._scenario_generator) tfa_env = tf_py_environment.TFPyEnvironment(TFAWrapper(self._runtime)) self._agent_0 = SACAgent(tfa_env, params=self._params) self._agent_1 = SACAgent(tfa_env, params=self._params) self._runner = SACRunner(tfa_env, [self._agent_0, self._agent_1], params=self._params, unwrapped_runtime=self._runtime)
def test_tfa_runtime(): params = ParameterServer( filename="tests/data/deterministic_scenario_test.json") scenario_generation = DeterministicScenarioGeneration(num_scenarios=3, random_seed=0, params=params) state_observer = ClosestAgentsObserver(params=params) action_wrapper = DynamicModel(params=params) evaluator = GoalReached(params=params) viewer = MPViewer(params=params, x_range=[-30, 30], y_range=[-20, 40], follow_agent_id=True) # use_world_bounds=True runtimerl = RuntimeRL(action_wrapper=action_wrapper, observer=state_observer, evaluator=evaluator, step_time=0.05, viewer=viewer, scenario_generator=scenario_generation) tfa_env = TFAWrapper(runtimerl) _ = tfa_env.reset() utils.validate_py_environment(tfa_env, episodes=5) _ = tf_py_environment.TFPyEnvironment(tfa_env)
def test_runner(): params = ParameterServer( filename="tests/data/deterministic_scenario_test.json") base_dir = os.path.dirname(os.path.dirname(__file__)) params["BaseDir"] = base_dir scenario_generation = DeterministicScenarioGeneration(num_scenarios=3, random_seed=0, params=params) state_observer = ClosestAgentsObserver(params=params) action_wrapper = DynamicModel(params=params) evaluator = GoalReached(params=params) viewer = MPViewer(params=params, x_range=[-30, 30], y_range=[-20, 40], follow_agent_id=True) runtimerl = RuntimeRL(action_wrapper=action_wrapper, observer=state_observer, evaluator=evaluator, step_time=0.2, viewer=viewer, scenario_generator=scenario_generation, render=False) tfa_env = tf_py_environment.TFPyEnvironment(TFAWrapper(runtimerl)) sac_agent = SACAgent(tfa_env, params=params) tfa_runner = TFARunner(tfa_env, sac_agent, params=params, unwrapped_runtime=runtimerl) tfa_runner.collect_initial_episodes() # main functionalities tfa_runner.train() tfa_runner.visualize() tfa_runner.evaluate()
def test_agent(self): params = ParameterServer( filename="tests/data/deterministic_scenario_test.json") base_dir = os.path.dirname(os.path.dirname(__file__)) params["BaseDir"] = base_dir scenario_generation = DeterministicScenarioGeneration(num_scenarios=2, random_seed=0, params=params) state_observer = ClosestAgentsObserver(params=params) action_wrapper = DynamicModel(params=params) evaluator = GoalReached(params=params) viewer = MPViewer(params=params, x_range=[-30, 30], y_range=[-20, 40], follow_agent_id=True) # use_world_bounds=True runtimerl = RuntimeRL(action_wrapper=action_wrapper, observer=state_observer, evaluator=evaluator, step_time=0.05, viewer=viewer, scenario_generator=scenario_generation) tfa_env = tf_py_environment.TFPyEnvironment(TFAWrapper(runtimerl)) sac_agent = SACAgent(tfa_env, params=params) self.assertEqual(sac_agent._agent.name, "sac_agent") sac_agent.reset() # TODO(@hart): does not work because of read-only file-system # sac_agent.save() sac_agent.load()
def test_triple_int(self): params = ParameterServer( filename="tests/data/deterministic_scenario_drone_test.json") scenario_generation = DeterministicScenarioGeneration(num_scenarios=3, random_seed=0, params=params) state_observer = SimpleObserver(params=params) action_wrapper = DynamicModel(model_name="TripleIntegratorModel", params=params) evaluator = GoalReached(params=params) viewer = MPViewer(params=params, x_range=[-30, 30], y_range=[-40, 40], use_world_bounds=True) runtimerl = RuntimeRL(action_wrapper=action_wrapper, observer=state_observer, evaluator=evaluator, step_time=0.2, viewer=viewer, scenario_generator=scenario_generation, render=True) for _ in range(0, 1): runtimerl.reset() for _ in range(0, 10): # run each scenario for 10 steps action = action_wrapper.action_space.sample() next_observed_state, reward, done, info = \ runtimerl.step(action) if done: print("State: {} \n Reward: {} \n Done {}, Info: {} \n \ =================================================" . \ format(next_observed_state, reward, done, info)) break
def _build_configuration(self): """Builds a configuration using an SAC agent """ self._scenario_generator = \ DeterministicDroneChallengeGeneration(num_scenarios=3, random_seed=0, params=self._params) self._observer = CustomObserver(params=self._params) self._behavior_model = DynamicModel(model_name="TripleIntegratorModel", params=self._params) self._evaluator = CustomEvaluator(params=self._params) viewer = MPViewer(params=self._params, x_range=[-20, 20], y_range=[-20, 20], follow_agent_id=True) self._viewer = viewer # self._viewer = VideoRenderer(renderer=viewer, world_step_time=0.2) self._runtime = RuntimeRL(action_wrapper=self._behavior_model, observer=self._observer, evaluator=self._evaluator, step_time=0.2, viewer=self._viewer, scenario_generator=self._scenario_generator) # tfa_env = tf_py_environment.TFPyEnvironment(TFAWrapper(self._runtime)) tfa_env = tf_py_environment.TFPyEnvironment( parallel_py_environment.ParallelPyEnvironment( [lambda: TFAWrapper(self._runtime)] * self._params["ML"]["Agent"]["num_parallel_environments"])) self._agent = SACAgent(tfa_env, params=self._params) self._runner = SACRunner(tfa_env, self._agent, params=self._params, unwrapped_runtime=self._runtime)
def test_runtime_rl(self): params = ParameterServer( filename="tests/data/deterministic_scenario_test.json") scenario_generation = DeterministicScenarioGeneration(num_scenarios=2, random_seed=0, params=params) state_observer = SimpleObserver(params=params) action_wrapper = DynamicModel(params=params) evaluator = GoalReached(params=params) viewer = MPViewer(params=params, x_range=[-30, 30], y_range=[-40, 40], use_world_bounds=True) #use_world_bounds=True) # runtimerl = RuntimeRL(action_wrapper=action_wrapper, observer=state_observer, evaluator=evaluator, step_time=0.2, viewer=viewer, scenario_generator=scenario_generation, render=False) start_time = time.time() for _ in range(0, 100): runtimerl.reset() done = False reward = 0. for _ in range(0, 50): # run each scenario for 10 steps action = action_wrapper.action_space.sample( ) / 100 # to go straight next_observed_state, reward, done, info = \ runtimerl.step(action) # observer self.assertEqual(len(next_observed_state), 16) np.testing.assert_array_equal( next_observed_state[0:4], state_observer._normalize( runtimerl._world.agents[100].state)[1:5]) np.testing.assert_array_equal( next_observed_state[4:8], state_observer._normalize( runtimerl._world.agents[101].state)[1:5]) if done: print("State: {} \n Reward: {} \n Done {}, Info: {} \n \ =================================================" . \ format(next_observed_state, reward, done, info)) break # must assert to equal as the agent reaches the goal in the # specified number of steps self.assertEqual(done, True) # goal must have been reached which returns a reward of 1. self.assertEqual(reward, 1.) self.assertEqual(runtimerl._world.agents[100].id, 100) self.assertEqual(runtimerl._world.agents[101].id, 101) end_time = time.time() print("100 runs took {}s.".format(str(end_time - start_time)))
def _build_configuration(self): """Builds a configuration using an SAC agent """ # TODO(@hart): increase scenario number self._scenario_generator = \ ConfigurableScenarioGeneration(num_scenarios=100, params=self._params) # USE THIS FOR DETERMINISTIC SCENARIO GEN. # self._scenario_generator = \ # DeterministicScenarioGeneration(num_scenarios=100, # params=self._params) # self._observer = NearestObserver(self._params) self._observer = ClosestAgentsObserver(self._params) self._behavior_model = DynamicModel(params=self._params) self._evaluator = CustomEvaluator(params=self._params) sim_step_time = 0.2 real_time_factor = 5 resolution = (1920, 1080) dpi = 300 fig_env = plt.figure(figsize=(resolution[0] / dpi, resolution[1] / dpi), dpi=dpi) gs = gridspec.GridSpec(1, 1, left=0.0, right=1, bottom=0, top=0.9) axis = plt.subplot(gs[0]) viewer = MPViewer(params=self._params, y_length=80, enforce_y_length=True, enforce_x_length=False, follow_agent_id=True, axis=axis) # self._viewer = VideoRenderer(renderer=viewer, world_step_time=0.2) self._viewer = viewer self._runtime = RuntimeRL(action_wrapper=self._behavior_model, observer=self._observer, evaluator=self._evaluator, step_time=0.2, viewer=self._viewer, scenario_generator=self._scenario_generator) tfa_env = tf_py_environment.TFPyEnvironment(TFAWrapper(self._runtime)) eval_tf_env = tf_py_environment.TFPyEnvironment( TFAWrapper(self._runtime)) # self._agent = PPOAgent(tfa_env, params=self._params) # self._runner = PPORunner(tfa_env, # eval_tf_env, # self._agent, # params=self._params, # unwrapped_runtime=self._runtime) self._agent = SACAgent(tfa_env, params=self._params) self._runner = SACRunner(tfa_env, self._agent, params=self._params, unwrapped_runtime=self._runtime)