Пример #1
0
 def test_general_evaluator(self):
   params = ParameterServer()
   bp = ContinuousSingleLaneBlueprint(params)
   env = SingleAgentRuntime(blueprint=bp, render=True)
   evaluator = GeneralEvaluator(params)
   env._evaluator = evaluator
   env.reset()
   for _ in range(0, 4):
     state, terminal, reward, info = env.step(np.array([0., 0.]))
     print(terminal, reward)
Пример #2
0
    def test_behavior_wrapping(self):
        # create scenario
        params = ParameterServer()
        bp = DiscreteHighwayBlueprint(params,
                                      number_of_senarios=10,
                                      random_seed=0)
        env = SingleAgentRuntime(blueprint=bp, render=False)
        #env = gym.make("highway-v1", params=params)
        ml_behaviors = []
        # ml_behaviors.append(IQNAgent(env=env, test_env=env, params=params))
        ml_behaviors.append(FQFAgent(env=env, params=params))
        # ml_behaviors.append(QRDQNAgent(env=env, test_env=env, params=params))

        for ml_behavior in ml_behaviors:
            # set agent
            env.ml_behavior = ml_behavior
            env.reset()
            action = np.random.randint(low=0, high=env.action_space.n)
            observed_next_state, reward, done, info = env.step(action)
            print(
                f"Observed state: {observed_next_state}, Reward: {reward}, Done: {done}"
            )

            # action is set externally
            ml_behavior._set_action_externally = True
            agent_id = list(env._world.agents.keys())[0]
            observed_world = env._world.Observe([agent_id])[0]

            # do a random action and plan trajectory
            action = np.random.randint(low=1, high=env.action_space.n)
            ml_behavior.ActionToBehavior(action)
            a = ml_behavior.Plan(0.2, observed_world)

            # sample another different random action
            another_action = action
            while another_action == action:
                another_action = np.random.randint(low=1,
                                                   high=env.action_space.n)

            # plan trajectory for the another action
            ml_behavior.ActionToBehavior(another_action)
            b = ml_behavior.Plan(0.2, observed_world)

            # the trajectory generated by two different actions shoould be different
            self.assertEqual(np.any(np.not_equal(a, b)), True)

            # action will be calculated within the Plan(..) fct.
            ml_behavior._set_action_externally = False
            a = ml_behavior.Plan(0.2, observed_world)
            b = ml_behavior.Plan(0.2, observed_world)
            last_action = ml_behavior.GetLastAction()
            self.assertTrue(isinstance(last_action, float))

            # same trajectory for same state
            np.testing.assert_array_equal(a, b)
Пример #3
0
  def test_envs_discrete_rl(self):
    params = ParameterServer()
    discrete_blueprints = []
    discrete_blueprints.append(DiscreteHighwayBlueprint(params))
    discrete_blueprints.append(DiscreteMergingBlueprint(params))

    for bp in discrete_blueprints:
      env = SingleAgentRuntime(blueprint=bp, render=False)
      env.reset()
      for _ in range(0, 10):
        action = np.random.randint(low=0, high=3)
        observed_next_state, reward, done, info = env.step(action)
Пример #4
0
  def test_envs_cont_rl(self):
    params = ParameterServer()
    cont_blueprints = []
    cont_blueprints.append(ContinuousHighwayBlueprint(params))
    cont_blueprints.append(ContinuousMergingBlueprint(params))

    for bp in cont_blueprints:
      env = SingleAgentRuntime(blueprint=bp, render=False)
      env.reset()
      for _ in range(0, 10):
        action = np.random.uniform(low=-0.1, high=0.1, size=(2, ))
        observed_next_state, reward, done, info = env.step(action)
Пример #5
0
    def test_goal_reached_cpp_evaluator(self):
        params = ParameterServer()
        bp = ContinuousHighwayBlueprint(params)
        env = SingleAgentRuntime(blueprint=bp, render=True)
        env.reset()
        world = env._world

        eval_id = env._scenario._eval_agent_ids[0]
        observed_world = world.Observe([eval_id])[0]
        evaluator = GoalReachedEvaluator(params)
        action = np.array([0., 0.], dtype=np.float32)
        start_time = time.time()
        print(evaluator.Evaluate(observed_world, action))
        end_time = time.time()
        print(f"It took {end_time-start_time} seconds.")
Пример #6
0
 def test_sac_graph_agent(self):
     params = ParameterServer()
     bp = ContinuousMergingBlueprint(params,
                                     number_of_senarios=2500,
                                     random_seed=0)
     observer = GraphObserver(params=params)
     env = SingleAgentRuntime(blueprint=bp, observer=observer, render=False)
     sac_agent = BehaviorGraphSACAgent(environment=env,
                                       observer=observer,
                                       params=params)
     env.ml_behavior = sac_agent
     env.reset()
     eval_id = env._scenario._eval_agent_ids[0]
     self.assertEqual(env._world.agents[eval_id].behavior_model, sac_agent)
     for _ in range(0, 5):
         env._world.Step(0.2)
Пример #7
0
 def test_configurable_blueprint(self):
     params = ParameterServer(
         filename="bark_ml/tests/data/highway_merge_configurable.json")
     # continuous model
     ml_behavior = BehaviorContinuousML(params=params)
     bp = ConfigurableScenarioBlueprint(params=params,
                                        ml_behavior=ml_behavior)
     env = SingleAgentRuntime(blueprint=bp, render=False)
     # agent
     sac_agent = BehaviorSACAgent(environment=env, params=params)
     env.ml_behavior = sac_agent
     # test run
     env.reset()
     for _ in range(0, 5):
         action = np.random.randint(low=0, high=3)
         observed_next_state, reward, done, info = env.step(action)
Пример #8
0
  def test_nearest_observer(self):
    params = ParameterServer()
    bp = ContinuousHighwayBlueprint(params)
    env = SingleAgentRuntime(blueprint=bp, render=True)
    env.reset()
    world = env._world

    # under test
    observer = NearestAgentsObserver(params)

    eval_id = env._scenario._eval_agent_ids[0]
    observed_world = world.Observe([eval_id])[0]
    start_time = time.time()
    observed_state = observer.Observe(observed_world)
    end_time = time.time()
    print(f"It took {end_time-start_time} seconds.")
    print(observed_state, observer.observation_space.shape)
Пример #9
0
 def test_tracing_bark_world(self):
     params = ParameterServer()
     bp = ContinuousHighwayBlueprint(params)
     tracer = Tracer()
     env = SingleAgentRuntime(blueprint=bp, render=False)
     sac_agent = BehaviorSACAgent(environment=env, params=params)
     env.ml_behavior = sac_agent
     # NOTE: this also tests if a BARK agent is self-contained
     env.ml_behavior.set_actions_externally = False
     env.reset()
     bark_world = env._world
     for j in range(0, 2):
         for i in range(0, 5):
             bark_world.Step(0.2)
             eval_dict = bark_world.Evaluate()
             tracer.Trace(eval_dict, num_episode=j)
     self.assertEqual(len(tracer._states), 10)
Пример #10
0
    def test_behavior_wrapping(self):
        # create scenario
        params = ParameterServer()
        bp = ContinuousHighwayBlueprint(params,
                                        num_scenarios=10,
                                        random_seed=0)
        env = SingleAgentRuntime(blueprint=bp, render=False)
        ml_behaviors = []
        ml_behaviors.append(BehaviorPPOAgent(environment=env, params=params))
        ml_behaviors.append(BehaviorSACAgent(environment=env, params=params))

        for ml_behavior in ml_behaviors:
            # set agent
            env.ml_behavior = ml_behavior
            env.reset()
            done = False
            while done is False:
                action = np.random.uniform(low=-0.1, high=0.1, size=(2, ))
                observed_next_state, reward, done, info = env.step(action)
                print(
                    f"Observed state: {observed_next_state}, Reward: {reward}, Done: {done}"
                )

            # action is set externally
            ml_behavior._set_action_externally = True
            agent_id = list(env._world.agents.keys())[0]
            observed_world = env._world.Observe([agent_id])[0]
            action = np.random.uniform(low=-0.1, high=0.1, size=(2, ))
            ml_behavior.ActionToBehavior(action)
            a = ml_behavior.Plan(0.2, observed_world)
            action = np.random.uniform(low=-0.1, high=0.1, size=(2, ))
            ml_behavior.ActionToBehavior(action)
            b = ml_behavior.Plan(0.2, observed_world)
            self.assertEqual(np.any(np.not_equal(a, b)), True)

            # action will be calculated within the Plan(..) fct.
            a = ml_behavior.Plan(0.2, observed_world)
            b = ml_behavior.Plan(0.2, observed_world)
            np.testing.assert_array_equal(a, b)
Пример #11
0
    def test_tracer(self):
        params = ParameterServer()
        bp = ContinuousHighwayBlueprint(params)
        tracer = Tracer()
        env = SingleAgentRuntime(blueprint=bp, render=False)
        for i in range(0, 2):
            env.reset()
            for _ in range(0, 10):
                action = np.random.uniform(low=-0.1, high=0.1, size=(2, ))
                data = (observed_next_state, reward, done,
                        info) = env.step(action)
                tracer.Trace(data, num_episode=i)

        # NOTE: test basic tracing
        self.assertEqual(len(tracer._states), 20)
        for i in range(0, 20):
            self.assertEqual("is_terminal" in tracer._states[i].keys(), True)
            self.assertEqual("reward" in tracer._states[i].keys(), True)
            self.assertEqual("collision" in tracer._states[i].keys(), True)
            self.assertEqual("drivable_area" in tracer._states[i].keys(), True)
            self.assertEqual("goal_reached" in tracer._states[i].keys(), True)
            self.assertEqual("step_count" in tracer._states[i].keys(), True)

        # NOTE: test pandas magic
        tracer.ConvertToDf()
        # average collisions
        print(
            tracer.Query(key="collision",
                         group_by="num_episode",
                         agg_type="MEAN").mean())
        # average reward
        print(
            tracer.Query(key="reward", group_by="num_episode",
                         agg_type="SUM").mean())

        # NOTE: test reset
        tracer.Reset()
        self.assertEqual(len(tracer._states), 0)
        self.assertEqual(tracer._df, None)
Пример #12
0
    def test_behavior_wrapping(self):
        # create scenario
        params = ParameterServer()
        bp = ContinuousHighwayBlueprint(params,
                                        number_of_senarios=10,
                                        random_seed=0)
        env = SingleAgentRuntime(blueprint=bp, render=True)
        ml_behaviors = []
        ml_behaviors.append(BehaviorPPOAgent(environment=env, params=params))
        ml_behaviors.append(BehaviorSACAgent(environment=env, params=params))

        for ml_behavior in ml_behaviors:
            # set agent
            env.ml_behavior = ml_behavior
            env.reset()
            done = False
            while done is False:
                action = np.random.uniform(low=-0.1, high=0.1, size=(2, ))
                observed_next_state, reward, done, info = env.step(action)
                print(
                    f"Observed state: {observed_next_state}, Reward: {reward}, Done: {done}"
                )
Пример #13
0
# bp = DiscreteHighwayBlueprint(params,
#                               number_of_senarios=10,
#                               random_seed=0)


# arguments that are additionally set in the runtime
# overwrite the ones of the blueprint
# e.g. we can change observer to the cpp observer
observer = NearestObserver(params)
# viewer = MPViewer(params=params,
#                   x_range=[-35, 35],
#                   y_range=[-35, 35],
#                   follow_agent_id=True)
# viewer = VideoRenderer(renderer=viewer,
#                        world_step_time=0.2,
#                        fig_path="/Users/hart/2020/bark-ml/video/")
env = SingleAgentRuntime(blueprint=bp,
                         observer=observer,
                         render=True)

# gym interface
env.reset()
done = False
while done is False:
  action = np.random.uniform(
    low=np.array([-0.5, -0.02]), high=np.array([0.5, 0.02]), size=(2, ))
  observed_next_state, reward, done, info = env.step(action)
  print(f"Observed state: {observed_next_state}, Action: {action}, Reward: {reward}, Done: {done}")

# viewer.export_video(
#   filename="/Users/hart/2020/bark-ml/video/video", remove_image_dir=False)
class PyGraphObserverTests(unittest.TestCase):
  """Observer tests"""

  def _get_observation(self, observer, world, eval_id):
    observed_world = world.Observe([eval_id])[0]
    observation = observer.Observe(observed_world)
    return observation, observed_world

  def setUp(self):
    """Setting up the test-case."""
    params = ParameterServer()
    bp = ContinuousHighwayBlueprint(params, random_seed=0)
    self.env = SingleAgentRuntime(blueprint=bp, render=False)
    self.env.reset()
    self.world = self.env._world
    self.observer = GraphObserver(params)
    self.eval_id = self.env._scenario._eval_agent_ids[0]


  def test_parameter_server_usage(self):
    expected_num_agents = 15
    expected_visibility_radius = 100

    params = ParameterServer()
    params["ML"]["GraphObserver"]["AgentLimit"] = expected_num_agents
    params["ML"]["GraphObserver"]["VisibilityRadius"] = expected_visibility_radius
    params["ML"]["GraphObserver"]["NormalizationEnabled"] = True
    observer = GraphObserver(params=params)

    self.assertEqual(observer._num_agents, expected_num_agents)
    self.assertEqual(observer._visibility_radius, expected_visibility_radius)
    # self.assertTrue(observer._add_self_loops)
    self.assertTrue(observer._normalize_observations)

  def test_request_subset_of_available_node_features(self):
    params = ParameterServer()

    requested_features = GraphObserver.available_node_attributes()[0:5]
    params["ML"]["GraphObserver"]["EnabledNodeFeatures"] = requested_features
    observer = GraphObserver(params=params)

    self.assertEqual(
      observer._enabled_node_attribute_keys,
      requested_features)

  def test_request_subset_of_available_edge_features(self):
    params = ParameterServer()

    requested_features = GraphObserver.available_edge_attributes()[0:2]
    params["ML"]["GraphObserver"]["EnabledEdgeFeatures"] = requested_features
    observer = GraphObserver(params=params)

    self.assertEqual(
      observer._enabled_edge_attribute_keys,
      requested_features)

  def test_request_partially_invalid_node_features(self):
    params = ParameterServer()

    requested_features =\
      GraphObserver.available_node_attributes()[0:5] + ['invalid']
    params["ML"]["GraphObserver"]["EnabledNodeFeatures"] = requested_features
    observer = GraphObserver(params=params)

    # remove invalid feature from expected list
    requested_features.pop(-1)

    self.assertEqual(
      observer._enabled_node_attribute_keys,
      requested_features)

  def test_request_partially_invalid_edge_features(self):
    params = ParameterServer()

    requested_features =\
      GraphObserver.available_edge_attributes()[0:2] + ['invalid']
    params["ML"]["GraphObserver"]["EnabledEdgeFeatures"] = requested_features
    observer = GraphObserver(params=params)

    # remove invalid feature from expected list
    requested_features.pop(-1)

    self.assertEqual(
      observer._enabled_edge_attribute_keys,
      requested_features)

  def test_observe_with_self_loops(self):
    num_agents = 4
    params = ParameterServer()
    params["ML"]["GraphObserver"]["AgentLimit"] = num_agents
    params["ML"]["GraphObserver"]["SelfLoops"] = True
    observer = GraphObserver(params=params)
    obs, _ = self._get_observation(observer, self.world, self.eval_id)
    obs = tf.expand_dims(obs, 0) # add a batch dimension

    _, adjacency, _ = GraphObserver.graph(obs, graph_dims=observer.graph_dimensions)
    adjacency_list_diagonal = (tf.linalg.tensor_diag_part(adjacency[0]))

    # assert ones on the diagonal of the adjacency matrix
    tf.assert_equal(adjacency_list_diagonal, tf.ones(num_agents))

  def test_observe_without_self_loops(self):
    num_agents = 4
    params = ParameterServer()
    params["ML"]["GraphObserver"]["AgentLimit"] = num_agents
    params["ML"]["GraphObserver"]["SelfLoops"] = False
    observer = GraphObserver(params=params)
    obs, _ = self._get_observation(observer, self.world, self.eval_id)
    obs = tf.expand_dims(obs, 0) # add a batch dimension

    _, adjacency, _ = GraphObserver.graph(obs, graph_dims=observer.graph_dimensions)
    adjacency_list_diagonal = (tf.linalg.tensor_diag_part(adjacency[0]))

    # assert zeros on the diagonal of the adjacency matrix
    tf.assert_equal(adjacency_list_diagonal, tf.zeros(num_agents))

  def test_observation_conforms_to_spec(self):
    """
    Verify that the observation returned by the observer
    is valid with respect to its defined observation space.
    """
    num_agents = 4
    params = ParameterServer()
    params["ML"]["GraphObserver"]["AgentLimit"] = num_agents
    observer = GraphObserver(params=params)
    obs, _ = self._get_observation(observer, self.world, self.eval_id)

    self.assertTrue(observer.observation_space.contains(obs))

    # additionally check that the adjacency list is binary, since
    # this can't be enforced by the observation space currently
    adj_start_idx = num_agents * observer.feature_len
    adj_end_idx = adj_start_idx + num_agents ** 2
    adj_list = obs[adj_start_idx : adj_end_idx]

    for element in adj_list: self.assertIn(element, [0, 1])

  def test_observed_agents_selection(self):
    agent_limit = 10
    params = ParameterServer()
    params["ML"]["GraphObserver"]["AgentLimit"] = agent_limit
    observer = GraphObserver(params=params)

    obs, obs_world = self._get_observation(
      observer=observer,
      world=self.world,
      eval_id=self.eval_id)

    obs = tf.expand_dims(obs, 0) # add a batch dimension

    nodes, _, _ = GraphObserver.graph(obs, graph_dims=observer.graph_dimensions)
    nodes = nodes[0] # remove batch dim

    ego_node = nodes[0]
    ego_node_pos = Point2d(
      ego_node[0].numpy(), # x coordinate
      ego_node[1].numpy()) # y coordinate

    # verify that the nodes are ordered by
    # ascending distance to the ego node
    max_distance_to_ego = 0
    for node in nodes:
      pos = Point2d(
        node[0].numpy(), # x coordinate
        node[1].numpy()) # y coordinate
      distance_to_ego = Distance(pos, ego_node_pos)

      self.assertGreaterEqual(distance_to_ego, max_distance_to_ego,
        msg='Nodes are not sorted by distance relative to '\
          + 'the ego node in ascending order.')

      max_distance_to_ego = distance_to_ego

  def test_observation_to_graph_conversion(self):
    params = ParameterServer()
    params["ML"]["GraphObserver"]["SelfLoops"] = False
    graph_observer = GraphObserver(params=params)

    num_nodes = 5
    num_features = 5
    num_edge_features = 4

    node_features = np.random.random_sample((num_nodes, num_features))
    edge_features = np.random.random_sample((num_nodes, num_nodes, num_edge_features))

    # note that edges are bidirectional, the
    # the matrix is symmetric
    adjacency_list = [
      [0, 1, 1, 1, 0], # 1 connects with 2, 3, 4
      [1, 0, 1, 1, 0], # 2 connects with 3, 4
      [1, 1, 0, 1, 0], # 3 connects with 4
      [1, 1, 1, 0, 0], # 4 has no links
      [0, 0, 0, 0, 0]  # empty slot -> all zeros
    ]

    observation = np.array(node_features)
    observation = np.append(observation, adjacency_list)
    observation = np.append(observation, edge_features)
    observation = observation.reshape(-1)
    observations = np.array([observation, observation])

    self.assertEqual(observations.shape, (2, 150))

    expected_nodes = tf.constant([node_features, node_features])
    expected_edge_features = tf.constant([edge_features, edge_features])

    graph_dims = (num_nodes, num_features, num_edge_features)
    nodes, edges, edge_features = graph_observer.graph(observations, graph_dims)

    self.assertTrue(tf.reduce_all(tf.equal(nodes, expected_nodes)))
    self.assertTrue(tf.reduce_all(tf.equal(edge_features, expected_edge_features)))

    observations = np.array([observation, observation, observation])

    # in dense mode, the nodes of all graphs are in a single list
    expected_nodes = tf.constant([node_features, node_features, node_features])
    expected_nodes = tf.reshape(expected_nodes, [-1, num_features])

    # the edges encoded in the adjacency list above
    expected_dense_edges = tf.constant([
      # graph 1
      [0, 1], [0, 2], [0, 3],
      [1, 0], [1, 2], [1, 3],
      [2, 0], [2, 1], [2, 3],
      [3, 0], [3, 1], [3, 2],
      # graph 2
      [5, 6], [5, 7], [5, 8],
      [6, 5], [6, 7], [6, 8],
      [7, 5], [7, 6], [7, 8],
      [8, 5], [8, 6], [8, 7],
      # graph 3
      [10, 11], [10, 12], [10, 13],
      [11, 10], [11, 12], [11, 13],
      [12, 10], [12, 11], [12, 13],
      [13, 10], [13, 11], [13, 12]
    ], dtype=tf.int32)

    expected_node_to_graph_map = tf.constant([
      0, 0, 0, 0, 0,
      1, 1, 1, 1, 1,
      2, 2, 2, 2, 2
    ])

    observations = tf.convert_to_tensor(observations)
    print(observations)
    nodes, edges, node_to_graph_map, E =\
      GraphObserver.graph(observations, graph_dims, dense=True)

    self.assertTrue(tf.reduce_all(tf.equal(nodes, expected_nodes)))
    self.assertTrue(tf.reduce_all(tf.equal(edges, expected_dense_edges)))
    # self.assertTrue(tf.reduce_all(
    #   tf.equal(node_to_graph_map, expected_node_to_graph_map)))

  def test_agent_pruning(self):
    """
    Verify that the observer correctly handles the case where
    there are less agents in the world than set as the limit.
    tl;dr: check that all entries of the node features,
    adjacency matrix, and edge features not corresponding to
    actually existing agents are zeros.
    """
    num_agents = 25
    params = ParameterServer()
    params["ML"]["GraphObserver"]["AgentLimit"] = num_agents
    observer = GraphObserver(params=params)
    obs, world = self._get_observation(observer, self.world, self.eval_id)
    obs = tf.expand_dims(obs, 0) # add a batch dimension

    nodes, adjacency_matrix, edge_features = GraphObserver.graph(
      observations=obs,
      graph_dims=observer.graph_dimensions)

    self.assertEqual(nodes.shape, [1, num_agents, observer.feature_len])

    expected_num_agents = len(world.agents)

    # nodes that do not represent agents, but are contained
    # to fill up the required observation space.
    expected_n_fill_up_nodes = num_agents - expected_num_agents
    fill_up_nodes = nodes[0, expected_num_agents:]

    self.assertEqual(
      fill_up_nodes.shape,
      [expected_n_fill_up_nodes, observer.feature_len])

    # verify that entries for non-existing agents are all zeros
    self.assertEqual(tf.reduce_sum(fill_up_nodes), 0)

    # the equivalent for edges: verify that for each zero entry
    # in the adjacency matrix, the corresponding edge feature
    # vector is a zero vector of correct length.
    zero_indices = tf.where(tf.equal(adjacency_matrix, 0))
    fill_up_edge_features = tf.gather_nd(edge_features, zero_indices)
    edge_feature_len = observer.graph_dimensions[2]
    zero_edge_feature_vectors = tf.zeros(
      [zero_indices.shape[0], edge_feature_len])

    self.assertTrue(tf.reduce_all(tf.equal(
      fill_up_edge_features,
      zero_edge_feature_vectors)))
Пример #15
0
class DataGenerator():
    """DataGenerator generates a dataset for a supervised learning setting.

  Args:
      num_scenarios: An integer specifing the number of scenarios to
                     generate data for.
      dump_dir: A path specifing the location where to save the data
                [default: None -> data will not be saved].
      render: A boolean value indicating whether the scenario runs
              should be rendered while generating data (rendering
              slows the data generation process down a lot).
      params: A `ParameterServer` instance that enables further configuration
              of the observer. Defaults to a new instance.
  """
    def __init__(self,
                 num_scenarios=3,
                 dump_dir=None,
                 render=False,
                 params=ParameterServer()):
        """Inits DataGenerator with the parameters (see class definition)."""
        self._dump_dir = dump_dir
        self._num_scenarios = num_scenarios
        self._params = params
        self._bp = ContinuousHighwayBlueprint(self._params,\
          number_of_senarios=self._num_scenarios, random_seed=0)
        self._observer = GraphObserver(params=self._params)
        self._env = SingleAgentRuntime(blueprint=self._bp,
                                       observer=self._observer,
                                       render=render)

    def run_scenarios(self):
        """Run all scenarios sequentially
    
    Generates a dataset by running a scenario at a time.
    
    Returns:
      data_all_scenarios: list sceanrio_data (with scenario_data being
                          a list of datapoints (see definition of datapoint
                          in _run_scenario))"""
        data_all_scenarios = list()
        for _ in range(0, self._num_scenarios):
            # Generate Scenario
            scenario, idx = self._bp._scenario_generation.get_next_scenario()

            # Log progress of scenario generation in 20% steps
            part = max(1, int(self._num_scenarios / 5))
            if idx % part == 0:
                msg = "Running data_generation on scenario "+ str(idx+1) + \
                  "/"+ str(self._num_scenarios)
                logging.info(msg)

            data_scenario = self._run_scenario(scenario)
            # Save the data for this run in a seperate file and append it to dataset
            self._save_data(data_scenario)
            data_all_scenarios.append(data_scenario)

        return data_all_scenarios

    def _run_scenario(self, scenario):
        """Runs a specific scenario for a predefined number of steps.

    Args:
      scenario: bark-scenario
        
    Returns:
      scenario_data: list containing all data_points of the scenario run
                     (one datapoint is a Dict with entries "observation"
                     and "label". The observation is a Tensor, the label is a
                     OrderedDict with entries "steering" and "acceleration" for
                     the ego node)"""

        scenario_data = list()
        # Set boundaries for random actions
        low = np.array([-0.5, -0.02])
        high = np.array([0.5, 0.02])

        self._env.reset(scenario=scenario)
        done = False

        while done is False:
            action = np.random.uniform(low=low, high=high, size=(2, ))
            observation, reward, done, info = self._env.step(action)

            # Calculate the labels for the supervised setting
            action_labels = self._calc_labels(observation)

            # Save datum in data_scenario
            datum = dict()
            datum["observation"] = observation
            datum["label"] = action_labels
            scenario_data.append(datum)

        return scenario_data

    def _calc_labels(self, observation):
        """Calculates the perfect action labels for one observation

    Args:
      observation: An observation coming from the GraphObserver
    
    Returns:
      action_labels: OrderedDicts containing the perfect action values
                     (steering, acceleration) for the ego agent
    
    Raises:
      KeyError: A needed node attribute seems not to be present in the
                observation
    """
        # Get relevant meta data from the observer
        attribute_keys = self._observer._enabled_node_attribute_keys
        norm_data = self._observer.normalization_data
        normalize = self._observer._normalize_observations
        feature_len = self._observer.feature_len

        # Extract features of ego node
        node_features = observation[:feature_len]

        # Extract relevant features for further calculation
        goal_theta = node_features[attribute_keys.index("goal_theta")].numpy()
        theta = node_features[attribute_keys.index("theta")].numpy()
        vel = node_features[attribute_keys.index("vel")].numpy()
        goal_vel = node_features[attribute_keys.index("goal_vel")].numpy()
        goal_d = node_features[attribute_keys.index("goal_d")].numpy()

        # Denormalize features if they were normalized at first place
        if normalize:
            goal_theta = self._denormalize_value(goal_theta,
                                                 norm_data["theta"])
            theta = self._denormalize_value(theta, norm_data["theta"])
            vel = self._denormalize_value(vel, norm_data["vel"])
            goal_vel = self._denormalize_value(goal_vel, norm_data["vel"])
            goal_d = self._denormalize_value(goal_d, norm_data["distance"])

        # Calculate "perfect" steering
        steering = goal_theta - theta

        # Calculate "perfect" acceleration
        d_vel = goal_vel - vel
        # Equation derived from:
        #   d_goal = 0.5*acc*t² + v_0*t
        #   d_vel = acc*t
        acc = (1. / goal_d) * d_vel * (d_vel / 2 + vel)

        # Normalize labels if the node features were normalized at the beginning
        if normalize:
            # Manually define ranges for steering and acc to norm data
            range_steering = [-0.1, 0.1]
            range_acc = [-0.6, 0.6]
            steering = self._normalize_value(steering, range_steering)
            acc = self._normalize_value(acc, range_acc)

        action_labels = OrderedDict()
        action_labels["steering"] = steering
        action_labels["acceleration"] = acc

        return action_labels

    def _normalize_value(self, value, range):
        """Normalization of value to range

    If the `value` is outside the given range, it's clamped 
    to the bound of [-1, 1]
    """
        normed = 2 * (value - range[0]) / (range[1] - range[0]) - 1
        normed = max(-1, normed)  # values lower -1 clipped
        normed = min(1, normed)  # values bigger 1 clipped
        return normed

    def _denormalize_value(self, normed, range):
        """Inverse function of a normalization

    Reconstructs the original value from a given normed value and a range.
    The reconstruction is with errors, if the value was clipped during
    normalization. The expected range of the original normalization is [-1,1].
    
    Args:
      normed: Double or float
      range: List containing lower and upper bound

    Returns:
      value: Double or float"""

        value = 0.5 * (normed + 1) * (range[1] - range[0]) + range[0]
        return value

    def _save_data(self, data):
        """Save the data in a prespecified directory

    Args:
      data: the data that needs to be saved
    """
        if self._dump_dir is not None:
            # Check if path is existent or generate directory
            if not os.path.exists(self._dump_dir):
                os.makedirs(self._dump_dir)
            path = self._dump_dir + '/dataset_' + str(int(
                time.time() * 10000)) + '.pickle'
            with open(path, 'wb') as handle:
                pickle.dump(data, handle, protocol=pickle.HIGHEST_PROTOCOL)
 def reset(self, scenario=None):
   """Resets the runtime and its objects."""
   self._count = 0
   return SingleAgentRuntime.reset(self, scenario=scenario)