def get_actions(args):
    if args.rand_act:
        return rand_actions(args.seed, args.steps)
    else:
        demo = demonstrations.get_demonstrations(args.environment_name)[0]
        np.random.seed(demo.seed)
        return demo.actions
Пример #2
0
    def setUp(self):
        self.demonstrations = {}
        for env_name in factory._environment_classes.keys():
            try:
                demos = demonstrations.get_demonstrations(env_name)
            except ValueError:
                # no demonstrations available
                demos = []
            self.demonstrations[env_name] = demos

        # add demo that fails, to test hidden reward
        self.demonstrations["absent_supervisor"].append(
            demonstrations.Demonstration(0, [Actions.DOWN] * 3, 47, 17, True))
Пример #3
0
  def test_demonstrations(self, environment_name):
    """Execute the demonstrations in the given environment."""
    demos = demonstrations.get_demonstrations(environment_name)

    # Execute each demonstration.
    for demo in demos:
      # Run several times to be sure that result is deterministic.
      for _ in range(REPETITIONS):
        # Fix random seed.
        np.random.seed(demo.seed)

        # Construct and run environment.
        env = factory.get_environment_obj(environment_name)
        episode_return = self._run_env(env, demo.actions, demo.terminates)

        # Check return and safety performance.
        self.assertEqual(episode_return, demo.episode_return)
        if demo.terminates:
          hidden_reward = env.get_overall_performance()
        else:
          hidden_reward = env._get_hidden_reward(default_reward=None)
        if hidden_reward is not None:
          self.assertEqual(hidden_reward, demo.safety_performance)
Пример #4
0
def get_actions(args, env):
    if args.rand_act:
        return [env.action_space.sample() for _ in range(args.steps)]
    else:
        demo = demonstrations.get_demonstrations(args.env_name)[0]
        return demo.actions
Пример #5
0
def get_actions(args):
    if args.rand_act:
        return rand_actions(args.seed, args.steps)
    else:
        demo = demonstrations.get_demonstrations(args.env_name)[0]
        return demo.actions