def testMultiObjPassedObjective(self, domain_name, task_name):
        """Ensure objective class can be passed directly."""
        multiobj_class = lambda: multiobj_objectives.SafetyObjective()  # pylint: disable=unnecessary-lambda
        env = rwrl.load(domain_name=domain_name,
                        task_name=task_name,
                        safety_spec={'enable': True},
                        multiobj_spec={
                            'enable': True,
                            'objective': multiobj_class,
                            'observed': True,
                            'coeff': 0.5
                        })
        env.reset()
        env.step(0)

        multiobj_class = multiobj_objectives.SafetyObjective
        env = rwrl.load(domain_name=domain_name,
                        task_name=task_name,
                        safety_spec={'enable': True},
                        multiobj_spec={
                            'enable': True,
                            'objective': multiobj_class,
                            'observed': True,
                            'coeff': 0.5
                        })
        env.reset()
        env.step(0)
Exemplo n.º 2
0
 def testSafetyCoeff(self, domain_name, task_name):
   """Ensure observations contain 'constraints' when safety is specified."""
   env = rwrl.load(
       domain_name=domain_name,
       task_name=task_name,
       safety_spec={'enable': True, 'safety_coeff': 0.1})
   env.reset()
   step = env.step(0)
   self.assertIn('constraints', step.observation.keys())
   for c in [2, -1]:
     with self.assertRaises(ValueError):
       env = rwrl.load(
           domain_name=domain_name,
           task_name=task_name,
           safety_spec={'enable': True, 'safety_coeff': c})
Exemplo n.º 3
0
  def testDelayObservationsDelay(self, domain_name, task_name):
    """Ensure there is observation delay as specified."""
    observations_delay = np.random.randint(low=1, high=10)
    env = rwrl.load(
        domain_name=domain_name,
        task_name=task_name,
        delay_spec={
            'enable': True,
            'observations': observations_delay
        })
    obs1 = env.reset()[3]
    action_spec = env.action_spec()
    one_action = np.ones(shape=action_spec.shape, dtype=action_spec.dtype)

    # Make sure subsequent observations are the same (clearing the buffer).
    for _ in range(observations_delay):
      obs2 = env.step(copy.copy(one_action))[3]
    for key in obs1:
      np.testing.assert_array_equal(obs1[key], obs2[key])

    # Make sure we finally observe a different observation.
    obs2 = env.step(copy.copy(one_action))[3]
    array_equality = []
    for key in obs1:
      array_equality.append((obs1[key] == obs2[key]).all())
    self.assertIn(False, array_equality)
Exemplo n.º 4
0
  def testDelayActionsDelay(self, domain_name, task_name):
    """Ensure there is action delay as specified."""
    actions_delay = np.random.randint(low=1, high=10)
    env = rwrl.load(
        domain_name=domain_name,
        task_name=task_name,
        delay_spec={
            'enable': True,
            'actions': actions_delay
        })
    env.reset()
    action_spec = env.action_spec()

    zero_action = np.zeros(shape=action_spec.shape, dtype=action_spec.dtype)
    one_action = np.ones(shape=action_spec.shape, dtype=action_spec.dtype)
    if hasattr(action_spec, 'minimum'):
      one_action = np.minimum(action_spec.maximum, one_action)
    # Perfrom first action that fills up the buffer.
    env.step(copy.copy(zero_action))

    # Send one action and make sure zero action is still executed.
    for _ in range(actions_delay):
      env.step(copy.copy(one_action))
      np.testing.assert_array_equal(env.physics.control(), zero_action)

    # Make sure we finally perform the delayed one action.
    env.step(copy.copy(zero_action))
    np.testing.assert_array_equal(env.physics.control(), one_action)
Exemplo n.º 5
0
def run():
    """Runs a random agent on a given environment."""

    env = rwrl.load(domain_name=FLAGS.domain_name,
                    task_name=FLAGS.task_name,
                    safety_spec=dict(enable=True),
                    delay_spec=dict(enable=True, actions=20),
                    log_output=os.path.join(FLAGS.save_path, 'log.npz'),
                    environment_kwargs=dict(log_safety_vars=True,
                                            log_every=20,
                                            flat_observation=True))

    policy = random_policy(action_spec=env.action_spec())

    rewards = []
    for _ in range(FLAGS.total_episodes):
        timestep = env.reset()
        total_reward = 0.
        while not timestep.last():
            action = policy(timestep)
            timestep = env.step(action)
            total_reward += timestep.reward
        rewards.append(total_reward)
    print('Random policy total reward per episode: {:.2f} +- {:.2f}'.format(
        np.mean(rewards), np.std(rewards)))
 def testMultiObjSafetyNoRewardObs(self, domain_name, task_name):
     """Ensure multi-objective safety reward can be loaded."""
     env = rwrl.load(domain_name=domain_name,
                     task_name=task_name,
                     safety_spec={'enable': True},
                     multiobj_spec={
                         'enable': True,
                         'objective': 'safety',
                         'reward': False,
                         'observed': True,
                         'coeff': 0.5
                     })
     env.reset()
     env.step(0)
     ts = env.step(0)
     self.assertIn('multiobj', ts.observation)
     # Make sure we see a 1 in normalized violations
     env.task._constraints_obs = np.ones(
         env.task._constraints_obs.shape).astype(np.bool)
     obs = env.task.get_observation(env.physics)
     self.assertEqual(obs['multiobj'][1], 1)
     # And that there is no effect on rewards
     env.task._multiobj_coeff = 0
     r1 = env.task.get_reward(env.physics)
     env.task._multiobj_coeff = 1
     r2 = env.task.get_reward(env.physics)
     self.assertEqual(r1, r2)
Exemplo n.º 7
0
 def test_no_logger(self, domain_name, task_name):
     env = rwrl.load(domain_name=domain_name,
                     task_name=task_name,
                     safety_spec={'enable': True},
                     log_output=None,
                     environment_kwargs=dict(log_safety_vars=True))
     env.write_logs()
Exemplo n.º 8
0
  def testNoiseRepetitionActions(self, domain_name, task_name):
    """Ensure actions are being repeated."""
    steps = np.random.randint(low=3, high=10)
    prob = 1.
    env = rwrl.load(
        domain_name=domain_name,
        task_name=task_name,
        noise_spec={
            'repetition': {
                'enable': True,
                'actions_prob': prob,
                'actions_steps': steps,
            }
        })
    action_spec = env.action_spec()
    zero_action = np.zeros(shape=action_spec.shape, dtype=action_spec.dtype)
    one_action = np.ones(shape=action_spec.shape, dtype=action_spec.dtype)
    if hasattr(action_spec, 'minimum'):
      one_action = np.minimum(action_spec.maximum, one_action)

    env.reset()
    env.step(copy.copy(zero_action))
    # Verify that all the actions are zero_action for 'steps' time steps.
    for _ in range(steps):
      np.testing.assert_array_equal(env.physics.control(), zero_action)
      env.step(copy.copy(one_action))
    # Verify that all the actions are one_action for 'steps' time steps.
    for _ in range(steps):
      np.testing.assert_array_equal(env.physics.control(), one_action)
      env.step(copy.copy(zero_action))
    np.testing.assert_array_equal(env.physics.control(), zero_action)
Exemplo n.º 9
0
 def test_log_every_n(self, every_n=5):
     domain_name = 'cartpole'
     task_name = 'realworld_balance'
     temp_dir = self.create_tempdir()
     env = rwrl.load(domain_name=domain_name,
                     task_name=task_name,
                     safety_spec={
                         'enable': True,
                         'observations': False
                     },
                     log_output=os.path.join(temp_dir.full_path,
                                             'test.pickle'),
                     environment_kwargs=dict(log_safety_vars=True,
                                             log_every=every_n))
     env.reset()
     n = 0
     while True:
         timestep = env.step(0)
         if timestep.last():
             n += 1
             if n % every_n == 0:
                 self.assertTrue(os.path.exists(env.logs_path))
                 os.remove(env.logs_path)
                 self.assertFalse(os.path.exists(env.logs_path))
             if n > 20:
                 break
Exemplo n.º 10
0
  def testPerturbRandomWalk(self, domain_name, task_name):
    """Ensure parameter is perturbed on each reset."""
    period = 3

    perturb_scheduler = 'drift_pos'
    if domain_name == 'cartpole':
      perturb_param = 'pole_length'
    elif domain_name == 'walker':
      perturb_param = 'thigh_length'
    elif domain_name == 'humanoid':
      perturb_param = 'contact_friction'
    elif domain_name == 'quadruped':
      perturb_param = 'shin_length'
    elif domain_name == 'manipulator':
      perturb_param = 'lower_arm_length'
    else:
      raise ValueError('Unknown environment name: %s' % domain_name)

    env = rwrl.load(
        domain_name=domain_name,
        task_name=task_name,
        perturb_spec={
            'enable': True,
            'period': period,
            'param': perturb_param,
            'scheduler': perturb_scheduler
        })

    def get_param_val():
      if domain_name == 'cartpole':
        return env._physics.named.model.geom_size['pole_1', 1]
      elif domain_name == 'walker':
        return env._physics.named.model.geom_size['right_thigh', 1]
      elif domain_name == 'humanoid':
        return env._physics.named.model.geom_friction['right_right_foot', 0]
      elif domain_name == 'quadruped':
        return env._physics.named.model.geom_size['shin_front_left', 1]
      elif domain_name == 'manipulator':
        return env._physics.named.model.geom_size['lower_arm', 1]
      else:
        pass

    # Verify that first reset changes value.
    val_old = get_param_val()
    env.reset()
    val_new = get_param_val()
    self.assertNotEqual(val_old, val_new)

    # Verify that the parameter changes only each `period` number of times.
    val_old = val_new
    for unused_count1 in range(1):
      for unused_count2 in range(period - 1):
        env.reset()
        val_new = get_param_val()
        self.assertEqual(val_old, val_new)
      env.reset()
      val_new = get_param_val()
      self.assertNotEqual(val_old, val_new)
      val_old = val_new
Exemplo n.º 11
0
 def testSafetyConstraintsPresent(self, domain_name, task_name):
     """Ensure observations contain 'constraints' when safety is specified."""
     env = rwrl.load(domain_name=domain_name,
                     task_name=task_name,
                     safety_spec={'enable': True})
     env.reset()
     step = env.step(0)
     self.assertIn('constraints', step.observation.keys())
Exemplo n.º 12
0
 def test_init(self, domain_name, task_name):
     temp_file = self.create_tempfile()
     env = rwrl.load(domain_name=domain_name,
                     task_name=task_name,
                     safety_spec={'enable': True},
                     log_output=temp_file.full_path,
                     environment_kwargs=dict(log_safety_vars=True))
     env.write_logs()
Exemplo n.º 13
0
    def testNoiseGaussianObservations(self, domain_name, task_name):
        """Ensure there is an additive Gaussian noise to the observation."""
        noise = 0.5
        env = rwrl.load(
            domain_name=domain_name,
            task_name=task_name,
            noise_spec={'gaussian': {
                'enable': True,
                'observations': noise
            }})
        env.reset()

        # Get observation from realworld cartpole.
        obs1 = env._task.get_observation(env._physics)

        # Get observation from underlying cartpole.
        obs2 = collections.OrderedDict()
        if domain_name == 'cartpole':
            obs2['position'] = env.physics.bounded_position()
            obs2['velocity'] = env.physics.velocity()
        elif domain_name == 'humanoid':
            obs2['joint_angles'] = env.physics.joint_angles()
            obs2['head_height'] = env.physics.head_height()
            obs2['extremities'] = env.physics.extremities()
            obs2['torso_vertical'] = env.physics.torso_vertical_orientation()
            obs2['com_velocity'] = env.physics.center_of_mass_velocity()
            obs2['velocity'] = env.physics.velocity()
        elif domain_name == 'manipulator':
            arm_joints = [
                'arm_root', 'arm_shoulder', 'arm_elbow', 'arm_wrist', 'finger',
                'fingertip', 'thumb', 'thumbtip'
            ]
            obs2['arm_pos'] = env.physics.bounded_joint_pos(arm_joints)
            obs2['arm_vel'] = env.physics.joint_vel(arm_joints)
            obs2['touch'] = env.physics.touch()
            obs2['hand_pos'] = env.physics.body_2d_pose('hand')
            obs2['object_pos'] = env.physics.body_2d_pose(env._task._object)
            obs2['object_vel'] = env.physics.joint_vel(
                env._task._object_joints)
            obs2['target_pos'] = env.physics.body_2d_pose(env._task._target)
        elif domain_name == 'quadruped':
            obs2['egocentric_state'] = env.physics.egocentric_state()
            obs2['torso_velocity'] = env.physics.torso_velocity()
            obs2['torso_upright'] = env.physics.torso_upright()
            obs2['imu'] = env.physics.imu()
            obs2['force_torque'] = env.physics.force_torque()
        elif domain_name == 'walker':
            obs2['orientations'] = env.physics.orientations()
            obs2['height'] = env.physics.torso_height()
            obs2['velocity'] = env.physics.velocity()
        else:
            raise ValueError('Unknown environment name: %s' % domain_name)

        # Verify that the observations are different (noise added).
        for key in obs1:
            np.testing.assert_array_compare(operator.__ne__, obs1[key],
                                            obs2[key])
Exemplo n.º 14
0
def main(_):
  if FLAGS.suite == 'dm_control':
    logging.info('Loading from dm_control...')
    env = suite.load(domain_name=FLAGS.domain_name, task_name=FLAGS.task_name)
  elif FLAGS.suite == 'rwrl':
    logging.info('Loading from rwrl...')
    env = rwrl.load(domain_name=FLAGS.domain_name, task_name=FLAGS.task_name)
  random_policy = RandomAgent(env.action_spec()).action
  viewer.launch(env, policy=random_policy)
Exemplo n.º 15
0
 def testSafetyObservationsDisabled(self, domain_name, task_name):
     """Ensure safety observations can be disabled."""
     env = rwrl.load(domain_name=domain_name,
                     task_name=task_name,
                     safety_spec={
                         'enable': True,
                         'observations': False
                     })
     env.reset()
     step = env.step(0)
     self.assertNotIn('constraints', step.observation.keys())
def environment(combined_challenge,
                domain,
                task,
                log_output=None,
                environment_kwargs=None):
    """RWRL environment."""
    env = rwrl_envs.load(domain_name=domain,
                         task_name=task,
                         log_output=log_output,
                         environment_kwargs=environment_kwargs,
                         combined_challenge=combined_challenge)
    return wrappers.SinglePrecisionWrapper(env)
Exemplo n.º 17
0
 def testNoiseGaussianObservationsFlattening(self, domain_name, task_name):
   """Ensure there is an additive Gaussian noise to the observation."""
   noise = 0.5
   env = rwrl.load(
       domain_name=domain_name,
       task_name=task_name,
       noise_spec={'gaussian': {
           'enable': True,
           'observations': noise
       }},
       environment_kwargs={'flat_observation': True})
   env.reset()
   env.step(0)
Exemplo n.º 18
0
 def _load_env():
   """Loads environment."""
   raw_env = rwrl.load(
       domain_name=FLAGS.domain_name,
       task_name=FLAGS.task_name,
       safety_spec=dict(enable=True),
       delay_spec=dict(enable=True, actions=20),
       log_output=os.path.join(FLAGS.save_path, 'log.npz'),
       environment_kwargs=dict(
           log_safety_vars=True, log_every=20, flat_observation=True))
   env = GymEnv(raw_env)
   env = bench.Monitor(env, FLAGS.save_path)
   return env
Exemplo n.º 19
0
    def testAddedDummyObservationsFlattened(self, domain_name, task_name):
        """Ensure there is an additive Gaussian noise to the observation."""
        base_env = rwrl.load(domain_name=domain_name,
                             task_name=task_name,
                             safety_spec={'enable': True},
                             environment_kwargs=dict(flat_observation=True))
        base_env.reset()
        mod_env = rwrl.load(domain_name=domain_name,
                            task_name=task_name,
                            dimensionality_spec={
                                'enable': True,
                                'num_random_state_observations': NUM_DUMMY,
                            },
                            safety_spec={'enable': True},
                            environment_kwargs=dict(flat_observation=True))
        mod_env.reset()

        # Get observation from realworld task.
        base_obs = base_env.step(0)
        mod_obs = mod_env.step(0)
        self.assertEqual(mod_obs.observation.shape[0],
                         base_obs.observation.shape[0] + NUM_DUMMY)
 def testMultiObjNoSafety(self, domain_name, task_name):
     """Ensure multi-objective safety reward can be loaded."""
     env = rwrl.load(domain_name=domain_name,
                     task_name=task_name,
                     safety_spec={'enable': False},
                     multiobj_spec={
                         'enable': True,
                         'objective': 'safety',
                         'observed': True,
                         'coeff': 0.5
                     })
     with self.assertRaises(Exception):
         env.reset()
         env.step(0)
Exemplo n.º 21
0
def environment(
        combined_challenge: str,
        domain: str,
        task: str,
        log_output: Optional[str] = None,
        environment_kwargs: Optional[Dict[str,
                                          Any]] = None) -> dm_env.Environment:
    """RWRL environment."""
    env = rwrl_envs.load(domain_name=domain,
                         task_name=task,
                         log_output=log_output,
                         environment_kwargs=environment_kwargs,
                         combined_challenge=combined_challenge)
    return wrappers.SinglePrecisionWrapper(env)
Exemplo n.º 22
0
def make_environment(domain_name: str = 'cartpole',
                     task_name: str = 'balance') -> dm_env.Environment:
    """Creates a RWRL suite environment."""
    environment = rwrl.load(domain_name=domain_name,
                            task_name=task_name,
                            safety_spec=dict(enable=True),
                            delay_spec=dict(enable=True, actions=20),
                            log_output=os.path.join(FLAGS.save_path,
                                                    'log.npz'),
                            environment_kwargs=dict(log_safety_vars=True,
                                                    log_every=2,
                                                    flat_observation=True))
    environment = wrappers.SinglePrecisionWrapper(environment)
    return environment
Exemplo n.º 23
0
 def test_disabled_safety_obs(self, domain_name, task_name):
     temp_file = self.create_tempfile()
     env = rwrl.load(domain_name=domain_name,
                     task_name=task_name,
                     safety_spec={
                         'enable': True,
                         'observations': False
                     },
                     log_output=temp_file.full_path,
                     environment_kwargs=dict(log_safety_vars=True))
     env.reset()
     timestep = env.step(0)
     self.assertNotIn('constraints', timestep.observation.keys())
     self.assertIn('constraints', env._stats_acc._buffer[-1].observation)
     env.write_logs()
Exemplo n.º 24
0
 def _gen_stats(self, domain_name, task_name):
   temp_dir = self.create_tempdir()
   env = rwrl.load(
       domain_name=domain_name,
       task_name=task_name,
       safety_spec={'enable': True},
       log_output=os.path.join(temp_dir.full_path, 'test.pickle'),
       environment_kwargs=dict(log_safety_vars=True, flat_observation=True))
   random_policy = RandomAgent(env.action_spec()).action
   for _ in range(3):
     timestep = env.step(random_policy())
     while not timestep.last():
       timestep = env.step(random_policy())
   env.write_logs()
   return env.logs_path
Exemplo n.º 25
0
 def test_flat_obs(self, domain_name, task_name):
     temp_file = self.create_tempfile()
     env = rwrl.load(domain_name=domain_name,
                     task_name=task_name,
                     safety_spec={'enable': True},
                     log_output=temp_file.full_path,
                     environment_kwargs=dict(log_safety_vars=True,
                                             flat_observation=True))
     self.assertLen(env.observation_spec().shape, 1)
     if domain_name == 'cartpole' and task_name in ['swingup', 'balance']:
         self.assertEqual(env.observation_spec().shape[0], 8)
     timestep = env.step(0)
     self.assertLen(timestep.observation.shape, 1)
     if domain_name == 'cartpole' and task_name in ['swingup', 'balance']:
         self.assertEqual(timestep.observation.shape[0], 8)
     env.write_logs()
Exemplo n.º 26
0
    def testAddedDummyObservations(self, domain_name, task_name):
        """Ensure there is an additive Gaussian noise to the observation."""
        env = rwrl.load(domain_name=domain_name,
                        task_name=task_name,
                        dimensionality_spec={
                            'enable': True,
                            'num_random_state_observations': 5,
                        })
        env.reset()

        # Get observation from realworld task.
        obs = env._task.get_observation(env._physics)
        for i in range(5):
            self.assertIn('dummy-{}'.format(i), obs.keys())
        for i in range(6, 10):
            self.assertNotIn('dummy-{}'.format(i), obs.keys())
Exemplo n.º 27
0
  def testDelayActionsNoDelay(self, domain_name, task_name):
    """Ensure there is no action delay if not specified."""
    env = rwrl.load(domain_name=domain_name, task_name=task_name)
    env.reset()
    action_spec = env.action_spec()

    # Send zero action and make sure it is immediately executed.
    zero_action = np.zeros(shape=action_spec.shape, dtype=action_spec.dtype)
    env.step(copy.copy(zero_action))
    np.testing.assert_array_equal(env.physics.control(), zero_action)

    # Send one action and make sure it is immediately executed.
    one_action = np.ones(shape=action_spec.shape, dtype=action_spec.dtype)
    if hasattr(action_spec, 'minimum'):
      one_action = np.minimum(action_spec.maximum, one_action)
    env.step(copy.copy(one_action))
    np.testing.assert_array_equal(env.physics.control(), one_action)
Exemplo n.º 28
0
    def testNoiseDroppedObservationsFlattening(self, domain_name, task_name):
        """Ensure there is an additive Gaussian noise to the observation."""
        prob = 1.
        steps = np.random.randint(low=3, high=10)

        env = rwrl.load(domain_name=domain_name,
                        task_name=task_name,
                        noise_spec={
                            'dropped': {
                                'enable': True,
                                'observations_prob': prob,
                                'observations_steps': steps,
                            }
                        },
                        environment_kwargs={'flat_observation': True})
        env.reset()
        env.step(np.array(0))  # Scalar actions aren't tolerated with noise.
Exemplo n.º 29
0
  def testDelayObservationsNoDelay(self, domain_name, task_name):
    """Ensure there is no observation delay if not specified."""
    env = rwrl.load(domain_name=domain_name, task_name=task_name)
    env.reset()
    action_spec = env.action_spec()
    one_action = np.ones(shape=action_spec.shape, dtype=action_spec.dtype)
    if hasattr(action_spec, 'minimum'):
      one_action = np.minimum(action_spec.maximum, one_action)
    obs1 = env._task.get_observation(env._physics)

    env.step(copy.copy(one_action))
    obs2 = env._task.get_observation(env._physics)

    # Make sure subsequent observations are different.
    array_equality = []
    for key in obs1:
      array_equality.append((obs1[key] == obs2[key]).all())
    self.assertIn(False, array_equality)
Exemplo n.º 30
0
  def testNoiseDroppedObservationsValues(self, domain_name, task_name):
    """Ensure observations drop values."""
    steps = np.random.randint(low=3, high=10)
    prob = 1.
    env = rwrl.load(
        domain_name=domain_name,
        task_name=task_name,
        noise_spec={
            'dropped': {
                'enable': True,
                'observations_prob': prob,
                'observations_steps': steps,
            }
        })
    action_spec = env.action_spec()
    one_action = np.ones(shape=action_spec.shape, dtype=action_spec.dtype)

    for step in range(steps):
      # Verify that values are dropping for the first `steps` steps.
      if step == 1:
        # Cancel the dropped values after the first sequence.
        env._task._noise_dropped_obs_steps = 0.
      obs = env.step(copy.copy(one_action))[3]
      for key in obs:
        if isinstance(obs[key], np.ndarray):
          np.testing.assert_array_equal(obs[key], np.zeros(obs[key].shape))
        else:
          np.testing.assert_array_equal(obs[key], 0.)
    obs = env.step(copy.copy(one_action))[3]
    # Ensure observation is not filled with zeros.
    for key in obs:
      obs[key] += np.random.normal()
    # Pass observation through the base class that in charge of dropping values.
    obs = realworld_env.Base.get_observation(env._task, env._physics, obs)
    for key in obs:
      # Verify that values have stopped dropping.
      if isinstance(obs[key], np.ndarray):
        np.testing.assert_array_compare(operator.__ne__, obs[key],
                                        np.zeros(obs[key].shape))
      else:
        np.testing.assert_array_compare(operator.__ne__, obs[key], 0.)