def testMultiObjPassedObjective(self, domain_name, task_name): """Ensure objective class can be passed directly.""" multiobj_class = lambda: multiobj_objectives.SafetyObjective() # pylint: disable=unnecessary-lambda env = rwrl.load(domain_name=domain_name, task_name=task_name, safety_spec={'enable': True}, multiobj_spec={ 'enable': True, 'objective': multiobj_class, 'observed': True, 'coeff': 0.5 }) env.reset() env.step(0) multiobj_class = multiobj_objectives.SafetyObjective env = rwrl.load(domain_name=domain_name, task_name=task_name, safety_spec={'enable': True}, multiobj_spec={ 'enable': True, 'objective': multiobj_class, 'observed': True, 'coeff': 0.5 }) env.reset() env.step(0)
def testSafetyCoeff(self, domain_name, task_name): """Ensure observations contain 'constraints' when safety is specified.""" env = rwrl.load( domain_name=domain_name, task_name=task_name, safety_spec={'enable': True, 'safety_coeff': 0.1}) env.reset() step = env.step(0) self.assertIn('constraints', step.observation.keys()) for c in [2, -1]: with self.assertRaises(ValueError): env = rwrl.load( domain_name=domain_name, task_name=task_name, safety_spec={'enable': True, 'safety_coeff': c})
def testDelayObservationsDelay(self, domain_name, task_name): """Ensure there is observation delay as specified.""" observations_delay = np.random.randint(low=1, high=10) env = rwrl.load( domain_name=domain_name, task_name=task_name, delay_spec={ 'enable': True, 'observations': observations_delay }) obs1 = env.reset()[3] action_spec = env.action_spec() one_action = np.ones(shape=action_spec.shape, dtype=action_spec.dtype) # Make sure subsequent observations are the same (clearing the buffer). for _ in range(observations_delay): obs2 = env.step(copy.copy(one_action))[3] for key in obs1: np.testing.assert_array_equal(obs1[key], obs2[key]) # Make sure we finally observe a different observation. obs2 = env.step(copy.copy(one_action))[3] array_equality = [] for key in obs1: array_equality.append((obs1[key] == obs2[key]).all()) self.assertIn(False, array_equality)
def testDelayActionsDelay(self, domain_name, task_name): """Ensure there is action delay as specified.""" actions_delay = np.random.randint(low=1, high=10) env = rwrl.load( domain_name=domain_name, task_name=task_name, delay_spec={ 'enable': True, 'actions': actions_delay }) env.reset() action_spec = env.action_spec() zero_action = np.zeros(shape=action_spec.shape, dtype=action_spec.dtype) one_action = np.ones(shape=action_spec.shape, dtype=action_spec.dtype) if hasattr(action_spec, 'minimum'): one_action = np.minimum(action_spec.maximum, one_action) # Perfrom first action that fills up the buffer. env.step(copy.copy(zero_action)) # Send one action and make sure zero action is still executed. for _ in range(actions_delay): env.step(copy.copy(one_action)) np.testing.assert_array_equal(env.physics.control(), zero_action) # Make sure we finally perform the delayed one action. env.step(copy.copy(zero_action)) np.testing.assert_array_equal(env.physics.control(), one_action)
def run(): """Runs a random agent on a given environment.""" env = rwrl.load(domain_name=FLAGS.domain_name, task_name=FLAGS.task_name, safety_spec=dict(enable=True), delay_spec=dict(enable=True, actions=20), log_output=os.path.join(FLAGS.save_path, 'log.npz'), environment_kwargs=dict(log_safety_vars=True, log_every=20, flat_observation=True)) policy = random_policy(action_spec=env.action_spec()) rewards = [] for _ in range(FLAGS.total_episodes): timestep = env.reset() total_reward = 0. while not timestep.last(): action = policy(timestep) timestep = env.step(action) total_reward += timestep.reward rewards.append(total_reward) print('Random policy total reward per episode: {:.2f} +- {:.2f}'.format( np.mean(rewards), np.std(rewards)))
def testMultiObjSafetyNoRewardObs(self, domain_name, task_name): """Ensure multi-objective safety reward can be loaded.""" env = rwrl.load(domain_name=domain_name, task_name=task_name, safety_spec={'enable': True}, multiobj_spec={ 'enable': True, 'objective': 'safety', 'reward': False, 'observed': True, 'coeff': 0.5 }) env.reset() env.step(0) ts = env.step(0) self.assertIn('multiobj', ts.observation) # Make sure we see a 1 in normalized violations env.task._constraints_obs = np.ones( env.task._constraints_obs.shape).astype(np.bool) obs = env.task.get_observation(env.physics) self.assertEqual(obs['multiobj'][1], 1) # And that there is no effect on rewards env.task._multiobj_coeff = 0 r1 = env.task.get_reward(env.physics) env.task._multiobj_coeff = 1 r2 = env.task.get_reward(env.physics) self.assertEqual(r1, r2)
def test_no_logger(self, domain_name, task_name): env = rwrl.load(domain_name=domain_name, task_name=task_name, safety_spec={'enable': True}, log_output=None, environment_kwargs=dict(log_safety_vars=True)) env.write_logs()
def testNoiseRepetitionActions(self, domain_name, task_name): """Ensure actions are being repeated.""" steps = np.random.randint(low=3, high=10) prob = 1. env = rwrl.load( domain_name=domain_name, task_name=task_name, noise_spec={ 'repetition': { 'enable': True, 'actions_prob': prob, 'actions_steps': steps, } }) action_spec = env.action_spec() zero_action = np.zeros(shape=action_spec.shape, dtype=action_spec.dtype) one_action = np.ones(shape=action_spec.shape, dtype=action_spec.dtype) if hasattr(action_spec, 'minimum'): one_action = np.minimum(action_spec.maximum, one_action) env.reset() env.step(copy.copy(zero_action)) # Verify that all the actions are zero_action for 'steps' time steps. for _ in range(steps): np.testing.assert_array_equal(env.physics.control(), zero_action) env.step(copy.copy(one_action)) # Verify that all the actions are one_action for 'steps' time steps. for _ in range(steps): np.testing.assert_array_equal(env.physics.control(), one_action) env.step(copy.copy(zero_action)) np.testing.assert_array_equal(env.physics.control(), zero_action)
def test_log_every_n(self, every_n=5): domain_name = 'cartpole' task_name = 'realworld_balance' temp_dir = self.create_tempdir() env = rwrl.load(domain_name=domain_name, task_name=task_name, safety_spec={ 'enable': True, 'observations': False }, log_output=os.path.join(temp_dir.full_path, 'test.pickle'), environment_kwargs=dict(log_safety_vars=True, log_every=every_n)) env.reset() n = 0 while True: timestep = env.step(0) if timestep.last(): n += 1 if n % every_n == 0: self.assertTrue(os.path.exists(env.logs_path)) os.remove(env.logs_path) self.assertFalse(os.path.exists(env.logs_path)) if n > 20: break
def testPerturbRandomWalk(self, domain_name, task_name): """Ensure parameter is perturbed on each reset.""" period = 3 perturb_scheduler = 'drift_pos' if domain_name == 'cartpole': perturb_param = 'pole_length' elif domain_name == 'walker': perturb_param = 'thigh_length' elif domain_name == 'humanoid': perturb_param = 'contact_friction' elif domain_name == 'quadruped': perturb_param = 'shin_length' elif domain_name == 'manipulator': perturb_param = 'lower_arm_length' else: raise ValueError('Unknown environment name: %s' % domain_name) env = rwrl.load( domain_name=domain_name, task_name=task_name, perturb_spec={ 'enable': True, 'period': period, 'param': perturb_param, 'scheduler': perturb_scheduler }) def get_param_val(): if domain_name == 'cartpole': return env._physics.named.model.geom_size['pole_1', 1] elif domain_name == 'walker': return env._physics.named.model.geom_size['right_thigh', 1] elif domain_name == 'humanoid': return env._physics.named.model.geom_friction['right_right_foot', 0] elif domain_name == 'quadruped': return env._physics.named.model.geom_size['shin_front_left', 1] elif domain_name == 'manipulator': return env._physics.named.model.geom_size['lower_arm', 1] else: pass # Verify that first reset changes value. val_old = get_param_val() env.reset() val_new = get_param_val() self.assertNotEqual(val_old, val_new) # Verify that the parameter changes only each `period` number of times. val_old = val_new for unused_count1 in range(1): for unused_count2 in range(period - 1): env.reset() val_new = get_param_val() self.assertEqual(val_old, val_new) env.reset() val_new = get_param_val() self.assertNotEqual(val_old, val_new) val_old = val_new
def testSafetyConstraintsPresent(self, domain_name, task_name): """Ensure observations contain 'constraints' when safety is specified.""" env = rwrl.load(domain_name=domain_name, task_name=task_name, safety_spec={'enable': True}) env.reset() step = env.step(0) self.assertIn('constraints', step.observation.keys())
def test_init(self, domain_name, task_name): temp_file = self.create_tempfile() env = rwrl.load(domain_name=domain_name, task_name=task_name, safety_spec={'enable': True}, log_output=temp_file.full_path, environment_kwargs=dict(log_safety_vars=True)) env.write_logs()
def testNoiseGaussianObservations(self, domain_name, task_name): """Ensure there is an additive Gaussian noise to the observation.""" noise = 0.5 env = rwrl.load( domain_name=domain_name, task_name=task_name, noise_spec={'gaussian': { 'enable': True, 'observations': noise }}) env.reset() # Get observation from realworld cartpole. obs1 = env._task.get_observation(env._physics) # Get observation from underlying cartpole. obs2 = collections.OrderedDict() if domain_name == 'cartpole': obs2['position'] = env.physics.bounded_position() obs2['velocity'] = env.physics.velocity() elif domain_name == 'humanoid': obs2['joint_angles'] = env.physics.joint_angles() obs2['head_height'] = env.physics.head_height() obs2['extremities'] = env.physics.extremities() obs2['torso_vertical'] = env.physics.torso_vertical_orientation() obs2['com_velocity'] = env.physics.center_of_mass_velocity() obs2['velocity'] = env.physics.velocity() elif domain_name == 'manipulator': arm_joints = [ 'arm_root', 'arm_shoulder', 'arm_elbow', 'arm_wrist', 'finger', 'fingertip', 'thumb', 'thumbtip' ] obs2['arm_pos'] = env.physics.bounded_joint_pos(arm_joints) obs2['arm_vel'] = env.physics.joint_vel(arm_joints) obs2['touch'] = env.physics.touch() obs2['hand_pos'] = env.physics.body_2d_pose('hand') obs2['object_pos'] = env.physics.body_2d_pose(env._task._object) obs2['object_vel'] = env.physics.joint_vel( env._task._object_joints) obs2['target_pos'] = env.physics.body_2d_pose(env._task._target) elif domain_name == 'quadruped': obs2['egocentric_state'] = env.physics.egocentric_state() obs2['torso_velocity'] = env.physics.torso_velocity() obs2['torso_upright'] = env.physics.torso_upright() obs2['imu'] = env.physics.imu() obs2['force_torque'] = env.physics.force_torque() elif domain_name == 'walker': obs2['orientations'] = env.physics.orientations() obs2['height'] = env.physics.torso_height() obs2['velocity'] = env.physics.velocity() else: raise ValueError('Unknown environment name: %s' % domain_name) # Verify that the observations are different (noise added). for key in obs1: np.testing.assert_array_compare(operator.__ne__, obs1[key], obs2[key])
def main(_): if FLAGS.suite == 'dm_control': logging.info('Loading from dm_control...') env = suite.load(domain_name=FLAGS.domain_name, task_name=FLAGS.task_name) elif FLAGS.suite == 'rwrl': logging.info('Loading from rwrl...') env = rwrl.load(domain_name=FLAGS.domain_name, task_name=FLAGS.task_name) random_policy = RandomAgent(env.action_spec()).action viewer.launch(env, policy=random_policy)
def testSafetyObservationsDisabled(self, domain_name, task_name): """Ensure safety observations can be disabled.""" env = rwrl.load(domain_name=domain_name, task_name=task_name, safety_spec={ 'enable': True, 'observations': False }) env.reset() step = env.step(0) self.assertNotIn('constraints', step.observation.keys())
def environment(combined_challenge, domain, task, log_output=None, environment_kwargs=None): """RWRL environment.""" env = rwrl_envs.load(domain_name=domain, task_name=task, log_output=log_output, environment_kwargs=environment_kwargs, combined_challenge=combined_challenge) return wrappers.SinglePrecisionWrapper(env)
def testNoiseGaussianObservationsFlattening(self, domain_name, task_name): """Ensure there is an additive Gaussian noise to the observation.""" noise = 0.5 env = rwrl.load( domain_name=domain_name, task_name=task_name, noise_spec={'gaussian': { 'enable': True, 'observations': noise }}, environment_kwargs={'flat_observation': True}) env.reset() env.step(0)
def _load_env(): """Loads environment.""" raw_env = rwrl.load( domain_name=FLAGS.domain_name, task_name=FLAGS.task_name, safety_spec=dict(enable=True), delay_spec=dict(enable=True, actions=20), log_output=os.path.join(FLAGS.save_path, 'log.npz'), environment_kwargs=dict( log_safety_vars=True, log_every=20, flat_observation=True)) env = GymEnv(raw_env) env = bench.Monitor(env, FLAGS.save_path) return env
def testAddedDummyObservationsFlattened(self, domain_name, task_name): """Ensure there is an additive Gaussian noise to the observation.""" base_env = rwrl.load(domain_name=domain_name, task_name=task_name, safety_spec={'enable': True}, environment_kwargs=dict(flat_observation=True)) base_env.reset() mod_env = rwrl.load(domain_name=domain_name, task_name=task_name, dimensionality_spec={ 'enable': True, 'num_random_state_observations': NUM_DUMMY, }, safety_spec={'enable': True}, environment_kwargs=dict(flat_observation=True)) mod_env.reset() # Get observation from realworld task. base_obs = base_env.step(0) mod_obs = mod_env.step(0) self.assertEqual(mod_obs.observation.shape[0], base_obs.observation.shape[0] + NUM_DUMMY)
def testMultiObjNoSafety(self, domain_name, task_name): """Ensure multi-objective safety reward can be loaded.""" env = rwrl.load(domain_name=domain_name, task_name=task_name, safety_spec={'enable': False}, multiobj_spec={ 'enable': True, 'objective': 'safety', 'observed': True, 'coeff': 0.5 }) with self.assertRaises(Exception): env.reset() env.step(0)
def environment( combined_challenge: str, domain: str, task: str, log_output: Optional[str] = None, environment_kwargs: Optional[Dict[str, Any]] = None) -> dm_env.Environment: """RWRL environment.""" env = rwrl_envs.load(domain_name=domain, task_name=task, log_output=log_output, environment_kwargs=environment_kwargs, combined_challenge=combined_challenge) return wrappers.SinglePrecisionWrapper(env)
def make_environment(domain_name: str = 'cartpole', task_name: str = 'balance') -> dm_env.Environment: """Creates a RWRL suite environment.""" environment = rwrl.load(domain_name=domain_name, task_name=task_name, safety_spec=dict(enable=True), delay_spec=dict(enable=True, actions=20), log_output=os.path.join(FLAGS.save_path, 'log.npz'), environment_kwargs=dict(log_safety_vars=True, log_every=2, flat_observation=True)) environment = wrappers.SinglePrecisionWrapper(environment) return environment
def test_disabled_safety_obs(self, domain_name, task_name): temp_file = self.create_tempfile() env = rwrl.load(domain_name=domain_name, task_name=task_name, safety_spec={ 'enable': True, 'observations': False }, log_output=temp_file.full_path, environment_kwargs=dict(log_safety_vars=True)) env.reset() timestep = env.step(0) self.assertNotIn('constraints', timestep.observation.keys()) self.assertIn('constraints', env._stats_acc._buffer[-1].observation) env.write_logs()
def _gen_stats(self, domain_name, task_name): temp_dir = self.create_tempdir() env = rwrl.load( domain_name=domain_name, task_name=task_name, safety_spec={'enable': True}, log_output=os.path.join(temp_dir.full_path, 'test.pickle'), environment_kwargs=dict(log_safety_vars=True, flat_observation=True)) random_policy = RandomAgent(env.action_spec()).action for _ in range(3): timestep = env.step(random_policy()) while not timestep.last(): timestep = env.step(random_policy()) env.write_logs() return env.logs_path
def test_flat_obs(self, domain_name, task_name): temp_file = self.create_tempfile() env = rwrl.load(domain_name=domain_name, task_name=task_name, safety_spec={'enable': True}, log_output=temp_file.full_path, environment_kwargs=dict(log_safety_vars=True, flat_observation=True)) self.assertLen(env.observation_spec().shape, 1) if domain_name == 'cartpole' and task_name in ['swingup', 'balance']: self.assertEqual(env.observation_spec().shape[0], 8) timestep = env.step(0) self.assertLen(timestep.observation.shape, 1) if domain_name == 'cartpole' and task_name in ['swingup', 'balance']: self.assertEqual(timestep.observation.shape[0], 8) env.write_logs()
def testAddedDummyObservations(self, domain_name, task_name): """Ensure there is an additive Gaussian noise to the observation.""" env = rwrl.load(domain_name=domain_name, task_name=task_name, dimensionality_spec={ 'enable': True, 'num_random_state_observations': 5, }) env.reset() # Get observation from realworld task. obs = env._task.get_observation(env._physics) for i in range(5): self.assertIn('dummy-{}'.format(i), obs.keys()) for i in range(6, 10): self.assertNotIn('dummy-{}'.format(i), obs.keys())
def testDelayActionsNoDelay(self, domain_name, task_name): """Ensure there is no action delay if not specified.""" env = rwrl.load(domain_name=domain_name, task_name=task_name) env.reset() action_spec = env.action_spec() # Send zero action and make sure it is immediately executed. zero_action = np.zeros(shape=action_spec.shape, dtype=action_spec.dtype) env.step(copy.copy(zero_action)) np.testing.assert_array_equal(env.physics.control(), zero_action) # Send one action and make sure it is immediately executed. one_action = np.ones(shape=action_spec.shape, dtype=action_spec.dtype) if hasattr(action_spec, 'minimum'): one_action = np.minimum(action_spec.maximum, one_action) env.step(copy.copy(one_action)) np.testing.assert_array_equal(env.physics.control(), one_action)
def testNoiseDroppedObservationsFlattening(self, domain_name, task_name): """Ensure there is an additive Gaussian noise to the observation.""" prob = 1. steps = np.random.randint(low=3, high=10) env = rwrl.load(domain_name=domain_name, task_name=task_name, noise_spec={ 'dropped': { 'enable': True, 'observations_prob': prob, 'observations_steps': steps, } }, environment_kwargs={'flat_observation': True}) env.reset() env.step(np.array(0)) # Scalar actions aren't tolerated with noise.
def testDelayObservationsNoDelay(self, domain_name, task_name): """Ensure there is no observation delay if not specified.""" env = rwrl.load(domain_name=domain_name, task_name=task_name) env.reset() action_spec = env.action_spec() one_action = np.ones(shape=action_spec.shape, dtype=action_spec.dtype) if hasattr(action_spec, 'minimum'): one_action = np.minimum(action_spec.maximum, one_action) obs1 = env._task.get_observation(env._physics) env.step(copy.copy(one_action)) obs2 = env._task.get_observation(env._physics) # Make sure subsequent observations are different. array_equality = [] for key in obs1: array_equality.append((obs1[key] == obs2[key]).all()) self.assertIn(False, array_equality)
def testNoiseDroppedObservationsValues(self, domain_name, task_name): """Ensure observations drop values.""" steps = np.random.randint(low=3, high=10) prob = 1. env = rwrl.load( domain_name=domain_name, task_name=task_name, noise_spec={ 'dropped': { 'enable': True, 'observations_prob': prob, 'observations_steps': steps, } }) action_spec = env.action_spec() one_action = np.ones(shape=action_spec.shape, dtype=action_spec.dtype) for step in range(steps): # Verify that values are dropping for the first `steps` steps. if step == 1: # Cancel the dropped values after the first sequence. env._task._noise_dropped_obs_steps = 0. obs = env.step(copy.copy(one_action))[3] for key in obs: if isinstance(obs[key], np.ndarray): np.testing.assert_array_equal(obs[key], np.zeros(obs[key].shape)) else: np.testing.assert_array_equal(obs[key], 0.) obs = env.step(copy.copy(one_action))[3] # Ensure observation is not filled with zeros. for key in obs: obs[key] += np.random.normal() # Pass observation through the base class that in charge of dropping values. obs = realworld_env.Base.get_observation(env._task, env._physics, obs) for key in obs: # Verify that values have stopped dropping. if isinstance(obs[key], np.ndarray): np.testing.assert_array_compare(operator.__ne__, obs[key], np.zeros(obs[key].shape)) else: np.testing.assert_array_compare(operator.__ne__, obs[key], 0.)