def test_timestamps_no_camera_delay(): platform = TriFingerPlatform( visualization=False, enable_cameras=True, camera_delay_steps=0 ) action = platform.Action() # ensure the camera frame rate is set to 10 Hz assert platform.camera_rate_fps == 10 # compute camera update step interval based on configured rates camera_update_step_interval = ( 1 / platform.camera_rate_fps ) / platform._time_step # robot time step in milliseconds time_step_ms = platform._time_step * 1000 # First time step t = platform.append_desired_action(action) first_stamp_ms = platform.get_timestamp_ms(t) first_stamp_s = first_stamp_ms / 1000 camera_obs = platform.get_camera_observation(t) assert first_stamp_s == camera_obs.cameras[0].timestamp assert first_stamp_s == camera_obs.cameras[1].timestamp assert first_stamp_s == camera_obs.cameras[2].timestamp # Test time stamps of observations t+1 (with the current implementation, # the observation should be exactly the same as for t). camera_obs_next = platform.get_camera_observation(t + 1) assert first_stamp_s == camera_obs_next.cameras[0].timestamp assert first_stamp_s == camera_obs_next.cameras[1].timestamp assert first_stamp_s == camera_obs_next.cameras[2].timestamp # Second time step t = platform.append_desired_action(action) second_stamp_ms = platform.get_timestamp_ms(t) assert second_stamp_ms == first_stamp_ms + time_step_ms # there should not be a new camera observation yet camera_obs = platform.get_camera_observation(t) assert first_stamp_s == camera_obs.cameras[0].timestamp assert first_stamp_s == camera_obs.cameras[1].timestamp assert first_stamp_s == camera_obs.cameras[2].timestamp # do several steps until a new camera/object update is expected # (-1 because there is already one action appended above for the # "second time step") for _ in range(int(camera_update_step_interval - 1)): t = platform.append_desired_action(action) nth_stamp_ms = platform.get_timestamp_ms(t) nth_stamp_s = nth_stamp_ms / 1000 assert nth_stamp_ms > second_stamp_ms camera_obs = platform.get_camera_observation(t) assert nth_stamp_s == camera_obs.cameras[0].timestamp assert nth_stamp_s == camera_obs.cameras[1].timestamp assert nth_stamp_s == camera_obs.cameras[2].timestamp
def test_camera_timestamps_with_camera_delay_less_than_rate(): # A bit more complex example (probably redundant with the simple one above # but since I already implemented it, let's keep it). platform = TriFingerPlatform( visualization=False, enable_cameras=True, camera_delay_steps=10 ) action = platform.Action() # ensure the camera frame rate is set to 10 Hz assert platform.camera_rate_fps == 10 assert platform._compute_camera_update_step_interval() == 100 first_stamp_s = platform.get_timestamp_ms(0) / 1000 # The start is a bit tricky because of the initial observation which has # timestamp 0 but the next observation is triggered in the first step, # resulting in the same timestamp. So first step 100 times, until the next # camera update is triggered. for i in range(100): t = platform.append_desired_action(action) # In each step, we still should see the camera observation from step 0 cameras = platform.get_camera_observation(t).cameras assert first_stamp_s == cameras[0].timestamp, f"i={i}, t={t}" assert first_stamp_s == cameras[1].timestamp, f"i={i}, t={t}" assert first_stamp_s == cameras[2].timestamp, f"i={i}, t={t}" assert t == 99 # one more step to trigger the next camera update t = platform.append_desired_action(action) second_stamp_s = platform.get_timestamp_ms(t) / 1000 # The next observation should be triggered now but due to the delay, we # should still see the old observation for the next 9 steps for i in range(9): t = platform.append_desired_action(action) cameras = platform.get_camera_observation(t).cameras assert first_stamp_s == cameras[0].timestamp, f"i={i}, t={t}" assert first_stamp_s == cameras[1].timestamp, f"i={i}, t={t}" assert first_stamp_s == cameras[2].timestamp, f"i={i}, t={t}" # after the next step, we should see an updated camera observation which # again persists for 100 steps for i in range(100): t = platform.append_desired_action(action) cameras = platform.get_camera_observation(t).cameras assert second_stamp_s == cameras[0].timestamp, f"i={i}, t={t}" assert second_stamp_s == cameras[1].timestamp, f"i={i}, t={t}" assert second_stamp_s == cameras[2].timestamp, f"i={i}, t={t}" # and now the next update should be there t = platform.append_desired_action(action) camera_obs = platform.get_camera_observation(t) assert second_stamp_s < camera_obs.cameras[0].timestamp
def test_timestamps(self): platform = TriFingerPlatform(visualization=False, enable_cameras=True) action = platform.Action() # ensure the camera frame rate is set to 10 Hz self.assertEqual(platform.camera_rate_fps, 10) # compute camera update step interval based on configured rates camera_update_step_interval = ( 1 / platform.camera_rate_fps) / platform._time_step # robot time step in milliseconds time_step_ms = platform._time_step * 1000 # First time step t = platform.append_desired_action(action) first_stamp_ms = platform.get_timestamp_ms(t) first_stamp_s = first_stamp_ms / 1000 camera_obs = platform.get_camera_observation(t) self.assertEqual(first_stamp_ms, camera_obs.cameras[0].timestamp) self.assertEqual(first_stamp_ms, camera_obs.cameras[1].timestamp) self.assertEqual(first_stamp_ms, camera_obs.cameras[2].timestamp) # Test time stamps of observations t+1 camera_obs_next = platform.get_camera_observation(t + 1) next_stamp_ms = first_stamp_ms + time_step_ms self.assertEqual(next_stamp_ms, camera_obs_next.cameras[0].timestamp) self.assertEqual(next_stamp_ms, camera_obs_next.cameras[1].timestamp) self.assertEqual(next_stamp_ms, camera_obs_next.cameras[2].timestamp) # Second time step t = platform.append_desired_action(action) second_stamp_ms = platform.get_timestamp_ms(t) self.assertEqual(second_stamp_ms, first_stamp_ms + time_step_ms) # there should not be a new camera observation yet camera_obs = platform.get_camera_observation(t) self.assertEqual(first_stamp_s, camera_obs.cameras[0].timestamp) self.assertEqual(first_stamp_s, camera_obs.cameras[1].timestamp) self.assertEqual(first_stamp_s, camera_obs.cameras[2].timestamp) # do several steps until a new camera/object update is expected # (-1 because there is already one action appended above for the # "second time step") for _ in range(int(camera_update_step_interval - 1)): t = platform.append_desired_action(action) nth_stamp_ms = platform.get_timestamp_ms(t) nth_stamp_s = nth_stamp_ms / 1000 self.assertGreater(nth_stamp_ms, second_stamp_ms) camera_obs = platform.get_camera_observation(t) self.assertEqual(nth_stamp_s, camera_obs.cameras[0].timestamp) self.assertEqual(nth_stamp_s, camera_obs.cameras[1].timestamp) self.assertEqual(nth_stamp_s, camera_obs.cameras[2].timestamp)
def test_object_pose_observation(self): Pose = namedtuple("Pose", ["position", "orientation"]) pose = Pose([0.1, -0.5, 0], [0, 0, 0.2084599, 0.97803091]) platform = TriFingerPlatform(initial_object_pose=pose) t = platform.append_desired_action(platform.Action()) obs = platform.get_camera_observation(t) np.testing.assert_array_equal(obs.object_pose.position, pose.position) np.testing.assert_array_almost_equal(obs.object_pose.orientation, pose.orientation)
def test_get_camera_observation_timeindex(self): platform = TriFingerPlatform(enable_cameras=True) # negative time index needs to be rejected with self.assertRaises(ValueError): platform.get_camera_observation(-1) t = platform.append_desired_action(platform.Action()) try: platform.get_camera_observation(t) platform.get_camera_observation(t + 1) except Exception: self.fail() with self.assertRaises(ValueError): platform.get_camera_observation(t + 2)
def test_get_object_pose_timeindex(self): platform = TriFingerPlatform() # negative time index needs to be rejected with self.assertRaises(ValueError): platform.get_object_pose(-1) t = platform.append_desired_action(platform.Action()) try: platform.get_object_pose(t) platform.get_object_pose(t + 1) except Exception: self.fail() with self.assertRaises(ValueError): platform.get_object_pose(t + 2)
class ExamplePushingTrainingEnv(gym.Env): """Gym environment for moving cubes with simulated TriFingerPro.""" def __init__( self, initializer=None, action_type=cube_env.ActionType.POSITION, frameskip=1, visualization=False, ): """Initialize. Args: initializer: Initializer class for providing initial cube pose and goal pose. If no initializer is provided, we will initialize in a way which is be helpful for learning. action_type (ActionType): Specify which type of actions to use. See :class:`ActionType` for details. frameskip (int): Number of actual control steps to be performed in one call of step(). visualization (bool): If true, the pyBullet GUI is run for visualization. """ # Basic initialization # ==================== self.initializer = initializer self.action_type = action_type self.visualization = visualization if frameskip < 1: raise ValueError("frameskip cannot be less than 1.") self.frameskip = frameskip # will be initialized in reset() self.platform = None # Create the action and observation spaces # ======================================== spaces = TriFingerPlatform.spaces if self.action_type == cube_env.ActionType.TORQUE: self.action_space = spaces.robot_torque.gym elif self.action_type == cube_env.ActionType.POSITION: self.action_space = spaces.robot_position.gym elif self.action_type == cube_env.ActionType.TORQUE_AND_POSITION: self.action_space = gym.spaces.Dict({ "torque": spaces.robot_torque.gym, "position": spaces.robot_position.gym, }) else: raise ValueError("Invalid action_type") self.observation_names = [ "robot_position", "robot_velocity", "robot_tip_positions", "object_position", "object_orientation", "goal_object_position", ] self.observation_space = gym.spaces.Dict({ "robot_position": spaces.robot_position.gym, "robot_velocity": spaces.robot_velocity.gym, "robot_tip_positions": gym.spaces.Box( low=np.array([spaces.object_position.low] * 3), high=np.array([spaces.object_position.high] * 3), ), "object_position": spaces.object_position.gym, "object_orientation": spaces.object_orientation.gym, "goal_object_position": spaces.object_position.gym, }) def step(self, action): if self.platform is None: raise RuntimeError("Call `reset()` before starting to step.") if not self.action_space.contains(action): raise ValueError( "Given action is not contained in the action space.") num_steps = self.frameskip # ensure episode length is not exceeded due to frameskip step_count_after = self.step_count + num_steps if step_count_after > move_cube.episode_length: excess = step_count_after - move_cube.episode_length num_steps = max(1, num_steps - excess) reward = 0.0 for _ in range(num_steps): self.step_count += 1 if self.step_count > move_cube.episode_length: raise RuntimeError("Exceeded number of steps for one episode.") # send action to robot robot_action = self._gym_action_to_robot_action(action) t = self.platform.append_desired_action(robot_action) previous_observation = self._create_observation(t) observation = self._create_observation(t + 1) reward += self._compute_reward( previous_observation=previous_observation, observation=observation, ) is_done = self.step_count == move_cube.episode_length return observation, reward, is_done, self.info def reset(self): # reset simulation del self.platform # initialize simulation if self.initializer is None: # if no initializer is given (which will be the case during training), # we can initialize in any way desired. here, we initialize the cube always # in the center of the arena, instead of randomly, as this appears to help # training initial_robot_position = ( TriFingerPlatform.spaces.robot_position.default) default_object_position = ( TriFingerPlatform.spaces.object_position.default) default_object_orientation = ( TriFingerPlatform.spaces.object_orientation.default) initial_object_pose = move_cube.Pose( position=default_object_position, orientation=default_object_orientation, ) goal_object_pose = move_cube.sample_goal(difficulty=1) else: # if an initializer is given, i.e. during evaluation, we need to initialize # according to it, to make sure we remain coherent with the standard CubeEnv. # otherwise the trajectories produced during evaluation will be invalid. initial_robot_position = ( TriFingerPlatform.spaces.robot_position.default) initial_object_pose = self.initializer.get_initial_state() goal_object_pose = self.initializer.get_goal() self.platform = TriFingerPlatform( visualization=self.visualization, initial_robot_position=initial_robot_position, initial_object_pose=initial_object_pose, ) self.goal = { "position": goal_object_pose.position, "orientation": goal_object_pose.orientation, } # visualize the goal if self.visualization: self.goal_marker = visual_objects.CubeMarker( width=0.065, position=goal_object_pose.position, orientation=goal_object_pose.orientation, physicsClientId=self.platform.simfinger._pybullet_client_id, ) self.info = dict() self.step_count = 0 return self._create_observation(0) def seed(self, seed=None): self.np_random, seed = gym.utils.seeding.np_random(seed) move_cube.random = self.np_random return [seed] def _create_observation(self, t): robot_observation = self.platform.get_robot_observation(t) object_observation = self.platform.get_object_pose(t) robot_tip_positions = self.platform.forward_kinematics( robot_observation.position) robot_tip_positions = np.array(robot_tip_positions) observation = { "robot_position": robot_observation.position, "robot_velocity": robot_observation.velocity, "robot_tip_positions": robot_tip_positions, "object_position": object_observation.position, "object_orientation": object_observation.orientation, "goal_object_position": self.goal["position"], } return observation @staticmethod def _compute_reward(previous_observation, observation): # calculate first reward term current_distance_from_block = np.linalg.norm( observation["robot_tip_positions"] - observation["object_position"]) previous_distance_from_block = np.linalg.norm( previous_observation["robot_tip_positions"] - previous_observation["object_position"]) reward_term_1 = (previous_distance_from_block - current_distance_from_block) # calculate second reward term current_dist_to_goal = np.linalg.norm( observation["goal_object_position"] - observation["object_position"]) previous_dist_to_goal = np.linalg.norm( previous_observation["goal_object_position"] - previous_observation["object_position"]) reward_term_2 = previous_dist_to_goal - current_dist_to_goal reward = 750 * reward_term_1 + 250 * reward_term_2 return reward def _gym_action_to_robot_action(self, gym_action): # construct robot action depending on action type if self.action_type == cube_env.ActionType.TORQUE: robot_action = self.platform.Action(torque=gym_action) elif self.action_type == cube_env.ActionType.POSITION: robot_action = self.platform.Action(position=gym_action) elif self.action_type == cube_env.ActionType.TORQUE_AND_POSITION: robot_action = self.platform.Action( torque=gym_action["torque"], position=gym_action["position"]) else: raise ValueError("Invalid action_type") return robot_action
class CubeEnv(gym.GoalEnv): """Gym environment for moving cubes with simulated TriFingerPro.""" def __init__( self, initializer, action_type=ActionType.POSITION, frameskip=1, visualization=False, ): """Initialize. Args: initializer: Initializer class for providing initial cube pose and goal pose. See :class:`RandomInitializer` and :class:`FixedInitializer`. action_type (ActionType): Specify which type of actions to use. See :class:`ActionType` for details. frameskip (int): Number of actual control steps to be performed in one call of step(). visualization (bool): If true, the pyBullet GUI is run for visualization. """ # Basic initialization # ==================== self.initializer = initializer self.action_type = action_type self.visualization = visualization # TODO: The name "frameskip" makes sense for an atari environment but # not really for our scenario. The name is also misleading as # "frameskip = 1" suggests that one frame is skipped while it actually # means "do one step per step" (i.e. no skip). if frameskip < 1: raise ValueError("frameskip cannot be less than 1.") self.frameskip = frameskip # will be initialized in reset() self.platform = None # Create the action and observation spaces # ======================================== spaces = TriFingerPlatform.spaces object_state_space = gym.spaces.Dict({ "position": spaces.object_position.gym, "orientation": spaces.object_orientation.gym, }) if self.action_type == ActionType.TORQUE: self.action_space = spaces.robot_torque.gym elif self.action_type == ActionType.POSITION: self.action_space = spaces.robot_position.gym elif self.action_type == ActionType.TORQUE_AND_POSITION: self.action_space = gym.spaces.Dict({ "torque": spaces.robot_torque.gym, "position": spaces.robot_position.gym, }) else: raise ValueError("Invalid action_type") self.observation_space = gym.spaces.Dict({ "observation": gym.spaces.Dict({ "position": spaces.robot_position.gym, "velocity": spaces.robot_velocity.gym, "torque": spaces.robot_torque.gym, }), "desired_goal": object_state_space, "achieved_goal": object_state_space, }) def compute_reward(self, achieved_goal, desired_goal, info): """Compute the reward for the given achieved and desired goal. Args: achieved_goal (dict): Current pose of the object. desired_goal (dict): Goal pose of the object. info (dict): An info dictionary containing a field "difficulty" which specifies the difficulty level. Returns: float: The reward that corresponds to the provided achieved goal w.r.t. to the desired goal. Note that the following should always hold true:: ob, reward, done, info = env.step() assert reward == env.compute_reward( ob['achieved_goal'], ob['desired_goal'], info, ) """ return -move_cube.evaluate_state( move_cube.Pose.from_dict(desired_goal), move_cube.Pose.from_dict(achieved_goal), info["difficulty"], ) def step(self, action): """Run one timestep of the environment's dynamics. When end of episode is reached, you are responsible for calling ``reset()`` to reset this environment's state. Args: action: An action provided by the agent (depends on the selected :class:`ActionType`). Returns: tuple: - observation (dict): agent's observation of the current environment. - reward (float) : amount of reward returned after previous action. - done (bool): whether the episode has ended, in which case further step() calls will return undefined results. - info (dict): info dictionary containing the difficulty level of the goal. """ if self.platform is None: raise RuntimeError("Call `reset()` before starting to step.") if not self.action_space.contains(action): raise ValueError( "Given action is not contained in the action space.") num_steps = self.frameskip # ensure episode length is not exceeded due to frameskip step_count_after = self.step_count + num_steps if step_count_after > move_cube.episode_length: excess = step_count_after - move_cube.episode_length num_steps = max(1, num_steps - excess) reward = 0.0 for _ in range(num_steps): self.step_count += 1 if self.step_count > move_cube.episode_length: raise RuntimeError("Exceeded number of steps for one episode.") # send action to robot robot_action = self._gym_action_to_robot_action(action) t = self.platform.append_desired_action(robot_action) # Use observations of step t + 1 to follow what would be expected # in a typical gym environment. Note that on the real robot, this # will not be possible observation = self._create_observation(t + 1) reward += self.compute_reward( observation["achieved_goal"], observation["desired_goal"], self.info, ) is_done = self.step_count == move_cube.episode_length return observation, reward, is_done, self.info def reset(self): # reset simulation del self.platform self.platform = TriFingerPlatform( visualization=self.visualization, initial_robot_position=TriFingerPlatform.spaces.robot_position. default, initial_object_pose=self.initializer.get_initial_state(), ) goal = self.initializer.get_goal() self.goal = { "position": goal.position, "orientation": goal.orientation, } # visualize the goal if self.visualization: self.goal_marker = visual_objects.CubeMarker( width=0.065, position=goal.position, orientation=goal.orientation, ) self.info = {"difficulty": self.initializer.difficulty} self.step_count = 0 return self._create_observation(0) def seed(self, seed=None): """Sets the seed for this env’s random number generator. .. note:: Spaces need to be seeded separately. E.g. if you want to sample actions directly from the action space using ``env.action_space.sample()`` you can set a seed there using ``env.action_space.seed()``. Returns: List of seeds used by this environment. This environment only uses a single seed, so the list contains only one element. """ self.np_random, seed = gym.utils.seeding.np_random(seed) move_cube.random = self.np_random return [seed] def _create_observation(self, t): robot_observation = self.platform.get_robot_observation(t) object_observation = self.platform.get_object_pose(t) observation = { "observation": { "position": robot_observation.position, "velocity": robot_observation.velocity, "torque": robot_observation.torque, }, "desired_goal": self.goal, "achieved_goal": { "position": object_observation.position, "orientation": object_observation.orientation, }, } return observation def _gym_action_to_robot_action(self, gym_action): # construct robot action depending on action type if self.action_type == ActionType.TORQUE: robot_action = self.platform.Action(torque=gym_action) elif self.action_type == ActionType.POSITION: robot_action = self.platform.Action(position=gym_action) elif self.action_type == ActionType.TORQUE_AND_POSITION: robot_action = self.platform.Action( torque=gym_action["torque"], position=gym_action["position"]) else: raise ValueError("Invalid action_type") return robot_action
class CubeTrajectoryEnv(gym.Env): """Gym environment for moving cubes with simulated TriFingerPro.""" def __init__( self, initializer: Initializer, action_type: ActionType = ActionType.POSITION, step_size: int = 1, visualization: bool = False, ): """Initialize. Args: initializer: Initializer class for providing goal trajectories. See :class:`RandomInitializer` and :class:`FixedInitializer`. action_type (ActionType): Specify which type of actions to use. See :class:`ActionType` for details. step_size (int): Number of actual control steps to be performed in one call of step(). visualization (bool): If true, the pyBullet GUI is run for visualization. """ # Basic initialization # ==================== self.initializer = initializer self.action_type = action_type self.visualization = visualization if step_size < 1: raise ValueError("step_size cannot be less than 1.") self.step_size = step_size # will be initialized in reset() self.platform = None # Create the action and observation spaces # ======================================== spaces = TriFingerPlatform.spaces if self.action_type == ActionType.TORQUE: self.action_space = spaces.robot_torque.gym elif self.action_type == ActionType.POSITION: self.action_space = spaces.robot_position.gym elif self.action_type == ActionType.TORQUE_AND_POSITION: self.action_space = gym.spaces.Dict({ "torque": spaces.robot_torque.gym, "position": spaces.robot_position.gym, }) else: raise ValueError("Invalid action_type") self.observation_space = gym.spaces.Dict({ "observation": gym.spaces.Dict({ "position": spaces.robot_position.gym, "velocity": spaces.robot_velocity.gym, "torque": spaces.robot_torque.gym, }), "desired_goal": spaces.object_position.gym, "achieved_goal": spaces.object_position.gym, }) def compute_reward( self, achieved_goal: mct.Position, desired_goal: mct.Position, info: dict, ) -> float: """Compute the reward for the given achieved and desired goal. Args: achieved_goal: Current position of the object. desired_goal: Goal trajectory of the object. info: An info dictionary containing a field "time_index" which contains the time index of the achieved_goal. Returns: The reward that corresponds to the provided achieved goal w.r.t. to the desired goal. Note that the following should always hold true:: ob, reward, done, info = env.step() assert reward == env.compute_reward( ob['achieved_goal'], ob['desired_goal'], info, ) """ # This is just some sanity check to verify that the given desired_goal # actually matches with the active goal in the trajectory. active_goal = np.asarray( mct.get_active_goal(self.info["trajectory"], self.info["time_index"])) assert np.all(active_goal == desired_goal), "{}: {} != {}".format( info["time_index"], active_goal, desired_goal) return -mct.evaluate_state(info["trajectory"], info["time_index"], achieved_goal) def step(self, action): """Run one timestep of the environment's dynamics. When end of episode is reached, you are responsible for calling ``reset()`` to reset this environment's state. Args: action: An action provided by the agent (depends on the selected :class:`ActionType`). Returns: tuple: - observation (dict): agent's observation of the current environment. - reward (float): amount of reward returned after previous action. - done (bool): whether the episode has ended, in which case further step() calls will return undefined results. - info (dict): info dictionary containing the current time index. """ if self.platform is None: raise RuntimeError("Call `reset()` before starting to step.") if not self.action_space.contains(action): raise ValueError( "Given action is not contained in the action space.") num_steps = self.step_size # ensure episode length is not exceeded due to step_size step_count_after = self.step_count + num_steps if step_count_after > mct.EPISODE_LENGTH: excess = step_count_after - mct.EPISODE_LENGTH num_steps = max(1, num_steps - excess) reward = 0.0 for _ in range(num_steps): self.step_count += 1 if self.step_count > mct.EPISODE_LENGTH: raise RuntimeError("Exceeded number of steps for one episode.") # send action to robot robot_action = self._gym_action_to_robot_action(action) t = self.platform.append_desired_action(robot_action) # update goal visualization if self.visualization: goal_position = mct.get_active_goal(self.info["trajectory"], t) self.goal_marker.set_state(goal_position, (0, 0, 0, 1)) # Use observations of step t + 1 to follow what would be expected # in a typical gym environment. Note that on the real robot, this # will not be possible self.info["time_index"] = t + 1 # Alternatively use the observation of step t. This is the # observation from the moment before action_t is applied, i.e. the # result of that action is not yet visible in this observation. # # When using this observation, the resulting cumulative reward # should match exactly the one computed during replay (with the # above it will differ slightly). # # self.info["time_index"] = t observation = self._create_observation(self.info["time_index"]) reward += self.compute_reward( observation["achieved_goal"], observation["desired_goal"], self.info, ) is_done = self.step_count >= mct.EPISODE_LENGTH return observation, reward, is_done, self.info def reset(self): """Reset the environment.""" # hard-reset simulation del self.platform # initialize simulation initial_robot_position = ( TriFingerPlatform.spaces.robot_position.default) # initialize cube at the centre initial_object_pose = mct.move_cube.Pose( position=mct.INITIAL_CUBE_POSITION) self.platform = TriFingerPlatform( visualization=self.visualization, initial_robot_position=initial_robot_position, initial_object_pose=initial_object_pose, ) # get goal trajectory from the initializer trajectory = self.initializer.get_trajectory() # visualize the goal if self.visualization: self.goal_marker = visual_objects.CubeMarker( width=mct.move_cube._CUBE_WIDTH, position=trajectory[0][1], orientation=(0, 0, 0, 1), pybullet_client_id=self.platform.simfinger._pybullet_client_id, ) self.info = {"time_index": -1, "trajectory": trajectory} self.step_count = 0 return self._create_observation(0) def seed(self, seed=None): """Sets the seed for this env’s random number generator. .. note:: Spaces need to be seeded separately. E.g. if you want to sample actions directly from the action space using ``env.action_space.sample()`` you can set a seed there using ``env.action_space.seed()``. Returns: List of seeds used by this environment. This environment only uses a single seed, so the list contains only one element. """ self.np_random, seed = gym.utils.seeding.np_random(seed) mct.move_cube.random = self.np_random return [seed] def _create_observation(self, t): robot_observation = self.platform.get_robot_observation(t) camera_observation = self.platform.get_camera_observation(t) object_observation = camera_observation.filtered_object_pose active_goal = np.asarray( mct.get_active_goal(self.info["trajectory"], t)) observation = { "observation": { "position": robot_observation.position, "velocity": robot_observation.velocity, "torque": robot_observation.torque, }, "desired_goal": active_goal, "achieved_goal": object_observation.position, } return observation def _gym_action_to_robot_action(self, gym_action): # construct robot action depending on action type if self.action_type == ActionType.TORQUE: robot_action = self.platform.Action(torque=gym_action) elif self.action_type == ActionType.POSITION: robot_action = self.platform.Action(position=gym_action) elif self.action_type == ActionType.TORQUE_AND_POSITION: robot_action = self.platform.Action( torque=gym_action["torque"], position=gym_action["position"]) else: raise ValueError("Invalid action_type") return robot_action
def test_camera_timestamps_with_camera_delay_simple(): # Basic test with simple values: Camera update every 2 steps, delay of 1 # step. platform = TriFingerPlatform( visualization=False, enable_cameras=True, camera_delay_steps=1 ) action = platform.Action() # set camera rate to 100 fps so we need less stepping in this test platform.camera_rate_fps = 500 assert platform._compute_camera_update_step_interval() == 2 initial_stamp_s = platform.get_timestamp_ms(0) / 1000 # first step triggers camera (we get the initial observation at this point) t = platform.append_desired_action(action) assert t == 0 camera_obs = platform.get_camera_observation(t) assert initial_stamp_s == camera_obs.cameras[0].timestamp, f"t={t}" assert initial_stamp_s == camera_obs.cameras[1].timestamp, f"t={t}" assert initial_stamp_s == camera_obs.cameras[2].timestamp, f"t={t}" trigger1_stamp_s = platform.get_timestamp_ms(t) / 1000 # in second step observation is ready but still has stamp zero t = platform.append_desired_action(action) camera_obs = platform.get_camera_observation(t) assert trigger1_stamp_s == camera_obs.cameras[0].timestamp, f"t={t}" assert trigger1_stamp_s == camera_obs.cameras[1].timestamp, f"t={t}" assert trigger1_stamp_s == camera_obs.cameras[2].timestamp, f"t={t}" # in third step new observation is triggered but due to delay we still get # the old one t = platform.append_desired_action(action) camera_obs = platform.get_camera_observation(t) assert trigger1_stamp_s == camera_obs.cameras[0].timestamp, f"t={t}" assert trigger1_stamp_s == camera_obs.cameras[1].timestamp, f"t={t}" assert trigger1_stamp_s == camera_obs.cameras[2].timestamp, f"t={t}" trigger2_stamp_s = platform.get_timestamp_ms(t) / 1000 assert trigger2_stamp_s > trigger1_stamp_s # in forth step the new observation is ready t = platform.append_desired_action(action) camera_obs = platform.get_camera_observation(t) assert trigger2_stamp_s == camera_obs.cameras[0].timestamp, f"t={t}" assert trigger2_stamp_s == camera_obs.cameras[1].timestamp, f"t={t}" assert trigger2_stamp_s == camera_obs.cameras[2].timestamp, f"t={t}" # again trigger but we get previous observation due to delay t = platform.append_desired_action(action) camera_obs = platform.get_camera_observation(t) assert trigger2_stamp_s == camera_obs.cameras[0].timestamp, f"t={t}" assert trigger2_stamp_s == camera_obs.cameras[1].timestamp, f"t={t}" assert trigger2_stamp_s == camera_obs.cameras[2].timestamp, f"t={t}" trigger3_stamp_s = platform.get_timestamp_ms(t) / 1000 assert trigger3_stamp_s > trigger2_stamp_s # and there should be an update again t = platform.append_desired_action(action) camera_obs = platform.get_camera_observation(t) assert trigger3_stamp_s == camera_obs.cameras[0].timestamp, f"t={t}" assert trigger3_stamp_s == camera_obs.cameras[1].timestamp, f"t={t}" assert trigger3_stamp_s == camera_obs.cameras[2].timestamp, f"t={t}"
def test_camera_timestamps_with_camera_delay_more_than_rate(): # In this test the delay is higher than the camera rate, so this results in # the effective rate to be reduced. # Use small numbers (camera update interval 2 and delay 3) to keep the test # manageable. platform = TriFingerPlatform( visualization=False, enable_cameras=True, camera_delay_steps=3 ) action = platform.Action() # set high camera rate so we need less stepping in this test platform.camera_rate_fps = 500 assert platform._compute_camera_update_step_interval() == 2 initial_stamp_s = platform.get_timestamp_ms(0) / 1000 # first step triggers camera (we get the initial observation at this point) t = platform.append_desired_action(action) assert t == 0 camera_obs = platform.get_camera_observation(t) assert initial_stamp_s == camera_obs.cameras[0].timestamp, f"t={t}" assert initial_stamp_s == camera_obs.cameras[1].timestamp, f"t={t}" assert initial_stamp_s == camera_obs.cameras[2].timestamp, f"t={t}" trigger1_stamp_s = platform.get_timestamp_ms(t) / 1000 # now it takes 3 steps until we actually see the new observation t = platform.append_desired_action(action) t = platform.append_desired_action(action) camera_obs = platform.get_camera_observation(t) assert initial_stamp_s == camera_obs.cameras[0].timestamp, f"t={t}" assert initial_stamp_s == camera_obs.cameras[1].timestamp, f"t={t}" assert initial_stamp_s == camera_obs.cameras[2].timestamp, f"t={t}" t = platform.append_desired_action(action) camera_obs = platform.get_camera_observation(t) assert trigger1_stamp_s == camera_obs.cameras[0].timestamp, f"t={t}" assert trigger1_stamp_s == camera_obs.cameras[1].timestamp, f"t={t}" assert trigger1_stamp_s == camera_obs.cameras[2].timestamp, f"t={t}" # Only now, one step later, the next update is triggered t = platform.append_desired_action(action) camera_obs = platform.get_camera_observation(t) assert trigger1_stamp_s == camera_obs.cameras[0].timestamp, f"t={t}" assert trigger1_stamp_s == camera_obs.cameras[1].timestamp, f"t={t}" assert trigger1_stamp_s == camera_obs.cameras[2].timestamp, f"t={t}" trigger2_stamp_s = platform.get_timestamp_ms(t) / 1000 assert trigger2_stamp_s > trigger1_stamp_s # And again it takes 3 steps until we actually see the new observation t = platform.append_desired_action(action) t = platform.append_desired_action(action) camera_obs = platform.get_camera_observation(t) assert trigger1_stamp_s == camera_obs.cameras[0].timestamp, f"t={t}" assert trigger1_stamp_s == camera_obs.cameras[1].timestamp, f"t={t}" assert trigger1_stamp_s == camera_obs.cameras[2].timestamp, f"t={t}" t = platform.append_desired_action(action) camera_obs = platform.get_camera_observation(t) assert trigger2_stamp_s == camera_obs.cameras[0].timestamp, f"t={t}" assert trigger2_stamp_s == camera_obs.cameras[1].timestamp, f"t={t}" assert trigger2_stamp_s == camera_obs.cameras[2].timestamp, f"t={t}"
def test_timestamps(self): platform = TriFingerPlatform(visualization=False, enable_cameras=True) action = platform.Action() # compute camera update step interval based on configured rates camera_update_step_interval = ( 1 / platform._camera_rate_fps) / platform._time_step # robot time step in milliseconds time_step_ms = platform._time_step * 1000 # First time step t = platform.append_desired_action(action) first_stamp_ms = platform.get_timestamp_ms(t) first_stamp_s = first_stamp_ms / 1000 object_pose = platform.get_object_pose(t) camera_obs = platform.get_camera_observation(t) self.assertEqual(first_stamp_ms, object_pose.timestamp) self.assertEqual(first_stamp_ms, camera_obs.cameras[0].timestamp) self.assertEqual(first_stamp_ms, camera_obs.cameras[1].timestamp) self.assertEqual(first_stamp_ms, camera_obs.cameras[2].timestamp) # Test time stamps of observations t+1 object_pose_next = platform.get_object_pose(t + 1) camera_obs_next = platform.get_camera_observation(t + 1) next_stamp_ms = first_stamp_ms + time_step_ms self.assertEqual(next_stamp_ms, object_pose_next.timestamp) self.assertEqual(next_stamp_ms, camera_obs_next.cameras[0].timestamp) self.assertEqual(next_stamp_ms, camera_obs_next.cameras[1].timestamp) self.assertEqual(next_stamp_ms, camera_obs_next.cameras[2].timestamp) # XXX =============================================================== # The following part of the test is disabled as currently everything is # updated in each step (i.e. no lower update rate for camera and # object). return # Second time step t = platform.append_desired_action(action) second_stamp_ms = platform.get_timestamp_ms(t) self.assertEqual(second_stamp_ms, first_stamp_ms + time_step_ms) # there should not be a new camera observation yet object_pose = platform.get_object_pose(t) camera_obs = platform.get_camera_observation(t) self.assertEqual(first_stamp_s, object_pose.timestamp) self.assertEqual(first_stamp_s, camera_obs.cameras[0].timestamp) self.assertEqual(first_stamp_s, camera_obs.cameras[1].timestamp) self.assertEqual(first_stamp_s, camera_obs.cameras[2].timestamp) # do several steps until a new camera/object update is expected for _ in range(int(camera_update_step_interval)): t = platform.append_desired_action(action) nth_stamp_ms = platform.get_timestamp_ms(t) nth_stamp_s = nth_stamp_ms / 1000 self.assertGreater(nth_stamp_ms, second_stamp_ms) object_pose = platform.get_object_pose(t) camera_obs = platform.get_camera_observation(t) self.assertEqual(nth_stamp_s, object_pose.timestamp) self.assertEqual(nth_stamp_s, camera_obs.cameras[0].timestamp) self.assertEqual(nth_stamp_s, camera_obs.cameras[1].timestamp) self.assertEqual(nth_stamp_s, camera_obs.cameras[2].timestamp)