示例#1
0
    def test_constant_torque(self):
        """Compare two runs sending a constant torque.

        In each run the finger is reset to a fixed position and a constant
        torque is applied for a number of steps.  The final observation of each
        run is compared.  If the simulation behaves deterministically, the
        observations should be equal.
        """
        finger = SimFinger(finger_type="fingerone", )

        start_position = [0.5, -0.7, -1.5]
        action = finger.Action(torque=[0.3, 0.3, 0.3])

        def run():
            finger.reset_finger_positions_and_velocities(start_position)
            for i in range(30):
                finger._set_desired_action(action)
                finger._step_simulation()
            return finger._get_latest_observation()

        first_run = run()
        second_run = run()

        np.testing.assert_array_equal(first_run.torque, second_run.torque)
        np.testing.assert_array_equal(first_run.position, second_run.position)
        np.testing.assert_array_equal(first_run.velocity, second_run.velocity)
示例#2
0
    def setUp(self):
        self.finger = SimFinger(
            finger_type="fingerone",
            time_step=0.001,
            enable_visualization=False,
        )

        self.start_position = [0, -0.7, -1.5]
        self.finger.reset_finger_positions_and_velocities(self.start_position)
def execute_random_motion(*, finger_name, nb_timesteps, enable_visualization):
    finger = SimFinger(
        finger_type=finger_name,
        enable_visualization=enable_visualization,
    )
    cube = collision_objects.Block()

    for t in range(nb_timesteps):
        if t % 50 == 0:
            torque = np.random.uniform(low=-0.36, high=0.36, size=(9))
        finger.append_desired_action(finger.Action(torque=torque))
    del finger
    del cube
 def test_loading_urdfs_locally(self):
     """
     Get the keys corresponding to the valid finger types
     from BaseFinger and try importing their corresponding
     URDFs.
     """
     finger_data = finger_types_data.finger_types_data
     for key in finger_data.keys():
         try:
             os.environ["ROS_PACKAGE_PATH"] = " "
             SimFinger(finger_type=key, )
         except pybullet.error as e:
             self.fail("Failed to import the local copies of the urdf for"
                       "SimFinger(finger_type={}): {}".format(key, e))
示例#5
0
    def test_reproduce_reset_state(self):
        """
        Send hundred states (positions + velocities) to all the 1DOF joints
        of the fingers and assert they exactly reach these states.
        """
        finger = SimFinger(finger_type="fingerone", )

        for _ in range(100):
            state_positions = sample.random_joint_positions(
                finger.number_of_fingers)
            state_velocities = [pos * 10 for pos in state_positions]

            reset_state = finger.reset_finger_positions_and_velocities(
                state_positions, state_velocities)

            reset_positions = reset_state.position
            reset_velocities = reset_state.velocity

            np.testing.assert_array_equal(reset_positions,
                                          state_positions,
                                          verbose=True)
            np.testing.assert_array_equal(reset_velocities,
                                          state_velocities,
                                          verbose=True)
    def test_loading_urdfs(self):
        """
        Get the keys corresponding to the valid finger types
        from BaseFinger and try importing their corresponding
        URDFs.
        """
        finger_data = finger_types_data.finger_types_data
        for key in finger_data.keys():
            try:
                SimFinger(finger_type=key, )

            except pybullet.error as e:
                self.fail(
                    "Failed to create SimFinger(finger_type={}): {}".format(
                        key, e))
def test_SimFinger_sanitise_torqes():
    max_torque = 0.5

    # without velocity damping
    t = SimFinger._sanitise_torques(
        [-0.2, 0.4], max_torque, 0, np.array([0, 0])
    )
    np.testing.assert_array_equal(t, [-0.2, 0.4])

    t = SimFinger._sanitise_torques(
        [-1.2, 0.4], max_torque, 0, np.array([0, 0])
    )
    np.testing.assert_array_equal(t, [-0.5, 0.4])

    t = SimFinger._sanitise_torques(
        [-0.2, 0.6], max_torque, 0, np.array([0, 0])
    )
    np.testing.assert_array_equal(t, [-0.2, 0.5])

    t = SimFinger._sanitise_torques(
        [-0.8, 0.6], max_torque, 0, np.array([0, 0])
    )
    np.testing.assert_array_equal(t, [-0.5, 0.5])

    # with velocity damping
    t = SimFinger._sanitise_torques(
        [0, 0], max_torque, 0.2, np.array([1, -0.5])
    )
    np.testing.assert_array_almost_equal(t, [-0.2, 0.1])

    t = SimFinger._sanitise_torques([0, 0], max_torque, 1, np.array([1, -0.6]))
    np.testing.assert_array_almost_equal(t, [-0.5, 0.5])

    t = SimFinger._sanitise_torques(
        [0.1, 0.2], max_torque, 0.2, np.array([1, -0.5])
    )
    np.testing.assert_array_almost_equal(t, [-0.1, 0.3])

    t = SimFinger._sanitise_torques(
        [1, -1], max_torque, 0.2, np.array([1, -0.5])
    )
    np.testing.assert_array_almost_equal(t, [0.3, -0.4])
def main_finger(finger_type, data_file):
    finger = SimFinger(enable_visualization=True, finger_type=finger_type)
    goal_marker = visual_objects.Marker(number_of_goals=1)

    def reset_finger(joint_position):
        for i, joint_id in enumerate(finger.pybullet_joint_indices):
            pybullet.resetJointState(finger.finger_id, joint_id,
                                     joint_position[i])

    with open(data_file, "rb") as file_handle:
        episodes = pickle.load(file_handle)

    input("Press enter to start playback")

    start_time_data = None
    start_time_playback = datetime.datetime.now()

    for episode_num, data in enumerate(episodes[1:]):
        print("episode {}".format(episode_num))

        tip_goal = data["tip_goal"][0]
        trajectory = np.vstack(data["joint_positions"])
        timestamps = data["timestamps"]

        if not start_time_data:
            start_time_data = timestamps[0]

        timediff_data = timestamps[0] - start_time_data - 1
        utils.sleep_until(start_time_playback +
                          datetime.timedelta(seconds=timediff_data))
        goal_marker.set_state(tip_goal)
        reset_finger(trajectory[0])

        for position, timestamp in zip(trajectory, timestamps):
            timediff_data = timestamp - start_time_data
            utils.sleep_until(start_time_playback +
                              datetime.timedelta(seconds=timediff_data))

            reset_finger(position)
示例#9
0
    def __init__(
        self,
        control_rate_s,
        finger_type,
        enable_visualization,
    ):
        """Intializes the constituents of the pushing environment.

        Constructor sets up the finger robot depending on the finger type,
        sets up the observation and action spaces, smoothing for
        reducing jitter on the robot, and provides for a way to synchronize
        robots being trained independently.


        Args:
            control_rate_s (float): the rate (in seconds) at which step method of the env
                will run. The actual robot controller may run at a higher rate,
                so this is used to compute the number of robot control updates
                per environment step.
            finger_type (string): Name of the finger type.  In order to get
                a list of the valid finger types, call
                :meth:`.finger_types_data.get_valid_finger_types`
            enable_visualization (bool): if the simulation env is to be
                visualized
        """

        #: an instance of the simulated robot depending on the desired
        #: robot type
        self.finger = SimFinger(
            finger_type=finger_type,
            enable_visualization=enable_visualization,
        )

        self.num_fingers = finger_types_data.get_number_of_fingers(finger_type)

        #: the number of times the same action is to be applied to
        #: the robot in one step.
        self.steps_per_control = int(
            round(control_rate_s / self.finger.time_step_s))
        assert (abs(control_rate_s -
                    self.steps_per_control * self.finger.time_step_s) <=
                0.000001)

        #: the types of observations that should be a part of the environment's
        #: observed state
        self.observations_keys = [
            "joint_positions",
            "joint_velocities",
            "action_joint_positions",
            "goal_position",
            "object_position",
        ]
        #: the size of each of the observation type that is part of the
        #: observation keys (in the same order)
        self.observations_sizes = [
            3 * self.num_fingers,
            3 * self.num_fingers,
            3 * self.num_fingers,
            3,
            3,
        ]

        # sets up the observation and action spaces for the environment,
        # unscaled spaces have the custom bounds set up for each observation
        # or action type, whereas all the values in the observation and action
        # spaces lie between 1 and -1
        self.spaces = FingerSpaces(
            num_fingers=self.num_fingers,
            observations_keys=self.observations_keys,
            observations_sizes=self.observations_sizes,
            separate_goals=False,
        )

        self.unscaled_observation_space = (
            self.spaces.get_unscaled_observation_space())
        self.unscaled_action_space = self.spaces.get_unscaled_action_space()

        self.observation_space = self.spaces.get_scaled_observation_space()
        self.action_space = self.spaces.get_scaled_action_space()

        #: a logger to enable logging of observations if desired
        self.logger = DataLogger()

        #: the object that has to be pushed
        self.block = collision_objects.Block()

        #: a marker to visualize where the target goal position for the episode
        #: is
        self.goal_marker = visual_objects.Marker(
            number_of_goals=1,
            goal_size=0.0325,
            initial_position=[0.19, 0.08, 0.0425],
        )

        self.reset()
示例#10
0
    def __init__(
        self,
        control_rate_s,
        finger_type,
        enable_visualization,
        smoothing_params,
        use_real_robot=False,
        finger_config_suffix="0",
        synchronize=False,
    ):
        """Intializes the constituents of the reaching environment.

        Constructor sets up the finger robot depending on the finger type, and
        also whether an instance of the simulated or the real robot is to be
        created. Also sets up the observation and action spaces, smoothing for
        reducing jitter on the robot, and provides for a way to synchronize
        robots being trained independently.

        Args:
            control_rate_s (float): the rate (in seconds) at which step method of the env
                will run. The actual robot controller may run at a higher rate,
                so this is used to compute the number of robot control updates
                per environment step.
            finger_type (string): Name of the finger type.  In order to get
                a dictionary of the valid finger types, call
                :meth:`.finger_types_data.get_valid_finger_types`
            enable_visualization (bool): if the simulation env is to be
                visualized
            smoothing_params (dict):
                num_episodes (int): the total number of episodes for which the
                    training is performed
                start_after (float): the fraction of episodes after which the
                    smoothing of applied actions to the motors should start
                final_alpha (float): smoothing coeff that will be reached at
                    the end of the smoothing
                stop_after (float): the fraction of total episodes by which
                    final alpha is to be reached, after which the same final
                    alpha will be used for smoothing in the remainder of
                    the episodes
                is_test (bool, optional): Include this for testing
            use_real_robot (bool): if training is to be performed on
                the real robot ([default] False)
            finger_config_suffix: pass this if only one of
                the three fingers is to be trained. Valid choices include
                [0, 120, 240] ([default] 0)
            synchronize (bool): Set this to True if you want to train
                independently on three fingers in separate processes, but
                have them synchronized. ([default] False)
        """
        #: an instance of a simulated, or a real robot depending on
        #: what is desired.
        if use_real_robot:
            from pybullet_fingers.real_finger import RealFinger

            self.finger = RealFinger(
                finger_type=finger_type,
                finger_config_suffix=finger_config_suffix,
                enable_visualization=enable_visualization,
            )

        else:
            self.finger = SimFinger(
                finger_type=finger_type,
                enable_visualization=enable_visualization,
            )

        self.num_fingers = finger_types_data.get_number_of_fingers(finger_type)

        #: the number of times the same action is to be applied to
        #: the robot.
        self.steps_per_control = int(
            round(control_rate_s / self.finger.time_step_s))
        assert (abs(control_rate_s -
                    self.steps_per_control * self.finger.time_step_s) <=
                0.000001)

        #: the types of observations that should be a part of the environment's
        #: observed state
        self.observations_keys = [
            "joint_positions",
            "joint_velocities",
            "goal_position",
            "action_joint_positions",
        ]

        self.observations_sizes = [
            3 * self.num_fingers,
            3 * self.num_fingers,
            3 * self.num_fingers,
            3 * self.num_fingers,
        ]

        # sets up the observation and action spaces for the environment,
        # unscaled spaces have the custom bounds set up for each observation
        # or action type, whereas all the values in the observation and action
        # spaces lie between 1 and -1
        self.spaces = FingerSpaces(
            num_fingers=self.num_fingers,
            observations_keys=self.observations_keys,
            observations_sizes=self.observations_sizes,
            separate_goals=True,
        )

        self.unscaled_observation_space = (
            self.spaces.get_unscaled_observation_space())
        self.unscaled_action_space = self.spaces.get_unscaled_action_space()

        self.observation_space = self.spaces.get_scaled_observation_space()
        self.action_space = self.spaces.get_scaled_action_space()

        #: a logger to enable logging of observations if desired
        self.logger = DataLogger()

        # sets up smooothing
        if "is_test" in smoothing_params:
            self.smoothing_start_episode = 0
            self.smoothing_alpha = smoothing_params["final_alpha"]
            self.smoothing_increase_step = 0
            self.smoothing_stop_episode = math.inf
        else:
            self.smoothing_stop_episode = int(
                smoothing_params["num_episodes"] *
                smoothing_params["stop_after"])

            self.smoothing_start_episode = int(
                smoothing_params["num_episodes"] *
                smoothing_params["start_after"])
            num_smoothing_increase_steps = (self.smoothing_stop_episode -
                                            self.smoothing_start_episode)
            self.smoothing_alpha = 0
            self.smoothing_increase_step = (smoothing_params["final_alpha"] /
                                            num_smoothing_increase_steps)

        self.smoothed_action = None
        self.episode_count = 0

        #: a marker to visualize where the target goal position for the episode
        #: is to which the tip link(s) of the robot should reach
        self.enable_visualization = enable_visualization
        if self.enable_visualization:
            self.goal_marker = visual_objects.Marker(
                number_of_goals=self.num_fingers)

        # set up synchronization if it's set to true
        self.synchronize = synchronize
        if synchronize:
            now = datetime.datetime.now()
            self.next_start_time = datetime.datetime(now.year, now.month,
                                                     now.day, now.hour,
                                                     now.minute + 1)
        else:
            self.next_start_time = None

        self.seed()
        self.reset()
    def __init__(
        self,
        visualization=False,
        initial_robot_position=None,
        initial_object_pose=None,
        enable_cameras=False,
    ):
        """Initialize.

        Args:
            visualization (bool):  Set to true to run visualization.
            initial_robot_position: Initial robot joint angles
            initial_object_pose:  Initial pose for the manipulation object.
                Can be any object with attributes ``position`` (x, y, z) and
                ``orientation`` (x, y, z, w).  This is optional, if not set, a
                random pose will be sampled.
            enable_cameras (bool):  Set to true to enable camera observations.
                By default this is disabled as rendering of images takes a lot
                of computational power.  Therefore the cameras should only be
                enabled if the images are actually used.
        """
        #: Camera rate in frames per second.  Observations of camera and
        #: object pose will only be updated with this rate.
        #: NOTE: This is currently not used!
        self._camera_rate_fps = 30

        #: Set to true to render camera observations
        self.enable_cameras = enable_cameras

        #: Simulation time step
        self._time_step = 0.004

        # first camera update in the first step
        self._next_camera_update_step = 0

        # Initialize robot, object and cameras
        # ====================================

        self.simfinger = SimFinger(
            finger_type="trifingerpro",
            time_step=self._time_step,
            enable_visualization=visualization,
        )

        if initial_robot_position is None:
            initial_robot_position = self.spaces.robot_position.default

        self.simfinger.reset_finger_positions_and_velocities(
            initial_robot_position)

        if initial_object_pose is None:
            initial_object_pose = move_cube.Pose(
                position=self.spaces.object_position.default,
                orientation=self.spaces.object_orientation.default,
            )
        self.cube = collision_objects.Block(
            initial_object_pose.position,
            initial_object_pose.orientation,
            mass=0.020,
        )

        self.tricamera = camera.TriFingerCameras()

        # Forward some methods for convenience
        # ====================================
        # forward "RobotFrontend" methods directly to simfinger
        self.Action = self.simfinger.Action
        self.get_desired_action = self.simfinger.get_desired_action
        self.get_applied_action = self.simfinger.get_applied_action
        self.get_timestamp_ms = self.simfinger.get_timestamp_ms
        self.get_current_timeindex = self.simfinger.get_current_timeindex
        self.get_robot_observation = self.simfinger.get_observation

        # forward kinematics directly to simfinger
        self.forward_kinematics = (
            self.simfinger.pinocchio_utils.forward_kinematics)

        # Initialize log
        # ==============
        self._action_log = {
            "initial_robot_position": initial_robot_position.tolist(),
            "initial_object_pose": {
                "position": initial_object_pose.position.tolist(),
                "orientation": initial_object_pose.orientation.tolist(),
            },
            "actions": [],
        }
def main_trifinger(finger_type, data_file_0, data_file_120, data_file_240):

    finger = SimFinger(enable_visualization=True, finger_type=finger_type)
    goal_marker = visual_objects.Marker(number_of_goals=3)

    def reset_finger(joint_position):
        for i, joint_id in enumerate(finger.pybullet_joint_indices):
            pybullet.resetJointState(finger.finger_id, joint_id,
                                     joint_position[i])

    data = []
    for filename in (data_file_0, data_file_120, data_file_240):
        with open(filename, "rb") as file_handle:
            data.append(pickle.load(file_handle))

    assert len(data[0]) == len(data[1]) == len(data[2])

    episodes = []
    for i in range(1, len(data[0])):
        goals = trifinger_goal_space_transforms(finger_type, data)

        initial_position = np.concatenate([
            data[0][i]["joint_positions"][0],
            data[1][i]["joint_positions"][0],
            data[2][i]["joint_positions"][0],
        ])

        joint_positions = []
        for f in range(3):
            for stamp, pos in zip(data[f][i]["timestamps"],
                                  data[f][i]["joint_positions"]):
                joint_positions.append((f, stamp, pos))
        joint_positions = sorted(joint_positions, key=lambda x: x[1])

        episodes.append({
            "tip_goals": goals,
            "joint_positions": joint_positions,
            "initial_position": initial_position,
        })

    input("Press enter to start playback")

    start_time_data = None
    start_time_playback = datetime.datetime.now()

    for episode_num, data in enumerate(episodes):
        print("episode {}".format(episode_num))
        tip_goals = data["tip_goals"]
        trajectory = data["joint_positions"]

        episode_start_time = trajectory[0][1]

        if not start_time_data:
            start_time_data = episode_start_time

        timediff_data = episode_start_time - start_time_data - 1
        utils.sleep_until(start_time_playback +
                          datetime.timedelta(seconds=timediff_data))
        goal_marker.set_state(tip_goals)
        reset_finger(data["initial_position"])

        current_pos = copy.copy(data["initial_position"])

        for finger_idx, timestamp, position in trajectory:
            timediff_data = timestamp - start_time_data
            utils.sleep_until(start_time_playback +
                              datetime.timedelta(seconds=timediff_data))

            slice_start = finger_idx * 3
            current_pos[slice_start:slice_start + 3] = position

            reset_finger(current_pos)
示例#13
0
    def __init__(
        self,
        finger_type,
        finger_config_suffix,
        enable_visualization=False,
    ):
        """
        Constructor, initializes the physical world we will work in.

        Args:
            finger_type (string): Name of the finger type.  In order to get
                a list of the valid finger types, call
                :meth:`.finger_types_data.get_valid_finger_types`.
            finger_config_suffix (int): ID of the finger that is used. Has to
                be one of [0, 120, 240]. This is only if a single finger is to
                be used on any of the robots, and is ignored otherwise.
            enable_visualization (bool, optional): Set to 'True' to run a GUI
                for visualization.  This uses pyBullet but only for
                visualization, i.e. the state of the simulation is constantly
                set to match the one of the real robot.
        """
        # Simulation is only used for visualization, so only run it when needed
        self.simulator = None
        if enable_visualization:
            self.simulator = SimFinger(
                finger_type=finger_type,
                time_step=0.001,  # todo: not sure if this is correct
                enable_visualization=True,
            )

        number_of_fingers = finger_types_data.get_number_of_fingers(
            finger_type)

        if number_of_fingers == 1:
            if finger_type == "fingerone":
                config_file_path = os.path.join(
                    rospkg.RosPack().get_path("robot_fingers"),
                    "config",
                    "finger_%s.yml" % finger_config_suffix,
                )
            elif finger_type == "fingeredu":
                config_file_path = os.path.join(
                    rospkg.RosPack().get_path("robot_fingers"),
                    "config",
                    "fingeredu_%s.yml" % finger_config_suffix,
                )
            finger_data = robot_interfaces.finger.SingleProcessData()
            self.real_finger_backend = (
                robot_fingers.create_real_finger_backend(
                    finger_data, config_file_path))
            self.robot = robot_interfaces.finger.Frontend(finger_data)
            self.Action = robot_interfaces.finger.Action
        elif number_of_fingers == 3:
            if finger_type == "trifingerone":
                config_file_path = os.path.join(
                    rospkg.RosPack().get_path("robot_fingers"),
                    "config",
                    "trifinger.yml",
                )
            elif finger_type == "trifingeredu":
                config_file_path = os.path.join(
                    rospkg.RosPack().get_path("robot_fingers"),
                    "config",
                    "trifingeredu.yml",
                )
            finger_data = robot_interfaces.trifinger.SingleProcessData()
            self.real_finger_backend = robot_fingers.create_trifinger_backend(
                finger_data, config_file_path)
            self.robot = robot_interfaces.trifinger.Frontend(finger_data)
            self.Action = robot_interfaces.trifinger.Action

        self.real_finger_backend.initialize()
class TriFingerPlatform:
    """
    Wrapper around the simulation providing the same interface as
    ``robot_interfaces::TriFingerPlatformFrontend``.

    The following methods of the robot_interfaces counterpart are not
    supported:

    - get_robot_status()
    - wait_until_timeindex()

    """

    # Create the action and observation spaces
    # ========================================

    _n_joints = 9
    _n_fingers = 3
    _max_torque_Nm = 0.36
    _max_velocity_radps = 10

    spaces = SimpleNamespace()

    spaces.robot_torque = SimpleNamespace(
        low=np.full(_n_joints, -_max_torque_Nm, dtype=np.float32),
        high=np.full(_n_joints, _max_torque_Nm, dtype=np.float32),
        default=np.zeros(_n_joints, dtype=np.float32),
    )
    spaces.robot_position = SimpleNamespace(
        low=np.array([-0.9, -1.57, -2.7] * _n_fingers, dtype=np.float32),
        high=np.array([1.4, 1.57, 0.0] * _n_fingers, dtype=np.float32),
        default=np.array(
            [0.0, np.deg2rad(70), np.deg2rad(-130)] * _n_fingers,
            dtype=np.float32,
        ),
    )
    spaces.robot_velocity = SimpleNamespace(
        low=np.full(_n_joints, -_max_velocity_radps, dtype=np.float32),
        high=np.full(_n_joints, _max_velocity_radps, dtype=np.float32),
        default=np.zeros(_n_joints, dtype=np.float32),
    )
    spaces.object_position = SimpleNamespace(
        low=np.array([-0.3, -0.3, 0], dtype=np.float32),
        high=np.array([0.3, 0.3, 0.3], dtype=np.float32),
        default=np.array([0, 0, move_cube._min_height], dtype=np.float32),
    )

    spaces.object_orientation = SimpleNamespace(
        low=-np.ones(4, dtype=np.float32),
        high=np.ones(4, dtype=np.float32),
        default=move_cube.Pose().orientation,
    )

    # for convenience, we also create the respective gym spaces
    spaces.robot_torque.gym = gym.spaces.Box(low=spaces.robot_torque.low,
                                             high=spaces.robot_torque.high)
    spaces.robot_position.gym = gym.spaces.Box(low=spaces.robot_position.low,
                                               high=spaces.robot_position.high)
    spaces.robot_velocity.gym = gym.spaces.Box(low=spaces.robot_velocity.low,
                                               high=spaces.robot_velocity.high)
    spaces.object_position.gym = gym.spaces.Box(
        low=spaces.object_position.low, high=spaces.object_position.high)
    spaces.object_orientation.gym = gym.spaces.Box(
        low=spaces.object_orientation.low, high=spaces.object_orientation.high)

    def __init__(
        self,
        visualization=False,
        initial_robot_position=None,
        initial_object_pose=None,
        enable_cameras=False,
    ):
        """Initialize.

        Args:
            visualization (bool):  Set to true to run visualization.
            initial_robot_position: Initial robot joint angles
            initial_object_pose:  Initial pose for the manipulation object.
                Can be any object with attributes ``position`` (x, y, z) and
                ``orientation`` (x, y, z, w).  This is optional, if not set, a
                random pose will be sampled.
            enable_cameras (bool):  Set to true to enable camera observations.
                By default this is disabled as rendering of images takes a lot
                of computational power.  Therefore the cameras should only be
                enabled if the images are actually used.
        """
        #: Camera rate in frames per second.  Observations of camera and
        #: object pose will only be updated with this rate.
        #: NOTE: This is currently not used!
        self._camera_rate_fps = 30

        #: Set to true to render camera observations
        self.enable_cameras = enable_cameras

        #: Simulation time step
        self._time_step = 0.004

        # first camera update in the first step
        self._next_camera_update_step = 0

        # Initialize robot, object and cameras
        # ====================================

        self.simfinger = SimFinger(
            finger_type="trifingerpro",
            time_step=self._time_step,
            enable_visualization=visualization,
        )

        if initial_robot_position is None:
            initial_robot_position = self.spaces.robot_position.default

        self.simfinger.reset_finger_positions_and_velocities(
            initial_robot_position)

        if initial_object_pose is None:
            initial_object_pose = move_cube.Pose(
                position=self.spaces.object_position.default,
                orientation=self.spaces.object_orientation.default,
            )
        self.cube = collision_objects.Block(
            initial_object_pose.position,
            initial_object_pose.orientation,
            mass=0.020,
        )

        self.tricamera = camera.TriFingerCameras()

        # Forward some methods for convenience
        # ====================================
        # forward "RobotFrontend" methods directly to simfinger
        self.Action = self.simfinger.Action
        self.get_desired_action = self.simfinger.get_desired_action
        self.get_applied_action = self.simfinger.get_applied_action
        self.get_timestamp_ms = self.simfinger.get_timestamp_ms
        self.get_current_timeindex = self.simfinger.get_current_timeindex
        self.get_robot_observation = self.simfinger.get_observation

        # forward kinematics directly to simfinger
        self.forward_kinematics = (
            self.simfinger.pinocchio_utils.forward_kinematics)

        # Initialize log
        # ==============
        self._action_log = {
            "initial_robot_position": initial_robot_position.tolist(),
            "initial_object_pose": {
                "position": initial_object_pose.position.tolist(),
                "orientation": initial_object_pose.orientation.tolist(),
            },
            "actions": [],
        }

    def get_time_step(self):
        """Get simulation time step in seconds."""
        return self._time_step

    def _compute_camera_update_step_interval(self):
        return (1.0 / self._camera_rate_fps) / self._time_step

    def append_desired_action(self, action):
        """
        Call :meth:`pybullet.SimFinger.append_desired_action` and add the
        action to the action log.

        Arguments/return value are the same as for
        :meth:`pybullet.SimFinger.append_desired_action`.
        """
        # update camera and object observations only with the rate of the
        # cameras
        # next_t = self.get_current_timeindex() + 1
        # has_camera_update = next_t >= self._next_camera_update_step
        # if has_camera_update:
        #     self._next_camera_update_step += (
        #         self._compute_camera_update_step_interval()
        #     )

        #     self._object_pose_t = self._get_current_object_pose()
        #     if self.enable_cameras:
        #         self._camera_observation_t = (
        #             self._get_current_camera_observation()
        #         )

        has_camera_update = True
        self._object_pose_t = self._get_current_object_pose()
        if self.enable_cameras:
            self._camera_observation_t = self._get_current_camera_observation()

        t = self.simfinger.append_desired_action(action)

        # The correct timestamp can only be acquired now that t is given.
        # Update it accordingly in the object and camera observations
        if has_camera_update:
            camera_timestamp_s = self.get_timestamp_ms(t) / 1000
            self._object_pose_t.timestamp = camera_timestamp_s
            if self.enable_cameras:
                for i in range(len(self._camera_observation_t.cameras)):
                    self._camera_observation_t.cameras[
                        i].timestamp = camera_timestamp_s

        # write the desired action to the log
        self._action_log["actions"].append({
            "t":
            t,
            "torque":
            action.torque.tolist(),
            "position":
            action.position.tolist(),
            "position_kp":
            action.position_kp.tolist(),
            "position_kd":
            action.position_kd.tolist(),
        })

        return t

    def _get_current_object_pose(self, t=None):
        cube_state = self.cube.get_state()
        pose = ObjectPose()
        pose.position = np.asarray(cube_state[0])
        pose.orientation = np.asarray(cube_state[1])
        pose.confidence = 1.0
        # NOTE: The timestamp can only be set correctly after time step t is
        # actually reached.  Therefore, this is set to None here and filled
        # with the proper value later.
        if t is None:
            pose.timestamp = None
        else:
            pose.timestamp = self.get_timestamp_ms(t)

        return pose

    def get_object_pose(self, t):
        """Get object pose at time step t.

        Args:
            t:  The time index of the step for which the object pose is
                requested.  Only the value returned by the last call of
                :meth:`~append_desired_action` is valid.

        Returns:
            ObjectPose:  Pose of the object.  Values come directly from the
            simulation without adding noise, so the confidence is 1.0.

        Raises:
            ValueError: If invalid time index ``t`` is passed.
        """
        current_t = self.simfinger._t

        if t < 0:
            raise ValueError("Cannot access time index less than zero.")
        elif t == current_t:
            return self._object_pose_t
        elif t == current_t + 1:
            return self._get_current_object_pose(t)
        else:
            raise ValueError(
                "Given time index t has to match with index of the current"
                " step or the next one.")

    def _get_current_camera_observation(self, t=None):
        images = self.tricamera.get_images()
        observation = TriCameraObservation()
        # NOTE: The timestamp can only be set correctly after time step t
        # is actually reached.  Therefore, this is set to None here and
        # filled with the proper value later.
        if t is None:
            timestamp = None
        else:
            timestamp = self.get_timestamp_ms(t)

        for i, image in enumerate(images):
            observation.cameras[i].image = image
            observation.cameras[i].timestamp = timestamp

        return observation

    def get_camera_observation(self, t):
        """Get camera observation at time step t.

        Args:
            t:  The time index of the step for which the observation is
                requested.  Only the value returned by the last call of
                :meth:`~append_desired_action` is valid.

        Returns:
            TriCameraObservation:  Observations of the three cameras.  Images
            are rendered in the simulation.  Note that they are not optimized
            to look realistically.

        Raises:
            ValueError: If invalid time index ``t`` is passed.
        """
        if not self.enable_cameras:
            raise RuntimeError(
                "Cameras are not enabled.  Create `TriFingerPlatform` with"
                " `enable_cameras=True` if you want to use camera"
                " observations.")

        current_t = self.simfinger._t

        if t < 0:
            raise ValueError("Cannot access time index less than zero.")
        elif t == current_t:
            return self._camera_observation_t
        elif t == current_t + 1:
            return self._get_current_camera_observation(t)
        else:
            raise ValueError(
                "Given time index t has to match with index of the current"
                " step or the next one.")

    def store_action_log(self, filename):
        """Store the action log to a JSON file.

        Args:
            filename (str):  Path to the JSON file to which the log shall be
                written.  If the file exists already, it will be overwritten.
        """

        # TODO should the log also contain intermediate observations (object
        # and finger) for verification?

        t = self.simfinger.get_current_timeindex()
        object_pose = self.get_object_pose(t)
        self._action_log["final_object_pose"] = {
            "position": object_pose.position.tolist(),
            "orientation": object_pose.orientation.tolist(),
        }

        with open(filename, "w") as fh:
            json.dump(self._action_log, fh)
示例#15
0
class RealFinger:
    """
    The RealFinger class provides an interface to the real robot. Any script
    that creates an instance of the :class:`.SimFinger` can create an instance
    of this class and use it in the same way.
    """
    def __init__(
        self,
        finger_type,
        finger_config_suffix,
        enable_visualization=False,
    ):
        """
        Constructor, initializes the physical world we will work in.

        Args:
            finger_type (string): Name of the finger type.  In order to get
                a list of the valid finger types, call
                :meth:`.finger_types_data.get_valid_finger_types`.
            finger_config_suffix (int): ID of the finger that is used. Has to
                be one of [0, 120, 240]. This is only if a single finger is to
                be used on any of the robots, and is ignored otherwise.
            enable_visualization (bool, optional): Set to 'True' to run a GUI
                for visualization.  This uses pyBullet but only for
                visualization, i.e. the state of the simulation is constantly
                set to match the one of the real robot.
        """
        # Simulation is only used for visualization, so only run it when needed
        self.simulator = None
        if enable_visualization:
            self.simulator = SimFinger(
                finger_type=finger_type,
                time_step=0.001,  # todo: not sure if this is correct
                enable_visualization=True,
            )

        number_of_fingers = finger_types_data.get_number_of_fingers(
            finger_type)

        if number_of_fingers == 1:
            if finger_type == "fingerone":
                config_file_path = os.path.join(
                    rospkg.RosPack().get_path("robot_fingers"),
                    "config",
                    "finger_%s.yml" % finger_config_suffix,
                )
            elif finger_type == "fingeredu":
                config_file_path = os.path.join(
                    rospkg.RosPack().get_path("robot_fingers"),
                    "config",
                    "fingeredu_%s.yml" % finger_config_suffix,
                )
            finger_data = robot_interfaces.finger.SingleProcessData()
            self.real_finger_backend = (
                robot_fingers.create_real_finger_backend(
                    finger_data, config_file_path))
            self.robot = robot_interfaces.finger.Frontend(finger_data)
            self.Action = robot_interfaces.finger.Action
        elif number_of_fingers == 3:
            if finger_type == "trifingerone":
                config_file_path = os.path.join(
                    rospkg.RosPack().get_path("robot_fingers"),
                    "config",
                    "trifinger.yml",
                )
            elif finger_type == "trifingeredu":
                config_file_path = os.path.join(
                    rospkg.RosPack().get_path("robot_fingers"),
                    "config",
                    "trifingeredu.yml",
                )
            finger_data = robot_interfaces.trifinger.SingleProcessData()
            self.real_finger_backend = robot_fingers.create_trifinger_backend(
                finger_data, config_file_path)
            self.robot = robot_interfaces.trifinger.Frontend(finger_data)
            self.Action = robot_interfaces.trifinger.Action

        self.real_finger_backend.initialize()

    def append_desired_action(self, action):
        """
        Append an action to the action timeseries, that should be
        applied to the robot.

        Args:
            action (self.Action): Joint positions or torques or both

        Returns:
            self.action_index (int): The current time-index at which the action
                was applied.
        """
        return self.robot.append_desired_action(action)

    def get_observation(self, time_index):
        """
        Get the observation from the robot at a specified time_index.

        Args:
            time_index (int): the time_index at which the observation is
                needed
        Returns:
            observation (robot.Observation): the corresponding observation
        """
        observation = self.robot.get_observation(time_index)

        if self.simulator is not None:
            self.simulator.reset_finger_positions_and_velocities(
                joint_positions=observation.position)

        return observation

    def reset_finger(self, joint_positions):
        """
        Move the finger(s) to a random position (sampled in the joint space).
        The sampled random position is set as target and the robot is stepped
        for one second to give it time to reach there.
        """
        action = self.Action(position=joint_positions)
        for i in range(1000):
            t = self.append_desired_action(action)
            observation = self.get_observation(t)
        return observation
示例#16
0
    def test_step_both(self):
        # Create two instances and send different actions to them.
        # Verify that both go towards their target
        robot1 = SimFinger(finger_type="trifingerpro")
        robot2 = SimFinger(finger_type="trifingerpro")

        start_position = np.array([0.0, 0.7, -1.5] * 3)

        robot1.reset_finger_positions_and_velocities(start_position)
        robot2.reset_finger_positions_and_velocities(start_position)

        action1 = robot1.Action(position=[0.5, 0.7, -1.5] * 3)
        action2 = robot2.Action(position=[-0.1, 0.7, -1.5] * 3)

        for i in range(1000):
            t1 = robot1.append_desired_action(action1)
            t2 = robot2.append_desired_action(action2)
            obs1 = robot1.get_observation(t1)
            obs2 = robot2.get_observation(t2)

            if i > 1:
                self.assertTrue((obs2.position != obs1.position).any())

        self.assertLess(np.linalg.norm(action1.position - obs1.position), 0.1)
        self.assertLess(np.linalg.norm(action2.position - obs2.position), 0.1)
示例#17
0
class TriFingerReach(gym.Env):
    """
    A gym environment to enable training on either the single or
    the tri-fingers robots for the task of reaching
    """
    def __init__(
        self,
        control_rate_s,
        finger_type,
        enable_visualization,
        smoothing_params,
        use_real_robot=False,
        finger_config_suffix="0",
        synchronize=False,
    ):
        """Intializes the constituents of the reaching environment.

        Constructor sets up the finger robot depending on the finger type, and
        also whether an instance of the simulated or the real robot is to be
        created. Also sets up the observation and action spaces, smoothing for
        reducing jitter on the robot, and provides for a way to synchronize
        robots being trained independently.

        Args:
            control_rate_s (float): the rate (in seconds) at which step method of the env
                will run. The actual robot controller may run at a higher rate,
                so this is used to compute the number of robot control updates
                per environment step.
            finger_type (string): Name of the finger type.  In order to get
                a dictionary of the valid finger types, call
                :meth:`.finger_types_data.get_valid_finger_types`
            enable_visualization (bool): if the simulation env is to be
                visualized
            smoothing_params (dict):
                num_episodes (int): the total number of episodes for which the
                    training is performed
                start_after (float): the fraction of episodes after which the
                    smoothing of applied actions to the motors should start
                final_alpha (float): smoothing coeff that will be reached at
                    the end of the smoothing
                stop_after (float): the fraction of total episodes by which
                    final alpha is to be reached, after which the same final
                    alpha will be used for smoothing in the remainder of
                    the episodes
                is_test (bool, optional): Include this for testing
            use_real_robot (bool): if training is to be performed on
                the real robot ([default] False)
            finger_config_suffix: pass this if only one of
                the three fingers is to be trained. Valid choices include
                [0, 120, 240] ([default] 0)
            synchronize (bool): Set this to True if you want to train
                independently on three fingers in separate processes, but
                have them synchronized. ([default] False)
        """
        #: an instance of a simulated, or a real robot depending on
        #: what is desired.
        if use_real_robot:
            from pybullet_fingers.real_finger import RealFinger

            self.finger = RealFinger(
                finger_type=finger_type,
                finger_config_suffix=finger_config_suffix,
                enable_visualization=enable_visualization,
            )

        else:
            self.finger = SimFinger(
                finger_type=finger_type,
                enable_visualization=enable_visualization,
            )

        self.num_fingers = finger_types_data.get_number_of_fingers(finger_type)

        #: the number of times the same action is to be applied to
        #: the robot.
        self.steps_per_control = int(
            round(control_rate_s / self.finger.time_step_s))
        assert (abs(control_rate_s -
                    self.steps_per_control * self.finger.time_step_s) <=
                0.000001)

        #: the types of observations that should be a part of the environment's
        #: observed state
        self.observations_keys = [
            "joint_positions",
            "joint_velocities",
            "goal_position",
            "action_joint_positions",
        ]

        self.observations_sizes = [
            3 * self.num_fingers,
            3 * self.num_fingers,
            3 * self.num_fingers,
            3 * self.num_fingers,
        ]

        # sets up the observation and action spaces for the environment,
        # unscaled spaces have the custom bounds set up for each observation
        # or action type, whereas all the values in the observation and action
        # spaces lie between 1 and -1
        self.spaces = FingerSpaces(
            num_fingers=self.num_fingers,
            observations_keys=self.observations_keys,
            observations_sizes=self.observations_sizes,
            separate_goals=True,
        )

        self.unscaled_observation_space = (
            self.spaces.get_unscaled_observation_space())
        self.unscaled_action_space = self.spaces.get_unscaled_action_space()

        self.observation_space = self.spaces.get_scaled_observation_space()
        self.action_space = self.spaces.get_scaled_action_space()

        #: a logger to enable logging of observations if desired
        self.logger = DataLogger()

        # sets up smooothing
        if "is_test" in smoothing_params:
            self.smoothing_start_episode = 0
            self.smoothing_alpha = smoothing_params["final_alpha"]
            self.smoothing_increase_step = 0
            self.smoothing_stop_episode = math.inf
        else:
            self.smoothing_stop_episode = int(
                smoothing_params["num_episodes"] *
                smoothing_params["stop_after"])

            self.smoothing_start_episode = int(
                smoothing_params["num_episodes"] *
                smoothing_params["start_after"])
            num_smoothing_increase_steps = (self.smoothing_stop_episode -
                                            self.smoothing_start_episode)
            self.smoothing_alpha = 0
            self.smoothing_increase_step = (smoothing_params["final_alpha"] /
                                            num_smoothing_increase_steps)

        self.smoothed_action = None
        self.episode_count = 0

        #: a marker to visualize where the target goal position for the episode
        #: is to which the tip link(s) of the robot should reach
        self.enable_visualization = enable_visualization
        if self.enable_visualization:
            self.goal_marker = visual_objects.Marker(
                number_of_goals=self.num_fingers)

        # set up synchronization if it's set to true
        self.synchronize = synchronize
        if synchronize:
            now = datetime.datetime.now()
            self.next_start_time = datetime.datetime(now.year, now.month,
                                                     now.day, now.hour,
                                                     now.minute + 1)
        else:
            self.next_start_time = None

        self.seed()
        self.reset()

    def _compute_reward(self, observation, goal):
        """
        The reward function of the environment

        Args:
            observation (list): the observation at the
                current step
            goal (list): the desired goal for the episode

        Returns:
            the reward, and the done signal
        """
        joint_positions = observation[
            self.spaces.key_to_index["joint_positions"]]

        end_effector_positions = self.finger.pinocchio_utils.forward_kinematics(
            np.array(joint_positions))

        # TODO is matrix norm really always same as vector norm on flattend
        # matrices?
        distance_to_goal = utils.compute_distance(end_effector_positions, goal)

        reward = -distance_to_goal
        done = False

        return reward * self.steps_per_control, done

    def _get_state(self, observation, action, log_observation=False):
        """
        Get the current observation from the env for the agent

        Args:
            log_observation (bool): specify whether you want to
                log the observation

        Returns:
            observation (list): comprising of the observations corresponding
                to the key values in the observation_keys
        """
        tip_positions = self.finger.pinocchio_utils.forward_kinematics(
            observation.position)
        end_effector_position = np.concatenate(tip_positions)
        joint_positions = observation.position
        joint_velocities = observation.velocity
        flat_goals = np.concatenate(self.goal)
        end_effector_to_goal = list(
            np.subtract(flat_goals, end_effector_position))

        # populate this observation dict from which you can select which
        # observation types to finally choose depending on the keys
        # used for constructing the observation space of the environment
        observation_dict = {}
        observation_dict["end_effector_position"] = end_effector_position
        observation_dict["joint_positions"] = joint_positions
        observation_dict["joint_velocities"] = joint_velocities
        observation_dict["end_effector_to_goal"] = end_effector_to_goal
        observation_dict["goal_position"] = flat_goals
        observation_dict["action_joint_positions"] = action

        if log_observation:
            self.logger.append(joint_positions, end_effector_position,
                               time.time())

        # returns only the observations corresponding to the keys that were
        # used for constructing the observation space
        observation = [
            v for key in self.observations_keys for v in observation_dict[key]
        ]

        return observation

    def step(self, action):
        """
        The env step method

        Args:
            action (list): the joint positions that have to be achieved

        Returns:
            the observation scaled to lie between [-1;1], the reward,
            the done signal, and info on if the agent was successful at
            the current step
        """
        # Unscale the action to the ranges of the action space of the
        # environment, explicitly (as the prediction from the network
        # lies in the range [-1;1])
        unscaled_action = utils.unscale(action, self.unscaled_action_space)

        # smooth the action by taking a weighted average with the previous
        # action, where the weight, ie, the smoothing_alpha is gradually
        # increased at every episode reset (see the reset method for details)
        if self.smoothed_action is None:
            # start with current position
            # self.smoothed_action = self.finger.observation.position
            self.smoothed_action = unscaled_action

        self.smoothed_action = (self.smoothing_alpha * self.smoothed_action +
                                (1 - self.smoothing_alpha) * unscaled_action)

        # this is the control loop to send the actions for a few timesteps
        # which depends on the actual control rate
        finger_action = self.finger.Action(position=self.smoothed_action)
        state = None
        for _ in range(self.steps_per_control):
            t = self.finger.append_desired_action(finger_action)
            observation = self.finger.get_observation(t)
            # get observation from first iteration (when action is applied the
            # first time)
            if state is None:
                state = self._get_state(observation, self.smoothed_action,
                                        True)
            if self.synchronize:
                self.observation = observation
        reward, done = self._compute_reward(state, self.goal)
        info = {"is_success": np.float32(done)}
        scaled_observation = utils.scale(state,
                                         self.unscaled_observation_space)
        return scaled_observation, reward, done, info

    def reset(self):
        """
        Episode reset

        Returns:
            the scaled to [-1;1] observation from the env after the reset
        """
        # synchronize episode starts with wall time
        # (freeze the robot at the current position before starting the sleep)
        if self.next_start_time:
            try:
                t = self.finger.append_desired_action(
                    self.finger.Action(position=self.observation.position))
                self.finger.get_observation(t)
            except Exception:
                pass

            utils.sleep_until(self.next_start_time)
            self.next_start_time += datetime.timedelta(seconds=4)

        # updates smoothing parameters
        self.update_smoothing()
        self.episode_count += 1
        self.smoothed_action = None

        # resets the finger to a random position
        action = sample.feasible_random_joint_positions_for_reaching(
            self.finger, self.spaces.action_bounds)
        observation = self.finger.reset_finger_positions_and_velocities(action)

        # generates a random goal for the next episode
        target_joint_config = np.asarray(
            sample.feasible_random_joint_positions_for_reaching(
                self.finger, self.spaces.action_bounds))
        self.goal = self.finger.pinocchio_utils.forward_kinematics(
            target_joint_config)

        if self.enable_visualization:
            self.goal_marker.set_state(self.goal)

        # logs relevant information for replayability
        self.logger.new_episode(target_joint_config, self.goal)

        return utils.scale(
            self._get_state(observation, action=action),
            self.unscaled_observation_space,
        )

    def update_smoothing(self):
        """
        Update the smoothing coefficient with which the action to be
        applied is smoothed
        """
        if (self.smoothing_start_episode <= self.episode_count <
                self.smoothing_stop_episode):
            self.smoothing_alpha += self.smoothing_increase_step
        print("episode: {}, smoothing: {}".format(self.episode_count,
                                                  self.smoothing_alpha))
示例#18
0
class TriFingerPush(gym.Env):
    """A gym environment to enable training on any of the valid robots,
    real or simulated, for the task of pushing.
    """
    def __init__(
        self,
        control_rate_s,
        finger_type,
        enable_visualization,
    ):
        """Intializes the constituents of the pushing environment.

        Constructor sets up the finger robot depending on the finger type,
        sets up the observation and action spaces, smoothing for
        reducing jitter on the robot, and provides for a way to synchronize
        robots being trained independently.


        Args:
            control_rate_s (float): the rate (in seconds) at which step method of the env
                will run. The actual robot controller may run at a higher rate,
                so this is used to compute the number of robot control updates
                per environment step.
            finger_type (string): Name of the finger type.  In order to get
                a list of the valid finger types, call
                :meth:`.finger_types_data.get_valid_finger_types`
            enable_visualization (bool): if the simulation env is to be
                visualized
        """

        #: an instance of the simulated robot depending on the desired
        #: robot type
        self.finger = SimFinger(
            finger_type=finger_type,
            enable_visualization=enable_visualization,
        )

        self.num_fingers = finger_types_data.get_number_of_fingers(finger_type)

        #: the number of times the same action is to be applied to
        #: the robot in one step.
        self.steps_per_control = int(
            round(control_rate_s / self.finger.time_step_s))
        assert (abs(control_rate_s -
                    self.steps_per_control * self.finger.time_step_s) <=
                0.000001)

        #: the types of observations that should be a part of the environment's
        #: observed state
        self.observations_keys = [
            "joint_positions",
            "joint_velocities",
            "action_joint_positions",
            "goal_position",
            "object_position",
        ]
        #: the size of each of the observation type that is part of the
        #: observation keys (in the same order)
        self.observations_sizes = [
            3 * self.num_fingers,
            3 * self.num_fingers,
            3 * self.num_fingers,
            3,
            3,
        ]

        # sets up the observation and action spaces for the environment,
        # unscaled spaces have the custom bounds set up for each observation
        # or action type, whereas all the values in the observation and action
        # spaces lie between 1 and -1
        self.spaces = FingerSpaces(
            num_fingers=self.num_fingers,
            observations_keys=self.observations_keys,
            observations_sizes=self.observations_sizes,
            separate_goals=False,
        )

        self.unscaled_observation_space = (
            self.spaces.get_unscaled_observation_space())
        self.unscaled_action_space = self.spaces.get_unscaled_action_space()

        self.observation_space = self.spaces.get_scaled_observation_space()
        self.action_space = self.spaces.get_scaled_action_space()

        #: a logger to enable logging of observations if desired
        self.logger = DataLogger()

        #: the object that has to be pushed
        self.block = collision_objects.Block()

        #: a marker to visualize where the target goal position for the episode
        #: is
        self.goal_marker = visual_objects.Marker(
            number_of_goals=1,
            goal_size=0.0325,
            initial_position=[0.19, 0.08, 0.0425],
        )

        self.reset()

    def _compute_reward(self, object_position, goal):
        """
        The reward function of the environment

        Args:
            observation (list): the observation at the
                current step
            goal (list): the desired goal for the episode

        Returns:
            the reward, and the done signal
        """
        done = False
        dist = utils.compute_distance(object_position, goal)
        reward = -dist
        done = False
        return reward, done

    def _get_state(self, observation, action, log_observation=False):
        """
        Get the current observation from the env for the agent

        Args:
            log_observation (bool): specify whether you want to
                log the observation

        Returns:
            observation (list): comprising of the observations corresponding
                to the key values in the observation_keys
        """
        joint_positions = observation.position
        joint_velocities = observation.velocity
        tip_positions = self.finger.pinocchio_utils.forward_kinematics(
            joint_positions)
        end_effector_position = np.concatenate(tip_positions)
        flat_goals = np.concatenate([self.goal] * self.num_fingers)

        if self.num_fingers == 1:
            flat_goals = self.goal

        end_effector_to_goal = list(
            np.subtract(flat_goals, end_effector_position))

        # populate this observation dict from which you can select which
        # observation types to finally choose depending on the keys
        # used for constructing the observation space of the environment
        observation_dict = {}
        observation_dict["joint_positions"] = joint_positions
        observation_dict["joint_velocities"] = joint_velocities
        observation_dict["end_effector_position"] = end_effector_position
        observation_dict["end_effector_to_goal"] = end_effector_to_goal
        observation_dict["goal_position"] = self.goal
        observation_dict["object_position"], _ = self.block.get_state()
        observation_dict["action_joint_positions"] = action

        # returns only the observations corresponding to the keys that were
        # used for constructing the observation space
        if log_observation:
            self.logger.append(
                observation_dict["joint_positions"],
                observation_dict["end_effector_position"],
                time.time(),
            )

        observation = [
            v for key in self.spaces.observations_keys
            for v in observation_dict[key]
        ]

        return observation

    def step(self, action):
        """
        The env step method

        Args:
            action (list): the joint positions that have to be achieved

        Returns:
            the observation scaled to lie between [-1;1], the reward,
            the done signal, and info on if the agent was successful at
            the current step
        """
        # unscales the action to the ranges of the action space of the
        # environment explicitly (as the prediction from the network
        # lies in the range [-1;1])
        unscaled_action = utils.unscale(action, self.unscaled_action_space)

        # this is the control loop to send the actions for a few timesteps
        # which depends on the actual control rate
        finger_action = self.finger.Action(position=unscaled_action)
        state = None
        for _ in range(self.steps_per_control):
            t = self.finger.append_desired_action(finger_action)
            observation = self.finger.get_observation(t)
            if state is None:
                state = self._get_state(observation, unscaled_action, True)

        key_observation = state[self.spaces.key_to_index["object_position"]]

        reward, done = self._compute_reward(key_observation, self.goal)
        info = {"is_success": np.float32(done)}

        scaled_observation = utils.scale(state,
                                         self.unscaled_observation_space)
        print("reward", reward)

        return scaled_observation, reward, done, info

    def reset(self):
        """
        Episode reset

        Returns:
            the scaled to [-1;1] observation from the env after the reset
        """
        # resets the finger to a random position
        action = sample.feasible_random_joint_positions_for_reaching(
            self.finger, self.spaces.action_bounds)
        observation = self.finger.reset_finger_positions_and_velocities(action)

        #: the episode target for the agent which is sampled randomly
        #: for each episode
        self.goal = sample.random_position_in_arena(height_limits=0.0425)

        #: the position from which the object is initialized at the
        #: beginning of each episode
        self.block_position = sample.random_position_in_arena(
            height_limits=0.0425)

        self.goal_marker.set_state([self.goal])
        self.block.set_state(self.block_position, [0, 0, 0, 1])

        # logs relevant information for replayability
        self.logger.new_episode(self.block_position, self.goal)

        return utils.scale(
            self._get_state(observation, action, True),
            self.unscaled_observation_space,
        )
示例#19
0
class TestRobotEquivalentInterface(unittest.TestCase):
    """
    Test the methods of SimFinger that provide an interface equivalent to the
    RobotFrontend of robot_interfaces.
    """
    def setUp(self):
        self.finger = SimFinger(
            finger_type="fingerone",
            time_step=0.001,
            enable_visualization=False,
        )

        self.start_position = [0, -0.7, -1.5]
        self.finger.reset_finger_positions_and_velocities(self.start_position)

    def tearDown(self):
        # destroy the simulation to ensure that the next test starts with a
        # clean state
        del self.finger

    def test_timing_action_t_vs_observation_t(self):
        """Verify that observation_t is really not affected by action_t."""
        # Apply a max torque action for one step
        action = self.finger.Action(torque=[
            -self.finger.max_motor_torque,
            -self.finger.max_motor_torque,
            -self.finger.max_motor_torque,
        ])
        t = self.finger.append_desired_action(action)
        obs = self.finger.get_observation(t)

        # as obs_t is from just before action_t is applied, the position should
        # not yet have changed
        np.testing.assert_array_equal(self.start_position, obs.position)

        # after applying another action (even with zero torque), we should see
        # the effect
        t = self.finger.append_desired_action(self.finger.Action())
        obs = self.finger.get_observation(t)
        # new position should be less, as negative torque is applied
        np.testing.assert_array_less(obs.position, self.start_position)

    def test_timing_action_t_vs_observation_tplus1(self):
        """Verify that observation_{t+1} is affected by action_t."""
        # Apply a max torque action for one step
        action = self.finger.Action(torque=[
            -self.finger.max_motor_torque,
            -self.finger.max_motor_torque,
            -self.finger.max_motor_torque,
        ])
        t = self.finger.append_desired_action(action)
        obs = self.finger.get_observation(t + 1)

        # new position should be less, as negative torque is applied
        np.testing.assert_array_less(obs.position, self.start_position)

    def test_timing_observation_t_vs_tplus1(self):
        # Apply a max torque action for one step
        action = self.finger.Action(torque=[
            -self.finger.max_motor_torque,
            -self.finger.max_motor_torque,
            -self.finger.max_motor_torque,
        ])
        t = self.finger.append_desired_action(action)
        obs_t = self.finger.get_observation(t)
        obs_tplus1 = self.finger.get_observation(t + 1)

        # newer position should be lesser, as negative torque is applied
        np.testing.assert_array_less(obs_tplus1.position, obs_t.position)

    def test_timing_observation_t_multiple_times(self):
        """
        Verify that calling get_observation(t) multiple times always gives same
        result.
        """
        # Apply a max torque action for one step
        action = self.finger.Action(torque=[
            -self.finger.max_motor_torque,
            -self.finger.max_motor_torque,
            -self.finger.max_motor_torque,
        ])
        t = self.finger.append_desired_action(action)
        obs_t1 = self.finger.get_observation(t)

        # observation should not change when calling multiple times with same t
        for i in range(10):
            obs_ti = self.finger.get_observation(t)
            np.testing.assert_array_equal(obs_t1.position, obs_ti.position)
            np.testing.assert_array_equal(obs_t1.velocity, obs_ti.velocity)
            np.testing.assert_array_equal(obs_t1.torque, obs_ti.torque)
            np.testing.assert_array_equal(obs_t1.tip_force, obs_ti.tip_force)

    def test_timing_observation_tplus1_multiple_times(self):
        """
        Verify that calling get_observation(t + 1) multiple times always gives
        same result.
        """
        # Apply a max torque action for one step
        action = self.finger.Action(torque=[
            -self.finger.max_motor_torque,
            -self.finger.max_motor_torque,
            -self.finger.max_motor_torque,
        ])
        t = self.finger.append_desired_action(action)
        obs_t1 = self.finger.get_observation(t + 1)

        # observation should not change when calling multiple times with same t
        for i in range(10):
            obs_ti = self.finger.get_observation(t + 1)
            np.testing.assert_array_equal(obs_t1.position, obs_ti.position)
            np.testing.assert_array_equal(obs_t1.velocity, obs_ti.velocity)
            np.testing.assert_array_equal(obs_t1.torque, obs_ti.torque)
            np.testing.assert_array_equal(obs_t1.tip_force, obs_ti.tip_force)

    def test_exception_on_old_t(self):
        """Verify that calling get_observation with invalid t raises error."""
        # verify that t < 0 is not accepted
        with self.assertRaises(ValueError):
            self.finger.get_observation(-1)

        # append two actions
        t1 = self.finger.append_desired_action(self.finger.Action())
        t2 = self.finger.append_desired_action(self.finger.Action())

        # it should be possible to get observation for t2 and t2 + 1 but not t1
        # or t2 + 2
        self.finger.get_observation(t2)
        self.finger.get_observation(t2 + 1)

        with self.assertRaises(ValueError):
            self.finger.get_observation(t1)

        with self.assertRaises(ValueError):
            self.finger.get_observation(t2 + 2)

    def test_observation_types(self):
        """Verify that all fields of observation are np.ndarrays."""
        # Apply a max torque action for one step
        action = self.finger.Action(torque=[
            -self.finger.max_motor_torque,
            -self.finger.max_motor_torque,
            -self.finger.max_motor_torque,
        ])
        t = self.finger.append_desired_action(action)
        obs = self.finger.get_observation(t)

        # verify types
        self.assertIsInstance(obs.torque, np.ndarray)
        self.assertIsInstance(obs.position, np.ndarray)
        self.assertIsInstance(obs.velocity, np.ndarray)
        self.assertIsInstance(obs.tip_force, np.ndarray)

    def test_get_desired_action(self):
        # verify that t < 0 is not accepted
        with self.assertRaises(ValueError):
            self.finger.get_desired_action(-1)

        orig_action = self.finger.Action(torque=[1.0, 2.0, 3.0],
                                         position=[0.0, -1.0, -2.0])
        t = self.finger.append_desired_action(orig_action)
        action = self.finger.get_desired_action(t)

        np.testing.assert_array_equal(orig_action.torque, action.torque)
        np.testing.assert_array_equal(orig_action.position, action.position)

        # verify types
        self.assertIsInstance(action.torque, np.ndarray)
        self.assertIsInstance(action.position, np.ndarray)
        self.assertIsInstance(action.position_kd, np.ndarray)
        self.assertIsInstance(action.position_kp, np.ndarray)

    def test_get_applied_action(self):
        # verify that t < 0 is not accepted
        with self.assertRaises(ValueError):
            self.finger.get_applied_action(-1)

        desired_action = self.finger.Action(torque=[5, 5, 5])
        t = self.finger.append_desired_action(desired_action)
        applied_action = self.finger.get_applied_action(t)

        np.testing.assert_array_almost_equal(
            applied_action.torque,
            [
                self.finger.max_motor_torque,
                self.finger.max_motor_torque,
                self.finger.max_motor_torque,
            ],
        )

        # verify types
        self.assertIsInstance(applied_action.torque, np.ndarray)
        self.assertIsInstance(applied_action.position, np.ndarray)
        self.assertIsInstance(applied_action.position_kd, np.ndarray)
        self.assertIsInstance(applied_action.position_kp, np.ndarray)

    def test_get_timestamp_ms_001(self):
        t = self.finger.append_desired_action(self.finger.Action())
        first_stamp = self.finger.get_timestamp_ms(t)
        t = self.finger.append_desired_action(self.finger.Action())
        second_stamp = self.finger.get_timestamp_ms(t)

        # time step is set to 0.001, so the difference between two steps should
        # be 1 ms.
        self.assertEqual(second_stamp - first_stamp, 1)

    def test_get_timestamp_ms_tplus1_001(self):
        t = self.finger.append_desired_action(self.finger.Action())
        stamp_t = self.finger.get_timestamp_ms(t)
        stamp_tp1 = self.finger.get_timestamp_ms(t + 1)

        # time step is set to 0.001, so the difference between two steps should
        # be 1 ms.
        self.assertEqual(stamp_tp1 - stamp_t, 1)

    def test_get_timestamp_ms_invalid_t(self):
        # verify that t < 0 is not accepted
        with self.assertRaises(ValueError):
            self.finger.get_timestamp_ms(-1)

        t = self.finger.append_desired_action(self.finger.Action())

        with self.assertRaises(ValueError):
            self.finger.get_timestamp_ms(t - 1)

        with self.assertRaises(ValueError):
            self.finger.get_timestamp_ms(t + 2)

    def test_get_current_timeindex(self):
        # no time index before first action
        with self.assertRaises(ValueError):
            self.finger.get_current_timeindex()

        t = self.finger.append_desired_action(self.finger.Action())
        self.assertEqual(self.finger.get_current_timeindex(), t)
示例#20
0
    def test_step_one(self):
        # Create two instances, send actions and step only one.  Verify that
        # the other one does not move.
        robot1 = SimFinger(finger_type="trifingerpro")
        robot2 = SimFinger(finger_type="trifingerpro")

        start_position = np.array([0.0, 0.7, -1.5] * 3)

        robot1.reset_finger_positions_and_velocities(start_position)
        robot2.reset_finger_positions_and_velocities(start_position)

        action = robot1.Action(torque=[0.3, 0.3, 0.3] * 3)

        for i in range(30):
            robot1._set_desired_action(action)
            robot1._step_simulation()
            obs1 = robot1._get_latest_observation()
            obs2 = robot2._get_latest_observation()

            self.assertTrue((start_position != obs1.position).any())
            np.testing.assert_array_equal(start_position, obs2.position)