def main(): argparser = argparse.ArgumentParser(description=__doc__) argparser.add_argument( "--control-mode", default="position", choices=["position", "torque"], help="Specify position or torque as the control mode.", ) argparser.add_argument( "--finger-type", default="trifingerone", choices=finger_types_data.get_valid_finger_types(), help="Specify a valid finger type", ) args = argparser.parse_args() time_step = 0.004 finger = sim_finger.SimFinger( finger_type=args.finger_type, time_step=time_step, enable_visualization=True, ) num_fingers = finger.number_of_fingers if args.control_mode == "position": position_goals = visual_objects.Marker(number_of_goals=num_fingers) while True: if args.control_mode == "position": desired_joint_positions = np.array( sample.random_joint_positions(number_of_fingers=num_fingers)) finger_action = finger.Action(position=desired_joint_positions) # visualize the goal position of the finger tip position_goals.set_state( finger.pinocchio_utils.forward_kinematics( desired_joint_positions)) if args.control_mode == "torque": desired_joint_torques = [random.random()] * 3 * num_fingers finger_action = finger.Action(torque=desired_joint_torques) # pursue this goal for one second for _ in range(int(1 / time_step)): t = finger.append_desired_action(finger_action) finger.get_observation(t) time.sleep(time_step)
def main_finger(finger_type, data_file): finger = SimFinger(enable_visualization=True, finger_type=finger_type) goal_marker = visual_objects.Marker(number_of_goals=1) def reset_finger(joint_position): for i, joint_id in enumerate(finger.pybullet_joint_indices): pybullet.resetJointState(finger.finger_id, joint_id, joint_position[i]) with open(data_file, "rb") as file_handle: episodes = pickle.load(file_handle) input("Press enter to start playback") start_time_data = None start_time_playback = datetime.datetime.now() for episode_num, data in enumerate(episodes[1:]): print("episode {}".format(episode_num)) tip_goal = data["tip_goal"][0] trajectory = np.vstack(data["joint_positions"]) timestamps = data["timestamps"] if not start_time_data: start_time_data = timestamps[0] timediff_data = timestamps[0] - start_time_data - 1 utils.sleep_until(start_time_playback + datetime.timedelta(seconds=timediff_data)) goal_marker.set_state(tip_goal) reset_finger(trajectory[0]) for position, timestamp in zip(trajectory, timestamps): timediff_data = timestamp - start_time_data utils.sleep_until(start_time_playback + datetime.timedelta(seconds=timediff_data)) reset_finger(position)
def main_trifinger(finger_type, data_file_0, data_file_120, data_file_240): finger = SimFinger(enable_visualization=True, finger_type=finger_type) goal_marker = visual_objects.Marker(number_of_goals=3) def reset_finger(joint_position): for i, joint_id in enumerate(finger.pybullet_joint_indices): pybullet.resetJointState(finger.finger_id, joint_id, joint_position[i]) data = [] for filename in (data_file_0, data_file_120, data_file_240): with open(filename, "rb") as file_handle: data.append(pickle.load(file_handle)) assert len(data[0]) == len(data[1]) == len(data[2]) episodes = [] for i in range(1, len(data[0])): goals = trifinger_goal_space_transforms(finger_type, data) initial_position = np.concatenate([ data[0][i]["joint_positions"][0], data[1][i]["joint_positions"][0], data[2][i]["joint_positions"][0], ]) joint_positions = [] for f in range(3): for stamp, pos in zip(data[f][i]["timestamps"], data[f][i]["joint_positions"]): joint_positions.append((f, stamp, pos)) joint_positions = sorted(joint_positions, key=lambda x: x[1]) episodes.append({ "tip_goals": goals, "joint_positions": joint_positions, "initial_position": initial_position, }) input("Press enter to start playback") start_time_data = None start_time_playback = datetime.datetime.now() for episode_num, data in enumerate(episodes): print("episode {}".format(episode_num)) tip_goals = data["tip_goals"] trajectory = data["joint_positions"] episode_start_time = trajectory[0][1] if not start_time_data: start_time_data = episode_start_time timediff_data = episode_start_time - start_time_data - 1 utils.sleep_until(start_time_playback + datetime.timedelta(seconds=timediff_data)) goal_marker.set_state(tip_goals) reset_finger(data["initial_position"]) current_pos = copy.copy(data["initial_position"]) for finger_idx, timestamp, position in trajectory: timediff_data = timestamp - start_time_data utils.sleep_until(start_time_playback + datetime.timedelta(seconds=timediff_data)) slice_start = finger_idx * 3 current_pos[slice_start:slice_start + 3] = position reset_finger(current_pos)
def __init__( self, control_rate_s, finger_type, enable_visualization, ): """Intializes the constituents of the pushing environment. Constructor sets up the finger robot depending on the finger type, sets up the observation and action spaces, smoothing for reducing jitter on the robot, and provides for a way to synchronize robots being trained independently. Args: control_rate_s (float): the rate (in seconds) at which step method of the env will run. The actual robot controller may run at a higher rate, so this is used to compute the number of robot control updates per environment step. finger_type (string): Name of the finger type. In order to get a list of the valid finger types, call :meth:`.finger_types_data.get_valid_finger_types` enable_visualization (bool): if the simulation env is to be visualized """ #: an instance of the simulated robot depending on the desired #: robot type self.finger = SimFinger( finger_type=finger_type, enable_visualization=enable_visualization, ) self.num_fingers = finger_types_data.get_number_of_fingers(finger_type) #: the number of times the same action is to be applied to #: the robot in one step. self.steps_per_control = int( round(control_rate_s / self.finger.time_step_s)) assert (abs(control_rate_s - self.steps_per_control * self.finger.time_step_s) <= 0.000001) #: the types of observations that should be a part of the environment's #: observed state self.observations_keys = [ "joint_positions", "joint_velocities", "action_joint_positions", "goal_position", "object_position", ] #: the size of each of the observation type that is part of the #: observation keys (in the same order) self.observations_sizes = [ 3 * self.num_fingers, 3 * self.num_fingers, 3 * self.num_fingers, 3, 3, ] # sets up the observation and action spaces for the environment, # unscaled spaces have the custom bounds set up for each observation # or action type, whereas all the values in the observation and action # spaces lie between 1 and -1 self.spaces = FingerSpaces( num_fingers=self.num_fingers, observations_keys=self.observations_keys, observations_sizes=self.observations_sizes, separate_goals=False, ) self.unscaled_observation_space = ( self.spaces.get_unscaled_observation_space()) self.unscaled_action_space = self.spaces.get_unscaled_action_space() self.observation_space = self.spaces.get_scaled_observation_space() self.action_space = self.spaces.get_scaled_action_space() #: a logger to enable logging of observations if desired self.logger = DataLogger() #: the object that has to be pushed self.block = collision_objects.Block() #: a marker to visualize where the target goal position for the episode #: is self.goal_marker = visual_objects.Marker( number_of_goals=1, goal_size=0.0325, initial_position=[0.19, 0.08, 0.0425], ) self.reset()
def __init__( self, control_rate_s, finger_type, enable_visualization, smoothing_params, use_real_robot=False, finger_config_suffix="0", synchronize=False, ): """Intializes the constituents of the reaching environment. Constructor sets up the finger robot depending on the finger type, and also whether an instance of the simulated or the real robot is to be created. Also sets up the observation and action spaces, smoothing for reducing jitter on the robot, and provides for a way to synchronize robots being trained independently. Args: control_rate_s (float): the rate (in seconds) at which step method of the env will run. The actual robot controller may run at a higher rate, so this is used to compute the number of robot control updates per environment step. finger_type (string): Name of the finger type. In order to get a dictionary of the valid finger types, call :meth:`.finger_types_data.get_valid_finger_types` enable_visualization (bool): if the simulation env is to be visualized smoothing_params (dict): num_episodes (int): the total number of episodes for which the training is performed start_after (float): the fraction of episodes after which the smoothing of applied actions to the motors should start final_alpha (float): smoothing coeff that will be reached at the end of the smoothing stop_after (float): the fraction of total episodes by which final alpha is to be reached, after which the same final alpha will be used for smoothing in the remainder of the episodes is_test (bool, optional): Include this for testing use_real_robot (bool): if training is to be performed on the real robot ([default] False) finger_config_suffix: pass this if only one of the three fingers is to be trained. Valid choices include [0, 120, 240] ([default] 0) synchronize (bool): Set this to True if you want to train independently on three fingers in separate processes, but have them synchronized. ([default] False) """ #: an instance of a simulated, or a real robot depending on #: what is desired. if use_real_robot: from pybullet_fingers.real_finger import RealFinger self.finger = RealFinger( finger_type=finger_type, finger_config_suffix=finger_config_suffix, enable_visualization=enable_visualization, ) else: self.finger = SimFinger( finger_type=finger_type, enable_visualization=enable_visualization, ) self.num_fingers = finger_types_data.get_number_of_fingers(finger_type) #: the number of times the same action is to be applied to #: the robot. self.steps_per_control = int( round(control_rate_s / self.finger.time_step_s)) assert (abs(control_rate_s - self.steps_per_control * self.finger.time_step_s) <= 0.000001) #: the types of observations that should be a part of the environment's #: observed state self.observations_keys = [ "joint_positions", "joint_velocities", "goal_position", "action_joint_positions", ] self.observations_sizes = [ 3 * self.num_fingers, 3 * self.num_fingers, 3 * self.num_fingers, 3 * self.num_fingers, ] # sets up the observation and action spaces for the environment, # unscaled spaces have the custom bounds set up for each observation # or action type, whereas all the values in the observation and action # spaces lie between 1 and -1 self.spaces = FingerSpaces( num_fingers=self.num_fingers, observations_keys=self.observations_keys, observations_sizes=self.observations_sizes, separate_goals=True, ) self.unscaled_observation_space = ( self.spaces.get_unscaled_observation_space()) self.unscaled_action_space = self.spaces.get_unscaled_action_space() self.observation_space = self.spaces.get_scaled_observation_space() self.action_space = self.spaces.get_scaled_action_space() #: a logger to enable logging of observations if desired self.logger = DataLogger() # sets up smooothing if "is_test" in smoothing_params: self.smoothing_start_episode = 0 self.smoothing_alpha = smoothing_params["final_alpha"] self.smoothing_increase_step = 0 self.smoothing_stop_episode = math.inf else: self.smoothing_stop_episode = int( smoothing_params["num_episodes"] * smoothing_params["stop_after"]) self.smoothing_start_episode = int( smoothing_params["num_episodes"] * smoothing_params["start_after"]) num_smoothing_increase_steps = (self.smoothing_stop_episode - self.smoothing_start_episode) self.smoothing_alpha = 0 self.smoothing_increase_step = (smoothing_params["final_alpha"] / num_smoothing_increase_steps) self.smoothed_action = None self.episode_count = 0 #: a marker to visualize where the target goal position for the episode #: is to which the tip link(s) of the robot should reach self.enable_visualization = enable_visualization if self.enable_visualization: self.goal_marker = visual_objects.Marker( number_of_goals=self.num_fingers) # set up synchronization if it's set to true self.synchronize = synchronize if synchronize: now = datetime.datetime.now() self.next_start_time = datetime.datetime(now.year, now.month, now.day, now.hour, now.minute + 1) else: self.next_start_time = None self.seed() self.reset()