def __init__(self): # specify the dimension of observation space and action space self.observation_space = 44 self.action_space = 4 # joint efforts # We assume that a ROS node has already been created before initialising the environment self.running_step = rospy.get_param("/running_step") self.weight_r1 = rospy.get_param("/weight_r1") self.weight_r2 = rospy.get_param("/weight_r2") self.weight_r3 = rospy.get_param("/weight_r3") # self.inr_r3 = rospy.get_param("/inr_r3") # stablishes connection with simulator self.gazebo = GazeboConnection() self.mrm_state_object = MrmState(weight_r1=self.weight_r1, weight_r2=self.weight_r2, weight_r3=self.weight_r3) self.mrm_joint_pubisher_object = JointPub() self._seed() self.reset_controller_pub = rospy.Publisher('/reset_controller', Bool, queue_size=1) self.pause_sim_pub = rospy.Publisher('/pause_sim', Bool, queue_size=1) # list for recording the minimal value from the laser self.mini_laser = [] self.moco = model_control() self.pipe_angle = 0.0
def __init__(self): # specify the dimension of observation space and action space self.observation_space = 22 self.action_space = 4 # We assume that a ROS node has already been created before initialising the environment self.running_step = rospy.get_param("/running_step") self.done_reward = rospy.get_param("/collision_reward") self.weight_r1 = rospy.get_param("/weight_r1") self.weight_r2 = rospy.get_param("/weight_r2") self.weight_r3 = rospy.get_param("/weight_r3") # stablishes connection with simulator self.gazebo = GazeboConnection() self.controllers_object = ControllersConnection(namespace="") self.mrm_state_object = MrmState(weight_r1=self.weight_r1, weight_r2=self.weight_r2, weight_r3=self.weight_r3) self.mrm_joint_pubisher_object = JointPub() self._seed() self.reset_controller_pub = rospy.Publisher('/reset_controller', Bool, queue_size=1) self.pause_sim_pub = rospy.Publisher('/pause_sim', Bool, queue_size=1)
def __init__(self): # specify the dimension of observation space and action space self.observation_space = 44 self.action_space = 4 # joint efforts # We assume that a ROS node has already been created before initialising the environment # gets training parameters from param server self.desired_target = Point() self.desired_target.x = rospy.get_param("/desired_target_point/x") self.desired_target.y = rospy.get_param("/desired_target_point/y") self.desired_target.z = rospy.get_param("/desired_target_point/z") self.running_step = rospy.get_param("/running_step") self.weight_r1 = rospy.get_param("/weight_r1") self.weight_r2 = rospy.get_param("/weight_r2") self.weight_r3 = rospy.get_param("/weight_r3") self.inr_r3 = rospy.get_param("/inr_r3") # stablishes connection with simulator self.gazebo = GazeboConnection() self.mrm_state_object = MrmState(weight_r1=self.weight_r1, weight_r2=self.weight_r2, weight_r3=self.weight_r3) self.mrm_state_object.set_desired_target_point(self.desired_target.x, self.desired_target.y, self.desired_target.z) self.mrm_joint_pubisher_object = JointPub() self._seed() self.passed_episode = 0 self.total_episode = rospy.get_param("/total_episode") # list for recording the minimal value from the laser self.mini_laser = [] self.obstacle_penalty = 0.0
def move_path(self, positions, speed): joints = [] for position in positions: joint_lf = self.my_Kinematic.inverse_kinematics_lf( position[0], position[1], position[2]) joint_lh = self.my_Kinematic.inverse_kinematics_lh( position[3], position[4], position[5]) joint_rf = self.my_Kinematic.inverse_kinematics_rf( position[6], position[7], position[8]) joint_rh = self.my_Kinematic.inverse_kinematics_rh( position[9], position[10], position[11]) joint = [] for item in joint_lf: joint.append(item) for item in joint_lh: joint.append(item) for item in joint_rf: joint.append(item) for item in joint_rh: joint.append(item) joints.append(joint) joint_publisher = JointPub() for joint in joints: joint_publisher.move_joint([ joint[0], joint[1], joint[2], joint[3], joint[4], joint[5], joint[6], joint[7], joint[8], joint[9], joint[10], joint[11], ], speed) return joints
class MrmEnv(gym.Env): def __init__(self): # specify the dimension of observation space and action space self.observation_space = 22 self.action_space = 4 # We assume that a ROS node has already been created before initialising the environment # gets training parameters from param server self.desired_target = Point() self.running_step = rospy.get_param("/running_step") self.done_reward = rospy.get_param("/collision_reward") self.weight_r1 = rospy.get_param("/weight_r1") self.weight_r2 = rospy.get_param("/weight_r2") self.weight_r3 = rospy.get_param("/weight_r3") # stablishes connection with simulator self.gazebo = GazeboConnection() self.controllers_object = ControllersConnection(namespace="") self.mrm_state_object = MrmState(weight_r1=self.weight_r1, weight_r2=self.weight_r2, weight_r3=self.weight_r3) self.mrm_joint_pubisher_object = JointPub() self._seed() self.reset_controller_pub = rospy.Publisher('/reset_controller', Bool, queue_size=1) self.pause_sim_pub = rospy.Publisher('/pause_sim', Bool, queue_size=1) self.moco = model_control() self.pipe_angle = 0.0 # A function to initialize the random generator def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] # Resets the state of the environment and returns an initial observation. def _reset(self): # 0st: We pause the controller self.pause_sim_pub.publish(Bool(True)) rospy.logdebug("Pausing SIM...") self.gazebo.pauseSim() # 1st: resets the simulation to initial values rospy.logdebug("Reset SIM...") self.gazebo.resetWorld() # 2nd: We Set the gravity to 0.0 so that we dont fall when reseting joints # It also UNPAUSES the simulation rospy.logdebug("Remove Gravity...") self.gazebo.change_gravity(0.0, 0.0, 0.0) # 3rd: resets the robot to initial conditions rospy.logdebug("set_init_condition...") self.mrm_joint_pubisher_object.set_init_condition() # 4th: probably add the function to change the target position is the target is reached self.reset_controller_pub.publish(Bool(True)) # 6th: We restore the gravity to original rospy.logdebug("Restore Gravity...") self.gazebo.change_gravity(0.0, 0.0, -9.81) rospy.logdebug("check_all_systems_ready...") self.mrm_state_object.check_all_systems_ready() # delete the old target self.moco.delete_model('target') # spawn the new target target_pos = self.change_target_pos(self.moco, self.pipe_angle) # set the target point for the gym env self.set_target_position([target_pos[0], target_pos[1], 0.0]) # 7th: pauses simulation rospy.logdebug("Pause SIM...") self.gazebo.pauseSim() rospy.logdebug("get_observations...") observation = self.mrm_state_object.get_observations() self.reset_controller_pub.publish(Bool(False)) return observation def _step(self, joint_targets): # We move it to that pos self.gazebo.unpauseSim() self.pause_sim_pub.publish(Bool(False)) self.mrm_joint_pubisher_object.move_joints(joint_targets) start = time.time() # Then we send the command to the robot and let it go for running_step seconds rospy.sleep(self.running_step) end = time.time() self.pause_sim_pub.publish(Bool(True)) self.gazebo.pauseSim() # finally we get an evaluation based on what happened in the sim reward, reach_target, done = self.mrm_state_object.process_data() # Generate State based on observations observation = self.mrm_state_object.get_observations() if end - start > 1.6: # if the real time spent on one step is too long, it means the robot is heavily stuck in the pipe, we should discard this episode stuck = True else: stuck = False return observation, reward, done, reach_target, stuck, {} def set_target_position(self, target_pos): self.mrm_state_object.set_desired_target_point(target_pos[0], target_pos[1], target_pos[2]) def change_target_pos(self, moco, angle): # how far will be the target away from the turning center approximately dis_target_left = [0.22, 0.23] b = 0.07 + random.uniform(0.0, 0.018) dis_target = np.asarray( random.uniform(dis_target_left[0], dis_target_left[1])) dis_target += (-0.018 + (angle - 60) / 10 * 0.006) target_pos = [ 0.07 + dis_target * cos((angle - 90.0) / 180 * pi) - b * sin( (angle - 90.0) / 180 * pi), -0.31 - dis_target * sin( (angle - 90.0) / 180 * pi) - b * cos( (angle - 90.0) / 180 * pi) ] target_name = 'target' # the model name and namespace targetdir = '/home/joshua/high_fidelity_exp/src/mrm_training/urdf/target.xacro' # visulize the target in Gazebo moco.spawn_model(targetdir, target_name, target_name, [target_pos[0], target_pos[1], 0.0], [0, 0, 0]) return target_pos def change_joints_init(self, diameter): joints_inits = { 80: [0.28, -0.78, -0.7, 0], 90: [0.3, -0.8, -0.75, 0], 100: [0.4, -1.05, -0.9, 0], 110: [0.51, -1.27, -0.95, 0], 120: [0.51, -1.27, -0.95, 0] } self.mrm_joint_pubisher_object.init_pos = joints_inits[diameter]
class URSimDoorOpening(robot_gazebo_env_goal.RobotGazeboEnv): def __init__(self): rospy.logdebug("Starting URSimDoorOpening Class object...") # Init GAZEBO Objects self.set_obj_state = rospy.ServiceProxy('/gazebo/set_model_state', SetModelState) self.get_world_state = rospy.ServiceProxy('/gazebo/get_world_properties', GetWorldProperties) # Subscribe joint state and target pose rospy.Subscriber("/ft_sensor_topic", WrenchStamped, self.wrench_stamped_callback) rospy.Subscriber("/joint_states", JointState, self.joints_state_callback) rospy.Subscriber("/gazebo/link_states", LinkStates, self.link_state_callback) # Gets training parameters from param server self.desired_pose = Pose() self.running_step = rospy.get_param("/running_step") self.max_height = rospy.get_param("/max_height") self.min_height = rospy.get_param("/min_height") self.observations = rospy.get_param("/observations") # Joint limitation shp_max = rospy.get_param("/joint_limits_array/shp_max") shp_min = rospy.get_param("/joint_limits_array/shp_min") shl_max = rospy.get_param("/joint_limits_array/shl_max") shl_min = rospy.get_param("/joint_limits_array/shl_min") elb_max = rospy.get_param("/joint_limits_array/elb_max") elb_min = rospy.get_param("/joint_limits_array/elb_min") wr1_max = rospy.get_param("/joint_limits_array/wr1_max") wr1_min = rospy.get_param("/joint_limits_array/wr1_min") wr2_max = rospy.get_param("/joint_limits_array/wr2_max") wr2_min = rospy.get_param("/joint_limits_array/wr2_min") wr3_max = rospy.get_param("/joint_limits_array/wr3_max") wr3_min = rospy.get_param("/joint_limits_array/wr3_min") self.joint_limits = {"shp_max": shp_max, "shp_min": shp_min, "shl_max": shl_max, "shl_min": shl_min, "elb_max": elb_max, "elb_min": elb_min, "wr1_max": wr1_max, "wr1_min": wr1_min, "wr2_max": wr2_max, "wr2_min": wr2_min, "wr3_max": wr3_max, "wr3_min": wr3_min } shp_init_value0 = rospy.get_param("/init_joint_pose0/shp") shl_init_value0 = rospy.get_param("/init_joint_pose0/shl") elb_init_value0 = rospy.get_param("/init_joint_pose0/elb") wr1_init_value0 = rospy.get_param("/init_joint_pose0/wr1") wr2_init_value0 = rospy.get_param("/init_joint_pose0/wr2") wr3_init_value0 = rospy.get_param("/init_joint_pose0/wr3") self.init_joint_pose0 = [shp_init_value0, shl_init_value0, elb_init_value0, wr1_init_value0, wr2_init_value0, wr3_init_value0] shp_init_value1 = rospy.get_param("/init_joint_pose1/shp") shl_init_value1 = rospy.get_param("/init_joint_pose1/shl") elb_init_value1 = rospy.get_param("/init_joint_pose1/elb") wr1_init_value1 = rospy.get_param("/init_joint_pose1/wr1") wr2_init_value1 = rospy.get_param("/init_joint_pose1/wr2") wr3_init_value1 = rospy.get_param("/init_joint_pose1/wr3") self.init_joint_pose1 = [shp_init_value1, shl_init_value1, elb_init_value1, wr1_init_value1, wr2_init_value1, wr3_init_value1] # print("[init_joint_pose1]: ", [shp_init_value1, shl_init_value1, elb_init_value1, wr1_init_value1, wr2_init_value1, wr3_init_value1]) shp_init_value2 = rospy.get_param("/init_joint_pose2/shp") shl_init_value2 = rospy.get_param("/init_joint_pose2/shl") elb_init_value2 = rospy.get_param("/init_joint_pose2/elb") wr1_init_value2 = rospy.get_param("/init_joint_pose2/wr1") wr2_init_value2 = rospy.get_param("/init_joint_pose2/wr2") wr3_init_value2 = rospy.get_param("/init_joint_pose2/wr3") self.init_joint_pose2 = [shp_init_value2, shl_init_value2, elb_init_value2, wr1_init_value2, wr2_init_value2, wr3_init_value2] # print("[init_joint_pose2]: ", [shp_init_value2, shl_init_value2, elb_init_value2, wr1_init_value2, wr2_init_value2, wr3_init_value2]) shp_after_rotate = rospy.get_param("/eelink_pose_after_rotate/shp") shl_after_rotate = rospy.get_param("/eelink_pose_after_rotate/shl") elb_after_rotate = rospy.get_param("/eelink_pose_after_rotate/elb") wr1_after_rotate = rospy.get_param("/eelink_pose_after_rotate/wr1") wr2_after_rotate = rospy.get_param("/eelink_pose_after_rotate/wr2") wr3_after_rotate = rospy.get_param("/eelink_pose_after_rotate/wr3") self.after_rotate = [shp_after_rotate, shl_after_rotate, elb_after_rotate, wr1_after_rotate, wr2_after_rotate, wr3_after_rotate] shp_after_pull = rospy.get_param("/eelink_pose_after_pull/shp") shl_after_pull = rospy.get_param("/eelink_pose_after_pull/shl") elb_after_pull = rospy.get_param("/eelink_pose_after_pull/elb") wr1_after_pull = rospy.get_param("/eelink_pose_after_pull/wr1") wr2_after_pull = rospy.get_param("/eelink_pose_after_pull/wr2") wr3_after_pull = rospy.get_param("/eelink_pose_after_pull/wr3") self.after_pull = [shp_after_pull, shl_after_pull, elb_after_pull, wr1_after_pull, wr2_after_pull, wr3_after_pull] r_drv_value1 = rospy.get_param("/init_grp_pose1/r_drive") l_drv_value1 = rospy.get_param("/init_grp_pose1/l_drive") r_flw_value1 = rospy.get_param("/init_grp_pose1/r_follower") l_flw_value1 = rospy.get_param("/init_grp_pose1/l_follower") r_spr_value1 = rospy.get_param("/init_grp_pose1/r_spring") l_spr_value1 = rospy.get_param("/init_grp_pose1/l_spring") r_drv_value2 = rospy.get_param("/init_grp_pose2/r_drive") l_drv_value2 = rospy.get_param("/init_grp_pose2/l_drive") r_flw_value2 = rospy.get_param("/init_grp_pose2/r_follower") l_flw_value2 = rospy.get_param("/init_grp_pose2/l_follower") r_spr_value2 = rospy.get_param("/init_grp_pose2/r_spring") l_spr_value2 = rospy.get_param("/init_grp_pose2/l_spring") self.init_grp_pose1 = [r_drv_value1, l_drv_value1, r_flw_value1, l_flw_value1, r_spr_value1, l_spr_value1] self.init_grp_pose2 = [r_drv_value2, l_drv_value2, r_flw_value2, l_flw_value2, r_spr_value2, l_spr_value2] # Fill in the Done Episode Criteria list self.episode_done_criteria = rospy.get_param("/episode_done_criteria") # stablishes connection with simulator self._gz_conn = GazeboConnection() self._ctrl_conn = ControllersConnection(namespace="") # Controller type for ros_control self._ctrl_type = rospy.get_param("/control_type") self.pre_ctrl_type = self._ctrl_type # Get the force and troque limit self.force_limit = rospy.get_param("/force_limit") self.torque_limit = rospy.get_param("/torque_limit") # We init the observations self.base_orientation = Quaternion() self.imu_link = Quaternion() self.door = Quaternion() self.quat = Quaternion() self.imu_link_rpy = Vector3() self.door_rpy = Vector3() self.link_state = LinkStates() self.wrench_stamped = WrenchStamped() self.joints_state = JointState() self.end_effector = Point() self.previous_action =[] self.counter = 0 self.max_rewards = 1 # Arm/Control parameters self._ik_params = setups['UR5_6dof']['ik_params'] # ROS msg type self._joint_pubisher = JointPub() self._joint_traj_pubisher = JointTrajPub() # Gym interface and action self.action_space = spaces.Discrete(6) self.observation_space = 21 #np.arange(self.get_observations().shape[0]) ## self.observation_space = 15 #np.arange(self.get_observations().shape[0]) self.reward_range = (-np.inf, np.inf) self._seed() # Change the controller type set_joint_pos_server = rospy.Service('/set_position_controller', SetBool, self._set_pos_ctrl) set_joint_traj_pos_server = rospy.Service('/set_trajectory_position_controller', SetBool, self._set_traj_pos_ctrl) set_joint_vel_server = rospy.Service('/set_velocity_controller', SetBool, self._set_vel_ctrl) set_joint_traj_vel_server = rospy.Service('/set_trajectory_velocity_controller', SetBool, self._set_traj_vel_ctrl) # set_gripper_server = rospy.Service('/set_gripper_controller', SetBool, self._set_grp_ctrl) self.pos_traj_controller = ['joint_state_controller', 'gripper_controller', 'pos_traj_controller'] self.pos_controller = ["joint_state_controller", "gripper_controller", "ur_shoulder_pan_pos_controller", "ur_shoulder_lift_pos_controller", "ur_elbow_pos_controller", "ur_wrist_1_pos_controller", "ur_wrist_2_pos_controller", "ur_wrist_3_pos_controller"] self.vel_traj_controller = ['joint_state_controller', 'gripper_controller', 'vel_traj_controller'] self.vel_controller = ["joint_state_controller", "gripper_controller", "ur_shoulder_pan_vel_controller", "ur_shoulder_lift_vel_controller", "ur_elbow_vel_controller", "ur_wrist_1_vel_controller", "ur_wrist_2_vel_controller", "ur_wrist_3_vel_controller"] # Helpful False self.stop_flag = False stop_trainning_server = rospy.Service('/stop_training', SetBool, self._stop_trainnig) start_trainning_server = rospy.Service('/start_training', SetBool, self._start_trainnig) def check_stop_flg(self): if self.stop_flag is False: return False else: return True def _start_trainnig(self, req): rospy.logdebug("_start_trainnig!!!!") self.stop_flag = False return SetBoolResponse(True, "_start_trainnig") def _stop_trainnig(self, req): rospy.logdebug("_stop_trainnig!!!!") self.stop_flag = True return SetBoolResponse(True, "_stop_trainnig") def _set_pos_ctrl(self, req): rospy.wait_for_service('set_position_controller') self._ctrl_conn.stop_controllers(self.pos_traj_controller) self._ctrl_conn.start_controllers(self.pos_controller) self._ctrl_type = 'pos' return SetBoolResponse(True, "_set_pos_ctrl") def _set_traj_pos_ctrl(self, req): rospy.wait_for_service('set_trajectory_position_controller') self._ctrl_conn.stop_controllers(self.pos_controller) self._ctrl_conn.start_controllers(self.pos_traj_controller) self._ctrl_type = 'traj_pos' return SetBoolResponse(True, "_set_traj_pos_ctrl") def _set_vel_ctrl(self, req): rospy.wait_for_service('set_velocity_controller') self._ctrl_conn.stop_controllers(self.vel_traj_controller) self._ctrl_conn.start_controllers(self.vel_controller) self._ctrl_type = 'vel' return SetBoolResponse(True, "_set_vel_ctrl") def _set_traj_vel_ctrl(self, req): rospy.wait_for_service('set_trajectory_velocity_controller') self._ctrl_conn.stop_controllers(self.vel_controller) self._ctrl_conn.start_controllers(self.vel_traj_controller) self._ctrl_type = 'traj_vel' return SetBoolResponse(True, "_set_traj_vel_ctrl") # def _set_grp_ctrl(self, req): # rospy.wait_for_service('set_gripper_controller') # self._ctrl_conn.start_controllers(self.gripper_controller) # return SetBoolResponse(True, "_set_grp_ctrl") # A function to initialize the random generator def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def link_state_callback(self, msg): self.link_state = msg self.end_effector = self.link_state.pose[12] self.imu_link = self.link_state.pose[5] self.door = self.link_state.pose[2] # def target_point_callback(self, msg): # self.target_point = msg def check_all_systems_ready(self): """ We check that all systems are ready :return: """ joint_states_msg = None while joint_states_msg is None and not rospy.is_shutdown(): try: joint_states_msg = rospy.wait_for_message("/joint_states", JointState, timeout=0.1) self.joints_state = joint_states_msg rospy.logdebug("Current joint_states READY") except Exception as e: self._ctrl_conn.start_controllers(controllers_on="joint_state_controller") rospy.logdebug("Current joint_states not ready yet, retrying==>"+str(e)) link_states_msg = None while link_states_msg is None and not rospy.is_shutdown(): try: link_states_msg = rospy.wait_for_message("/gazebo/link_states", LinkStates, timeout=0.1) self.link_states = link_states_msg rospy.logdebug("Reading link_states READY") except Exception as e: rospy.logdebug("Reading link_states not ready yet, retrying==>"+str(e)) rospy.logdebug("ALL SYSTEMS READY") def get_xyz(self, q): """Get x,y,z coordinates Args: q: a numpy array of joints angle positions. Returns: xyz are the x,y,z coordinates of an end-effector in a Cartesian space. """ mat = ur_utils.forward(q, self._ik_params) xyz = mat[:3, 3] return xyz def get_current_xyz(self): """Get x,y,z coordinates according to currrent joint angles Returns: xyz are the x,y,z coordinates of an end-effector in a Cartesian space. """ joint_states = self.joints_state shp_joint_ang = joint_states.position[0] shl_joint_ang = joint_states.position[1] elb_joint_ang = joint_states.position[2] wr1_joint_ang = joint_states.position[3] wr2_joint_ang = joint_states.position[4] wr3_joint_ang = joint_states.position[5] q = [shp_joint_ang, shl_joint_ang, elb_joint_ang, wr1_joint_ang, wr2_joint_ang, wr3_joint_ang] mat = ur_utils.forward(q, self._ik_params) xyz = mat[:3, 3] return xyz def get_orientation(self, q): """Get Euler angles Args: q: a numpy array of joints angle positions. Returns: xyz are the x,y,z coordinates of an end-effector in a Cartesian space. """ mat = ur_utils.forward(q, self._ik_params) orientation = mat[0:3, 0:3] roll = -orientation[1, 2] pitch = orientation[0, 2] yaw = -orientation[0, 1] return Vector3(roll, pitch, yaw) def cvt_quat_to_euler(self, quat): euler_rpy = Vector3() euler = euler_from_quaternion([self.quat.x, self.quat.y, self.quat.z, self.quat.w]) euler_rpy.x = euler[0] euler_rpy.y = euler[1] euler_rpy.z = euler[2] return euler_rpy def init_joints_pose(self, init_pos): """ We initialise the Position variable that saves the desired position where we want our joints to be :param init_pos: :return: """ self.current_joint_pose =[] self.current_joint_pose = copy.deepcopy(init_pos) # print("[current_joint_pose]:", self.current_joint_pose, type(self.current_joint_pose)) return self.current_joint_pose def get_euclidean_dist(self, p_in, p_pout): """ Given a Vector3 Object, get distance from current position :param p_end: :return: """ a = numpy.array((p_in.x, p_in.y, p_in.z)) b = numpy.array((p_pout.x, p_pout.y, p_pout.z)) distance = numpy.linalg.norm(a - b) return distance def joints_state_callback(self,msg): self.joints_state = msg def wrench_stamped_callback(self,msg): self.wrench_stamped = msg def get_observations(self): """ Returns the state of the robot needed for OpenAI QLearn Algorithm The state will be defined by an array :return: observation """ joint_states = self.joints_state self.force = self.wrench_stamped.wrench.force self.torque = self.wrench_stamped.wrench.torque # print("[force]", self.force.x, self.force.y, self.force.z) # print("[torque]", self.torque.x, self.torque.y, self.torque.z) shp_joint_ang = joint_states.position[2] shl_joint_ang = joint_states.position[1] elb_joint_ang = joint_states.position[0] wr1_joint_ang = joint_states.position[9] wr2_joint_ang = joint_states.position[10] wr3_joint_ang = joint_states.position[11] shp_joint_vel = joint_states.velocity[2] shl_joint_vel = joint_states.velocity[1] elb_joint_vel = joint_states.velocity[0] wr1_joint_vel = joint_states.velocity[9] wr2_joint_vel = joint_states.velocity[10] wr3_joint_vel = joint_states.velocity[11] q = [shp_joint_ang, shl_joint_ang, elb_joint_ang, wr1_joint_ang, wr2_joint_ang, wr3_joint_ang] eef_x, eef_y, eef_z = self.get_xyz(q) observation = [] # rospy.logdebug("List of Observations==>"+str(self.observations)) for obs_name in self.observations: if obs_name == "shp_joint_ang": observation.append(shp_joint_ang) elif obs_name == "shl_joint_ang": observation.append(shl_joint_ang) elif obs_name == "elb_joint_ang": observation.append(elb_joint_ang) elif obs_name == "wr1_joint_ang": observation.append(wr1_joint_ang) elif obs_name == "wr2_joint_ang": observation.append(wr2_joint_ang) elif obs_name == "wr3_joint_ang": observation.append(wr3_joint_ang) elif obs_name == "shp_joint_vel": observation.append(shp_joint_vel) elif obs_name == "shl_joint_vel": observation.append(shl_joint_vel) elif obs_name == "elb_joint_vel": observation.append(elb_joint_vel) elif obs_name == "wr1_joint_vel": observation.append(wr1_joint_vel) elif obs_name == "wr2_joint_vel": observation.append(wr2_joint_vel) elif obs_name == "wr3_joint_vel": observation.append(wr3_joint_vel) elif obs_name == "eef_x": observation.append(eef_x) elif obs_name == "eef_y": observation.append(eef_y) elif obs_name == "eef_z": observation.append(eef_z) elif obs_name == "force_x": observation.append(self.force.x) elif obs_name == "force_y": observation.append(self.force.y) elif obs_name == "force_z": observation.append(self.force.z) elif obs_name == "torque_x": observation.append(self.torque.x) elif obs_name == "torque_y": observation.append(self.torque.y) elif obs_name == "torque_z": observation.append(self.torque.z) else: raise NameError('Observation Asked does not exist=='+str(obs_name)) return observation def clamp_to_joint_limits(self): """ clamps self.current_joint_pose based on the joint limits self._joint_limits { "shp_max": shp_max, "shp_min": shp_min, ... } :return: """ rospy.logdebug("Clamping current_joint_pose>>>" + str(self.current_joint_pose)) shp_joint_value = self.current_joint_pose[0] shl_joint_value = self.current_joint_pose[1] elb_joint_value = self.current_joint_pose[2] wr1_joint_value = self.current_joint_pose[3] wr2_joint_value = self.current_joint_pose[4] wr3_joint_value = self.current_joint_pose[5] self.current_joint_pose[0] = max(min(shp_joint_value, self._joint_limits["shp_max"]), self._joint_limits["shp_min"]) self.current_joint_pose[1] = max(min(shl_joint_value, self._joint_limits["shl_max"]), self._joint_limits["shl_min"]) self.current_joint_pose[2] = max(min(elb_joint_value, self._joint_limits["elb_max"]), self._joint_limits["elb_min"]) self.current_joint_pose[3] = max(min(wr1_joint_value, self._joint_limits["wr1_max"]), self._joint_limits["wr1_min"]) self.current_joint_pose[4] = max(min(wr2_joint_value, self._joint_limits["wr2_max"]), self._joint_limits["wr2_min"]) self.current_joint_pose[5] = max(min(wr3_joint_value, self._joint_limits["wr3_max"]), self._joint_limits["wr3_min"]) rospy.logdebug("DONE Clamping current_joint_pose>>>" + str(self.current_joint_pose)) # Resets the state of the environment and returns an initial observation. def reset(self): # Go to initial position self._gz_conn.unpauseSim() rospy.logdebug("set_init_pose init variable...>>>" + str(self.init_joint_pose0)) init_pos0 = self.init_joints_pose(self.init_joint_pose0) self.arr_init_pos0 = np.array(init_pos0, dtype='float32') init_pos1 = self.init_joints_pose(self.init_joint_pose1) self.arr_init_pos1 = np.array(init_pos1, dtype='float32') init_g_pos1 = self.init_joints_pose(self.init_grp_pose1) arr_init_g_pos1 = np.array(init_g_pos1, dtype='float32') jointtrajpub = JointTrajPub() for update in range(500): jointtrajpub.GrpCommand(arr_init_g_pos1) time.sleep(2) for update in range(1000): jointtrajpub.jointTrajectoryCommand(self.arr_init_pos1) time.sleep(0.3) for update in range(1000): jointtrajpub.jointTrajectoryCommand(self.arr_init_pos0) time.sleep(0.3) # 0st: We pause the Simulator rospy.logdebug("Pausing SIM...") self._gz_conn.pauseSim() # 1st: resets the simulation to initial values rospy.logdebug("Reset SIM...") self._gz_conn.resetSim() # 2nd: We Set the gravity to 0.0 so that we dont fall when reseting joints # It also UNPAUSES the simulation rospy.logdebug("Remove Gravity...") self._gz_conn.change_gravity_zero() # EXTRA: Reset JoinStateControlers because sim reset doesnt reset TFs, generating time problems rospy.logdebug("reset_ur_joint_controllers...") self._ctrl_conn.reset_ur_joint_controllers(self._ctrl_type) # 3rd: resets the robot to initial conditions rospy.logdebug("set_init_pose init variable...>>>" + str(self.init_joint_pose1)) rospy.logdebug("set_init_pose init variable...>>>" + str(self.init_joint_pose2)) # We save that position as the current joint desired position # print("[init_joint_pose1]:", self.init_joint_pose1, type(self.init_joint_pose1)) init_pos2 = self.init_joints_pose(self.init_joint_pose2) self.arr_init_pos2 = np.array(init_pos2, dtype='float32') after_rotate = self.init_joints_pose(self.after_rotate) self.arr_after_rotate = np.array(after_rotate, dtype='float32') after_pull = self.init_joints_pose(self.after_pull) self.arr_after_pull = np.array(after_pull, dtype='float32') init_g_pos2 = self.init_joints_pose(self.init_grp_pose2) arr_init_g_pos2 = np.array(init_g_pos2, dtype='float32') # 4th: We Set the init pose to the jump topic so that the jump control can update # We check the jump publisher has connection if self._ctrl_type == 'traj_pos': self._joint_traj_pubisher.check_publishers_connection() elif self._ctrl_type == 'pos': self._joint_pubisher.check_publishers_connection() elif self._ctrl_type == 'traj_vel': self._joint_traj_pubisher.check_publishers_connection() elif self._ctrl_type == 'vel': self._joint_pubisher.check_publishers_connection() else: rospy.logwarn("Controller type is wrong!!!!") # 5th: Check all subscribers work. # Get the state of the Robot defined by its RPY orientation, distance from # desired point, contact force and JointState of the three joints rospy.logdebug("check_all_systems_ready...") self.check_all_systems_ready() # 6th: We restore the gravity to original rospy.logdebug("Restore Gravity...") self._gz_conn.adjust_gravity() for update in range(1000): jointtrajpub.jointTrajectoryCommand(self.arr_init_pos1) time.sleep(0.3) for update in range(1000): jointtrajpub.jointTrajectoryCommand(self.arr_init_pos2) time.sleep(0.3) for update in range(50): jointtrajpub.GrpCommand(arr_init_g_pos2) time.sleep(2) # 7th: pauses simulation rospy.logdebug("Pause SIM...") self._gz_conn.pauseSim() # 8th: Get the State Discrete Stringuified version of the observations rospy.logdebug("get_observations...") observation = self.get_observations() # print("[observations]", observation) return observation def _act(self, action): if self._ctrl_type == 'traj_pos': self.pre_ctrl_type = 'traj_pos' self._joint_traj_pubisher.jointTrajectoryCommand(action) elif self._ctrl_type == 'pos': self.pre_ctrl_type = 'pos' self._joint_pubisher.move_joints(action) elif self._ctrl_type == 'traj_vel': self.pre_ctrl_type = 'traj_vel' self._joint_traj_pubisher.jointTrajectoryCommand(action) elif self._ctrl_type == 'vel': self.pre_ctrl_type = 'vel' self._joint_pubisher.move_joints(action) else: self._joint_pubisher.move_joints(action) def training_ok(self): rate = rospy.Rate(1) while self.check_stop_flg() is True: rospy.logdebug("stop_flag is ON!!!!") self._gz_conn.unpauseSim() if self.check_stop_flg() is False: break rate.sleep() def step(self, action): ''' ('action: ', array([ 0., 0. , -0., -0., -0. , 0. ], dtype=float32)) ''' rospy.logdebug("UR step func") # define the logger self.training_ok() # Given the action selected by the learning algorithm, # we perform the corresponding movement of the robot # Act self._gz_conn.unpauseSim() action = action + self.arr_init_pos2 self._act(action) # print("[action]", action) self.wrench_stamped self.force = self.wrench_stamped.wrench.force self.torque = self.wrench_stamped.wrench.torque # print("wrench", self.wrench_stamped, type(self.wrench_stamped)) #<class 'geometry_msgs.msg._WrenchStamped.WrenchStamped'> # print("[force]", self.force.x, self.force.y, self.force.z) # print("[torque]", self.torque.x, self.torque.y, self.torque.z) ''' if self.force_limit < self.force.x or self.force.x < -self.force_limit: self._act(self.previous_action) # print("force.x over the limit") # print("[previous_action]", self.previous_action) elif self.force_limit < self.force.y or self.force.y < -self.force_limit: self._act(self.previous_action) # print("force.y over the limit") # print("[previous_action]", self.previous_action) elif self.force_limit < self.force.z or self.force.z < -self.force_limit: self._act(self.previous_action) # print("force.z over the limit") # print("[previous_action]", self.previous_action) elif self.torque_limit < self.torque.x or self.torque.x < -self.torque_limit: self._act(self.previous_action) # print("torque.x over the limit") # print("[previous_action]", self.previous_action) elif self.torque_limit < self.torque.y or self.torque.y < -self.torque_limit: self._act(self.previous_action) # print("torque.y over the limit") # print("[previous_action]", self.previous_action) elif self.torque_limit < self.torque.z or self.torque.z < -self.torque_limit: self._act(self.previous_action) # print("torque.z over the limit") # print("[previous_action]", self.previous_action) else: self.previous_action = copy.deepcopy(action) # print("[action]", action) ''' # Then we send the command to the robot and let it go for running_step seconds time.sleep(self.running_step) self._gz_conn.pauseSim() # We now process the latest data saved in the class state to calculate # the state and the rewards. This way we guarantee that they work # with the same exact data. # Generate State based on observations observation = self.get_observations() # print("[observations]", observation) # finally we get an evaluation based on what happened in the sim reward = self.compute_dist_rewards() done = self.check_done() return observation, reward, done, {} def compute_dist_rewards(self): self.quat = self.door.orientation self.door_rpy = self.cvt_quat_to_euler(self.quat) self.quat = self.imu_link.orientation self.imu_link_rpy = self.cvt_quat_to_euler(self.quat) compute_rewards = 0 # self.rpy = self.cvt_quat_to_euler(Quaternion(0.0, 0.0, 0.7071, 0.7071)) #print ("[self.target_point]: ", [self.target_point.x, self.target_point.y, self.target_point.z]) #print ("(self.get_current_xyz(): ", self.get_current_xyz()) #end_effector_pos = np.array([self.end_effector.position.x, self.end_effector.position.y, self.end_effector.position.z]) #self.distance = np.linalg.norm(end_effector_pos - [self.target_point.x, self.target_point.y, self.target_point.z], axis=0) # print ("[door]: ", [self.door.orientation.x, self.door.orientation.y, self.door.orientation.z, self.door.orientation.w]) # print ("[imu_link]: ", [self.imu_link.orientation.x, self.imu_link.orientation.y, self.imu_link.orientation.z, self.imu_link.orientation.w]) # print ("[door_rpy]: ", [self.door_rpy.x, self.door_rpy.y, self.door_rpy.z]) # [-3.141590232638843, 4.64637166410168e-06, 3.1407993185850303] # => [-3.141587417428544, 6.811796590263218e-05, 2.8971100347923704] # print ("[self.imu_link_rpy]: ", [self.imu_link_rpy.x, self.imu_link_rpy.y, self.imu_link_rpy.z]) # [-5.017238272290064e-06, 2.560885641286913e-07, 1.5707993173228965] # => [1.2205817198134408, -4.341035340318689e-06, 1.3270298472237638] # print ("[type]: ", type(self.door_rpy), type(self.end_effector.position)) # print ("[rpy]: ", [self.rpy.x, self.rpy.y, self.rpy.z]) # self.counter = self.counter + 1 # return self.imu_link_rpy.x + 1.5708061 - self.imu_link_rpy.z # for door opening compute_rewards = self.imu_link_rpy.x + 1.5708061 - self.imu_link_rpy.z return compute_rewards # for door opening # close #('[door_rpy]: ', [-3.141589494723927, 5.371950050444065e-06, 3.140803800525111]) #('[self.imu_link_rpy]: ', [-5.406752832019292e-06, 6.896590419417709e-06, 1.5708061011106662]) # open #('[door_rpy]: ', [3.1374584007550737, -0.0018131747911100536, 2.764050391990123(delta=0.3768)]) #('[self.imu_link_rpy]: ', [1.0934212049101757(delta=1.0934), -0.004085577221111396, 1.1941435185763472(delta=0.3768)]) def check_done(self): # if 3.1408 - self.door_rpy.z > 3.14 / 180 * 10: # for door opening if self.imu_link_rpy.x + 1.5708061 - self.imu_link_rpy.z > 3000: # for door opening print("done") return True else : return False
def __init__(self): rospy.logdebug("Starting URSimReaching Class object...") # Init GAZEBO Objects self.set_obj_state = rospy.ServiceProxy('/gazebo/set_model_state', SetModelState) self.get_world_state = rospy.ServiceProxy( '/gazebo/get_world_properties', GetWorldProperties) # Subscribe joint state and target pose rospy.Subscriber("/joint_states", JointState, self.joints_state_callback) rospy.Subscriber("/target_blocks_pose", Point, self.target_point_cb) # Gets training parameters from param server self.desired_pose = Pose() self.desired_pose.position.x = rospy.get_param("/desired_pose/x") self.desired_pose.position.y = rospy.get_param("/desired_pose/y") self.desired_pose.position.z = rospy.get_param("/desired_pose/z") self.running_step = rospy.get_param("/running_step") self.max_height = rospy.get_param("/max_height") self.min_height = rospy.get_param("/min_height") self.observations = rospy.get_param("/observations") # Joint limitation shp_max = rospy.get_param("/joint_limits_array/shp_max") shp_min = rospy.get_param("/joint_limits_array/shp_min") shl_max = rospy.get_param("/joint_limits_array/shl_max") shl_min = rospy.get_param("/joint_limits_array/shl_min") elb_max = rospy.get_param("/joint_limits_array/elb_max") elb_min = rospy.get_param("/joint_limits_array/elb_min") wr1_max = rospy.get_param("/joint_limits_array/wr1_max") wr1_min = rospy.get_param("/joint_limits_array/wr1_min") wr2_max = rospy.get_param("/joint_limits_array/wr2_max") wr2_min = rospy.get_param("/joint_limits_array/wr2_min") wr3_max = rospy.get_param("/joint_limits_array/wr3_max") wr3_min = rospy.get_param("/joint_limits_array/wr3_min") self.joint_limits = { "shp_max": shp_max, "shp_min": shp_min, "shl_max": shl_max, "shl_min": shl_min, "elb_max": elb_max, "elb_min": elb_min, "wr1_max": wr1_max, "wr1_min": wr1_min, "wr2_max": wr2_max, "wr2_min": wr2_min, "wr3_max": wr3_max, "wr3_min": wr3_min } shp_init_value = rospy.get_param("/init_joint_pose/shp") shl_init_value = rospy.get_param("/init_joint_pose/shl") elb_init_value = rospy.get_param("/init_joint_pose/elb") wr1_init_value = rospy.get_param("/init_joint_pose/wr1") wr2_init_value = rospy.get_param("/init_joint_pose/wr2") wr3_init_value = rospy.get_param("/init_joint_pose/wr3") self.init_joint_pose = [ shp_init_value, shl_init_value, elb_init_value, wr1_init_value, wr2_init_value, wr3_init_value ] # Fill in the Done Episode Criteria list self.episode_done_criteria = rospy.get_param("/episode_done_criteria") # stablishes connection with simulator self._gz_conn = GazeboConnection() self._ctrl_conn = ControllersConnection(namespace="") # Controller type for ros_control self._ctrl_type = sys.argv[1] self.pre_ctrl_type = self._ctrl_type # We init the observations self.base_orientation = Quaternion() self.target_point = Point() self.joints_state = JointState() # Arm/Control parameters self._ik_params = setups['UR5_6dof']['ik_params'] # ROS msg type self._joint_pubisher = JointPub() self._joint_traj_pubisher = JointTrajPub() # Gym interface and action self.action_space = spaces.Discrete(6) self.reward_range = (-np.inf, np.inf) self._seed()
agent = PPO(args=args, observation_size=env.observation_space, action_size=env.action_space) # load the trained model rospack = rospkg.RosPack() pkg_path = rospack.get_path('mrm_training') outdir = pkg_path + '/training_results' string_coffs = ['15'] # pipe coefficient angles = range(55, 126, 5) D = [120, 110, 100, 90] # joint publisher mrm_jointpub = JointPub() rospy.loginfo("Start Validation") sm = SpawnModel() sm.urdf_format = True sm.initial_xyz = [0, 0, 0] sm.initial_rpy = [0, 0, 0] sm.model_name = 'mrm' for string_coff in string_coffs: modeldir = outdir + '/PPO_model_new/high_fidelity_%s_1.ckpt' % string_coff # modeldir = outdir + '/PPO_model/pirate_pos_control3.ckpt' agent.load_model(modeldir) # start training from turning left for d in D:
def __init__(self): # We assume that a ROS node has already been created # before initialising the environment # gets training parameters from param server self.desired_pose = Pose() self.desired_pose.position.x = rospy.get_param("/desired_pose/x") self.desired_pose.position.y = rospy.get_param("/desired_pose/y") self.desired_pose.position.z = rospy.get_param("/desired_pose/z") self.running_step = rospy.get_param("/running_step") self.max_incl = rospy.get_param("/max_incl") self.max_height = rospy.get_param("/max_height") self.min_height = rospy.get_param("/min_height") self.joint_increment_value = rospy.get_param("/joint_increment_value") self.done_reward = rospy.get_param("/done_reward") self.alive_reward = rospy.get_param("/alive_reward") self.desired_force = rospy.get_param("/desired_force") self.desired_yaw = rospy.get_param("/desired_yaw") self.weight_r1 = rospy.get_param("/weight_r1") self.weight_r2 = rospy.get_param("/weight_r2") self.weight_r3 = rospy.get_param("/weight_r3") self.weight_r4 = rospy.get_param("/weight_r4") self.weight_r5 = rospy.get_param("/weight_r5") # stablishes connection with simulator self.gazebo = GazeboConnection() self.controllers_object = ControllersConnection(namespace="biped") self.biped_state_object = BipedState( max_height=self.max_height, min_height=self.min_height, abs_max_roll=self.max_incl, abs_max_pitch=self.max_incl, joint_increment_value=self.joint_increment_value, done_reward=self.done_reward, alive_reward=self.alive_reward, desired_force=self.desired_force, desired_yaw=self.desired_yaw, weight_r1=self.weight_r1, weight_r2=self.weight_r2, weight_r3=self.weight_r3, weight_r4=self.weight_r4, weight_r5=self.weight_r5 ) self.biped_state_object.set_desired_world_point(self.desired_pose.position.x, self.desired_pose.position.y, self.desired_pose.position.z) self.biped_joint_pubisher_object = JointPub() """ For this version, we consider 12 actions 1-2) Increment/Decrement L_haa_joint 3-4) Increment/Decrement L_hfe_joint 5-6) Increment/Decrement L_kfe_joint 7-8) Increment/Decrement R_haa_joint 9-10) Increment/Decrement R_hfe_joint 11-12) Increment/Decrement R_kfe_joint """ self.action_space = spaces.Discrete(12) self.reward_range = (-np.inf, np.inf) self._seed()
class URSimReaching(robot_gazebo_env_goal.RobotGazeboEnv): def __init__(self): rospy.logdebug("Starting URSimReaching Class object...") # Init GAZEBO Objects self.set_obj_state = rospy.ServiceProxy('/gazebo/set_model_state', SetModelState) self.get_world_state = rospy.ServiceProxy( '/gazebo/get_world_properties', GetWorldProperties) # Subscribe joint state and target pose rospy.Subscriber("/joint_states", JointState, self.joints_state_callback) rospy.Subscriber("/target_blocks_pose", Point, self.target_point_cb) # Gets training parameters from param server self.desired_pose = Pose() self.desired_pose.position.x = rospy.get_param("/desired_pose/x") self.desired_pose.position.y = rospy.get_param("/desired_pose/y") self.desired_pose.position.z = rospy.get_param("/desired_pose/z") self.running_step = rospy.get_param("/running_step") self.max_height = rospy.get_param("/max_height") self.min_height = rospy.get_param("/min_height") self.observations = rospy.get_param("/observations") # Joint limitation shp_max = rospy.get_param("/joint_limits_array/shp_max") shp_min = rospy.get_param("/joint_limits_array/shp_min") shl_max = rospy.get_param("/joint_limits_array/shl_max") shl_min = rospy.get_param("/joint_limits_array/shl_min") elb_max = rospy.get_param("/joint_limits_array/elb_max") elb_min = rospy.get_param("/joint_limits_array/elb_min") wr1_max = rospy.get_param("/joint_limits_array/wr1_max") wr1_min = rospy.get_param("/joint_limits_array/wr1_min") wr2_max = rospy.get_param("/joint_limits_array/wr2_max") wr2_min = rospy.get_param("/joint_limits_array/wr2_min") wr3_max = rospy.get_param("/joint_limits_array/wr3_max") wr3_min = rospy.get_param("/joint_limits_array/wr3_min") self.joint_limits = { "shp_max": shp_max, "shp_min": shp_min, "shl_max": shl_max, "shl_min": shl_min, "elb_max": elb_max, "elb_min": elb_min, "wr1_max": wr1_max, "wr1_min": wr1_min, "wr2_max": wr2_max, "wr2_min": wr2_min, "wr3_max": wr3_max, "wr3_min": wr3_min } shp_init_value = rospy.get_param("/init_joint_pose/shp") shl_init_value = rospy.get_param("/init_joint_pose/shl") elb_init_value = rospy.get_param("/init_joint_pose/elb") wr1_init_value = rospy.get_param("/init_joint_pose/wr1") wr2_init_value = rospy.get_param("/init_joint_pose/wr2") wr3_init_value = rospy.get_param("/init_joint_pose/wr3") self.init_joint_pose = [ shp_init_value, shl_init_value, elb_init_value, wr1_init_value, wr2_init_value, wr3_init_value ] # Fill in the Done Episode Criteria list self.episode_done_criteria = rospy.get_param("/episode_done_criteria") # stablishes connection with simulator self._gz_conn = GazeboConnection() self._ctrl_conn = ControllersConnection(namespace="") # Controller type for ros_control self._ctrl_type = sys.argv[1] self.pre_ctrl_type = self._ctrl_type # We init the observations self.base_orientation = Quaternion() self.target_point = Point() self.joints_state = JointState() # Arm/Control parameters self._ik_params = setups['UR5_6dof']['ik_params'] # ROS msg type self._joint_pubisher = JointPub() self._joint_traj_pubisher = JointTrajPub() # Gym interface and action self.action_space = spaces.Discrete(6) self.reward_range = (-np.inf, np.inf) self._seed() # A function to initialize the random generator def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def target_point_cb(self, msg): self.target_point = msg def check_all_systems_ready(self): """ We check that all systems are ready :return: """ joint_states_msg = None while joint_states_msg is None and not rospy.is_shutdown(): try: joint_states_msg = rospy.wait_for_message("/joint_states", JointState, timeout=0.1) self.joints_state = joint_states_msg rospy.logdebug("Current joint_states READY") except Exception as e: rospy.logdebug( "Current joint_states not ready yet, retrying==>" + str(e)) target_pose_msg = None while target_pose_msg is None and not rospy.is_shutdown(): try: target_pose_msg = rospy.wait_for_message("/target_blocks_pose", Point, timeout=0.1) self.target_point = target_pose_msg rospy.logdebug("Reading target pose READY") except Exception as e: rospy.logdebug( "Reading target pose not ready yet, retrying==>" + str(e)) rospy.logdebug("ALL SYSTEMS READY") def get_xyz(self, q): """Get x,y,z coordinates Args: q: a numpy array of joints angle positions. Returns: xyz are the x,y,z coordinates of an end-effector in a Cartesian space. """ mat = ur_utils.forward(q, self._ik_params) xyz = mat[:3, 3] return xyz def get_orientation(self, q): """Get Euler angles Args: q: a numpy array of joints angle positions. Returns: xyz are the x,y,z coordinates of an end-effector in a Cartesian space. """ mat = ur_utils.forward(q, self._ik_params) orientation = mat[0:3, 0:3] roll = -orientation[1, 2] pitch = orientation[0, 2] yaw = -orientation[0, 1] return Vector3(roll, pitch, yaw) def cvt_quat_to_euler(self, quat): euler_rpy = Vector3() euler = tf.transformations.euler_from_quaternion( [self.quat.x, self.quat.y, self.quat.z, self.quat.w]) euler_rpy.x = euler[0] euler_rpy.y = euler[1] euler_rpy.z = euler[2] return euler_rpy def init_joints_pose(self, init_pos): """ We initialise the Position variable that saves the desired position where we want our joints to be :param init_pos: :return: """ self.current_joint_pose = [] self.current_joint_pose = copy.deepcopy(init_pos) return self.current_joint_pose def get_euclidean_dist(self, p_in, p_pout): """ Given a Vector3 Object, get distance from current position :param p_end: :return: """ a = numpy.array((p_in.x, p_in.y, p_in.z)) b = numpy.array((p_pout.x, p_pout.y, p_pout.z)) distance = numpy.linalg.norm(a - b) return distance def joints_state_callback(self, msg): self.joints_state = msg def get_observations(self): """ Returns the state of the robot needed for OpenAI QLearn Algorithm The state will be defined by an array :return: observation """ joint_states = self.joints_state shp_joint_ang = joint_states.position[0] shl_joint_ang = joint_states.position[1] elb_joint_ang = joint_states.position[2] wr1_joint_ang = joint_states.position[3] wr2_joint_ang = joint_states.position[4] wr3_joint_ang = joint_states.position[5] shp_joint_vel = joint_states.velocity[0] shl_joint_vel = joint_states.velocity[1] elb_joint_vel = joint_states.velocity[2] wr1_joint_vel = joint_states.velocity[3] wr2_joint_vel = joint_states.velocity[4] wr3_joint_vel = joint_states.velocity[5] q = [ shp_joint_ang, shl_joint_ang, elb_joint_ang, wr1_joint_ang, wr2_joint_ang, wr3_joint_ang ] eef_x, eef_y, eef_z = self.get_xyz(q) observation = [] rospy.logdebug("List of Observations==>" + str(self.observations)) for obs_name in self.observations: if obs_name == "shp_joint_ang": observation.append(shp_joint_ang) elif obs_name == "shl_joint_ang": observation.append(shl_joint_ang) elif obs_name == "elb_joint_ang": observation.append(elb_joint_ang) elif obs_name == "wr1_joint_ang": observation.append(wr1_joint_ang) elif obs_name == "wr2_joint_ang": observation.append(wr2_joint_ang) elif obs_name == "wr3_joint_ang": observation.append(wr3_joint_ang) elif obs_name == "shp_joint_vel": observation.append(shp_joint_vel) elif obs_name == "shl_joint_vel": observation.append(shl_joint_vel) elif obs_name == "elb_joint_vel": observation.append(elb_joint_vel) elif obs_name == "wr1_joint_vel": observation.append(wr1_joint_vel) elif obs_name == "wr2_joint_vel": observation.append(wr2_joint_vel) elif obs_name == "wr3_joint_vel": observation.append(wr3_joint_vel) elif obs_name == "eef_x": observation.append(eef_x) elif obs_name == "eef_y": observation.append(eef_y) elif obs_name == "eef_z": observation.append(eef_z) else: raise NameError('Observation Asked does not exist==' + str(obs_name)) return observation def clamp_to_joint_limits(self): """ clamps self.current_joint_pose based on the joint limits self._joint_limits { "shp_max": shp_max, "shp_min": shp_min, ... } :return: """ rospy.logdebug("Clamping current_joint_pose>>>" + str(self.current_joint_pose)) shp_joint_value = self.current_joint_pose[0] shl_joint_value = self.current_joint_pose[1] elb_joint_value = self.current_joint_pose[2] wr1_joint_value = self.current_joint_pose[3] wr2_joint_value = self.current_joint_pose[4] wr3_joint_value = self.current_joint_pose[5] self.current_joint_pose[0] = max( min(shp_joint_value, self._joint_limits["shp_max"]), self._joint_limits["shp_min"]) self.current_joint_pose[1] = max( min(shl_joint_value, self._joint_limits["shl_max"]), self._joint_limits["shl_min"]) self.current_joint_pose[2] = max( min(elb_joint_value, self._joint_limits["elb_max"]), self._joint_limits["elb_min"]) self.current_joint_pose[3] = max( min(wr1_joint_value, self._joint_limits["wr1_max"]), self._joint_limits["wr1_min"]) self.current_joint_pose[4] = max( min(wr2_joint_value, self._joint_limits["wr2_max"]), self._joint_limits["wr2_min"]) self.current_joint_pose[5] = max( min(wr3_joint_value, self._joint_limits["wr3_max"]), self._joint_limits["wr3_min"]) rospy.logdebug("DONE Clamping current_joint_pose>>>" + str(self.current_joint_pose)) # Resets the state of the environment and returns an initial observation. def reset(self): # 0st: We pause the Simulator rospy.logdebug("Pausing SIM...") self._gz_conn.pauseSim() # 1st: resets the simulation to initial values rospy.logdebug("Reset SIM...") self._gz_conn.resetSim() # 2nd: We Set the gravity to 0.0 so that we dont fall when reseting joints # It also UNPAUSES the simulation rospy.logdebug("Remove Gravity...") self._gz_conn.change_gravity(0.0, 0.0, 0.0) # EXTRA: Reset JoinStateControlers because sim reset doesnt reset TFs, generating time problems rospy.logdebug("reset_ur_joint_controllers...") self._ctrl_conn.reset_ur_joint_controllers(self._ctrl_type) # 3rd: resets the robot to initial conditions rospy.logdebug("set_init_pose init variable...>>>" + str(self.init_joint_pose)) # We save that position as the current joint desired position init_pos = self.init_joints_pose(self.init_joint_pose) print("init_pos") # 4th: We Set the init pose to the jump topic so that the jump control can update # We check the jump publisher has connection if self._ctrl_type == 'traj_vel': self._joint_traj_pubisher.check_publishers_connection() elif self._ctrl_type == 'vel': self._joint_pubisher.check_publishers_connection() else: rospy.logwarn("Controller type is wrong!!!!") print("check_publishers_connection ") # 5th: Check all subscribers work. # Get the state of the Robot defined by its RPY orientation, distance from # desired point, contact force and JointState of the three joints rospy.logdebug("check_all_systems_ready...") self.check_all_systems_ready() # 6th: We restore the gravity to original rospy.logdebug("Restore Gravity...") self._gz_conn.change_gravity(0.0, 0.0, -9.81) # 7th: pauses simulation rospy.logdebug("Pause SIM...") self._gz_conn.pauseSim() # self._init_obj_pose() # 8th: Get the State Discrete Stringuified version of the observations rospy.logdebug("get_observations...") observation = self.get_observations() print('Reset final') return observation def _act(self, action): if self._ctrl_type == 'traj_vel': self.pre_ctrl_type = 'traj_vel' self._joint_traj_pubisher.move_joints(action) elif self._ctrl_type == 'vel': self.pre_ctrl_type = 'vel' self._joint_pubisher.move_joints(action) else: self._joint_pubisher.move_joints(action) def step(self, action): rospy.logdebug("UR step func") # Given the action selected by the learning algorithm, # we perform the corresponding movement of the robot # Act self._gz_conn.unpauseSim() self._act(action) # Then we send the command to the robot and let it go # for running_step seconds time.sleep(self.running_step) self._gz_conn.pauseSim() # We now process the latest data saved in the class state to calculate # the state and the rewards. This way we guarantee that they work # with the same exact data. # Generate State based on observations observation = self.get_observations() # finally we get an evaluation based on what happened in the sim reward = self.compute_dist_rewards() done = self.check_done() return observation, reward, done, {} def compute_dist_rewards(self): return 0 def check_done(self): return True
class URSimDoorOpening(robot_gazebo_env_goal.RobotGazeboEnv): def __init__(self): # rospy.logdebug("Starting URSimDoorOpening Class object...") # Init GAZEBO Objects self.set_obj_state = rospy.ServiceProxy('/gazebo/set_model_state', SetModelState) self.get_world_state = rospy.ServiceProxy( '/gazebo/get_world_properties', GetWorldProperties) # Subscribe joint state and target pose rospy.Subscriber("/ft_sensor_topic", WrenchStamped, self.wrench_stamped_callback) rospy.Subscriber("/joint_states", JointState, self.joints_state_callback) rospy.Subscriber("/gazebo/link_states", LinkStates, self.link_state_callback) rospy.Subscriber("/robotiq/rightcam/image_raw_right", Image, self.r_image_callback) rospy.Subscriber("/robotiq/leftcam/image_raw_left", Image, self.l_image_callback) # Gets training parameters from param server self.running_step = rospy.get_param("/running_step") self.observations = rospy.get_param("/observations") # Joint limitation shp_max = rospy.get_param("/joint_limits_array/shp_max") shp_min = rospy.get_param("/joint_limits_array/shp_min") shl_max = rospy.get_param("/joint_limits_array/shl_max") shl_min = rospy.get_param("/joint_limits_array/shl_min") elb_max = rospy.get_param("/joint_limits_array/elb_max") elb_min = rospy.get_param("/joint_limits_array/elb_min") wr1_max = rospy.get_param("/joint_limits_array/wr1_max") wr1_min = rospy.get_param("/joint_limits_array/wr1_min") wr2_max = rospy.get_param("/joint_limits_array/wr2_max") wr2_min = rospy.get_param("/joint_limits_array/wr2_min") wr3_max = rospy.get_param("/joint_limits_array/wr3_max") wr3_min = rospy.get_param("/joint_limits_array/wr3_min") self.joint_limits = { "shp_max": shp_max, "shp_min": shp_min, "shl_max": shl_max, "shl_min": shl_min, "elb_max": elb_max, "elb_min": elb_min, "wr1_max": wr1_max, "wr1_min": wr1_min, "wr2_max": wr2_max, "wr2_min": wr2_min, "wr3_max": wr3_max, "wr3_min": wr3_min } shp_init_value0 = rospy.get_param("/init_joint_pose0/shp") shl_init_value0 = rospy.get_param("/init_joint_pose0/shl") elb_init_value0 = rospy.get_param("/init_joint_pose0/elb") wr1_init_value0 = rospy.get_param("/init_joint_pose0/wr1") wr2_init_value0 = rospy.get_param("/init_joint_pose0/wr2") wr3_init_value0 = rospy.get_param("/init_joint_pose0/wr3") self.init_joint_pose0 = [ shp_init_value0, shl_init_value0, elb_init_value0, wr1_init_value0, wr2_init_value0, wr3_init_value0 ] shp_init_value1 = rospy.get_param("/init_joint_pose1/shp") shl_init_value1 = rospy.get_param("/init_joint_pose1/shl") elb_init_value1 = rospy.get_param("/init_joint_pose1/elb") wr1_init_value1 = rospy.get_param("/init_joint_pose1/wr1") wr2_init_value1 = rospy.get_param("/init_joint_pose1/wr2") wr3_init_value1 = rospy.get_param("/init_joint_pose1/wr3") self.init_joint_pose1 = [ shp_init_value1, shl_init_value1, elb_init_value1, wr1_init_value1, wr2_init_value1, wr3_init_value1 ] shp_init_value2 = rospy.get_param("/init_joint_pose2/shp") shl_init_value2 = rospy.get_param("/init_joint_pose2/shl") elb_init_value2 = rospy.get_param("/init_joint_pose2/elb") wr1_init_value2 = rospy.get_param("/init_joint_pose2/wr1") wr2_init_value2 = rospy.get_param("/init_joint_pose2/wr2") wr3_init_value2 = rospy.get_param("/init_joint_pose2/wr3") self.init_joint_pose2 = [ shp_init_value2, shl_init_value2, elb_init_value2, wr1_init_value2, wr2_init_value2, wr3_init_value2 ] r_drv_value1 = rospy.get_param("/init_grp_pose1/r_drive") l_drv_value1 = rospy.get_param("/init_grp_pose1/l_drive") r_flw_value1 = rospy.get_param("/init_grp_pose1/r_follower") l_flw_value1 = rospy.get_param("/init_grp_pose1/l_follower") r_spr_value1 = rospy.get_param("/init_grp_pose1/r_spring") l_spr_value1 = rospy.get_param("/init_grp_pose1/l_spring") r_drv_value2 = rospy.get_param("/init_grp_pose2/r_drive") l_drv_value2 = rospy.get_param("/init_grp_pose2/l_drive") r_flw_value2 = rospy.get_param("/init_grp_pose2/r_follower") l_flw_value2 = rospy.get_param("/init_grp_pose2/l_follower") r_spr_value2 = rospy.get_param("/init_grp_pose2/r_spring") l_spr_value2 = rospy.get_param("/init_grp_pose2/l_spring") init_pos0 = self.init_joints_pose(self.init_joint_pose0) self.arr_init_pos0 = np.array(init_pos0, dtype='float32') init_pos1 = self.init_joints_pose(self.init_joint_pose1) self.arr_init_pos1 = np.array(init_pos1, dtype='float32') init_pos2 = self.init_joints_pose(self.init_joint_pose2) self.arr_init_pos2 = np.array(init_pos2, dtype='float32') self.init_grp_pose1 = [ r_drv_value1, l_drv_value1, r_flw_value1, l_flw_value1, r_spr_value1, l_spr_value1 ] self.init_grp_pose2 = [ r_drv_value2, l_drv_value2, r_flw_value2, l_flw_value2, r_spr_value2, l_spr_value2 ] init_g_pos1 = self.init_joints_pose(self.init_grp_pose1) self.arr_init_g_pos1 = np.array(init_g_pos1, dtype='float32') init_g_pos2 = self.init_joints_pose(self.init_grp_pose2) self.arr_init_g_pos2 = np.array(init_g_pos2, dtype='float32') # Fill in the Done Episode Criteria list self.episode_done_criteria = rospy.get_param("/episode_done_criteria") # stablishes connection with simulator self._gz_conn = GazeboConnection() self._ctrl_conn = ControllersConnection(namespace="") # Controller type for ros_control self._ctrl_type = rospy.get_param("/control_type") self.pre_ctrl_type = self._ctrl_type # Get the force and troque limit self.force_limit = rospy.get_param("/force_limit") self.torque_limit = rospy.get_param("/torque_limit") # Get tolerances of door_frame self.tolerances = rospy.get_param("/door_frame_tolerances") # Get observation parameters self.joint_n = rospy.get_param("/obs_params/joint_n") self.eef_n = rospy.get_param("/obs_params/eef_n") self.eef_rpy_n = rospy.get_param("/obs_params/eef_rpy_n") self.force_n = rospy.get_param("/obs_params/force_n") self.torque_n = rospy.get_param("/obs_params/torque_n") self.image_n = rospy.get_param("/obs_params/image_n") self.min_static_limit = rospy.get_param("/min_static_limit") self.max_static_limit = rospy.get_param("/max_static_limit") # We init the observations self.base_orientation = Quaternion() self.imu_link = Quaternion() self.door = Quaternion() self.door_frame = Point() self.quat = Quaternion() self.imu_link_rpy = Vector3() self.door_rpy = Vector3() self.link_state = LinkStates() self.wrench_stamped = WrenchStamped() self.joints_state = JointState() self.right_image = Image() self.right_image_ini = [] self.left_image = Image() self.lift_image_ini = [] self.end_effector = Point() self.previous_action = copy.deepcopy(self.arr_init_pos2) self.counter = 0 self.max_rewards = 1 # Arm/Control parameters self._ik_params = setups['UR5_6dof']['ik_params'] # ROS msg type self._joint_pubisher = JointPub() self._joint_traj_pubisher = JointTrajPub() # Gym interface and action self.action_space = spaces.Discrete(n_act) self.observation_space = obs_dim #np.arange(self.get_observations().shape[0]) self.reward_range = (-np.inf, np.inf) self._seed() # Change the controller type set_joint_pos_server = rospy.Service('/set_position_controller', SetBool, self._set_pos_ctrl) set_joint_traj_pos_server = rospy.Service( '/set_trajectory_position_controller', SetBool, self._set_traj_pos_ctrl) set_joint_vel_server = rospy.Service('/set_velocity_controller', SetBool, self._set_vel_ctrl) set_joint_traj_vel_server = rospy.Service( '/set_trajectory_velocity_controller', SetBool, self._set_traj_vel_ctrl) self.pos_traj_controller = [ 'joint_state_controller', 'gripper_controller', 'pos_traj_controller' ] self.pos_controller = [ "joint_state_controller", "gripper_controller", "ur_shoulder_pan_pos_controller", "ur_shoulder_lift_pos_controller", "ur_elbow_pos_controller", "ur_wrist_1_pos_controller", "ur_wrist_2_pos_controller", "ur_wrist_3_pos_controller" ] self.vel_traj_controller = [ 'joint_state_controller', 'gripper_controller', 'vel_traj_controller' ] self.vel_controller = [ "joint_state_controller", "gripper_controller", "ur_shoulder_pan_vel_controller", "ur_shoulder_lift_vel_controller", "ur_elbow_vel_controller", "ur_wrist_1_vel_controller", "ur_wrist_2_vel_controller", "ur_wrist_3_vel_controller" ] # Helpful False self.stop_flag = False stop_trainning_server = rospy.Service('/stop_training', SetBool, self._stop_trainnig) start_trainning_server = rospy.Service('/start_training', SetBool, self._start_trainnig) def check_stop_flg(self): if self.stop_flag is False: return False else: return True def _start_trainnig(self, req): rospy.logdebug("_start_trainnig!!!!") self.stop_flag = False return SetBoolResponse(True, "_start_trainnig") def _stop_trainnig(self, req): rospy.logdebug("_stop_trainnig!!!!") self.stop_flag = True return SetBoolResponse(True, "_stop_trainnig") def _set_pos_ctrl(self, req): rospy.wait_for_service('set_position_controller') self._ctrl_conn.stop_controllers(self.pos_traj_controller) self._ctrl_conn.start_controllers(self.pos_controller) self._ctrl_type = 'pos' return SetBoolResponse(True, "_set_pos_ctrl") def _set_traj_pos_ctrl(self, req): rospy.wait_for_service('set_trajectory_position_controller') self._ctrl_conn.stop_controllers(self.pos_controller) self._ctrl_conn.start_controllers(self.pos_traj_controller) self._ctrl_type = 'traj_pos' return SetBoolResponse(True, "_set_traj_pos_ctrl") def _set_vel_ctrl(self, req): rospy.wait_for_service('set_velocity_controller') self._ctrl_conn.stop_controllers(self.vel_traj_controller) self._ctrl_conn.start_controllers(self.vel_controller) self._ctrl_type = 'vel' return SetBoolResponse(True, "_set_vel_ctrl") def _set_traj_vel_ctrl(self, req): rospy.wait_for_service('set_trajectory_velocity_controller') self._ctrl_conn.stop_controllers(self.vel_controller) self._ctrl_conn.start_controllers(self.vel_traj_controller) self._ctrl_type = 'traj_vel' return SetBoolResponse(True, "_set_traj_vel_ctrl") # A function to initialize the random generator def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def check_all_systems_ready(self): """ We check that all systems are ready :return: """ joint_states_msg = None while joint_states_msg is None and not rospy.is_shutdown(): try: joint_states_msg = rospy.wait_for_message("/joint_states", JointState, timeout=0.1) self.joints_state = joint_states_msg rospy.logdebug("Current joint_states READY") except Exception as e: self._ctrl_conn.start_controllers( controllers_on="joint_state_controller") rospy.logdebug( "Current joint_states not ready yet, retrying==>" + str(e)) link_states_msg = None while link_states_msg is None and not rospy.is_shutdown(): try: link_states_msg = rospy.wait_for_message("/gazebo/link_states", LinkStates, timeout=0.1) self.link_states = link_states_msg rospy.logdebug("Reading link_states READY") except Exception as e: rospy.logdebug( "Reading link_states not ready yet, retrying==>" + str(e)) rospy.logdebug("ALL SYSTEMS READY") def get_xyz(self, q): """Get x,y,z coordinates Args: q: a numpy array of joints angle positions. Returns: xyz are the x,y,z coordinates of an end-effector in a Cartesian space. """ mat = ur_utils.forward(q, self._ik_params) xyz = mat[:3, 3] return xyz def get_current_xyz(self): """Get x,y,z coordinates according to currrent joint angles Returns: xyz are the x,y,z coordinates of an end-effector in a Cartesian space. """ joint_states = self.joints_state shp_joint_ang = joint_states.position[0] shl_joint_ang = joint_states.position[1] elb_joint_ang = joint_states.position[2] wr1_joint_ang = joint_states.position[3] wr2_joint_ang = joint_states.position[4] wr3_joint_ang = joint_states.position[5] q = [ shp_joint_ang, shl_joint_ang, elb_joint_ang, wr1_joint_ang, wr2_joint_ang, wr3_joint_ang ] mat = ur_utils.forward(q, self._ik_params) xyz = mat[:3, 3] return xyz def get_orientation(self, q): """Get Euler angles Args: q: a numpy array of joints angle positions. Returns: xyz are the x,y,z coordinates of an end-effector in a Cartesian space. """ mat = ur_utils.forward(q, self._ik_params) orientation = mat[0:3, 0:3] roll = -orientation[1, 2] pitch = orientation[0, 2] yaw = -orientation[0, 1] return Vector3(roll, pitch, yaw) def cvt_quat_to_euler(self, quat): euler_rpy = Vector3() euler = euler_from_quaternion( [self.quat.x, self.quat.y, self.quat.z, self.quat.w]) euler_rpy.x = euler[0] euler_rpy.y = euler[1] euler_rpy.z = euler[2] return euler_rpy def init_joints_pose(self, init_pos): """ We initialise the Position variable that saves the desired position where we want our joints to be :param init_pos: :return: """ self.current_joint_pose = [] self.current_joint_pose = copy.deepcopy(init_pos) return self.current_joint_pose def get_euclidean_dist(self, p_in, p_pout): """ Given a Vector3 Object, get distance from current position :param p_end: :return: """ a = numpy.array((p_in.x, p_in.y, p_in.z)) b = numpy.array((p_pout.x, p_pout.y, p_pout.z)) distance = numpy.linalg.norm(a - b) return distance def joints_state_callback(self, msg): self.joints_state = msg def wrench_stamped_callback(self, msg): self.wrench_stamped = msg def link_state_callback(self, msg): self.link_state = msg self.end_effector = self.link_state.pose[12] self.imu_link = self.link_state.pose[5] self.door_frame = self.link_state.pose[1] self.door = self.link_state.pose[2] def r_image_callback(self, msg): self.right_image = msg def l_image_callback(self, msg): self.left_image = msg def get_observations(self): """ Returns the state of the robot needed for OpenAI QLearn Algorithm The state will be defined by an array :return: observation """ joint_states = self.joints_state eef_rpy = Vector3() self.force = self.wrench_stamped.wrench.force self.torque = self.wrench_stamped.wrench.torque # print("[force]", self.force.x, self.force.y, self.force.z) # print("[torque]", self.torque.x, self.torque.y, self.torque.z) shp_joint_ang = joint_states.position[2] shl_joint_ang = joint_states.position[1] elb_joint_ang = joint_states.position[0] wr1_joint_ang = joint_states.position[9] wr2_joint_ang = joint_states.position[10] wr3_joint_ang = joint_states.position[11] shp_joint_vel = joint_states.velocity[2] shl_joint_vel = joint_states.velocity[1] elb_joint_vel = joint_states.velocity[0] wr1_joint_vel = joint_states.velocity[9] wr2_joint_vel = joint_states.velocity[10] wr3_joint_vel = joint_states.velocity[11] q = [ shp_joint_ang, shl_joint_ang, elb_joint_ang, wr1_joint_ang, wr2_joint_ang, wr3_joint_ang ] eef_x, eef_y, eef_z = self.get_xyz(q) eef_x_ini, eef_y_ini, eef_z_ini = self.get_xyz(self.init_joint_pose2) eef_rpy = self.get_orientation(q) eef_rpy_ini = self.get_orientation(self.init_joint_pose2) r_image = self.right_image l_image = self.left_image observation = [] # rospy.logdebug("List of Observations==>"+str(self.observations)) for obs_name in self.observations: if obs_name == "shp_joint_ang": observation.append( (shp_joint_ang - self.init_joint_pose2[0]) * self.joint_n) elif obs_name == "shl_joint_ang": observation.append( (shl_joint_ang - self.init_joint_pose2[1]) * self.joint_n) elif obs_name == "elb_joint_ang": observation.append( (elb_joint_ang - self.init_joint_pose2[2]) * self.joint_n) elif obs_name == "wr1_joint_ang": observation.append( (wr1_joint_ang - self.init_joint_pose2[3]) * self.joint_n) elif obs_name == "wr2_joint_ang": observation.append( (wr2_joint_ang - self.init_joint_pose2[4]) * self.joint_n) elif obs_name == "wr3_joint_ang": observation.append( (wr3_joint_ang - self.init_joint_pose2[5]) * self.joint_n) elif obs_name == "shp_joint_vel": observation.append(shp_joint_vel) elif obs_name == "shl_joint_vel": observation.append(shl_joint_vel) elif obs_name == "elb_joint_vel": observation.append(elb_joint_vel) elif obs_name == "wr1_joint_vel": observation.append(wr1_joint_vel) elif obs_name == "wr2_joint_vel": observation.append(wr2_joint_vel) elif obs_name == "wr3_joint_vel": observation.append(wr3_joint_vel) elif obs_name == "eef_x": observation.append((eef_x - eef_x_ini) * self.eef_n) elif obs_name == "eef_y": observation.append((eef_y - eef_y_ini) * self.eef_n) elif obs_name == "eef_z": observation.append((eef_z - eef_z_ini) * self.eef_n) elif obs_name == "eef_rpy_x": observation.append( (eef_rpy.x - eef_rpy_ini.x) * self.eef_rpy_n) elif obs_name == "eef_rpy_y": observation.append( (eef_rpy.y - eef_rpy_ini.y) * self.eef_rpy_n) elif obs_name == "eef_rpy_z": observation.append( (eef_rpy.z - eef_rpy_ini.z) * self.eef_rpy_n) elif obs_name == "force_x": observation.append((self.force.x - self.force_ini.x) / self.force_limit * self.force_n) elif obs_name == "force_y": observation.append((self.force.y - self.force_ini.y) / self.force_limit * self.force_n) elif obs_name == "force_z": observation.append((self.force.z - self.force_ini.z) / self.force_limit * self.force_n) elif obs_name == "torque_x": observation.append((self.torque.x - self.torque_ini.x) / self.torque_limit * self.torque_n) elif obs_name == "torque_y": observation.append((self.torque.y - self.torque_ini.y) / self.torque_limit * self.torque_n) elif obs_name == "torque_z": observation.append((self.torque.z - self.torque_ini.z) / self.torque_limit * self.torque_n) elif obs_name == "image_data": for x in range(0, 28): observation.append( (ord(r_image.data[x]) - ord(self.right_image_ini.data[x])) * self.image_n) for x in range(0, 28): observation.append( (ord(l_image.data[x]) - ord(self.left_image_ini.data[x])) * self.image_n) else: raise NameError('Observation Asked does not exist==' + str(obs_name)) # print("observation", list(map(round, observation, [3]*len(observation)))) return observation def clamp_to_joint_limits(self): """ clamps self.current_joint_pose based on the joint limits self._joint_limits { "shp_max": shp_max, "shp_min": shp_min, ... } :return: """ rospy.logdebug("Clamping current_joint_pose>>>" + str(self.current_joint_pose)) shp_joint_value = self.current_joint_pose[0] shl_joint_value = self.current_joint_pose[1] elb_joint_value = self.current_joint_pose[2] wr1_joint_value = self.current_joint_pose[3] wr2_joint_value = self.current_joint_pose[4] wr3_joint_value = self.current_joint_pose[5] self.current_joint_pose[0] = max( min(shp_joint_value, self._joint_limits["shp_max"]), self._joint_limits["shp_min"]) self.current_joint_pose[1] = max( min(shl_joint_value, self._joint_limits["shl_max"]), self._joint_limits["shl_min"]) self.current_joint_pose[2] = max( min(elb_joint_value, self._joint_limits["elb_max"]), self._joint_limits["elb_min"]) self.current_joint_pose[3] = max( min(wr1_joint_value, self._joint_limits["wr1_max"]), self._joint_limits["wr1_min"]) self.current_joint_pose[4] = max( min(wr2_joint_value, self._joint_limits["wr2_max"]), self._joint_limits["wr2_min"]) self.current_joint_pose[5] = max( min(wr3_joint_value, self._joint_limits["wr3_max"]), self._joint_limits["wr3_min"]) rospy.logdebug("DONE Clamping current_joint_pose>>>" + str(self.current_joint_pose)) def first_reset(self): # print("first reset") jointtrajpub = JointTrajPub() for update in range(500): jointtrajpub.jointTrajectoryCommand_reset(self.arr_init_pos0) time.sleep(0.5) for update in range(300): jointtrajpub.jointTrajectoryCommand_reset(self.arr_init_pos1) time.sleep(0.5) # Resets the state of the environment and returns an initial observation. def reset(self): self.max_knob_rotation = 0 self.max_door_rotation = 0 self.max_wirst3 = 0 self.min_wirst3 = 0 self.max_wirst2 = 0 self.min_wirst2 = 0 self.max_wirst1 = 0 self.min_wirst1 = 0 self.max_elb = 0 self.min_elb = 0 self.max_shl = 0 self.min_shl = 0 self.max_shp = 0 self.min_shp = 0 self.max_force_x = 0 self.min_force_x = 0 self.max_force_y = 0 self.min_force_y = 0 self.max_force_z = 0 self.min_force_z = 0 self.max_torque_x = 0 self.min_torque_x = 0 self.max_torque_y = 0 self.min_torque_y = 0 self.max_torque_z = 0 self.min_torque_z = 0 # Go to initial position self._gz_conn.unpauseSim() # rospy.logdebug("set_init_pose init variable...>>>" + str(self.init_joint_pose0)) jointtrajpub = JointTrajPub() for update in range(200): jointtrajpub.GrpCommand(self.arr_init_g_pos1) # time.sleep(1) for update in range(300): jointtrajpub.jointTrajectoryCommand_reset(self.arr_init_pos2) time.sleep(1) for update in range(300): jointtrajpub.jointTrajectoryCommand_reset(self.arr_init_pos1) time.sleep(1) # 0st: We pause the Simulator # rospy.logdebug("Pausing SIM...") self._gz_conn.pauseSim() # 1st: resets the simulation to initial values # rospy.logdebug("Reset SIM...") self._gz_conn.resetSim() # 2nd: We Set the gravity to 0.0 so that we dont fall when reseting joints # It also UNPAUSES the simulation # rospy.logdebug("Remove Gravity...") self._gz_conn.change_gravity_zero() # EXTRA: Reset JoinStateControlers because sim reset doesnt reset TFs, generating time problems # rospy.logdebug("reset_ur_joint_controllers...") self._ctrl_conn.reset_ur_joint_controllers(self._ctrl_type) # 3rd: resets the robot to initial conditions # rospy.logdebug("set_init_pose init variable...>>>" + str(self.init_joint_pose1)) # rospy.logdebug("set_init_pose init variable...>>>" + str(self.init_joint_pose2)) self.force = self.wrench_stamped.wrench.force self.torque = self.wrench_stamped.wrench.torque # print("self.force", self.force) # print("self.torque", self.torque) self.force_ini = copy.deepcopy(self.force) self.torque_ini = copy.deepcopy(self.torque) # We save that position as the current joint desired position # 4th: We Set the init pose to the jump topic so that the jump control can update # We check the jump publisher has connection if self._ctrl_type == 'traj_pos': self._joint_traj_pubisher.check_publishers_connection() elif self._ctrl_type == 'pos': self._joint_pubisher.check_publishers_connection() elif self._ctrl_type == 'traj_vel': self._joint_traj_pubisher.check_publishers_connection() elif self._ctrl_type == 'vel': self._joint_pubisher.check_publishers_connection() else: rospy.logwarn("Controller type is wrong!!!!") # 5th: Check all subscribers work. # Get the state of the Robot defined by its RPY orientation, distance from # desired point, contact force and JointState of the three joints # rospy.logdebug("check_all_systems_ready...") self.check_all_systems_ready() # 6th: We restore the gravity to original # rospy.logdebug("Restore Gravity...") self._gz_conn.adjust_gravity() for update in range(300): jointtrajpub.jointTrajectoryCommand_reset(self.arr_init_pos2) time.sleep(1) for update in range(200): jointtrajpub.GrpCommand(self.arr_init_g_pos2) time.sleep(1) # 7th: pauses simulation # rospy.logdebug("Pause SIM...") self._gz_conn.pauseSim() self.right_image_ini = copy.deepcopy(self.right_image) self.left_image_ini = copy.deepcopy(self.left_image) # 8th: Get the State Discrete Stringuified version of the observations # rospy.logdebug("get_observations...") observation = self.get_observations() # print("[observations]", observation) return observation def _act(self, action): if self._ctrl_type == 'traj_pos': self.pre_ctrl_type = 'traj_pos' self._joint_traj_pubisher.jointTrajectoryCommand(action) elif self._ctrl_type == 'pos': self.pre_ctrl_type = 'pos' self._joint_pubisher.move_joints(action) elif self._ctrl_type == 'traj_vel': self.pre_ctrl_type = 'traj_vel' self._joint_traj_pubisher.jointTrajectoryCommand(action) elif self._ctrl_type == 'vel': self.pre_ctrl_type = 'vel' self._joint_pubisher.move_joints(action) else: self._joint_pubisher.move_joints(action) def training_ok(self): rate = rospy.Rate(1) while self.check_stop_flg() is True: rospy.logdebug("stop_flag is ON!!!!") self._gz_conn.unpauseSim() if self.check_stop_flg() is False: break rate.sleep() def step(self, action, update): ''' ('action: ', array([ 0., 0. , -0., -0., -0. , 0. ], dtype=float32)) ''' # rospy.logdebug("UR step func") # define the logger self.training_ok() # Given the action selected by the learning algorithm, # we perform the corresponding movement of the robot # Act self._gz_conn.unpauseSim() if self.max_wirst3 < action[5]: self.max_wirst3 = action[5] if self.min_wirst3 > action[5]: self.min_wirst3 = action[5] if self.max_wirst2 < action[4]: self.max_wirst2 = action[4] if self.min_wirst2 > action[4]: self.min_wirst2 = action[4] if self.max_wirst1 < action[3]: self.max_wirst1 = action[3] if self.min_wirst1 > action[3]: self.min_wirst1 = action[3] if self.max_elb < action[2]: self.max_elb = action[2] if self.min_elb > action[2]: self.min_elb = action[2] if self.max_shl < action[1]: self.max_shl = action[1] if self.min_shl > action[1]: self.min_shl = action[1] if self.max_shp < action[0]: self.max_shp = action[0] if self.min_shp > action[0]: self.min_shp = action[0] # print("action", action) action = action + self.arr_init_pos2 # action = [1.488122534496775, -1.4496597816566892, 2.4377209990850974, 2.168370898415174, -1.4670589583209175, 1.4] self._act(action) self.wrench_stamped self.force = self.wrench_stamped.wrench.force self.torque = self.wrench_stamped.wrench.torque # print("force", self.force) # print("torque", self.torque) if self.max_force_x < self.force.x: self.max_force_x = self.force.x if self.min_force_x > self.force.x: self.min_force_x = self.force.x if self.max_force_y < self.force.y: self.max_force_y = self.force.y if self.min_force_y > self.force.y: self.min_force_y = self.force.y if self.max_force_z < self.force.z: self.max_force_z = self.force.z if self.min_force_z > self.force.z: self.min_force_z = self.force.z if self.max_torque_x < self.torque.x: self.max_torque_x = self.torque.x if self.min_torque_x > self.torque.x: self.min_torque_x = self.torque.x if self.max_torque_y < self.torque.y: self.max_torque_y = self.torque.y if self.min_torque_y > self.torque.y: self.min_torque_y = self.torque.y if self.max_torque_z < self.torque.z: self.max_torque_z = self.torque.z if self.min_torque_z > self.torque.z: self.min_torque_z = self.torque.z if self.force_limit < self.force.x or self.force.x < -self.force_limit: self._act(self.previous_action) # print("force.x over the limit") elif self.force_limit < self.force.y or self.force.y < -self.force_limit: self._act(self.previous_action) # print("force.y over the limit") elif self.force_limit < self.force.z or self.force.z < -self.force_limit: self._act(self.previous_action) # print("force.z over the limit") elif self.torque_limit < self.torque.x or self.torque.x < -self.torque_limit: self._act(self.previous_action) # print("torque.x over the limit") elif self.torque_limit < self.torque.y or self.torque.y < -self.torque_limit: self._act(self.previous_action) # print("torque.y over the limit") elif self.torque_limit < self.torque.z or self.torque.z < -self.torque_limit: self._act(self.previous_action) # print("torque.z over the limit") else: self.previous_action = copy.deepcopy(action) # print("True") self.min_static_taxel0 = 0 self.min_static_taxel1 = 0 self.max_static_taxel0 = 0 self.max_static_taxel1 = 0 r_image = self.right_image l_image = self.left_image for x in range(0, 28): if self.min_static_taxel0 > (ord(r_image.data[x]) - ord( self.right_image_ini.data[x])) * self.image_n: self.min_static_taxel0 = (ord(r_image.data[x]) - ord( self.right_image_ini.data[x])) * self.image_n if self.min_static_taxel1 > (ord(l_image.data[x]) - ord( self.left_image_ini.data[x])) * self.image_n: self.min_static_taxel1 = (ord(l_image.data[x]) - ord( self.left_image_ini.data[x])) * self.image_n if self.max_static_taxel0 < (ord(r_image.data[x]) - ord( self.right_image_ini.data[x])) * self.image_n: self.max_static_taxel0 = (ord(r_image.data[x]) - ord( self.right_image_ini.data[x])) * self.image_n if self.max_static_taxel1 < (ord(l_image.data[x]) - ord( self.left_image_ini.data[x])) * self.image_n: self.max_static_taxel1 = (ord(l_image.data[x]) - ord( self.left_image_ini.data[x])) * self.image_n # print("min, max taxel", self.min_static_taxel0, self.max_static_taxel0, self.min_static_taxel1, self.max_static_taxel1) # Then we send the command to the robot and let it go for running_step seconds time.sleep(self.running_step) self._gz_conn.pauseSim() # We now process the latest data saved in the class state to calculate # the state and the rewards. This way we guarantee that they work # with the same exact data. # Generate State based on observations observation = self.get_observations() # finally we get an evaluation based on what happened in the sim reward = self.compute_dist_rewards(action, update) done = self.check_done(update) return observation, reward, done, {} def compute_dist_rewards(self, action, update): self.quat = self.door.orientation self.door_rpy = self.cvt_quat_to_euler(self.quat) self.quat = self.imu_link.orientation self.imu_link_rpy = self.cvt_quat_to_euler(self.quat) compute_rewards = 0 knob_c = 100 #1 rotation of knob(+) knob_bonus_c = 10 #2 bonus of knob rotation(+) panel_c = 50 #3 door panel open(+) panel_b_c = 50 #4 door panel before open(-) tolerances_c = 50 #5 movement of door frame(-) force_c = 1 #6 over force limit1(-) taxel_c = 100 #7 release the knob(-) act_0_n = 10 #8 action[0] (-) act_1_n = 10 # action[1] (-) act_2_n = 10 # action[2] (-) act_3_n = 10 # action[3] (-) act_4_n = 10 # action[4] (-) act_5_n = 10 # action[5] (-) #1 rotation of knob(+), #2 bonus of knob rotation(+), #3 door panel open(+), if self.imu_link_rpy.x < 0.8: compute_rewards = self.imu_link_rpy.x * knob_c print("reward_knob_rotation", compute_rewards) if 0.4 > self.imu_link_rpy.x > 0.2: compute_rewards = self.imu_link_rpy.x * knob_c + knob_bonus_c elif 0.6 > self.imu_link_rpy.x > 0.4: compute_rewards = self.imu_link_rpy.x * knob_c + knob_bonus_c * 2 elif 0.8 > self.imu_link_rpy.x > 0.6: compute_rewards = self.imu_link_rpy.x * knob_c + knob_bonus_c * 3 else: compute_rewards = 0.8 * knob_c + knob_bonus_c * 3 + ( 1.5708061 - self.imu_link_rpy.z) * panel_c #5 movement of door frame(-) if abs(self.door_frame.position.x + 0.0659) > self.tolerances or abs( self.door_frame.position.y - 0.5649) > self.tolerances or abs(self.door_frame.position.z - 0.0999) > self.tolerances: compute_rewards -= tolerances_c * (n_step - update) / n_step + tolerances_c print("door_frame limit", compute_rewards) #6 over force limit1(-) if self.force_limit < self.force.x or self.force.x < -self.force_limit: compute_rewards -= force_c * (n_step - update) / n_step + force_c print("force_x limit", compute_rewards) if self.force_limit < self.force.y or self.force.y < -self.force_limit: compute_rewards -= force_c * (n_step - update) / n_step + force_c print("force_y limit", compute_rewards) if self.force_limit < self.force.z or self.force.z < -self.force_limit: compute_rewards -= force_c * (n_step - update) / n_step + force_c print("force_z limit", compute_rewards) if self.torque_limit < self.torque.x or self.torque.x < -self.torque_limit: compute_rewards -= force_c * (n_step - update) / n_step + force_c print("torque_x limit", compute_rewards) if self.torque_limit < self.torque.y or self.torque.y < -self.torque_limit: compute_rewards -= force_c * (n_step - update) / n_step + force_c print("torque_y limit", compute_rewards) if self.torque_limit < self.torque.z or self.torque.z < -self.torque_limit: compute_rewards -= force_c * (n_step - update) / n_step + force_c print("torque_z limit", compute_rewards) #7 release the knob(-) if self.min_static_taxel0 < self.min_static_limit and self.min_static_taxel1 < self.min_static_limit: compute_rewards -= taxel_c * (n_step - update) / n_step + taxel_c print("min_static limit", compute_rewards) if self.max_static_taxel0 > self.max_static_limit and self.max_static_taxel1 > self.max_static_limit: compute_rewards -= taxel_c * (n_step - update) / n_step + taxel_c print("max_static limit", compute_rewards) #8 joint(+, -) action = action - self.arr_init_pos2 if action[5] < -0.005: compute_rewards -= (-0.005 - action[5]) * act_5_n print("action5 limit", compute_rewards) elif 1 < action[5]: compute_rewards -= (action[5] - 1) * act_5_n print("action5 limit", compute_rewards) if action[4] < -0.005: compute_rewards -= (-0.005 - action[4]) * act_4_n print("action4 limit", compute_rewards) elif 0.03 < action[4]: compute_rewards -= (action[4] - 0.03) * act_4_n print("action4 limit", compute_rewards) if action[3] < -0.023: compute_rewards -= (-0.023 - action[3]) * act_3_n print("action3 limit", compute_rewards) elif 0.005 < action[3]: compute_rewards -= (action[3] - 0.005) * act_3_n print("action3 limit", compute_rewards) if action[2] < -0.005: compute_rewards -= (-0.005 - action[2]) * act_2_n print("action2 limit", compute_rewards) elif 0.14 < action[2]: compute_rewards -= (action[2] - 0.14) * act_2_n print("action2 limit", compute_rewards) if action[1] < -0.11: compute_rewards -= (-0.11 - action[1]) * act_1_n print("action1 limit", compute_rewards) elif 0.005 < action[1]: compute_rewards -= (action[1] - 0.005) * act_1_n print("action1 limit", compute_rewards) if action[0] < -0.015: compute_rewards -= (-0.015 - action[0]) * act_0_n print("action0 limit", compute_rewards) if 0.005 < action[0]: compute_rewards -= (action[0] - 0.005) * act_0_n print("action0 limit", compute_rewards) if self.max_knob_rotation < self.imu_link_rpy.x: self.max_knob_rotation = self.imu_link_rpy.x if self.max_door_rotation < 1.5708061 - self.imu_link_rpy.z: self.max_door_rotation = 1.5708061 - self.imu_link_rpy.z # print("imu_link_rpy", self.imu_link_rpy) # print("door_frame", self.door_frame.position.x + 0.0659, self.door_frame.position.y - 0.5649, self.door_frame.position.z - 0.0999) return compute_rewards def check_done(self, update): if update > 1: if abs(self.door_frame.position.x + 0.0659 ) > self.tolerances or abs(self.door_frame.position.y - 0.5649) > self.tolerances or abs( self.door_frame.position.z - 0.0999) > self.tolerances: print( "########## door frame position over the limit ##########", update) return True elif self.min_static_taxel0 < self.min_static_limit and self.min_static_taxel1 < self.min_static_limit: print("########## static_taxles over the min_limit ##########", update) return True elif self.max_static_taxel0 > self.max_static_limit and self.max_static_taxel1 > self.max_static_limit: print("########## static_taxles over the max_limit ##########", update) return True else: return False else: return False
class MrmEnv(gym.Env): def __init__(self): # specify the dimension of observation space and action space self.observation_space = 44 self.action_space = 4 # joint efforts # We assume that a ROS node has already been created before initialising the environment # gets training parameters from param server self.desired_target = Point() self.desired_target.x = rospy.get_param("/desired_target_point/x") self.desired_target.y = rospy.get_param("/desired_target_point/y") self.desired_target.z = rospy.get_param("/desired_target_point/z") self.running_step = rospy.get_param("/running_step") self.weight_r1 = rospy.get_param("/weight_r1") self.weight_r2 = rospy.get_param("/weight_r2") self.weight_r3 = rospy.get_param("/weight_r3") self.inr_r3 = rospy.get_param("/inr_r3") # stablishes connection with simulator self.gazebo = GazeboConnection() self.mrm_state_object = MrmState( weight_r1=self.weight_r1, weight_r2=self.weight_r2, weight_r3=self.weight_r3) self.mrm_state_object.set_desired_target_point(self.desired_target.x, self.desired_target.y, self.desired_target.z) self.mrm_joint_pubisher_object = JointPub() self.reset_controller_pub = rospy.Publisher('/reset_controller', Bool, queue_size=1) self.pause_sim_pub = rospy.Publisher('/pause_sim', Bool, queue_size=1) self._seed() # list for recording the minimal value from the laser self.mini_laser = [] # A function to initialize the random generator def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] # Resets the state of the environment and returns an initial observation. def _reset(self): # 0st: We pause the controller self.pause_sim_pub.publish(Bool(True)) rospy.logdebug("Pausing SIM...") self.gazebo.pauseSim() # 1st: resets the simulation to initial values rospy.logdebug("Reset SIM...") self.gazebo.resetWorld() # 2nd: We Set the gravity to 0.0 so that we dont fall when reseting joints # It also UNPAUSES the simulation rospy.logdebug("Remove Gravity...") self.gazebo.change_gravity(0.0, 0.0, 0.0) # 3rd: resets the robot to initial conditions rospy.logdebug("set_init_condition...") self.mrm_joint_pubisher_object.set_init_condition() # reset the controllers self.reset_controller_pub.publish(Bool(True)) # 6th: We restore the gravity to original and unpause the sim rospy.logdebug("Restore Gravity...") self.gazebo.change_gravity(0.0, 0.0, -9.81) # Get the state of the Robot defined by the XY distances from the # desired point, jointState of the four joints and rospy.logdebug("check_all_systems_ready...") self.mrm_state_object.check_all_systems_ready() # this step is very important!!!! the system will be easily failed if it is removed # We pause the Simulator rospy.logdebug("Pausing SIM...") self.gazebo.pauseSim() self.mrm_state_object.history = [] # clear the history observation = self.mrm_state_object.get_observations() observation.extend([0.0,0.0,0.0,0.0]) # assume the previous action is 0 for the first step self.reset_controller_pub.publish(Bool(False)) return observation def _step(self, action): # We move it to that pos self.gazebo.unpauseSim() self.pause_sim_pub.publish(Bool(False)) self.mrm_joint_pubisher_object.move_joints(action) # Then we send the command to the robot and let it go for running_step seconds start = time.time() rospy.sleep(self.running_step) end = time.time() self.pause_sim_pub.publish(Bool(True)) self.gazebo.pauseSim() # finally we get an evaluation based on what happened in the sim reward, reach_target, done = self.mrm_state_object.process_data() # Generate State based on observations observation = self.mrm_state_object.get_observations() observation.extend(action) # add previous action to the state if end - start > 1.0: stuck = True else: stuck = False return observation, reward, done, reach_target, stuck, {} def set_target_position(self, target_pos): self.mrm_state_object.set_desired_target_point(target_pos[0], target_pos[1], target_pos[2])
def __init__(self): rospy.logdebug("Starting URSimReaching Class object...") # Init GAZEBO Objects self.set_obj_state = rospy.ServiceProxy('/gazebo/set_model_state', SetModelState) self.get_world_state = rospy.ServiceProxy( '/gazebo/get_world_properties', GetWorldProperties) # Subscribe joint state and target pose rospy.Subscriber("/joint_states", JointState, self.joints_state_callback) rospy.Subscriber("/target_blocks_pose", Point, self.target_point_callback) rospy.Subscriber("/gazebo/link_states", LinkStates, self.link_state_callback) # Gets training parameters from param server self.desired_pose = Pose() self.running_step = rospy.get_param("/running_step") self.max_height = rospy.get_param("/max_height") self.min_height = rospy.get_param("/min_height") self.observations = rospy.get_param("/observations") # Joint limitation shp_max = rospy.get_param("/joint_limits_array/shp_max") shp_min = rospy.get_param("/joint_limits_array/shp_min") shl_max = rospy.get_param("/joint_limits_array/shl_max") shl_min = rospy.get_param("/joint_limits_array/shl_min") elb_max = rospy.get_param("/joint_limits_array/elb_max") elb_min = rospy.get_param("/joint_limits_array/elb_min") wr1_max = rospy.get_param("/joint_limits_array/wr1_max") wr1_min = rospy.get_param("/joint_limits_array/wr1_min") wr2_max = rospy.get_param("/joint_limits_array/wr2_max") wr2_min = rospy.get_param("/joint_limits_array/wr2_min") wr3_max = rospy.get_param("/joint_limits_array/wr3_max") wr3_min = rospy.get_param("/joint_limits_array/wr3_min") self.joint_limits = { "shp_max": shp_max, "shp_min": shp_min, "shl_max": shl_max, "shl_min": shl_min, "elb_max": elb_max, "elb_min": elb_min, "wr1_max": wr1_max, "wr1_min": wr1_min, "wr2_max": wr2_max, "wr2_min": wr2_min, "wr3_max": wr3_max, "wr3_min": wr3_min } shp_init_value = rospy.get_param("/init_joint_pose/shp") shl_init_value = rospy.get_param("/init_joint_pose/shl") elb_init_value = rospy.get_param("/init_joint_pose/elb") wr1_init_value = rospy.get_param("/init_joint_pose/wr1") wr2_init_value = rospy.get_param("/init_joint_pose/wr2") wr3_init_value = rospy.get_param("/init_joint_pose/wr3") self.init_joint_pose = [ shp_init_value, shl_init_value, elb_init_value, wr1_init_value, wr2_init_value, wr3_init_value ] # Fill in the Done Episode Criteria list self.episode_done_criteria = rospy.get_param("/episode_done_criteria") # stablishes connection with simulator self._gz_conn = GazeboConnection() self._ctrl_conn = ControllersConnection(namespace="") # Controller type for ros_control self._ctrl_type = rospy.get_param("/control_type") self.pre_ctrl_type = self._ctrl_type # We init the observations self.base_orientation = Quaternion() self.target_point = Point() self.link_state = LinkStates() self.joints_state = JointState() self.end_effector = Point() self.distance = 0. # Arm/Control parameters self._ik_params = setups['UR5_6dof']['ik_params'] # ROS msg type self._joint_pubisher = JointPub() self._joint_traj_pubisher = JointTrajPub() # Gym interface and action self.action_space = spaces.Discrete(6) self.observation_space = 15 #np.arange(self.get_observations().shape[0]) self.reward_range = (-np.inf, np.inf) self._seed() # Change the controller type set_joint_vel_server = rospy.Service('/set_velocity_controller', SetBool, self._set_vel_ctrl) set_joint_traj_vel_server = rospy.Service( '/set_trajectory_velocity_controller', SetBool, self._set_traj_vel_ctrl) self.vel_traj_controller = [ 'joint_state_controller', 'gripper_controller', 'vel_traj_controller' ] self.vel_controller = [ "joint_state_controller", "gripper_controller", "ur_shoulder_pan_vel_controller", "ur_shoulder_lift_vel_controller", "ur_elbow_vel_controller", "ur_wrist_1_vel_controller", "ur_wrist_2_vel_controller", "ur_wrist_3_vel_controller" ] # Helpful False self.stop_flag = False stop_trainning_server = rospy.Service('/stop_training', SetBool, self._stop_trainnig) start_trainning_server = rospy.Service('/start_training', SetBool, self._start_trainnig)
class MrmEnv(gym.Env): def __init__(self): # specify the dimension of observation space and action space self.observation_space = 22 self.action_space = 4 # We assume that a ROS node has already been created before initialising the environment self.running_step = rospy.get_param("/running_step") self.done_reward = rospy.get_param("/collision_reward") self.weight_r1 = rospy.get_param("/weight_r1") self.weight_r2 = rospy.get_param("/weight_r2") self.weight_r3 = rospy.get_param("/weight_r3") # stablishes connection with simulator self.gazebo = GazeboConnection() self.controllers_object = ControllersConnection(namespace="") self.mrm_state_object = MrmState(weight_r1=self.weight_r1, weight_r2=self.weight_r2, weight_r3=self.weight_r3) self.mrm_joint_pubisher_object = JointPub() self._seed() self.reset_controller_pub = rospy.Publisher('/reset_controller', Bool, queue_size=1) self.pause_sim_pub = rospy.Publisher('/pause_sim', Bool, queue_size=1) # A function to initialize the random generator def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] # Resets the state of the environment and returns an initial observation. def _reset(self): # 0st: We pause the controller self.pause_sim_pub.publish(Bool(True)) rospy.logdebug("Pausing SIM...") self.gazebo.pauseSim() # 1st: resets the simulation to initial values rospy.logdebug("Reset SIM...") self.gazebo.resetWorld() # 2nd: We Set the gravity to 0.0 so that we dont fall when reseting joints # It also UNPAUSES the simulation rospy.logdebug("Remove Gravity...") self.gazebo.change_gravity(0.0, 0.0, 0.0) # close the controller # self.controllers_object.shut_down_controller() # 3rd: resets the robot to initial conditions rospy.logdebug("set_init_condition...") self.mrm_joint_pubisher_object.set_init_condition() # 4th: probably add the function to change the target position is the target is reached # reset the controllers self.reset_controller_pub.publish(Bool(True)) # self.controllers_object.reset_mrm_joint_controllers() # 6th: We restore the gravity to original rospy.logdebug("Restore Gravity...") self.gazebo.change_gravity(0.0, 0.0, -9.81) rospy.logdebug("check_all_systems_ready...") self.mrm_state_object.check_all_systems_ready() # 7th: pauses simulation rospy.logdebug("Pause SIM...") self.gazebo.pauseSim() rospy.logdebug("get_observations...") observation = self.mrm_state_object.get_observations() self.reset_controller_pub.publish(Bool(False)) return observation def _step(self, joint_targets): # We move it to that pos self.gazebo.unpauseSim() self.pause_sim_pub.publish(Bool(False)) self.mrm_joint_pubisher_object.move_joints(joint_targets) # Then we send the command to the robot and let it go for running_step seconds start = time.time() rospy.sleep(self.running_step) end = time.time() self.pause_sim_pub.publish(Bool(True)) self.gazebo.pauseSim() # finally we get an evaluation based on what happened in the sim reward, reach_target, done = self.mrm_state_object.process_data() # Generate State based on observations observation = self.mrm_state_object.get_observations() return observation, reward, done, reach_target, {} def set_target_position(self, target_pos): self.mrm_state_object.set_desired_target_point(target_pos[0], target_pos[1], target_pos[2]) def change_joints_init(self, diameter): joints_inits = { 80: [0.28, -0.78, -0.7, 0], 90: [0.3, -0.8, -0.75, 0], 100: [0.4, -1.05, -0.9, 0], 110: [0.51, -1.27, -0.95, 0], 120: [0.53, -1.29, -0.98, 0] } self.mrm_joint_pubisher_object.init_pos = joints_inits[diameter] def get_joints_init(self): return self.mrm_joint_pubisher_object.init_pos
#!/usr/bin/env python import rospy from joint_publisher import JointPub if __name__ == "__main__": rospy.init_node('jump_control_node', log_level=rospy.INFO) joint_publisher = JointPub() rate_value = 8.0 joint_publisher.move_joints_jump(rate_value)
def __init__(self): # We assume that a ROS node has already been created before initialising the environment # Gets training parameters from param server self.desired_pose = Pose() self.desired_pose.position.x = rospy.get_param("/desired_pose/x") self.desired_pose.position.y = rospy.get_param("/desired_pose/y") self.desired_pose.position.z = rospy.get_param("/desired_pose/z") self.running_step = rospy.get_param("/running_step") self.max_incl = rospy.get_param("/max_incl") self.max_height = rospy.get_param("/max_height") self.min_height = rospy.get_param("/min_height") self.joint_increment_value = rospy.get_param("/joint_increment_value") self.done_reward = rospy.get_param("/done_reward") self.alive_reward = rospy.get_param("/alive_reward") self.desired_force = rospy.get_param("/desired_force") self.desired_yaw = rospy.get_param("/desired_yaw") self.list_of_observations = rospy.get_param("/list_of_observations") haa_max = rospy.get_param("/joint_limits_array/haa_max") haa_min = rospy.get_param("/joint_limits_array/haa_min") hfe_max = rospy.get_param("/joint_limits_array/hfe_max") hfe_min = rospy.get_param("/joint_limits_array/hfe_min") kfe_max = rospy.get_param("/joint_limits_array/kfe_max") kfe_min = rospy.get_param("/joint_limits_array/kfe_min") self.joint_limits = { "haa_max": haa_max, "haa_min": haa_min, "hfe_max": hfe_max, "hfe_min": hfe_min, "kfe_max": kfe_max, "kfe_min": kfe_min } self.discrete_division = rospy.get_param("/discrete_division") self.maximum_base_linear_acceleration = rospy.get_param( "/maximum_base_linear_acceleration") self.maximum_base_angular_velocity = rospy.get_param( "/maximum_base_angular_velocity") self.maximum_joint_effort = rospy.get_param("/maximum_joint_effort") self.weight_r1 = rospy.get_param("/weight_r1") self.weight_r2 = rospy.get_param("/weight_r2") self.weight_r3 = rospy.get_param("/weight_r3") self.weight_r4 = rospy.get_param("/weight_r4") self.weight_r5 = rospy.get_param("/weight_r5") haa_init_value = rospy.get_param("/init_joint_pose/haa") hfe_init_value = rospy.get_param("/init_joint_pose/hfe") kfe_init_value = rospy.get_param("/init_joint_pose/kfe") self.init_joint_pose = [haa_init_value, hfe_init_value, kfe_init_value] # Fill in the Done Episode Criteria list self.episode_done_criteria = rospy.get_param("/episode_done_criteria") # Jump Increment Value in Radians self.jump_increment = rospy.get_param("/jump_increment") # stablishes connection with simulator self.gazebo = GazeboConnection() self.controllers_object = ControllersConnection(namespace="ur") self.ur_state_object = URState( max_height=self.max_height, min_height=self.min_height, abs_max_roll=self.max_incl, abs_max_pitch=self.max_incl, joint_increment_value=self.joint_increment_value, list_of_observations=self.list_of_observations, joint_limits=self.joint_limits, episode_done_criteria=self.episode_done_criteria, done_reward=self.done_reward, alive_reward=self.alive_reward, desired_force=self.desired_force, desired_yaw=self.desired_yaw, weight_r1=self.weight_r1, weight_r2=self.weight_r2, weight_r3=self.weight_r3, weight_r4=self.weight_r4, weight_r5=self.weight_r5, discrete_division=self.discrete_division, maximum_base_linear_acceleration=self. maximum_base_linear_acceleration, maximum_base_angular_velocity=self.maximum_base_angular_velocity, maximum_joint_effort=self.maximum_joint_effort, jump_increment=self.jump_increment) self.ur_state_object.set_desired_world_point( self.desired_pose.position.x, self.desired_pose.position.y, self.desired_pose.position.z) self.ur_joint_pubisher_object = JointPub() """ For this version, we consider 5 actions 1-2) Increment/Decrement haa_joint 3-4) Increment/Decrement hfe_joint 5) Dont Move 6) Perform One Jump """ self.action_space = spaces.Discrete(6) self.reward_range = (-np.inf, np.inf) self._seed()
class URSimEnv(robot_gazebo_env_goal.RobotGazeboEnv): def __init__(self): # We assume that a ROS node has already been created before initialising the environment # Gets training parameters from param server self.desired_pose = Pose() self.desired_pose.position.x = rospy.get_param("/desired_pose/x") self.desired_pose.position.y = rospy.get_param("/desired_pose/y") self.desired_pose.position.z = rospy.get_param("/desired_pose/z") self.running_step = rospy.get_param("/running_step") self.max_incl = rospy.get_param("/max_incl") self.max_height = rospy.get_param("/max_height") self.min_height = rospy.get_param("/min_height") self.joint_increment_value = rospy.get_param("/joint_increment_value") self.done_reward = rospy.get_param("/done_reward") self.alive_reward = rospy.get_param("/alive_reward") self.desired_force = rospy.get_param("/desired_force") self.desired_yaw = rospy.get_param("/desired_yaw") self.list_of_observations = rospy.get_param("/list_of_observations") haa_max = rospy.get_param("/joint_limits_array/haa_max") haa_min = rospy.get_param("/joint_limits_array/haa_min") hfe_max = rospy.get_param("/joint_limits_array/hfe_max") hfe_min = rospy.get_param("/joint_limits_array/hfe_min") kfe_max = rospy.get_param("/joint_limits_array/kfe_max") kfe_min = rospy.get_param("/joint_limits_array/kfe_min") self.joint_limits = { "haa_max": haa_max, "haa_min": haa_min, "hfe_max": hfe_max, "hfe_min": hfe_min, "kfe_max": kfe_max, "kfe_min": kfe_min } self.discrete_division = rospy.get_param("/discrete_division") self.maximum_base_linear_acceleration = rospy.get_param( "/maximum_base_linear_acceleration") self.maximum_base_angular_velocity = rospy.get_param( "/maximum_base_angular_velocity") self.maximum_joint_effort = rospy.get_param("/maximum_joint_effort") self.weight_r1 = rospy.get_param("/weight_r1") self.weight_r2 = rospy.get_param("/weight_r2") self.weight_r3 = rospy.get_param("/weight_r3") self.weight_r4 = rospy.get_param("/weight_r4") self.weight_r5 = rospy.get_param("/weight_r5") haa_init_value = rospy.get_param("/init_joint_pose/haa") hfe_init_value = rospy.get_param("/init_joint_pose/hfe") kfe_init_value = rospy.get_param("/init_joint_pose/kfe") self.init_joint_pose = [haa_init_value, hfe_init_value, kfe_init_value] # Fill in the Done Episode Criteria list self.episode_done_criteria = rospy.get_param("/episode_done_criteria") # Jump Increment Value in Radians self.jump_increment = rospy.get_param("/jump_increment") # stablishes connection with simulator self.gazebo = GazeboConnection() self.controllers_object = ControllersConnection(namespace="ur") self.ur_state_object = URState( max_height=self.max_height, min_height=self.min_height, abs_max_roll=self.max_incl, abs_max_pitch=self.max_incl, joint_increment_value=self.joint_increment_value, list_of_observations=self.list_of_observations, joint_limits=self.joint_limits, episode_done_criteria=self.episode_done_criteria, done_reward=self.done_reward, alive_reward=self.alive_reward, desired_force=self.desired_force, desired_yaw=self.desired_yaw, weight_r1=self.weight_r1, weight_r2=self.weight_r2, weight_r3=self.weight_r3, weight_r4=self.weight_r4, weight_r5=self.weight_r5, discrete_division=self.discrete_division, maximum_base_linear_acceleration=self. maximum_base_linear_acceleration, maximum_base_angular_velocity=self.maximum_base_angular_velocity, maximum_joint_effort=self.maximum_joint_effort, jump_increment=self.jump_increment) self.ur_state_object.set_desired_world_point( self.desired_pose.position.x, self.desired_pose.position.y, self.desired_pose.position.z) self.ur_joint_pubisher_object = JointPub() """ For this version, we consider 5 actions 1-2) Increment/Decrement haa_joint 3-4) Increment/Decrement hfe_joint 5) Dont Move 6) Perform One Jump """ self.action_space = spaces.Discrete(6) self.reward_range = (-np.inf, np.inf) self._seed() # A function to initialize the random generator def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] # Resets the state of the environment and returns an initial observation. def _reset(self): # 0st: We pause the Simulator rospy.logdebug("Pausing SIM...") self.gazebo.pauseSim() # 1st: resets the simulation to initial values rospy.logdebug("Reset SIM...") self.gazebo.resetSim() # 2nd: We Set the gravity to 0.0 so that we dont fall when reseting joints # It also UNPAUSES the simulation rospy.logdebug("Remove Gravity...") self.gazebo.change_gravity(0.0, 0.0, 0.0) # EXTRA: Reset JoinStateControlers because sim reset doesnt reset TFs, generating time problems rospy.logdebug("reset_ur_joint_controllers...") self.controllers_object.reset_ur_joint_controllers() # 3rd: resets the robot to initial conditions rospy.logdebug("set_init_pose init variable...>>>" + str(self.init_joint_pose)) # We save that position as the current joint desired position init_pos = self.ur_state_object.init_joints_pose(self.init_joint_pose) # 4th: We Set the init pose to the jump topic so that the jump control can update rospy.logdebug("Publish init_pose for Jump Control...>>>" + str(init_pos)) # We check the jump publisher has connection self.ur_joint_pubisher_object.check_publishers_connection() # We move the joints to position, no jump do_jump = False self.ur_joint_pubisher_object.move_joints_jump(init_pos, do_jump) # 5th: Check all subscribers work. # Get the state of the Robot defined by its RPY orientation, distance from # desired point, contact force and JointState of the three joints rospy.logdebug("check_all_systems_ready...") self.ur_state_object.check_all_systems_ready() # 6th: We restore the gravity to original rospy.logdebug("Restore Gravity...") self.gazebo.change_gravity(0.0, 0.0, -9.81) # 7th: pauses simulation rospy.logdebug("Pause SIM...") self.gazebo.pauseSim() # 8th: Get the State Discrete Stringuified version of the observations rospy.logdebug("get_observations...") observation = self.ur_state_object.get_observations() state = self.get_state(observation) return state def _step(self, action): # Given the action selected by the learning algorithm, # we perform the corresponding movement of the robot # 1st, decide which action corresponds to which joint is incremented next_action_position, do_jump = self.ur_state_object.get_action_to_position( action) # We move it to that pos self.gazebo.unpauseSim() self.ur_joint_pubisher_object.move_joints_jump(next_action_position, do_jump) # Then we send the command to the robot and let it go # for running_step seconds time.sleep(self.running_step) self.gazebo.pauseSim() # We now process the latest data saved in the class state to calculate # the state and the rewards. This way we guarantee that they work # with the same exact data. # Generate State based on observations observation = self.ur_state_object.get_observations() # finally we get an evaluation based on what happened in the sim reward, done = self.ur_state_object.process_data() # Get the State Discrete Stringuified version of the observations state = self.get_state(observation) return state, reward, done, {} def get_state(self, observation): """ We retrieve the Stringuified-Discrete version of the given observation :return: state """ return self.ur_state_object.get_state_as_string(observation)
def __init__(self): rospy.logdebug("Starting URSimDoorOpening Class object...") # Init GAZEBO Objects self.set_obj_state = rospy.ServiceProxy('/gazebo/set_model_state', SetModelState) self.get_world_state = rospy.ServiceProxy('/gazebo/get_world_properties', GetWorldProperties) # Subscribe joint state and target pose rospy.Subscriber("/ft_sensor_topic", WrenchStamped, self.wrench_stamped_callback) rospy.Subscriber("/joint_states", JointState, self.joints_state_callback) rospy.Subscriber("/gazebo/link_states", LinkStates, self.link_state_callback) # Gets training parameters from param server self.desired_pose = Pose() self.running_step = rospy.get_param("/running_step") self.max_height = rospy.get_param("/max_height") self.min_height = rospy.get_param("/min_height") self.observations = rospy.get_param("/observations") # Joint limitation shp_max = rospy.get_param("/joint_limits_array/shp_max") shp_min = rospy.get_param("/joint_limits_array/shp_min") shl_max = rospy.get_param("/joint_limits_array/shl_max") shl_min = rospy.get_param("/joint_limits_array/shl_min") elb_max = rospy.get_param("/joint_limits_array/elb_max") elb_min = rospy.get_param("/joint_limits_array/elb_min") wr1_max = rospy.get_param("/joint_limits_array/wr1_max") wr1_min = rospy.get_param("/joint_limits_array/wr1_min") wr2_max = rospy.get_param("/joint_limits_array/wr2_max") wr2_min = rospy.get_param("/joint_limits_array/wr2_min") wr3_max = rospy.get_param("/joint_limits_array/wr3_max") wr3_min = rospy.get_param("/joint_limits_array/wr3_min") self.joint_limits = {"shp_max": shp_max, "shp_min": shp_min, "shl_max": shl_max, "shl_min": shl_min, "elb_max": elb_max, "elb_min": elb_min, "wr1_max": wr1_max, "wr1_min": wr1_min, "wr2_max": wr2_max, "wr2_min": wr2_min, "wr3_max": wr3_max, "wr3_min": wr3_min } shp_init_value0 = rospy.get_param("/init_joint_pose0/shp") shl_init_value0 = rospy.get_param("/init_joint_pose0/shl") elb_init_value0 = rospy.get_param("/init_joint_pose0/elb") wr1_init_value0 = rospy.get_param("/init_joint_pose0/wr1") wr2_init_value0 = rospy.get_param("/init_joint_pose0/wr2") wr3_init_value0 = rospy.get_param("/init_joint_pose0/wr3") self.init_joint_pose0 = [shp_init_value0, shl_init_value0, elb_init_value0, wr1_init_value0, wr2_init_value0, wr3_init_value0] shp_init_value1 = rospy.get_param("/init_joint_pose1/shp") shl_init_value1 = rospy.get_param("/init_joint_pose1/shl") elb_init_value1 = rospy.get_param("/init_joint_pose1/elb") wr1_init_value1 = rospy.get_param("/init_joint_pose1/wr1") wr2_init_value1 = rospy.get_param("/init_joint_pose1/wr2") wr3_init_value1 = rospy.get_param("/init_joint_pose1/wr3") self.init_joint_pose1 = [shp_init_value1, shl_init_value1, elb_init_value1, wr1_init_value1, wr2_init_value1, wr3_init_value1] # print("[init_joint_pose1]: ", [shp_init_value1, shl_init_value1, elb_init_value1, wr1_init_value1, wr2_init_value1, wr3_init_value1]) shp_init_value2 = rospy.get_param("/init_joint_pose2/shp") shl_init_value2 = rospy.get_param("/init_joint_pose2/shl") elb_init_value2 = rospy.get_param("/init_joint_pose2/elb") wr1_init_value2 = rospy.get_param("/init_joint_pose2/wr1") wr2_init_value2 = rospy.get_param("/init_joint_pose2/wr2") wr3_init_value2 = rospy.get_param("/init_joint_pose2/wr3") self.init_joint_pose2 = [shp_init_value2, shl_init_value2, elb_init_value2, wr1_init_value2, wr2_init_value2, wr3_init_value2] # print("[init_joint_pose2]: ", [shp_init_value2, shl_init_value2, elb_init_value2, wr1_init_value2, wr2_init_value2, wr3_init_value2]) shp_after_rotate = rospy.get_param("/eelink_pose_after_rotate/shp") shl_after_rotate = rospy.get_param("/eelink_pose_after_rotate/shl") elb_after_rotate = rospy.get_param("/eelink_pose_after_rotate/elb") wr1_after_rotate = rospy.get_param("/eelink_pose_after_rotate/wr1") wr2_after_rotate = rospy.get_param("/eelink_pose_after_rotate/wr2") wr3_after_rotate = rospy.get_param("/eelink_pose_after_rotate/wr3") self.after_rotate = [shp_after_rotate, shl_after_rotate, elb_after_rotate, wr1_after_rotate, wr2_after_rotate, wr3_after_rotate] shp_after_pull = rospy.get_param("/eelink_pose_after_pull/shp") shl_after_pull = rospy.get_param("/eelink_pose_after_pull/shl") elb_after_pull = rospy.get_param("/eelink_pose_after_pull/elb") wr1_after_pull = rospy.get_param("/eelink_pose_after_pull/wr1") wr2_after_pull = rospy.get_param("/eelink_pose_after_pull/wr2") wr3_after_pull = rospy.get_param("/eelink_pose_after_pull/wr3") self.after_pull = [shp_after_pull, shl_after_pull, elb_after_pull, wr1_after_pull, wr2_after_pull, wr3_after_pull] r_drv_value1 = rospy.get_param("/init_grp_pose1/r_drive") l_drv_value1 = rospy.get_param("/init_grp_pose1/l_drive") r_flw_value1 = rospy.get_param("/init_grp_pose1/r_follower") l_flw_value1 = rospy.get_param("/init_grp_pose1/l_follower") r_spr_value1 = rospy.get_param("/init_grp_pose1/r_spring") l_spr_value1 = rospy.get_param("/init_grp_pose1/l_spring") r_drv_value2 = rospy.get_param("/init_grp_pose2/r_drive") l_drv_value2 = rospy.get_param("/init_grp_pose2/l_drive") r_flw_value2 = rospy.get_param("/init_grp_pose2/r_follower") l_flw_value2 = rospy.get_param("/init_grp_pose2/l_follower") r_spr_value2 = rospy.get_param("/init_grp_pose2/r_spring") l_spr_value2 = rospy.get_param("/init_grp_pose2/l_spring") self.init_grp_pose1 = [r_drv_value1, l_drv_value1, r_flw_value1, l_flw_value1, r_spr_value1, l_spr_value1] self.init_grp_pose2 = [r_drv_value2, l_drv_value2, r_flw_value2, l_flw_value2, r_spr_value2, l_spr_value2] # Fill in the Done Episode Criteria list self.episode_done_criteria = rospy.get_param("/episode_done_criteria") # stablishes connection with simulator self._gz_conn = GazeboConnection() self._ctrl_conn = ControllersConnection(namespace="") # Controller type for ros_control self._ctrl_type = rospy.get_param("/control_type") self.pre_ctrl_type = self._ctrl_type # Get the force and troque limit self.force_limit = rospy.get_param("/force_limit") self.torque_limit = rospy.get_param("/torque_limit") # We init the observations self.base_orientation = Quaternion() self.imu_link = Quaternion() self.door = Quaternion() self.quat = Quaternion() self.imu_link_rpy = Vector3() self.door_rpy = Vector3() self.link_state = LinkStates() self.wrench_stamped = WrenchStamped() self.joints_state = JointState() self.end_effector = Point() self.previous_action =[] self.counter = 0 self.max_rewards = 1 # Arm/Control parameters self._ik_params = setups['UR5_6dof']['ik_params'] # ROS msg type self._joint_pubisher = JointPub() self._joint_traj_pubisher = JointTrajPub() # Gym interface and action self.action_space = spaces.Discrete(6) self.observation_space = 21 #np.arange(self.get_observations().shape[0]) ## self.observation_space = 15 #np.arange(self.get_observations().shape[0]) self.reward_range = (-np.inf, np.inf) self._seed() # Change the controller type set_joint_pos_server = rospy.Service('/set_position_controller', SetBool, self._set_pos_ctrl) set_joint_traj_pos_server = rospy.Service('/set_trajectory_position_controller', SetBool, self._set_traj_pos_ctrl) set_joint_vel_server = rospy.Service('/set_velocity_controller', SetBool, self._set_vel_ctrl) set_joint_traj_vel_server = rospy.Service('/set_trajectory_velocity_controller', SetBool, self._set_traj_vel_ctrl) # set_gripper_server = rospy.Service('/set_gripper_controller', SetBool, self._set_grp_ctrl) self.pos_traj_controller = ['joint_state_controller', 'gripper_controller', 'pos_traj_controller'] self.pos_controller = ["joint_state_controller", "gripper_controller", "ur_shoulder_pan_pos_controller", "ur_shoulder_lift_pos_controller", "ur_elbow_pos_controller", "ur_wrist_1_pos_controller", "ur_wrist_2_pos_controller", "ur_wrist_3_pos_controller"] self.vel_traj_controller = ['joint_state_controller', 'gripper_controller', 'vel_traj_controller'] self.vel_controller = ["joint_state_controller", "gripper_controller", "ur_shoulder_pan_vel_controller", "ur_shoulder_lift_vel_controller", "ur_elbow_vel_controller", "ur_wrist_1_vel_controller", "ur_wrist_2_vel_controller", "ur_wrist_3_vel_controller"] # Helpful False self.stop_flag = False stop_trainning_server = rospy.Service('/stop_training', SetBool, self._stop_trainnig) start_trainning_server = rospy.Service('/start_training', SetBool, self._start_trainnig)
class BipedEnv(gym.Env): def __init__(self): # We assume that a ROS node has already been created # before initialising the environment # gets training parameters from param server self.desired_pose = Pose() self.desired_pose.position.x = rospy.get_param("/desired_pose/x") self.desired_pose.position.y = rospy.get_param("/desired_pose/y") self.desired_pose.position.z = rospy.get_param("/desired_pose/z") self.running_step = rospy.get_param("/running_step") self.max_incl = rospy.get_param("/max_incl") self.max_height = rospy.get_param("/max_height") self.min_height = rospy.get_param("/min_height") self.joint_increment_value = rospy.get_param("/joint_increment_value") self.done_reward = rospy.get_param("/done_reward") self.alive_reward = rospy.get_param("/alive_reward") self.desired_force = rospy.get_param("/desired_force") self.desired_yaw = rospy.get_param("/desired_yaw") self.weight_r1 = rospy.get_param("/weight_r1") self.weight_r2 = rospy.get_param("/weight_r2") self.weight_r3 = rospy.get_param("/weight_r3") self.weight_r4 = rospy.get_param("/weight_r4") self.weight_r5 = rospy.get_param("/weight_r5") # stablishes connection with simulator self.gazebo = GazeboConnection() self.controllers_object = ControllersConnection(namespace="biped") self.biped_state_object = BipedState( max_height=self.max_height, min_height=self.min_height, abs_max_roll=self.max_incl, abs_max_pitch=self.max_incl, joint_increment_value=self.joint_increment_value, done_reward=self.done_reward, alive_reward=self.alive_reward, desired_force=self.desired_force, desired_yaw=self.desired_yaw, weight_r1=self.weight_r1, weight_r2=self.weight_r2, weight_r3=self.weight_r3, weight_r4=self.weight_r4, weight_r5=self.weight_r5 ) self.biped_state_object.set_desired_world_point(self.desired_pose.position.x, self.desired_pose.position.y, self.desired_pose.position.z) self.biped_joint_pubisher_object = JointPub() """ For this version, we consider 12 actions 1-2) Increment/Decrement L_haa_joint 3-4) Increment/Decrement L_hfe_joint 5-6) Increment/Decrement L_kfe_joint 7-8) Increment/Decrement R_haa_joint 9-10) Increment/Decrement R_hfe_joint 11-12) Increment/Decrement R_kfe_joint """ self.action_space = spaces.Discrete(12) self.reward_range = (-np.inf, np.inf) self._seed() # A function to initialize the random generator def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] # Resets the state of the environment and returns an initial observation. def _reset(self): # 0st: We pause the Simulator rospy.logdebug("Pausing SIM...") self.gazebo.pauseSim() # 1st: resets the simulation to initial values rospy.logdebug("Reset SIM...") self.gazebo.resetSim() # 2nd: We Set the gravity to 0.0 so that we dont fall when reseting joints # It also UNPAUSES the simulation rospy.logdebug("Remove Gravity...") self.gazebo.change_gravity(0.0, 0.0, 0.0) # EXTRA: Reset JoinStateControlers because sim reset doesnt reset TFs, generating time problems rospy.logdebug("reset_biped_joint_controllers...") self.controllers_object.reset_biped_joint_controllers() # 3rd: resets the robot to initial conditions rospy.logdebug("set_init_pose...") self.biped_joint_pubisher_object.set_init_pose() # 5th: Check all subscribers work. # Get the state of the Robot defined by its RPY orientation, distance from # desired point, contact force and JointState of the three joints rospy.logdebug("check_all_systems_ready...") self.biped_state_object.check_all_systems_ready() rospy.logdebug("get_observations...") observation = self.biped_state_object.get_observations() # 6th: We restore the gravity to original rospy.logdebug("Restore Gravity...") self.gazebo.change_gravity(0.0, 0.0, -9.81) # 7th: pauses simulation rospy.logdebug("Pause SIM...") self.gazebo.pauseSim() # Get the State Discrete Stringuified version of the observations state = self.get_state(observation) return state def _step(self, action): # Given the action selected by the learning algorithm, # we perform the corresponding movement of the robot # 1st, decide which action corresponsd to which joint is incremented next_action_position = self.biped_state_object.get_action_to_position(action) # We move it to that pos self.gazebo.unpauseSim() self.biped_joint_pubisher_object.move_joints(next_action_position) # Then we send the command to the robot and let it go # for running_step seconds time.sleep(self.running_step) self.gazebo.pauseSim() # We now process the latest data saved in the class state to calculate # the state and the rewards. This way we guarantee that they work # with the same exact data. # Generate State based on observations observation = self.biped_state_object.get_observations() # finally we get an evaluation based on what happened in the sim reward,done = self.biped_state_object.process_data() # Get the State Discrete Stringuified version of the observations state = self.get_state(observation) return state, reward, done, {} def get_state(self, observation): """ We retrieve the Stringuified-Discrete version of the given observation :return: state """ return self.biped_state_object.get_state_as_string(observation)
def __init__(self): # We assume that a ROS node has already been created # before initialising the environment # gets training parameters from param server self.desired_pose = Pose() self.desired_pose.position.x = rospy.get_param("/desired_pose/x") self.desired_pose.position.y = rospy.get_param("/desired_pose/y") self.desired_pose.position.z = rospy.get_param("/desired_pose/z") self.running_step = rospy.get_param("/running_step") self.max_incl = rospy.get_param("/max_incl") self.max_height = rospy.get_param("/max_height") self.min_height = rospy.get_param("/min_height") self.joint_increment_value = rospy.get_param("/joint_increment_value") self.done_reward = rospy.get_param("/done_reward") self.alive_reward = rospy.get_param("/alive_reward") self.desired_force = rospy.get_param("/desired_force") self.desired_yaw = rospy.get_param("/desired_yaw") self.weight_r1 = rospy.get_param("/weight_r1") self.weight_r2 = rospy.get_param("/weight_r2") self.weight_r3 = rospy.get_param("/weight_r3") self.weight_r4 = rospy.get_param("/weight_r4") self.weight_r5 = rospy.get_param("/weight_r5") # stablishes connection with simulator self.gazebo = GazeboConnection() self.controllers_object = ControllersConnection(namespace="simplicity") self.simplicity_state_object = SimplicityState( max_height=self.max_height, min_height=self.min_height, abs_max_roll=self.max_incl, abs_max_pitch=self.max_incl, joint_increment_value=self.joint_increment_value, done_reward=self.done_reward, alive_reward=self.alive_reward, desired_force=self.desired_force, desired_yaw=self.desired_yaw, weight_r1=self.weight_r1, weight_r2=self.weight_r2, weight_r3=self.weight_r3, weight_r4=self.weight_r4, weight_r5=self.weight_r5 ) self.simplicity_state_object.set_desired_world_point(self.desired_pose.position.x, self.desired_pose.position.y, self.desired_pose.position.z) self.simplicity_joint_pubisher_object = JointPub() """ For this version, we consider 6 actions 1-2: Increment/Decrement front_left_shoulder 3-4: Increment/Decrement front_left_thigh 5-6: Increment/Decrement front_left_shank 7-8: Increment/Decrement front_right_shoulder 9-10: Increment/Decrement front_right_thigh 11-12: Increment/Decrement front_right_shank 13-14: Increment/Decrement back_left_shoulder 15-16: Increment/Decrement back_left_thigh 17-18: Increment/Decrement back_left_shank 19-20: Increment/Decrement back_right_shoulder 21-22: Increment/Decrement back_right_thigh 23-24: Increment/Decrement back_right_shank """ self.action_space = spaces.Discrete(24) self.reward_range = (-np.inf, np.inf) self.seed()
class MrmEnv(gym.Env): def __init__(self): # specify the dimension of observation space and action space self.observation_space = 44 self.action_space = 4 # joint efforts # We assume that a ROS node has already been created before initialising the environment self.running_step = rospy.get_param("/running_step") self.weight_r1 = rospy.get_param("/weight_r1") self.weight_r2 = rospy.get_param("/weight_r2") self.weight_r3 = rospy.get_param("/weight_r3") # self.inr_r3 = rospy.get_param("/inr_r3") # stablishes connection with simulator self.gazebo = GazeboConnection() self.mrm_state_object = MrmState(weight_r1=self.weight_r1, weight_r2=self.weight_r2, weight_r3=self.weight_r3) self.mrm_joint_pubisher_object = JointPub() self._seed() self.reset_controller_pub = rospy.Publisher('/reset_controller', Bool, queue_size=1) self.pause_sim_pub = rospy.Publisher('/pause_sim', Bool, queue_size=1) # list for recording the minimal value from the laser self.mini_laser = [] self.moco = model_control() self.pipe_angle = 0.0 # A function to initialize the random generator def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] # Resets the state of the environment and returns an initial observation. def _reset(self): # 0st: We pause the controller self.pause_sim_pub.publish(Bool(True)) rospy.logdebug("Pausing SIM...") self.gazebo.pauseSim() # 1st: resets the simulation to initial values rospy.logdebug("Reset SIM...") self.gazebo.resetWorld() # 2nd: We Set the gravity to 0.0 so that we dont fall when reseting joints # It also UNPAUSES the simulation rospy.logdebug("Remove Gravity...") self.gazebo.change_gravity(0.0, 0.0, 0.0) # 3rd: resets the robot to initial conditions rospy.logdebug("set_init_condition...") self.mrm_joint_pubisher_object.set_init_condition() # reset the controllers self.reset_controller_pub.publish(Bool(True)) # 6th: We restore the gravity to original and unpause the sim rospy.logdebug("Restore Gravity...") self.gazebo.change_gravity(0.0, 0.0, -9.81) # Get the state of the Robot defined by the XY distances from the # desired point, jointState of the four joints and rospy.logdebug("check_all_systems_ready...") self.mrm_state_object.check_all_systems_ready( ) # this step is very important!!!! the system will be easily failed if it is removed # delete the old target self.moco.delete_model('target') # spawn the new target target_pos = self.change_target_pos(self.moco, self.pipe_angle) # set the target point for the gym env self.set_target_position([target_pos[0], target_pos[1], 0.0]) # We pause the Simulator rospy.logdebug("Pausing SIM...") self.gazebo.pauseSim() self.mrm_state_object.history = [] # clear the history observation = self.mrm_state_object.get_observations() observation.extend( [0.0, 0.0, 0.0, 0.0]) # assume the previous action is 0 for the first step self.reset_controller_pub.publish(Bool(False)) return observation def _step(self, action): # We move it to that pos self.gazebo.unpauseSim() self.pause_sim_pub.publish(Bool(False)) self.mrm_joint_pubisher_object.move_joints(action) start = time.time() rospy.sleep(self.running_step) end = time.time() self.pause_sim_pub.publish(Bool(True)) self.gazebo.pauseSim() # finally we get an evaluation based on what happened in the sim reward, reach_target, done = self.mrm_state_object.process_data() # Generate State based on observations observation = self.mrm_state_object.get_observations() observation.extend(action) # add previous action to the state # record the minimal value of laser data every time step # laser_data = observation[8:18] # self.mini_laser.append(min(laser_data)) if end - start > 1.6: stuck = True else: stuck = False return observation, reward, done, reach_target, stuck, {} def set_target_position(self, target_pos): self.mrm_state_object.set_desired_target_point(target_pos[0], target_pos[1], target_pos[2]) def change_target_pos(self, moco, angle): # how far will be the target away from the turning center approximately dis_target_left = [0.22, 0.23] b = 0.07 + random.uniform(0.0, 0.018) dis_target = np.asarray( random.uniform(dis_target_left[0], dis_target_left[1])) dis_target += (-0.018 + (angle - 60) / 10 * 0.006) target_pos = [ 0.07 + dis_target * cos((angle - 90.0) / 180 * pi) - b * sin( (angle - 90.0) / 180 * pi), -0.31 - dis_target * sin( (angle - 90.0) / 180 * pi) - b * cos( (angle - 90.0) / 180 * pi) ] # add some noise to vertical position # target_pos[1] -= random.uniform(0.04, 0.07) target_name = 'target' # the model name and namespace targetdir = '/home/joshua/robot_arm_ws/src/mrm_training_force/urdf/target.xacro' # visulize the target in Gazebo moco.spawn_model(targetdir, target_name, target_name, [target_pos[0], target_pos[1], 0.0], [0, 0, 0]) return target_pos
#!/usr/bin/env python import rospy from joint_publisher import JointPub if __name__ == "__main__": rospy.init_node('jump_control_node', log_level=rospy.INFO) joint_publisher = JointPub() rate_value = 8.0 joint_publisher.start_jump_loop(rate_value)
class MrmEnv(gym.Env): def __init__(self): # specify the dimension of observation space and action space self.observation_space = 44 self.action_space = 4 # joint efforts # We assume that a ROS node has already been created before initialising the environment # gets training parameters from param server self.desired_target = Point() self.desired_target.x = rospy.get_param("/desired_target_point/x") self.desired_target.y = rospy.get_param("/desired_target_point/y") self.desired_target.z = rospy.get_param("/desired_target_point/z") self.running_step = rospy.get_param("/running_step") self.weight_r1 = rospy.get_param("/weight_r1") self.weight_r2 = rospy.get_param("/weight_r2") self.weight_r3 = rospy.get_param("/weight_r3") self.inr_r3 = rospy.get_param("/inr_r3") # stablishes connection with simulator self.gazebo = GazeboConnection() self.mrm_state_object = MrmState(weight_r1=self.weight_r1, weight_r2=self.weight_r2, weight_r3=self.weight_r3) self.mrm_state_object.set_desired_target_point(self.desired_target.x, self.desired_target.y, self.desired_target.z) self.mrm_joint_pubisher_object = JointPub() self._seed() self.passed_episode = 0 self.total_episode = rospy.get_param("/total_episode") # list for recording the minimal value from the laser self.mini_laser = [] self.obstacle_penalty = 0.0 # A function to initialize the random generator def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] # Resets the state of the environment and returns an initial observation. def _reset(self): self.passed_episode += 1 # 2nd: We Set the gravity to 0.0 so that we dont fall when reseting joints # It also UNPAUSES the simulation rospy.logdebug("Remove Gravity...") self.gazebo.change_gravity(0.0, 0.0, 0.0) # 3rd: resets the robot to initial conditions rospy.logdebug("set_init_condition...") self.mrm_joint_pubisher_object.set_init_condition() # change the weights of rewards to enable curriculum learning self.weight_r3 **= self.inr_r3 #self.weight_r3 = 1 - np.exp(-90 / (self.total_episode - self.passed_episode)) self.mrm_state_object.change_weights(weight_r1=self.weight_r1, weight_r2=self.weight_r2, weight_r3=self.weight_r3) # Get the state of the Robot defined by the XY distances from the # desired point, jointState of the four joints and rospy.logdebug("check_all_systems_ready...") self.mrm_state_object.check_all_systems_ready( ) # this step is very important!!!! the system will be easily failed if it is removed # 6th: We restore the gravity to original and unpause the sim rospy.logdebug("Restore Gravity...") self.gazebo.change_gravity(0.0, 0.0, -9.81) # We pause the Simulator rospy.logdebug("Pausing SIM...") self.gazebo.pauseSim() self.mrm_state_object.history = [] # clear the history observation = self.mrm_state_object.get_observations() observation.extend( [0.0, 0.0, 0.0, 0.0]) # assume the previous action is 0 for the first step return observation def _step(self, action): # We apply torques to each joint for a duration of time; in order to apply torques to each joint at the same time, we call the service while # gazebo is paused self.mrm_joint_pubisher_object.apply_joints_effort( action, self.running_step) # unpasue the simulation and start applying the effort self.gazebo.unpauseSim() rospy.sleep(self.running_step) # pause the simelation again to process data self.gazebo.pauseSim() # finally we get an evaluation based on what happened in the sim reward, reach_target, done = self.mrm_state_object.process_data() # Generate State based on observations observation = self.mrm_state_object.get_observations() observation.extend(action) # add previous action to the state # record the minimal value of laser data every time step laser_data = observation[8:18] self.mini_laser.append(min(laser_data)) return observation, reward, done, reach_target, {} def set_target_position(self, target_pos): self.mrm_state_object.set_desired_target_point(target_pos[0], target_pos[1], target_pos[2])