def _reward(self): #print(__reward) self.__reward = 1000 robotId = self._robot.id() # if (robotId == RC.CJACO_ARM_HAND or robotId == RC.CKUKA_ARM_BARRETT_HAND or robotId == RC.CKUKA_ARM_SUCTION_PAD or robotId == RC.CUR5_ARM_BARRETT_HAND or robotId == RC.CHEXAPOD): # ------------------------------------------------------------------------------------------------ # BALANCE TASK ----------------------------------------------------------------------------------- # ballPos = RC.getObjectWorldPosition(RC.CBALL_OBJ_NAME) basePlatePos = RC.getObjectWorldPosition(RC.CBASE_PLATE_OBJ_NAME) d = math.sqrt((ballPos[0] - basePlatePos[0])**2 + (ballPos[1] - basePlatePos[1])**2 + (ballPos[2] - basePlatePos[2])**2) #print('DDDD:', d) self.__reward -= d if (RC.GB_TRACE): print('self.__reward:', self.__reward) return self.__reward
def getObservation(self, action): self._observation = self._robot.getObservation() #if(RC.isTaskObjSuctionBalancePlate()): # Vel-trained joint vel, action[1] --> Refer to the V-Rep Server Robot script to confirm this! #self._observation.append(np.array(action[1], dtype=np.float32)) #robotId = self._robot.id() if (RC.isTaskObjSuctionObjectSupport()): # Cuboid Pos ------------------------------------------------------------------------------------------ cuboidPos = RC.getObjectWorldPosition(RC.CCUBOID_OBJ_NAME) self._observation.append(np.array(cuboidPos[0], dtype=np.float32)) self._observation.append(np.array(cuboidPos[1], dtype=np.float32)) self._observation.append(np.array(cuboidPos[2], dtype=np.float32)) # Distance from base plate pos and cuboid center ------------------------------------------------------ basePlatePos = RC.getObjectWorldPosition(RC.CBASE_PLATE_OBJ_NAME) d = math.sqrt((cuboidPos[0] - basePlatePos[0])**2 + (cuboidPos[1] - basePlatePos[1])**2) self._observation.append(np.array(d, dtype=np.float32)) # Orientation of the base plate ----------------------------------------------------------------------- basePlateOrient = RC.getObjectOrientation( RC.CBASE_PLATE_OBJ_NAME) # Must be the same name set in V-Rep alpha1 = basePlateOrient[0] # Around x beta1 = basePlateOrient[1] # Around y gamma1 = basePlateOrient[2] # Around z #print('Plate Orient', plateOrient) self._observation.append(np.array(alpha1, dtype=np.float32)) self._observation.append(np.array(beta1, dtype=np.float32)) return self._observation
def isObjGroundHit(self): objName = '' if (RC.isTaskObjHandBalance() or RC.isTaskObjSuctionBalance() or RC.isTaskObjHexapodBalance()): objName = RC.CPLATE_OBJ_NAME elif (RC.isTaskObjHold()): objName = RC.CTUBE_OBJ_NAME elif (RC.isTaskObjCatch()): objName = RC.CBALL_OBJ_NAME elif (RC.isTaskObjTimelyCatch()): objName = RC.CCUBOID_OBJ_NAME pos = RC.getObjectWorldPosition(objName) # V-REP API Ground Hit Check #res = self.detectObjectsReachGround() #if(res): #self._objGroundHit = True ##RC.stopSimulation(self._clientID) ##self._robot.commandJointVibration(0) #return True if (RC.isTaskObjTimelyCatch()): res = (pos[2] <= 0.1) else: res = (pos[2] < 0.5) return res
def getObservation(self, action): self._observation = self._robot.getObservation() #if(RC.isTaskObjSuctionBalancePlate()): # Vel-trained joint vel, action[1] --> Refer to the V-Rep Server Robot script to confirm this! #self._observation.append(np.array(action[1], dtype=np.float32)) #robotId = self._robot.id() if (RC.isTaskObjSuctionObjectSupport()): ballPos = RC.getObjectWorldPosition(RC.CBALL_OBJ_NAME) self._observation.append(np.array(ballPos[0], dtype=np.float32)) self._observation.append(np.array(ballPos[1], dtype=np.float32)) self._observation.append(np.array(ballPos[2], dtype=np.float32)) basePlatePos = RC.getObjectWorldPosition(RC.CBASE_PLATE_OBJ_NAME) d = math.sqrt((ballPos[0] - basePlatePos[0])**2 + (ballPos[1] - basePlatePos[1])**2 + (ballPos[2] - basePlatePos[2])**2) self._observation.append(np.array(d, dtype=np.float32)) return self._observation
def _reward(self): #print(__reward) self.__reward = 1000 robotId = self._robot.id() # if (robotId == RC.CJACO_ARM_HAND or robotId == RC.CKUKA_ARM_BARRETT_HAND or robotId == RC.CKUKA_ARM_SUCTION_PAD or robotId == RC.CUR5_ARM_BARRETT_HAND or robotId == RC.CHEXAPOD): # ---------------------------------------------------------------------------------------------------------- # BALANCE TASK --------------------------------------------------------------------------------------------- # #if(not self.isObjAwayFromBasePlate()): cuboidPos = RC.getObjectWorldPosition(RC.CCUBOID_OBJ_NAME) basePlatePos = RC.getObjectWorldPosition(RC.CBASE_PLATE_OBJ_NAME) d = math.sqrt((cuboidPos[0] - basePlatePos[0])**2 + (cuboidPos[1] - basePlatePos[1])**2) #print('DDDD:', d) self.__reward -= d # Orientation of the base plate ----------------------------------------------------------------------- basePlateOrient = RC.getObjectOrientation( RC.CBASE_PLATE_OBJ_NAME) # Must be the same name set in V-Rep alpha1 = basePlateOrient[0] # Around x beta1 = basePlateOrient[1] # Around y gamma1 = basePlateOrient[2] # Around z #print('Plate Orient', plateOrient) self.__reward -= (abs(alpha1) + abs(beta1)) #else: #self.__reward -= 100 if (RC.GB_TRACE): print('self.__reward:', self.__reward) return self.__reward
def objectAvoidanceTraining(self): # self._objs # # 3.1 Objects Pos & Velocity # Compose Environment Info (State input) baseCurPos = self._robot.getCurrentBaseJointPos() print('BASE CUR POS:', baseCurPos) envInfo = [baseCurPos] objsPos = [] for i in range(self._CNUM_OBJS): objInfo = [] #print("Obj Ids: ", i, self._objs[i].id()) objPos = RC.getObjectWorldPosition(RC.CFALL_OBJS_NAMES[i]) objsPos.append(objPos) # This returns a list of two vector3 values (3 floats in a list) representing the linear velocity [x,y,z] # and angular velocity [wx,wy,wz] in Cartesian worldspace coordinates. objLinearVel = RC.getObjectVelocity(RC.CFALL_OBJS_NAMES[i]) #print("OBJPS:",i, objPos) objInfo += objPos #objInfo.append(objLinearVel[2]) # zVel only envInfo.append(objInfo) #print("OBJINFO:", objInfo) # 5. ACTION ------------------------------------------------------------------------- # actionId = self._robotBot.act(envInfo) print("Robot Act: ", actionId) self._robot.actRobot(actionId) # 6. UPDATE OBJECTS ROBOT-HIT & GROUND-HIT STATUS for i in range(self._CNUM_OBJS): #print("CHECK KUKA COLLISION WITH BALL", RC.CFALL_OBJS_NAMES[i]) if(self._robot.detectCollisionWith(RC.CFALL_OBJS_NAMES[i]) or \ self.detectObjectsReachGround()): self.setTerminated(1) print('Terminate') break
def doInverseKinematicsCalculation(self): count = 0 target_index = 0 change_target_time = dt * 1000 vmax = 0.5 kp = 200.0 kv = np.sqrt(kp) # define variables to share with nengo q = np.zeros(len(joint_handles)) dq = np.zeros(len(joint_handles)) # NOTE: main loop starts here ----------------------------------------- target_xyz = RC.getObjectWorldPosition("obstacle") track_target.append(np.copy(target_xyz)) # store for plotting target_xyz = np.asarray(target_xyz) for ii, joint_handle in enumerate(joint_handles): old_q = np.copy(q) # get the joint angles _, q[ii] = vrep.simxGetJointPosition(clientID, joint_handle, vrep.simx_opmode_blocking) if _ != 0: raise Exception() # get the joint velocity _, dq[ii] = vrep.simxGetObjectFloatParameter( clientID, joint_handle, 2012, # parameter ID for angular velocity of the joint vrep.simx_opmode_blocking) if _ != 0: raise Exception() # calculate position of the end-effector # derived in the ur5 calc_TnJ class xyz = robot_config.Tx('EE', q) # calculate the Jacobian for the end effector JEE = robot_config.J('EE', q) # calculate the inertia matrix in joint space Mq = robot_config.Mq(q) # calculate the effect of gravity in joint space Mq_g = robot_config.Mq_g(q) # convert the mass compensation into end effector space Mx_inv = np.dot(JEE, np.dot(np.linalg.inv(Mq), JEE.T)) svd_u, svd_s, svd_v = np.linalg.svd(Mx_inv) # cut off any singular values that could cause control problems singularity_thresh = .00025 for i in range(len(svd_s)): svd_s[i] = 0 if svd_s[i] < singularity_thresh else \ 1./float(svd_s[i]) # numpy returns U,S,V.T, so have to transpose both here Mx = np.dot(svd_v.T, np.dot(np.diag(svd_s), svd_u.T)) # calculate desired force in (x,y,z) space dx = np.dot(JEE, dq) # implement velocity limiting lamb = kp / kv x_tilde = xyz - target_xyz sat = vmax / (lamb * np.abs(x_tilde)) scale = np.ones(3) if np.any(sat < 1): index = np.argmin(sat) unclipped = kp * x_tilde[index] clipped = kv * vmax * np.sign(x_tilde[index]) scale = np.ones(3) * clipped / unclipped scale[index] = 1 u_xyz = -kv * (dx - np.clip(sat / scale, 0, 1) * -lamb * scale * x_tilde) u_xyz = np.dot(Mx, u_xyz) # transform into joint space, add vel and gravity compensation u = np.dot(JEE.T, u_xyz) - Mq_g # calculate the null space filter Jdyn_inv = np.dot(Mx, np.dot(JEE, np.linalg.inv(Mq))) null_filter = (np.eye(robot_config.num_joints) - np.dot(JEE.T, Jdyn_inv)) # calculate our secondary control signal q_des = np.zeros(robot_config.num_joints) dq_des = np.zeros(robot_config.num_joints) # calculated desired joint angle acceleration using rest angles for ii in range(1, robot_config.num_joints): if robot_config.rest_angles[ii] is not None: q_des[ii] = (((robot_config.rest_angles[ii] - q[ii]) + np.pi) % (np.pi * 2) - np.pi) dq_des[ii] = dq[ii] # only compensate for velocity for joints with a control signal nkp = kp * .1 nkv = np.sqrt(nkp) u_null = np.dot(Mq, (nkp * q_des - nkv * dq_des)) u += np.dot(null_filter, u_null) # get the (x,y,z) position of the center of the obstacle v = RC.getObjectWorldPosition('obstacle') v = np.asarray(v) # find the closest point of each link to the obstacle for ii in range(robot_config.num_joints): # get the start and end-points of the arm segment p1 = robot_config.Tx('joint%i' % ii, q=q) if ii == robot_config.num_joints - 1: p2 = robot_config.Tx('EE', q=q) else: p2 = robot_config.Tx('joint%i' % (ii + 1), q=q) # calculate minimum distance from arm segment to obstacle # the vector of our line vec_line = p2 - p1 # the vector from the obstacle to the first line point vec_ob_line = v - p1 # calculate the projection normalized by length of arm segment projection = np.dot(vec_ob_line, vec_line) / np.sum((vec_line)**2) if projection < 0: # then closest point is the start of the segment closest = p1 elif projection > 1: # then closest point is the end of the segment closest = p2 else: closest = p1 + projection * vec_line # calculate distance from obstacle vertex to the closest point dist = np.sqrt(np.sum((v - closest)**2)) # account for size of obstacle rho = dist - obstacle_radius if rho < threshold: eta = .02 # feel like i saw 4 somewhere in the paper drhodx = (v - closest) / rho Fpsp = (eta * (1.0 / rho - 1.0 / threshold) * 1.0 / rho**2 * drhodx) # get offset of closest point from link's reference frame T_inv = robot_config.T_inv('link%i' % ii, q=q) m = np.dot(T_inv, np.hstack([closest, [1]]))[:-1] # calculate the Jacobian for this point Jpsp = robot_config.J('link%i' % ii, x=m, q=q)[:3] # calculate the inertia matrix for the # point subjected to the potential space Mxpsp_inv = np.dot(Jpsp, np.dot(np.linalg.pinv(Mq), Jpsp.T)) svd_u, svd_s, svd_v = np.linalg.svd(Mxpsp_inv) # cut off singular values that could cause problems singularity_thresh = .00025 for ii in range(len(svd_s)): svd_s[ii] = 0 if svd_s[ii] < singularity_thresh else \ 1./float(svd_s[ii]) # numpy returns U,S,V.T, so have to transpose both here Mxpsp = np.dot(svd_v.T, np.dot(np.diag(svd_s), svd_u.T)) u_psp = -np.dot(Jpsp.T, np.dot(Mxpsp, Fpsp)) if rho < .01: u = u_psp else: u += u_psp # multiply by -1 because torque is opposite of expected u *= -1 print('u: ', u) for ii, joint_handle in enumerate(joint_handles): # the way we're going to do force control is by setting # the target velocities of each joint super high and then # controlling the max torque allowed (yeah, i know) # get the current joint torque _, torque = \ vrep.simxGetJointForce( clientID, joint_handle, vrep.simx_opmode_blocking) if _ != 0: raise Exception() # if force has changed signs, # we need to change the target velocity sign if np.sign(torque) * np.sign(u[ii]) <= 0: joint_target_velocities[ii] = \ joint_target_velocities[ii] * -1 _ = self.setJointVelocity( ii, joint_target_velocities[ii]) # target velocity if _ != 0: raise Exception() # and now modulate the force _ = self.setJointForce(ii, abs(u[ii])) # force to apply if _ != 0: raise Exception()
def getHandWorldPosition(self): return RC.getObjectWorldPosition(RC.CBARRETT_HAND_NAME)
def getEndTipWorldPosition(self): return RC.getObjectWorldPosition(GB_CEND_TIP_NAME)
def reward(self): #print("reward") #print(__reward) self.__reward = 1000 robotId = self._robot.id() # if (robotId == RC.CJACO_ARM_HAND or robotId == RC.CKUKA_ARM_BARRETT_HAND or robotId == RC.CKUKA_ARM_SUCTION_PAD or robotId == RC.CUR5_ARM_BARRETT_HAND or robotId == RC.CHEXAPOD): # ------------------------------------------------------------------------------------------------ # BALANCE TASK ----------------------------------------------------------------------------------- # if (RC.isTaskObjHandBalance() or RC.isTaskObjSuctionBalancePlate() or RC.isTaskObjHexapodBalance()): if (not self.isObjGroundHit()): # Distance of plate away from hand palm center ----------------------------------------------- platePos = RC.getObjectWorldPosition(RC.CPLATE_OBJ_NAME) endTipPos = self._robot.getEndTipWorldPosition() #print('Plate Pos:', platePos) #print('Endtip Pos:', endTipPos) d = math.sqrt((platePos[0] - endTipPos[0])**2 + (platePos[1] - endTipPos[1])**2) #print('DDDD:', d) self.__reward -= d # Orientation of the plate ------------------------------------------------------------------- #plateOrient = RC.getObjectOrientation(RC.CPLATE_OBJ_NAME) # Must be the same name set in V-Rep #alpha1 = plateOrient[0] # Around x #beta1 = plateOrient[1] # Around y #gamma1 = plateOrient[2] # Around z #print('Plate Orient', plateOrient) #self.__reward -= (abs(alpha1) + abs(beta1)) #endTipOrient = self._robot.getEndTipOrientation() #endTipVelocity = self._robot.getEndTipVelocity() #print('Endtip Vel', endTipVelocity) else: self.__reward -= 100 if (RC.isTaskObjSuctionBalancePlate()): # Suction Pad suctionPadOrient = RC.getObjectOrientation( RC.CSUCTION_PAD_NAME) alpha2 = suctionPadOrient[0] # Around x beta2 = suctionPadOrient[1] # Around y gamma2 = suctionPadOrient[2] # Around z delta_alpha2 = abs(math.pi - abs(alpha2)) self.__reward -= (delta_alpha2 + abs(beta2)) # Joint Poses #joint4Pos = RC.getJointPosition(self._robot._jointHandles[3]) #joint6Pos = RC.getJointPosition(self._robot._jointHandles[5]) #print(joint4Pos, '==', joint6Pos) #self.__reward -= abs((-math.pi/2 - joint4Pos) - joint6Pos) #!!! <--- #handPos = self._robot.getHandWorldPosition() #handOrient = self._robot.getHandOrientation() ##print('Hand Orient', handOrient) #handVelocity = self._robot.getHandVelocity() ##print('Hand Vel', handVelocity) # ------------------------------------------------------------------------------------------------ # CATCH TASK ------------------------------------------------------------------------------------- # elif (RC.isTaskObjCatch()): # Distance of base joint pos to the ball ballPos = RC.getObjectWorldPosition(RC.CBALL_OBJ_NAME) basePos = self._robot.getCurrentBaseJointPos() angle = math.atan2(ballPos[1], ballPos[0]) self.__reward -= abs(angle - basePos) # Z Distance from ball to the suction pad tip suctionPadPosZ = 9.5846e-01 + 1.0000e-01 dist = ballPos[2] - suctionPadPosZ if (dist > 0): self.__reward -= dist else: self.__reward -= 100 # Wrist joint orientation directed to the ball #suctionPadOrient = RC.getObjectOrientation(RC.CSUCTION_PAD_NAME) #print('Suction Pad Orient', suctionPadOrient) # 3D Distance from ball to the suction pad tip padTip = self._robot.getSuctionPadTipInfo() endTip = RC.getObjectWorldPosition('RobotTip') self.__reward -= RC.getDistanceFromPointToLine( ballPos, padTip, endTip) ##suctionPadLine = padTip - endTip # #if(robotId == RC.CUR5_ARM_GRIPPER): if (RC.GB_TRACE): print('self.__reward:', self.__reward) return self.__reward
def step(self, action): ## ---------------------------------------------------------------------------------------- if (RC.GB_TRACE): print('ACTION:', action) ## ---------------------------------------------------------------------------------------- ## APPLY ACTION START ## self._objGroundHit = False self._robotOperationTime = 0 ## WAIT FOR V-REP SERVER SIMULATION STATE TO BE READY ## if (self._robot.getOperationState() == RC.CROBOT_STATE_READY): self._robot.applyAction(action) ## OBSERVE & REWARD reward = 0 ######################################################################################### ### TIMELY PICK ------------------------------------------------------------------------- ### if (RC.isTaskObjTimelyPick()): objName = RC.CCUBOID_OBJ_NAME pos = RC.getObjectWorldPosition(objName) self._robotOperationTime = self.getRobotOperationTime() while (pos[1] >= RC.CTASK_OBJ_TIMELY_PICK_POSITION and self._robot.getOperationState() != RC.CROBOT_STATE_MOVING_ENDED): #print('Obj Pos:', pos) pos = RC.getObjectWorldPosition(objName) #print('END-Obj Pos:', pos) # EITHER MOVING_ENDED or not, get the operation time self._robotOperationTime = self.getRobotOperationTime() #print('OPER TIME:', self._robotOperationTime) # PUNISHMENT POLICY # # P1 -- if (self._robot.getOperationState() == RC.CROBOT_STATE_MOVING_ENDED and pos[1] >= RC.CTASK_OBJ_TIMELY_PICK_POSITION): d = self.getTimelyPickObjectDistance( ) # pos[1] - RC.CTASK_OBJ_TIMELY_PICK_POSITION t = 5000 * d + 400 print('TOO SOON', d) reward -= t # P2 -- elif (pos[1] < RC.CTASK_OBJ_TIMELY_PICK_POSITION): d = self.getCurrentRobotPosDistanceToPrePickPose() d = d * 500 #print('DISTANCE: ', d) # !NOTE: # d value will decide not yet reaching pre-pick pose -- # Reached pre-pick pose -- while (self._robot.getOperationState() != RC.CROBOT_STATE_MOVING_ENDED): time = self.getRobotOperationTime( ) - self._robotOperationTime #print('RUN TIME:', time) if (time > 7000): self._observation = self.getObservation(action) self._observation.append(np.array(0, dtype=np.float32)) self._observation.append(np.array(0, dtype=np.float32)) reward = -time return self._observation, reward, True, { } ######## ducta ti = (self.getRobotOperationTime() - self._robotOperationTime) / 100 d += ti print('Late:', d, ':', ti) reward -= d # P3 -- approachingTime = self.getTimelyPickApproachingTime() pickingTime = self.getTimelyPickGrippingTime() print('OPER TIME', approachingTime, ' --- ', pickingTime) # OBSERVATION # self._observation = self.getObservation(action) self._observation.append( np.array(approachingTime, dtype=np.float32)) self._observation.append(np.array(pickingTime, dtype=np.float32)) reward += self.reward() isSlowApproaching = (action[0] < 0.1) or (approachingTime > 7000) isSlowGripping = (action[1] < 0.1) or (pickingTime > 1000) if (isSlowApproaching): reward -= approachingTime if (isSlowGripping): reward -= pickingTime done = not (isSlowApproaching or isSlowGripping) print('Env observed!', reward, ' - ', done) # self._robot.getOperationState() return self._observation, reward, done, {} ######## ducta ######################################################################################### ### TIMELY CATCH ------------------------------------------------------------------------ ### elif (RC.isTaskObjTimelyCatch()): objName = RC.CCUBOID_OBJ_NAME #pos = RC.getObjectWorldPosition(objName) self._robotOperationTime = self.getRobotOperationTime() APPROACHING_TIME_LIMIT = 7000 PICKING_TIME_LIMIT = 1000 # WAIT FOR ACTION ENDED # reward += self.reward() # while (self._robot.getOperationState() != RC.CROBOT_STATE_MOVING_ENDED): #print('Obj Pos:', pos) self._objGroundHit = self.isObjGroundHit() if (self._objGroundHit): reward -= 100 # ------------------------------------------------------------ time = self.getRobotOperationTime() - self._robotOperationTime #print('RUN TIME:', time) if (time > (APPROACHING_TIME_LIMIT + PICKING_TIME_LIMIT)): self._observation = self.getObservation(action) self._observation.append(np.array(0, dtype=np.float32)) self._observation.append(np.array(0, dtype=np.float32)) reward = -time print('MOVING TOO LONG', time) print('Env observed!', reward, ' - ', False) # self._robot.getOperationState() return self._observation, reward, False, {} ######## ducta #print('END-Obj Pos:', pos) self._robotOperationTime = self.getRobotOperationTime() #print('OPER TIME:', self._robotOperationTime) # PUNISHMENT POLICY # # P1 ------------------------------------------------------------------------------------------------- # #!NOTE: THESE VALUES ARE ONLY VALID AFTER self._robot.getOperationState() == RC.CROBOT_STATE_MOVING_ENDED, # ELSE RETURN 0 approachingTime = self.getTimelyPickApproachingTime() pickingTime = self.getTimelyPickGrippingTime() print('OPER TIME', approachingTime, ' --- ', pickingTime) # OBSERVATION # self._observation = self.getObservation(action) self._observation.append( np.array(approachingTime, dtype=np.float32)) self._observation.append(np.array(pickingTime, dtype=np.float32)) isSlowApproaching = (action[0] < 0.1) or (approachingTime > APPROACHING_TIME_LIMIT) isSlowGripping = (action[1] < 0.1) or (pickingTime > PICKING_TIME_LIMIT) if (isSlowApproaching and isSlowGripping): reward -= (approachingTime + pickingTime) elif (isSlowApproaching): reward -= (approachingTime + PICKING_TIME_LIMIT - action[1]) elif (isSlowGripping): reward -= (pickingTime + APPROACHING_TIME_LIMIT - action[0]) done = not ( isSlowApproaching or isSlowGripping ) # Here, we intentionally reverse the meaning for cancelling off all < 0.1 values if (not done): print('TOO SLOW ACT') print('Env observed!', reward, ' - ', done) # self._robot.getOperationState() return self._observation, reward, done, {} ######## ducta # ---------------------------------------------------------------------------------------------------- self._objGroundHit = self.isObjGroundHit() pos = RC.getObjectWorldPosition(objName) #print('OBJ Z POS:', pos[2]) # P2 ------------------------------------------------------------------------------------------------- # OBJ ON GROUND NOW # ALREADY HANDLED ABOVE by -100 reward if () during waiting for moving ended if (pos[2] >= 0.3 and pos[2] <= 0.5): reward += 5000 - abs(pos[2] - 0.36) print('Obj CAUGHT! APPARE!') # P3 ------------------------------------------------------------------------------------------------- # OBJ ON GROUND NOW # ALREADY HANDLED ABOVE by -100 reward if () during waiting for moving ended elif (self._objGroundHit): reward -= (approachingTime + pickingTime) / 10 print('Obj Already On ground..Late?') # # P4 ------------------------------------------------------------------------------------------------- # OBJ STILL FALLING elif (pos[2] > 0.5): d = self.getTimelyCatchObjectDistance() t = 5000 * d print('TOO SOON', d) reward -= t print('Env observed!', reward, ' - ', done) # self._robot.getOperationState() return self._observation, reward, done, {} ######## ducta ### OTHERS ############################################################################## else: #time.sleep(self._timeStep) #!!!Wait for some time since starting the action then observe: i = 0 while (self._robot.getOperationState() == RC.CROBOT_STATE_MOVING): time.sleep(1) self._objGroundHit = self.isObjGroundHit() if (self._objGroundHit): #print('GROUND HIT',i+1) reward -= 100 if (i == 2): # 3s has passed! reward += self.reward() i = i + 1 #print('Still moving',i) self._observation = self.getObservation(action) reward += self.reward() # Reward Addition after observation 2nd! done = self.termination() print('Env observed 2nd!', reward) # self._robot.getOperationState() #print("len=%r" % len(self._observation)) return self._observation, reward, False, {}
def getObservation(self, action): self._observation = self._robot.getObservation() #if(RC.isTaskObjSuctionBalancePlate()): # Vel-trained joint vel, action[1] --> Refer to the V-Rep Server Robot script to confirm this! #self._observation.append(np.array(action[1], dtype=np.float32)) #robotId = self._robot.id() if (RC.isTaskObjHandBalance() or RC.isTaskObjSuctionBalance() or RC.isTaskObjHexapodBalance()): ############################################################################################## # PLATE INFO # # Plate Object # Angle rotation away from horizontal plane plateOrient = RC.getObjectOrientation( RC.CPLATE_OBJ_NAME) # Must be the same name set in V-Rep self._observation.append( np.array( abs(plateOrient[0]), dtype=np.float32)) # Either alpha(X) or beta(Y) is enough! #self._observation.append(np.array(abs(plateOrient[1]), dtype=np.float32)) # Distance from plate to the center of end-tip platePos = RC.getObjectWorldPosition(RC.CPLATE_OBJ_NAME) endTipPos = self._robot.getEndTipWorldPosition() #basePlatePos = RC.getObjectWorldPosition(RC.CBASE_PLATE_OBJ_NAME) d = math.sqrt((platePos[0] - endTipPos[0])**2 + (platePos[1] - endTipPos[1])**2) # Base Plate Normal Vector ----------------------------------------------------------------------------- normalVector = self.getBasePlateNormalVector() if (len(normalVector) == 3): slantingDegree = abs( RC.angle_between(np.array([0, 0, 1]), np.array(normalVector))) else: slantingDegree = 0 #print('SLANT', slantingDegree) # Velocity Factor of Joint 4 ----------------------------------------------------------------------------- if (RC.isTaskObjSuctionBalancePosVel()): self._observation.append( np.array([action[1]], dtype=np.float32)) self._observation.append(np.array([d], dtype=np.float32)) self._observation.append( np.array([slantingDegree], dtype=np.float32)) elif (RC.isTaskObjHold()): ############################################################################################## # TUBE INFO # # Tube Object # Angle rotation away from horizontal plane tubeOrient = RC.getObjectOrientation( RC.CTUBE_OBJ_NAME) # Must be the same name set in V-Rep self._observation.append( np.array(abs(tubeOrient[1]), dtype=np.float32)) # Distance from plate to the center of end-tip tubePos = RC.getObjectWorldPosition(RC.CTUBE_OBJ_NAME) palmPos = self._robot.getEndTipWorldPosition() d = math.sqrt((tubePos[0] - palmPos[0])**2 + (tubePos[1] - palmPos[1])**2) self._observation.append(np.array(d, dtype=np.float32)) elif (RC.isTaskObjCatch()): ############################################################################################## # BALL INFO # # Ball Object # Ball z position only ballPos = RC.getObjectWorldPosition(RC.CBALL_OBJ_NAME) ballLinVel = RC.getObjectVelocity(RC.CBALL_OBJ_NAME) ##self._observation.append(np.array(ballPos[0], dtype=np.float32)) ##self._observation.append(np.array(ballPos[1], dtype=np.float32)) self._observation.append(np.array(ballPos[2], dtype=np.float32)) self._observation.append(np.array(ballLinVel[2], dtype=np.float32)) elif (RC.isTaskObjTimelyPick()): ############################################################################################## # OBJ INFO # # Object position on conveyor belt #objPos = RC.getObjectWorldPosition(RC.CCUBOID_OBJ_NAME) #endTipPos = RC.getObjectWorldPosition('RG2') #d = math.sqrt((objPos[0] - endTipPos[0])**2 + # (objPos[1] - endTipPos[1])**2 + # (objPos[1] - endTipPos[1])**2) #self._observation.append(np.array(d, dtype=np.float32)) d = self.getTimelyPickObjectDistance() self._observation.append(np.array(d, dtype=np.float32)) #vel = self.getBeltVelocity() #self._observation.append(np.array(vel, dtype=np.float32)) #print('OBJECT TO PICK: ', d) elif (RC.isTaskObjTimelyCatch()): ############################################################################################## # OBJ INFO # # Object position on conveyor belt objPos = RC.getObjectWorldPosition(RC.CCUBOID_OBJ_NAME) endTipPos = RC.getObjectWorldPosition('RG2') zDelta = objPos[2] - endTipPos[2] #d = math.sqrt((objPos[0] - endTipPos[0])**2 + # (objPos[1] - endTipPos[1])**2 + # (objPos[1] - endTipPos[1])**2) #self._observation.append(np.array(d, dtype=np.float32)) d = self.getTimelyCatchObjectDistance() self._observation.append(np.array(zDelta, dtype=np.float32)) self._observation.append(np.array(d, dtype=np.float32)) return self._observation