def _reward(self):
        #print(__reward)
        self.__reward = 1000

        robotId = self._robot.id()
        #
        if (robotId == RC.CJACO_ARM_HAND
                or robotId == RC.CKUKA_ARM_BARRETT_HAND
                or robotId == RC.CKUKA_ARM_SUCTION_PAD
                or robotId == RC.CUR5_ARM_BARRETT_HAND
                or robotId == RC.CHEXAPOD):
            # ------------------------------------------------------------------------------------------------
            # BALANCE TASK -----------------------------------------------------------------------------------
            #
            ballPos = RC.getObjectWorldPosition(RC.CBALL_OBJ_NAME)
            basePlatePos = RC.getObjectWorldPosition(RC.CBASE_PLATE_OBJ_NAME)
            d = math.sqrt((ballPos[0] - basePlatePos[0])**2 +
                          (ballPos[1] - basePlatePos[1])**2 +
                          (ballPos[2] - basePlatePos[2])**2)
            #print('DDDD:', d)
            self.__reward -= d

        if (RC.GB_TRACE):
            print('self.__reward:', self.__reward)
        return self.__reward
示例#2
0
    def getObservation(self, action):
        self._observation = self._robot.getObservation()
        #if(RC.isTaskObjSuctionBalancePlate()):
        # Vel-trained joint vel, action[1] --> Refer to the V-Rep Server Robot script to confirm this!
        #self._observation.append(np.array(action[1], dtype=np.float32))

        #robotId = self._robot.id()
        if (RC.isTaskObjSuctionObjectSupport()):
            # Cuboid Pos ------------------------------------------------------------------------------------------
            cuboidPos = RC.getObjectWorldPosition(RC.CCUBOID_OBJ_NAME)
            self._observation.append(np.array(cuboidPos[0], dtype=np.float32))
            self._observation.append(np.array(cuboidPos[1], dtype=np.float32))
            self._observation.append(np.array(cuboidPos[2], dtype=np.float32))

            # Distance from base plate pos and cuboid center ------------------------------------------------------
            basePlatePos = RC.getObjectWorldPosition(RC.CBASE_PLATE_OBJ_NAME)
            d = math.sqrt((cuboidPos[0] - basePlatePos[0])**2 +
                          (cuboidPos[1] - basePlatePos[1])**2)
            self._observation.append(np.array(d, dtype=np.float32))

            # Orientation of the base plate -----------------------------------------------------------------------
            basePlateOrient = RC.getObjectOrientation(
                RC.CBASE_PLATE_OBJ_NAME)  # Must be the same name set in V-Rep
            alpha1 = basePlateOrient[0]  # Around x
            beta1 = basePlateOrient[1]  # Around y
            gamma1 = basePlateOrient[2]  # Around z
            #print('Plate Orient', plateOrient)
            self._observation.append(np.array(alpha1, dtype=np.float32))
            self._observation.append(np.array(beta1, dtype=np.float32))

        return self._observation
示例#3
0
    def isObjGroundHit(self):
        objName = ''
        if (RC.isTaskObjHandBalance() or RC.isTaskObjSuctionBalance()
                or RC.isTaskObjHexapodBalance()):
            objName = RC.CPLATE_OBJ_NAME
        elif (RC.isTaskObjHold()):
            objName = RC.CTUBE_OBJ_NAME
        elif (RC.isTaskObjCatch()):
            objName = RC.CBALL_OBJ_NAME
        elif (RC.isTaskObjTimelyCatch()):
            objName = RC.CCUBOID_OBJ_NAME

        pos = RC.getObjectWorldPosition(objName)

        # V-REP API Ground Hit Check
        #res = self.detectObjectsReachGround()
        #if(res):
        #self._objGroundHit = True
        ##RC.stopSimulation(self._clientID)
        ##self._robot.commandJointVibration(0)
        #return True

        if (RC.isTaskObjTimelyCatch()):
            res = (pos[2] <= 0.1)
        else:
            res = (pos[2] < 0.5)
        return res
    def getObservation(self, action):
        self._observation = self._robot.getObservation()
        #if(RC.isTaskObjSuctionBalancePlate()):
        # Vel-trained joint vel, action[1] --> Refer to the V-Rep Server Robot script to confirm this!
        #self._observation.append(np.array(action[1], dtype=np.float32))

        #robotId = self._robot.id()
        if (RC.isTaskObjSuctionObjectSupport()):
            ballPos = RC.getObjectWorldPosition(RC.CBALL_OBJ_NAME)
            self._observation.append(np.array(ballPos[0], dtype=np.float32))
            self._observation.append(np.array(ballPos[1], dtype=np.float32))
            self._observation.append(np.array(ballPos[2], dtype=np.float32))
            basePlatePos = RC.getObjectWorldPosition(RC.CBASE_PLATE_OBJ_NAME)
            d = math.sqrt((ballPos[0] - basePlatePos[0])**2 +
                          (ballPos[1] - basePlatePos[1])**2 +
                          (ballPos[2] - basePlatePos[2])**2)
            self._observation.append(np.array(d, dtype=np.float32))

        return self._observation
示例#5
0
    def _reward(self):
        #print(__reward)
        self.__reward = 1000

        robotId = self._robot.id()
        #
        if (robotId == RC.CJACO_ARM_HAND
                or robotId == RC.CKUKA_ARM_BARRETT_HAND
                or robotId == RC.CKUKA_ARM_SUCTION_PAD
                or robotId == RC.CUR5_ARM_BARRETT_HAND
                or robotId == RC.CHEXAPOD):
            # ----------------------------------------------------------------------------------------------------------
            # BALANCE TASK ---------------------------------------------------------------------------------------------
            #
            #if(not self.isObjAwayFromBasePlate()):
            cuboidPos = RC.getObjectWorldPosition(RC.CCUBOID_OBJ_NAME)
            basePlatePos = RC.getObjectWorldPosition(RC.CBASE_PLATE_OBJ_NAME)
            d = math.sqrt((cuboidPos[0] - basePlatePos[0])**2 +
                          (cuboidPos[1] - basePlatePos[1])**2)
            #print('DDDD:', d)
            self.__reward -= d

            # Orientation of the base plate -----------------------------------------------------------------------
            basePlateOrient = RC.getObjectOrientation(
                RC.CBASE_PLATE_OBJ_NAME)  # Must be the same name set in V-Rep
            alpha1 = basePlateOrient[0]  # Around x
            beta1 = basePlateOrient[1]  # Around y
            gamma1 = basePlateOrient[2]  # Around z
            #print('Plate Orient', plateOrient)
            self.__reward -= (abs(alpha1) + abs(beta1))
            #else:
            #self.__reward -= 100

        if (RC.GB_TRACE):
            print('self.__reward:', self.__reward)
        return self.__reward
示例#6
0
    def objectAvoidanceTraining(self):
        # self._objs
        #
        # 3.1 Objects Pos & Velocity
        # Compose Environment Info (State input)
        baseCurPos = self._robot.getCurrentBaseJointPos()
        print('BASE CUR POS:', baseCurPos)
        envInfo = [baseCurPos]
        objsPos = []
        for i in range(self._CNUM_OBJS):
            objInfo = []
            #print("Obj Ids: ", i, self._objs[i].id())
            objPos = RC.getObjectWorldPosition(RC.CFALL_OBJS_NAMES[i])
            objsPos.append(objPos)
            # This returns a list of two vector3 values (3 floats in a list) representing the linear velocity [x,y,z]
            # and angular velocity [wx,wy,wz] in Cartesian worldspace coordinates.
            objLinearVel = RC.getObjectVelocity(RC.CFALL_OBJS_NAMES[i])
            #print("OBJPS:",i, objPos)
            objInfo += objPos
            #objInfo.append(objLinearVel[2]) # zVel only
            envInfo.append(objInfo)
            #print("OBJINFO:", objInfo)

        # 5. ACTION -------------------------------------------------------------------------
        #
        actionId = self._robotBot.act(envInfo)
        print("Robot Act: ", actionId)
        self._robot.actRobot(actionId)

        # 6. UPDATE OBJECTS ROBOT-HIT & GROUND-HIT STATUS
        for i in range(self._CNUM_OBJS):
            #print("CHECK KUKA COLLISION WITH BALL", RC.CFALL_OBJS_NAMES[i])
            if(self._robot.detectCollisionWith(RC.CFALL_OBJS_NAMES[i]) or \
               self.detectObjectsReachGround()):
                self.setTerminated(1)
                print('Terminate')
                break
示例#7
0
    def doInverseKinematicsCalculation(self):
        count = 0
        target_index = 0
        change_target_time = dt * 1000
        vmax = 0.5
        kp = 200.0
        kv = np.sqrt(kp)

        # define variables to share with nengo
        q = np.zeros(len(joint_handles))
        dq = np.zeros(len(joint_handles))

        # NOTE: main loop starts here -----------------------------------------
        target_xyz = RC.getObjectWorldPosition("obstacle")

        track_target.append(np.copy(target_xyz))  # store for plotting
        target_xyz = np.asarray(target_xyz)

        for ii, joint_handle in enumerate(joint_handles):
            old_q = np.copy(q)
            # get the joint angles
            _, q[ii] = vrep.simxGetJointPosition(clientID, joint_handle,
                                                 vrep.simx_opmode_blocking)
            if _ != 0:
                raise Exception()

            # get the joint velocity
            _, dq[ii] = vrep.simxGetObjectFloatParameter(
                clientID,
                joint_handle,
                2012,  # parameter ID for angular velocity of the joint
                vrep.simx_opmode_blocking)
            if _ != 0:
                raise Exception()

        # calculate position of the end-effector
        # derived in the ur5 calc_TnJ class
        xyz = robot_config.Tx('EE', q)

        # calculate the Jacobian for the end effector
        JEE = robot_config.J('EE', q)

        # calculate the inertia matrix in joint space
        Mq = robot_config.Mq(q)

        # calculate the effect of gravity in joint space
        Mq_g = robot_config.Mq_g(q)

        # convert the mass compensation into end effector space
        Mx_inv = np.dot(JEE, np.dot(np.linalg.inv(Mq), JEE.T))
        svd_u, svd_s, svd_v = np.linalg.svd(Mx_inv)
        # cut off any singular values that could cause control problems
        singularity_thresh = .00025
        for i in range(len(svd_s)):
            svd_s[i] = 0 if svd_s[i] < singularity_thresh else \
                1./float(svd_s[i])
        # numpy returns U,S,V.T, so have to transpose both here
        Mx = np.dot(svd_v.T, np.dot(np.diag(svd_s), svd_u.T))

        # calculate desired force in (x,y,z) space
        dx = np.dot(JEE, dq)
        # implement velocity limiting
        lamb = kp / kv
        x_tilde = xyz - target_xyz
        sat = vmax / (lamb * np.abs(x_tilde))
        scale = np.ones(3)
        if np.any(sat < 1):
            index = np.argmin(sat)
            unclipped = kp * x_tilde[index]
            clipped = kv * vmax * np.sign(x_tilde[index])
            scale = np.ones(3) * clipped / unclipped
            scale[index] = 1
        u_xyz = -kv * (dx -
                       np.clip(sat / scale, 0, 1) * -lamb * scale * x_tilde)
        u_xyz = np.dot(Mx, u_xyz)

        # transform into joint space, add vel and gravity compensation
        u = np.dot(JEE.T, u_xyz) - Mq_g

        # calculate the null space filter
        Jdyn_inv = np.dot(Mx, np.dot(JEE, np.linalg.inv(Mq)))
        null_filter = (np.eye(robot_config.num_joints) -
                       np.dot(JEE.T, Jdyn_inv))
        # calculate our secondary control signal
        q_des = np.zeros(robot_config.num_joints)
        dq_des = np.zeros(robot_config.num_joints)
        # calculated desired joint angle acceleration using rest angles
        for ii in range(1, robot_config.num_joints):
            if robot_config.rest_angles[ii] is not None:
                q_des[ii] = (((robot_config.rest_angles[ii] - q[ii]) + np.pi) %
                             (np.pi * 2) - np.pi)
                dq_des[ii] = dq[ii]
        # only compensate for velocity for joints with a control signal
        nkp = kp * .1
        nkv = np.sqrt(nkp)
        u_null = np.dot(Mq, (nkp * q_des - nkv * dq_des))

        u += np.dot(null_filter, u_null)

        # get the (x,y,z) position of the center of the obstacle
        v = RC.getObjectWorldPosition('obstacle')
        v = np.asarray(v)

        # find the closest point of each link to the obstacle
        for ii in range(robot_config.num_joints):
            # get the start and end-points of the arm segment
            p1 = robot_config.Tx('joint%i' % ii, q=q)
            if ii == robot_config.num_joints - 1:
                p2 = robot_config.Tx('EE', q=q)
            else:
                p2 = robot_config.Tx('joint%i' % (ii + 1), q=q)

            # calculate minimum distance from arm segment to obstacle
            # the vector of our line
            vec_line = p2 - p1
            # the vector from the obstacle to the first line point
            vec_ob_line = v - p1
            # calculate the projection normalized by length of arm segment
            projection = np.dot(vec_ob_line, vec_line) / np.sum((vec_line)**2)
            if projection < 0:
                # then closest point is the start of the segment
                closest = p1
            elif projection > 1:
                # then closest point is the end of the segment
                closest = p2
            else:
                closest = p1 + projection * vec_line
            # calculate distance from obstacle vertex to the closest point
            dist = np.sqrt(np.sum((v - closest)**2))
            # account for size of obstacle
            rho = dist - obstacle_radius

            if rho < threshold:

                eta = .02  # feel like i saw 4 somewhere in the paper
                drhodx = (v - closest) / rho
                Fpsp = (eta * (1.0 / rho - 1.0 / threshold) * 1.0 / rho**2 *
                        drhodx)

                # get offset of closest point from link's reference frame
                T_inv = robot_config.T_inv('link%i' % ii, q=q)
                m = np.dot(T_inv, np.hstack([closest, [1]]))[:-1]
                # calculate the Jacobian for this point
                Jpsp = robot_config.J('link%i' % ii, x=m, q=q)[:3]

                # calculate the inertia matrix for the
                # point subjected to the potential space
                Mxpsp_inv = np.dot(Jpsp, np.dot(np.linalg.pinv(Mq), Jpsp.T))
                svd_u, svd_s, svd_v = np.linalg.svd(Mxpsp_inv)
                # cut off singular values that could cause problems
                singularity_thresh = .00025
                for ii in range(len(svd_s)):
                    svd_s[ii] = 0 if svd_s[ii] < singularity_thresh else \
                        1./float(svd_s[ii])
                # numpy returns U,S,V.T, so have to transpose both here
                Mxpsp = np.dot(svd_v.T, np.dot(np.diag(svd_s), svd_u.T))

                u_psp = -np.dot(Jpsp.T, np.dot(Mxpsp, Fpsp))
                if rho < .01:
                    u = u_psp
                else:
                    u += u_psp

        # multiply by -1 because torque is opposite of expected
        u *= -1
        print('u: ', u)

        for ii, joint_handle in enumerate(joint_handles):
            # the way we're going to do force control is by setting
            # the target velocities of each joint super high and then
            # controlling the max torque allowed (yeah, i know)

            # get the current joint torque
            _, torque = \
                vrep.simxGetJointForce(
                    clientID,
                    joint_handle,
                    vrep.simx_opmode_blocking)
            if _ != 0:
                raise Exception()

            # if force has changed signs,
            # we need to change the target velocity sign
            if np.sign(torque) * np.sign(u[ii]) <= 0:
                joint_target_velocities[ii] = \
                    joint_target_velocities[ii] * -1
                _ = self.setJointVelocity(
                    ii, joint_target_velocities[ii])  # target velocity
            if _ != 0:
                raise Exception()

            # and now modulate the force
            _ = self.setJointForce(ii, abs(u[ii]))  # force to apply
            if _ != 0:
                raise Exception()
示例#8
0
 def getHandWorldPosition(self):
     return RC.getObjectWorldPosition(RC.CBARRETT_HAND_NAME)
示例#9
0
 def getEndTipWorldPosition(self):
     return RC.getObjectWorldPosition(GB_CEND_TIP_NAME)
示例#10
0
    def reward(self):
        #print("reward")
        #print(__reward)
        self.__reward = 1000

        robotId = self._robot.id()
        #
        if (robotId == RC.CJACO_ARM_HAND
                or robotId == RC.CKUKA_ARM_BARRETT_HAND
                or robotId == RC.CKUKA_ARM_SUCTION_PAD
                or robotId == RC.CUR5_ARM_BARRETT_HAND
                or robotId == RC.CHEXAPOD):
            # ------------------------------------------------------------------------------------------------
            # BALANCE TASK -----------------------------------------------------------------------------------
            #
            if (RC.isTaskObjHandBalance() or RC.isTaskObjSuctionBalancePlate()
                    or RC.isTaskObjHexapodBalance()):
                if (not self.isObjGroundHit()):
                    # Distance of plate away from hand palm center -----------------------------------------------
                    platePos = RC.getObjectWorldPosition(RC.CPLATE_OBJ_NAME)
                    endTipPos = self._robot.getEndTipWorldPosition()
                    #print('Plate Pos:', platePos)
                    #print('Endtip Pos:', endTipPos)
                    d = math.sqrt((platePos[0] - endTipPos[0])**2 +
                                  (platePos[1] - endTipPos[1])**2)
                    #print('DDDD:', d)
                    self.__reward -= d

                    # Orientation of the plate -------------------------------------------------------------------
                    #plateOrient = RC.getObjectOrientation(RC.CPLATE_OBJ_NAME) # Must be the same name set in V-Rep
                    #alpha1 = plateOrient[0] # Around x
                    #beta1  = plateOrient[1] # Around y
                    #gamma1 = plateOrient[2] # Around z
                    #print('Plate Orient', plateOrient)
                    #self.__reward -= (abs(alpha1) + abs(beta1))

                    #endTipOrient = self._robot.getEndTipOrientation()
                    #endTipVelocity = self._robot.getEndTipVelocity()
                    #print('Endtip Vel', endTipVelocity)
                else:
                    self.__reward -= 100

                if (RC.isTaskObjSuctionBalancePlate()):
                    # Suction Pad
                    suctionPadOrient = RC.getObjectOrientation(
                        RC.CSUCTION_PAD_NAME)
                    alpha2 = suctionPadOrient[0]  # Around x
                    beta2 = suctionPadOrient[1]  # Around y
                    gamma2 = suctionPadOrient[2]  # Around z
                    delta_alpha2 = abs(math.pi - abs(alpha2))
                    self.__reward -= (delta_alpha2 + abs(beta2))

                    # Joint Poses
                    #joint4Pos = RC.getJointPosition(self._robot._jointHandles[3])
                    #joint6Pos = RC.getJointPosition(self._robot._jointHandles[5])
                    #print(joint4Pos, '==', joint6Pos)
                    #self.__reward -= abs((-math.pi/2 - joint4Pos) - joint6Pos) #!!! <---

                #handPos = self._robot.getHandWorldPosition()
                #handOrient = self._robot.getHandOrientation()
                ##print('Hand Orient', handOrient)
                #handVelocity = self._robot.getHandVelocity()
                ##print('Hand Vel', handVelocity)

            # ------------------------------------------------------------------------------------------------
            # CATCH TASK -------------------------------------------------------------------------------------
            #
            elif (RC.isTaskObjCatch()):
                # Distance of base joint pos to the ball
                ballPos = RC.getObjectWorldPosition(RC.CBALL_OBJ_NAME)
                basePos = self._robot.getCurrentBaseJointPos()
                angle = math.atan2(ballPos[1], ballPos[0])
                self.__reward -= abs(angle - basePos)

                # Z Distance from ball to the suction pad tip
                suctionPadPosZ = 9.5846e-01 + 1.0000e-01
                dist = ballPos[2] - suctionPadPosZ
                if (dist > 0):
                    self.__reward -= dist
                else:
                    self.__reward -= 100

                # Wrist joint orientation directed to the ball
                #suctionPadOrient = RC.getObjectOrientation(RC.CSUCTION_PAD_NAME)
                #print('Suction Pad Orient', suctionPadOrient)

                # 3D Distance from ball to the suction pad tip
                padTip = self._robot.getSuctionPadTipInfo()
                endTip = RC.getObjectWorldPosition('RobotTip')
                self.__reward -= RC.getDistanceFromPointToLine(
                    ballPos, padTip, endTip)
                ##suctionPadLine = padTip - endTip

        #
        #if(robotId == RC.CUR5_ARM_GRIPPER):

        if (RC.GB_TRACE):
            print('self.__reward:', self.__reward)
        return self.__reward
示例#11
0
    def step(self, action):
        ## ----------------------------------------------------------------------------------------
        if (RC.GB_TRACE):
            print('ACTION:', action)
        ## ----------------------------------------------------------------------------------------
        ## APPLY ACTION START
        ##
        self._objGroundHit = False
        self._robotOperationTime = 0

        ## WAIT FOR V-REP SERVER SIMULATION STATE TO BE READY
        ##
        if (self._robot.getOperationState() == RC.CROBOT_STATE_READY):
            self._robot.applyAction(action)

        ## OBSERVE & REWARD
        reward = 0

        #########################################################################################
        ### TIMELY PICK -------------------------------------------------------------------------
        ###
        if (RC.isTaskObjTimelyPick()):
            objName = RC.CCUBOID_OBJ_NAME
            pos = RC.getObjectWorldPosition(objName)

            self._robotOperationTime = self.getRobotOperationTime()
            while (pos[1] >= RC.CTASK_OBJ_TIMELY_PICK_POSITION
                   and self._robot.getOperationState() !=
                   RC.CROBOT_STATE_MOVING_ENDED):
                #print('Obj Pos:', pos)
                pos = RC.getObjectWorldPosition(objName)

            #print('END-Obj Pos:', pos)

            # EITHER MOVING_ENDED or not, get the operation time
            self._robotOperationTime = self.getRobotOperationTime()
            #print('OPER TIME:', self._robotOperationTime)

            # PUNISHMENT POLICY
            #
            # P1 --
            if (self._robot.getOperationState() == RC.CROBOT_STATE_MOVING_ENDED
                    and pos[1] >= RC.CTASK_OBJ_TIMELY_PICK_POSITION):
                d = self.getTimelyPickObjectDistance(
                )  # pos[1] - RC.CTASK_OBJ_TIMELY_PICK_POSITION
                t = 5000 * d + 400
                print('TOO SOON', d)
                reward -= t

            # P2 --
            elif (pos[1] < RC.CTASK_OBJ_TIMELY_PICK_POSITION):
                d = self.getCurrentRobotPosDistanceToPrePickPose()
                d = d * 500
                #print('DISTANCE: ', d)
                # !NOTE:
                # d value will decide not yet reaching pre-pick pose --
                # Reached pre-pick pose --

                while (self._robot.getOperationState() !=
                       RC.CROBOT_STATE_MOVING_ENDED):
                    time = self.getRobotOperationTime(
                    ) - self._robotOperationTime
                    #print('RUN TIME:', time)
                    if (time > 7000):
                        self._observation = self.getObservation(action)
                        self._observation.append(np.array(0, dtype=np.float32))
                        self._observation.append(np.array(0, dtype=np.float32))
                        reward = -time
                        return self._observation, reward, True, {
                        }  ######## ducta

                ti = (self.getRobotOperationTime() -
                      self._robotOperationTime) / 100
                d += ti
                print('Late:', d, ':', ti)
                reward -= d

            # P3 --
            approachingTime = self.getTimelyPickApproachingTime()
            pickingTime = self.getTimelyPickGrippingTime()
            print('OPER TIME', approachingTime, ' --- ', pickingTime)

            # OBSERVATION
            #
            self._observation = self.getObservation(action)
            self._observation.append(
                np.array(approachingTime, dtype=np.float32))
            self._observation.append(np.array(pickingTime, dtype=np.float32))

            reward += self.reward()
            isSlowApproaching = (action[0] < 0.1) or (approachingTime > 7000)
            isSlowGripping = (action[1] < 0.1) or (pickingTime > 1000)
            if (isSlowApproaching):
                reward -= approachingTime
            if (isSlowGripping):
                reward -= pickingTime
            done = not (isSlowApproaching or isSlowGripping)

            print('Env observed!', reward, ' - ',
                  done)  # self._robot.getOperationState()

            return self._observation, reward, done, {}  ######## ducta

        #########################################################################################
        ### TIMELY CATCH ------------------------------------------------------------------------
        ###
        elif (RC.isTaskObjTimelyCatch()):
            objName = RC.CCUBOID_OBJ_NAME
            #pos = RC.getObjectWorldPosition(objName)
            self._robotOperationTime = self.getRobotOperationTime()

            APPROACHING_TIME_LIMIT = 7000
            PICKING_TIME_LIMIT = 1000

            # WAIT FOR ACTION ENDED
            #
            reward += self.reward()
            #
            while (self._robot.getOperationState() !=
                   RC.CROBOT_STATE_MOVING_ENDED):
                #print('Obj Pos:', pos)
                self._objGroundHit = self.isObjGroundHit()
                if (self._objGroundHit):
                    reward -= 100
                # ------------------------------------------------------------
                time = self.getRobotOperationTime() - self._robotOperationTime
                #print('RUN TIME:', time)
                if (time > (APPROACHING_TIME_LIMIT + PICKING_TIME_LIMIT)):
                    self._observation = self.getObservation(action)
                    self._observation.append(np.array(0, dtype=np.float32))
                    self._observation.append(np.array(0, dtype=np.float32))
                    reward = -time
                    print('MOVING TOO LONG', time)
                    print('Env observed!', reward, ' - ',
                          False)  # self._robot.getOperationState()
                    return self._observation, reward, False, {}  ######## ducta

            #print('END-Obj Pos:', pos)
            self._robotOperationTime = self.getRobotOperationTime()
            #print('OPER TIME:', self._robotOperationTime)

            # PUNISHMENT POLICY
            #
            # P1 -------------------------------------------------------------------------------------------------
            #
            #!NOTE: THESE VALUES ARE ONLY VALID AFTER self._robot.getOperationState() == RC.CROBOT_STATE_MOVING_ENDED,
            # ELSE RETURN 0
            approachingTime = self.getTimelyPickApproachingTime()
            pickingTime = self.getTimelyPickGrippingTime()
            print('OPER TIME', approachingTime, ' --- ', pickingTime)

            # OBSERVATION
            #
            self._observation = self.getObservation(action)
            self._observation.append(
                np.array(approachingTime, dtype=np.float32))
            self._observation.append(np.array(pickingTime, dtype=np.float32))

            isSlowApproaching = (action[0] < 0.1) or (approachingTime >
                                                      APPROACHING_TIME_LIMIT)
            isSlowGripping = (action[1] < 0.1) or (pickingTime >
                                                   PICKING_TIME_LIMIT)

            if (isSlowApproaching and isSlowGripping):
                reward -= (approachingTime + pickingTime)
            elif (isSlowApproaching):
                reward -= (approachingTime + PICKING_TIME_LIMIT - action[1])
            elif (isSlowGripping):
                reward -= (pickingTime + APPROACHING_TIME_LIMIT - action[0])

            done = not (
                isSlowApproaching or isSlowGripping
            )  # Here, we intentionally reverse the meaning for cancelling off all < 0.1 values

            if (not done):
                print('TOO SLOW ACT')
                print('Env observed!', reward, ' - ',
                      done)  # self._robot.getOperationState()
                return self._observation, reward, done, {}  ######## ducta

            # ----------------------------------------------------------------------------------------------------
            self._objGroundHit = self.isObjGroundHit()
            pos = RC.getObjectWorldPosition(objName)
            #print('OBJ Z POS:', pos[2])

            # P2 -------------------------------------------------------------------------------------------------
            # OBJ ON GROUND NOW
            # ALREADY HANDLED ABOVE by -100 reward if () during waiting for moving ended
            if (pos[2] >= 0.3 and pos[2] <= 0.5):
                reward += 5000 - abs(pos[2] - 0.36)
                print('Obj CAUGHT! APPARE!')

            # P3 -------------------------------------------------------------------------------------------------
            # OBJ ON GROUND NOW
            # ALREADY HANDLED ABOVE by -100 reward if () during waiting for moving ended
            elif (self._objGroundHit):
                reward -= (approachingTime + pickingTime) / 10
                print('Obj Already On ground..Late?')
            #
            # P4 -------------------------------------------------------------------------------------------------
            # OBJ STILL FALLING
            elif (pos[2] > 0.5):
                d = self.getTimelyCatchObjectDistance()
                t = 5000 * d
                print('TOO SOON', d)
                reward -= t

            print('Env observed!', reward, ' - ',
                  done)  # self._robot.getOperationState()
            return self._observation, reward, done, {}  ######## ducta

        ### OTHERS ##############################################################################
        else:
            #time.sleep(self._timeStep)
            #!!!Wait for some time since starting the action then observe:
            i = 0
            while (self._robot.getOperationState() == RC.CROBOT_STATE_MOVING):
                time.sleep(1)
                self._objGroundHit = self.isObjGroundHit()
                if (self._objGroundHit):
                    #print('GROUND HIT',i+1)
                    reward -= 100
                if (i == 2):  # 3s has passed!
                    reward += self.reward()
                i = i + 1
                #print('Still moving',i)

            self._observation = self.getObservation(action)
            reward += self.reward()  # Reward Addition after observation 2nd!
            done = self.termination()
            print('Env observed 2nd!',
                  reward)  # self._robot.getOperationState()
            #print("len=%r" % len(self._observation))

            return self._observation, reward, False, {}
示例#12
0
    def getObservation(self, action):
        self._observation = self._robot.getObservation()
        #if(RC.isTaskObjSuctionBalancePlate()):
        # Vel-trained joint vel, action[1] --> Refer to the V-Rep Server Robot script to confirm this!
        #self._observation.append(np.array(action[1], dtype=np.float32))

        #robotId = self._robot.id()
        if (RC.isTaskObjHandBalance() or RC.isTaskObjSuctionBalance()
                or RC.isTaskObjHexapodBalance()):
            ##############################################################################################
            # PLATE INFO
            #
            # Plate Object
            # Angle rotation away from horizontal plane
            plateOrient = RC.getObjectOrientation(
                RC.CPLATE_OBJ_NAME)  # Must be the same name set in V-Rep
            self._observation.append(
                np.array(
                    abs(plateOrient[0]),
                    dtype=np.float32))  # Either alpha(X) or beta(Y) is enough!
            #self._observation.append(np.array(abs(plateOrient[1]), dtype=np.float32))

            # Distance from plate to the center of end-tip
            platePos = RC.getObjectWorldPosition(RC.CPLATE_OBJ_NAME)
            endTipPos = self._robot.getEndTipWorldPosition()
            #basePlatePos = RC.getObjectWorldPosition(RC.CBASE_PLATE_OBJ_NAME)
            d = math.sqrt((platePos[0] - endTipPos[0])**2 +
                          (platePos[1] - endTipPos[1])**2)

            # Base Plate Normal Vector -----------------------------------------------------------------------------
            normalVector = self.getBasePlateNormalVector()
            if (len(normalVector) == 3):
                slantingDegree = abs(
                    RC.angle_between(np.array([0, 0, 1]),
                                     np.array(normalVector)))
            else:
                slantingDegree = 0
            #print('SLANT', slantingDegree)

            # Velocity Factor of Joint 4  -----------------------------------------------------------------------------
            if (RC.isTaskObjSuctionBalancePosVel()):
                self._observation.append(
                    np.array([action[1]], dtype=np.float32))

            self._observation.append(np.array([d], dtype=np.float32))
            self._observation.append(
                np.array([slantingDegree], dtype=np.float32))
        elif (RC.isTaskObjHold()):
            ##############################################################################################
            # TUBE INFO
            #
            # Tube Object
            # Angle rotation away from horizontal plane
            tubeOrient = RC.getObjectOrientation(
                RC.CTUBE_OBJ_NAME)  # Must be the same name set in V-Rep
            self._observation.append(
                np.array(abs(tubeOrient[1]), dtype=np.float32))

            # Distance from plate to the center of end-tip
            tubePos = RC.getObjectWorldPosition(RC.CTUBE_OBJ_NAME)
            palmPos = self._robot.getEndTipWorldPosition()
            d = math.sqrt((tubePos[0] - palmPos[0])**2 +
                          (tubePos[1] - palmPos[1])**2)
            self._observation.append(np.array(d, dtype=np.float32))

        elif (RC.isTaskObjCatch()):
            ##############################################################################################
            # BALL INFO
            #
            # Ball Object
            # Ball z position only
            ballPos = RC.getObjectWorldPosition(RC.CBALL_OBJ_NAME)
            ballLinVel = RC.getObjectVelocity(RC.CBALL_OBJ_NAME)
            ##self._observation.append(np.array(ballPos[0], dtype=np.float32))
            ##self._observation.append(np.array(ballPos[1], dtype=np.float32))
            self._observation.append(np.array(ballPos[2], dtype=np.float32))
            self._observation.append(np.array(ballLinVel[2], dtype=np.float32))

        elif (RC.isTaskObjTimelyPick()):
            ##############################################################################################
            # OBJ INFO
            #
            # Object position on conveyor belt
            #objPos    = RC.getObjectWorldPosition(RC.CCUBOID_OBJ_NAME)
            #endTipPos = RC.getObjectWorldPosition('RG2')
            #d = math.sqrt((objPos[0] - endTipPos[0])**2 +
            #              (objPos[1] - endTipPos[1])**2 +
            #              (objPos[1] - endTipPos[1])**2)
            #self._observation.append(np.array(d, dtype=np.float32))
            d = self.getTimelyPickObjectDistance()
            self._observation.append(np.array(d, dtype=np.float32))

            #vel = self.getBeltVelocity()
            #self._observation.append(np.array(vel, dtype=np.float32))
            #print('OBJECT TO PICK: ', d)

        elif (RC.isTaskObjTimelyCatch()):
            ##############################################################################################
            # OBJ INFO
            #
            # Object position on conveyor belt
            objPos = RC.getObjectWorldPosition(RC.CCUBOID_OBJ_NAME)
            endTipPos = RC.getObjectWorldPosition('RG2')
            zDelta = objPos[2] - endTipPos[2]
            #d = math.sqrt((objPos[0] - endTipPos[0])**2 +
            #              (objPos[1] - endTipPos[1])**2 +
            #              (objPos[1] - endTipPos[1])**2)
            #self._observation.append(np.array(d, dtype=np.float32))
            d = self.getTimelyCatchObjectDistance()
            self._observation.append(np.array(zDelta, dtype=np.float32))
            self._observation.append(np.array(d, dtype=np.float32))

        return self._observation