Exemplo n.º 1
0
    def termination(self):
        if (self._terminated):
            ##RC.stopSimulation(self._clientID)
            ##self._robot.commandJointVibration(0)
            return True

        # Hit the object or object hit the ground
        #self._robot.detectCollisionWith(RC.CFALL_OBJS_NAMES[0])

        if (RC.isTaskObjCatch()):
            if (self._robot.checkObjectCaught()):
                return True

        if (RC.isTaskObjTimelyPick()):
            return False
        # Simple Ground Hit Check
        else:
            self._objGroundHit = self.isObjGroundHit()
            if (self._objGroundHit):
                return True

        if (not RC.isTaskObjCatch()):
            res = self.detectHandOnGround()
            ##if(res):
            ##RC.stopSimulation(self._clientID)
            ##self._robot.commandJointVibration(0)

        if (res):
            return True
        else:
            if (RC.isTaskObjSuctionBalancePlate() or RC.isTaskObjHandBalance()
                    or RC.isTaskObjCatch()):
                while (self._robotOperationTime <= 7000):
                    self._robotOperationTime = self.getRobotOperationTime()
                    #print('Operation Time:', robotOperTime)
                    time.sleep(1)

        return True
Exemplo n.º 2
0
    def getObservation(self):
        observation = []
        #endTipPos = self.getEndTipWorldPosition()
        isTaskObjHandBalance = RC.isTaskObjHandBalance()
        isTaskObjSuctionBalance = RC.isTaskObjSuctionBalance()
        isTaskObjHexapodBalance = RC.isTaskObjHexapodBalance()
        isTaskObjHold = RC.isTaskObjHold()
        isTaskObjCatch = RC.isTaskObjCatch()
        isTaskObjTimelyPick = RC.isTaskObjTimelyPick()
        isTaskObjTimelyCatch = RC.isTaskObjTimelyCatch()

        if (False):  #isTaskObjTimelyPick
            inputInts = [self._robotHandle]  #[objHandles[16]]
            inputFloats = []
            inputStrings = ''
            inputBuffer = bytearray()
            ##inputBuffer.append(78)
            ##inputBuffer.append(42)
            res, retInts, randomInitJointPose, retStrings, retBuffer = vrep.simxCallScriptFunction(self._clientID, RC.CUR5_ARM_NAME,     \
                                                                                                   vrep.sim_scripttype_childscript,      \
                                                                                                   'gbGetRandomInitJointPoseFromClient', \
                                                                                                   inputInts, inputFloats, inputStrings, inputBuffer, \
                                                                                                   vrep.simx_opmode_oneshot_wait)

            observation.append(
                np.array(randomInitJointPose[0], dtype=np.float32))
            #self._jointHandles
            #for i in range(len(randomInitJointPose)):
            #observation.append(np.array(randomInitJointPose[i], dtype=np.float32))

        for i in range(len(self._jointHandles)):
            if (isTaskObjHexapodBalance):
                pos = RC.getJointPosition(self._jointHandles[i])  # Pos
                #
                observation.append(np.array(pos, dtype=np.float32))
            else:
                if (i == 0 or i == 3 or i == 4 or i == 5):
                    pos = RC.getJointPosition(self._jointHandles[i])
                    #vel = RC.getJointVelocity(self._jointHandles[i])
                    #
                    if (isTaskObjHandBalance or isTaskObjSuctionBalance):
                        observation.append(np.array(pos,
                                                    dtype=np.float32))  # Pos
                        #observation.append(np.array(vel, dtype=np.float32)) # Vel
                    elif (isTaskObjHold):
                        force = RC.getJointForce(self._jointHandles[i])
                        observation.append(np.array(force,
                                                    dtype=np.float32))  # Force

        if (isTaskObjHandBalance):
            # HAND INFO (!Hand is also one factor that makes up the object condition (pos & orient), therefore should
            # also be added into the environment)
            #
            # Hand Joint Pos
            handJointPoses = self.getCurrentHandJointPoses()
            observation.append(np.array(handJointPoses[2], dtype=np.float32))
            observation.append(np.array(handJointPoses[7], dtype=np.float32))

        elif (isTaskObjHold):
            # HAND INFO (!Hand is also one factor that makes up the object condition (pos & orient), therefore should
            # also be added into the environment)
            #
            # Hand Joint Pos
            handJointPoses = self.getCurrentHandJointPoses()
            observation.append(np.array(handJointPoses[2], dtype=np.float32))
            observation.append(np.array(handJointPoses[7], dtype=np.float32))

            handJointForces = [
                RC.getJointForce(self._handJointHandles[0]),
                RC.getJointForce(self._handJointHandles[1])
            ]
            observation.append(np.array(handJointForces[0], dtype=np.float32))
            observation.append(np.array(handJointForces[1], dtype=np.float32))

        elif (isTaskObjCatch):
            pos0 = RC.getJointPosition(self._jointHandles[0])
            vel0 = RC.getJointVelocity(self._jointHandles[0])
            observation.append(np.array(pos0, dtype=np.float32))
            observation.append(np.array(vel0, dtype=np.float32))

        return observation
Exemplo n.º 3
0
def gb_observation_2_state(ob):
    if (RC.GB_CSERVER_ROBOT_ID == RC.CKUKA_ARM_BARRETT_HAND):
        if (RC.isTaskObjHandBalance()):
            return np.hstack((
                ob[0],
                ob[1],
                ob[2],
                ob[3],
                ob[4],
                ob[5],  # Joint pos and vel
                ob[6],
                ob[7],
                ob[8],
                ob[9]))
        elif (RC.isTaskObjSuctionBalancePosOnly()):
            return np.hstack((
                ob[0],
                ob[1],
                ob[2],
                ob[3],  # Joint pos
                ob[4],  # Plate slanting degree
                ob[5]  # Plate distance to base plate
            ))
        elif (RC.isTaskObjSuctionBalancePosVel()):
            return np.hstack((
                ob[0],
                ob[1],
                ob[2],
                ob[3],  # Joint pos
                ob[4],  # Vel-Trained joint vel
                ob[5],  # Plate slanting degree
                ob[6]  # Plate distance to base plate
            ))
        elif (RC.isTaskObjHold()):
            return np.hstack((
                ob[0],
                ob[1],
                ob[2],
                ob[3],
                ob[4],
                ob[5],
                ob[6],  # Joint i (pos)
                ob[7],
                ob[8],
                ob[9],
                ob[10],
                ob[11],
                ob[12]))
        elif (RC.isTaskObjCatch()):
            return np.hstack((ob[0], ob[1], ob[2], ob[3]))

    elif (RC.GB_CSERVER_ROBOT_ID == RC.CUR5_ARM_GRIPPER):
        if (RC.isTaskObjTimelyPick()):
            return np.hstack((
                ob[0],
                ob[1],
                ob[2]  #, ob[4], ob[5], ob[6], ob[7] # Joint i (init pos)
            ))

    elif (RC.GB_CSERVER_ROBOT_ID == RC.CHEXAPOD):
        if (RC.isTaskObjHexapodBalance()):
            return np.hstack((
                ob[0],
                ob[1],
                ob[2],
                ob[3],
                ob[4],
                ob[5],  # Joint pos and vel
                ob[6],
                ob[7]))

    else:  #if(GB_CSERVER_ROBOT_ID == CUR5_ARM_BARRETT_HAND):
        return np.hstack((
            ob[0],
            ob[1],
            ob[2],
            ob[3],
            ob[4],
            ob[5],  # Joint i (pos)
            ob[6],
            ob[7],
            ob[8],  # Endtip pos X,Y,Z
            ob[9],
            ob[10],
            ob[11]  # Endtip orient X,Y,Z
        ))
Exemplo n.º 4
0
    def step(self, action):
        ## ----------------------------------------------------------------------------------------
        if (RC.GB_TRACE):
            print('ACTION:', action)
        ## ----------------------------------------------------------------------------------------
        ## APPLY ACTION START
        ##
        self._objGroundHit = False
        self._robotOperationTime = 0

        ## WAIT FOR V-REP SERVER SIMULATION STATE TO BE READY
        ##
        if (self._robot.getOperationState() == RC.CROBOT_STATE_READY):
            self._robot.applyAction(action)

        ## OBSERVE & REWARD
        reward = 0

        #########################################################################################
        ### TIMELY PICK -------------------------------------------------------------------------
        ###
        if (RC.isTaskObjTimelyPick()):
            objName = RC.CCUBOID_OBJ_NAME
            pos = RC.getObjectWorldPosition(objName)

            self._robotOperationTime = self.getRobotOperationTime()
            while (pos[1] >= RC.CTASK_OBJ_TIMELY_PICK_POSITION
                   and self._robot.getOperationState() !=
                   RC.CROBOT_STATE_MOVING_ENDED):
                #print('Obj Pos:', pos)
                pos = RC.getObjectWorldPosition(objName)

            #print('END-Obj Pos:', pos)

            # EITHER MOVING_ENDED or not, get the operation time
            self._robotOperationTime = self.getRobotOperationTime()
            #print('OPER TIME:', self._robotOperationTime)

            # PUNISHMENT POLICY
            #
            # P1 --
            if (self._robot.getOperationState() == RC.CROBOT_STATE_MOVING_ENDED
                    and pos[1] >= RC.CTASK_OBJ_TIMELY_PICK_POSITION):
                d = self.getTimelyPickObjectDistance(
                )  # pos[1] - RC.CTASK_OBJ_TIMELY_PICK_POSITION
                t = 5000 * d + 400
                print('TOO SOON', d)
                reward -= t

            # P2 --
            elif (pos[1] < RC.CTASK_OBJ_TIMELY_PICK_POSITION):
                d = self.getCurrentRobotPosDistanceToPrePickPose()
                d = d * 500
                #print('DISTANCE: ', d)
                # !NOTE:
                # d value will decide not yet reaching pre-pick pose --
                # Reached pre-pick pose --

                while (self._robot.getOperationState() !=
                       RC.CROBOT_STATE_MOVING_ENDED):
                    time = self.getRobotOperationTime(
                    ) - self._robotOperationTime
                    #print('RUN TIME:', time)
                    if (time > 7000):
                        self._observation = self.getObservation(action)
                        self._observation.append(np.array(0, dtype=np.float32))
                        self._observation.append(np.array(0, dtype=np.float32))
                        reward = -time
                        return self._observation, reward, True, {
                        }  ######## ducta

                ti = (self.getRobotOperationTime() -
                      self._robotOperationTime) / 100
                d += ti
                print('Late:', d, ':', ti)
                reward -= d

            # P3 --
            approachingTime = self.getTimelyPickApproachingTime()
            pickingTime = self.getTimelyPickGrippingTime()
            print('OPER TIME', approachingTime, ' --- ', pickingTime)

            # OBSERVATION
            #
            self._observation = self.getObservation(action)
            self._observation.append(
                np.array(approachingTime, dtype=np.float32))
            self._observation.append(np.array(pickingTime, dtype=np.float32))

            reward += self.reward()
            isSlowApproaching = (action[0] < 0.1) or (approachingTime > 7000)
            isSlowGripping = (action[1] < 0.1) or (pickingTime > 1000)
            if (isSlowApproaching):
                reward -= approachingTime
            if (isSlowGripping):
                reward -= pickingTime
            done = not (isSlowApproaching or isSlowGripping)

            print('Env observed!', reward, ' - ',
                  done)  # self._robot.getOperationState()

            return self._observation, reward, done, {}  ######## ducta

        #########################################################################################
        ### TIMELY CATCH ------------------------------------------------------------------------
        ###
        elif (RC.isTaskObjTimelyCatch()):
            objName = RC.CCUBOID_OBJ_NAME
            #pos = RC.getObjectWorldPosition(objName)
            self._robotOperationTime = self.getRobotOperationTime()

            APPROACHING_TIME_LIMIT = 7000
            PICKING_TIME_LIMIT = 1000

            # WAIT FOR ACTION ENDED
            #
            reward += self.reward()
            #
            while (self._robot.getOperationState() !=
                   RC.CROBOT_STATE_MOVING_ENDED):
                #print('Obj Pos:', pos)
                self._objGroundHit = self.isObjGroundHit()
                if (self._objGroundHit):
                    reward -= 100
                # ------------------------------------------------------------
                time = self.getRobotOperationTime() - self._robotOperationTime
                #print('RUN TIME:', time)
                if (time > (APPROACHING_TIME_LIMIT + PICKING_TIME_LIMIT)):
                    self._observation = self.getObservation(action)
                    self._observation.append(np.array(0, dtype=np.float32))
                    self._observation.append(np.array(0, dtype=np.float32))
                    reward = -time
                    print('MOVING TOO LONG', time)
                    print('Env observed!', reward, ' - ',
                          False)  # self._robot.getOperationState()
                    return self._observation, reward, False, {}  ######## ducta

            #print('END-Obj Pos:', pos)
            self._robotOperationTime = self.getRobotOperationTime()
            #print('OPER TIME:', self._robotOperationTime)

            # PUNISHMENT POLICY
            #
            # P1 -------------------------------------------------------------------------------------------------
            #
            #!NOTE: THESE VALUES ARE ONLY VALID AFTER self._robot.getOperationState() == RC.CROBOT_STATE_MOVING_ENDED,
            # ELSE RETURN 0
            approachingTime = self.getTimelyPickApproachingTime()
            pickingTime = self.getTimelyPickGrippingTime()
            print('OPER TIME', approachingTime, ' --- ', pickingTime)

            # OBSERVATION
            #
            self._observation = self.getObservation(action)
            self._observation.append(
                np.array(approachingTime, dtype=np.float32))
            self._observation.append(np.array(pickingTime, dtype=np.float32))

            isSlowApproaching = (action[0] < 0.1) or (approachingTime >
                                                      APPROACHING_TIME_LIMIT)
            isSlowGripping = (action[1] < 0.1) or (pickingTime >
                                                   PICKING_TIME_LIMIT)

            if (isSlowApproaching and isSlowGripping):
                reward -= (approachingTime + pickingTime)
            elif (isSlowApproaching):
                reward -= (approachingTime + PICKING_TIME_LIMIT - action[1])
            elif (isSlowGripping):
                reward -= (pickingTime + APPROACHING_TIME_LIMIT - action[0])

            done = not (
                isSlowApproaching or isSlowGripping
            )  # Here, we intentionally reverse the meaning for cancelling off all < 0.1 values

            if (not done):
                print('TOO SLOW ACT')
                print('Env observed!', reward, ' - ',
                      done)  # self._robot.getOperationState()
                return self._observation, reward, done, {}  ######## ducta

            # ----------------------------------------------------------------------------------------------------
            self._objGroundHit = self.isObjGroundHit()
            pos = RC.getObjectWorldPosition(objName)
            #print('OBJ Z POS:', pos[2])

            # P2 -------------------------------------------------------------------------------------------------
            # OBJ ON GROUND NOW
            # ALREADY HANDLED ABOVE by -100 reward if () during waiting for moving ended
            if (pos[2] >= 0.3 and pos[2] <= 0.5):
                reward += 5000 - abs(pos[2] - 0.36)
                print('Obj CAUGHT! APPARE!')

            # P3 -------------------------------------------------------------------------------------------------
            # OBJ ON GROUND NOW
            # ALREADY HANDLED ABOVE by -100 reward if () during waiting for moving ended
            elif (self._objGroundHit):
                reward -= (approachingTime + pickingTime) / 10
                print('Obj Already On ground..Late?')
            #
            # P4 -------------------------------------------------------------------------------------------------
            # OBJ STILL FALLING
            elif (pos[2] > 0.5):
                d = self.getTimelyCatchObjectDistance()
                t = 5000 * d
                print('TOO SOON', d)
                reward -= t

            print('Env observed!', reward, ' - ',
                  done)  # self._robot.getOperationState()
            return self._observation, reward, done, {}  ######## ducta

        ### OTHERS ##############################################################################
        else:
            #time.sleep(self._timeStep)
            #!!!Wait for some time since starting the action then observe:
            i = 0
            while (self._robot.getOperationState() == RC.CROBOT_STATE_MOVING):
                time.sleep(1)
                self._objGroundHit = self.isObjGroundHit()
                if (self._objGroundHit):
                    #print('GROUND HIT',i+1)
                    reward -= 100
                if (i == 2):  # 3s has passed!
                    reward += self.reward()
                i = i + 1
                #print('Still moving',i)

            self._observation = self.getObservation(action)
            reward += self.reward()  # Reward Addition after observation 2nd!
            done = self.termination()
            print('Env observed 2nd!',
                  reward)  # self._robot.getOperationState()
            #print("len=%r" % len(self._observation))

            return self._observation, reward, False, {}
Exemplo n.º 5
0
    def getObservation(self, action):
        self._observation = self._robot.getObservation()
        #if(RC.isTaskObjSuctionBalancePlate()):
        # Vel-trained joint vel, action[1] --> Refer to the V-Rep Server Robot script to confirm this!
        #self._observation.append(np.array(action[1], dtype=np.float32))

        #robotId = self._robot.id()
        if (RC.isTaskObjHandBalance() or RC.isTaskObjSuctionBalance()
                or RC.isTaskObjHexapodBalance()):
            ##############################################################################################
            # PLATE INFO
            #
            # Plate Object
            # Angle rotation away from horizontal plane
            plateOrient = RC.getObjectOrientation(
                RC.CPLATE_OBJ_NAME)  # Must be the same name set in V-Rep
            self._observation.append(
                np.array(
                    abs(plateOrient[0]),
                    dtype=np.float32))  # Either alpha(X) or beta(Y) is enough!
            #self._observation.append(np.array(abs(plateOrient[1]), dtype=np.float32))

            # Distance from plate to the center of end-tip
            platePos = RC.getObjectWorldPosition(RC.CPLATE_OBJ_NAME)
            endTipPos = self._robot.getEndTipWorldPosition()
            #basePlatePos = RC.getObjectWorldPosition(RC.CBASE_PLATE_OBJ_NAME)
            d = math.sqrt((platePos[0] - endTipPos[0])**2 +
                          (platePos[1] - endTipPos[1])**2)

            # Base Plate Normal Vector -----------------------------------------------------------------------------
            normalVector = self.getBasePlateNormalVector()
            if (len(normalVector) == 3):
                slantingDegree = abs(
                    RC.angle_between(np.array([0, 0, 1]),
                                     np.array(normalVector)))
            else:
                slantingDegree = 0
            #print('SLANT', slantingDegree)

            # Velocity Factor of Joint 4  -----------------------------------------------------------------------------
            if (RC.isTaskObjSuctionBalancePosVel()):
                self._observation.append(
                    np.array([action[1]], dtype=np.float32))

            self._observation.append(np.array([d], dtype=np.float32))
            self._observation.append(
                np.array([slantingDegree], dtype=np.float32))
        elif (RC.isTaskObjHold()):
            ##############################################################################################
            # TUBE INFO
            #
            # Tube Object
            # Angle rotation away from horizontal plane
            tubeOrient = RC.getObjectOrientation(
                RC.CTUBE_OBJ_NAME)  # Must be the same name set in V-Rep
            self._observation.append(
                np.array(abs(tubeOrient[1]), dtype=np.float32))

            # Distance from plate to the center of end-tip
            tubePos = RC.getObjectWorldPosition(RC.CTUBE_OBJ_NAME)
            palmPos = self._robot.getEndTipWorldPosition()
            d = math.sqrt((tubePos[0] - palmPos[0])**2 +
                          (tubePos[1] - palmPos[1])**2)
            self._observation.append(np.array(d, dtype=np.float32))

        elif (RC.isTaskObjCatch()):
            ##############################################################################################
            # BALL INFO
            #
            # Ball Object
            # Ball z position only
            ballPos = RC.getObjectWorldPosition(RC.CBALL_OBJ_NAME)
            ballLinVel = RC.getObjectVelocity(RC.CBALL_OBJ_NAME)
            ##self._observation.append(np.array(ballPos[0], dtype=np.float32))
            ##self._observation.append(np.array(ballPos[1], dtype=np.float32))
            self._observation.append(np.array(ballPos[2], dtype=np.float32))
            self._observation.append(np.array(ballLinVel[2], dtype=np.float32))

        elif (RC.isTaskObjTimelyPick()):
            ##############################################################################################
            # OBJ INFO
            #
            # Object position on conveyor belt
            #objPos    = RC.getObjectWorldPosition(RC.CCUBOID_OBJ_NAME)
            #endTipPos = RC.getObjectWorldPosition('RG2')
            #d = math.sqrt((objPos[0] - endTipPos[0])**2 +
            #              (objPos[1] - endTipPos[1])**2 +
            #              (objPos[1] - endTipPos[1])**2)
            #self._observation.append(np.array(d, dtype=np.float32))
            d = self.getTimelyPickObjectDistance()
            self._observation.append(np.array(d, dtype=np.float32))

            #vel = self.getBeltVelocity()
            #self._observation.append(np.array(vel, dtype=np.float32))
            #print('OBJECT TO PICK: ', d)

        elif (RC.isTaskObjTimelyCatch()):
            ##############################################################################################
            # OBJ INFO
            #
            # Object position on conveyor belt
            objPos = RC.getObjectWorldPosition(RC.CCUBOID_OBJ_NAME)
            endTipPos = RC.getObjectWorldPosition('RG2')
            zDelta = objPos[2] - endTipPos[2]
            #d = math.sqrt((objPos[0] - endTipPos[0])**2 +
            #              (objPos[1] - endTipPos[1])**2 +
            #              (objPos[1] - endTipPos[1])**2)
            #self._observation.append(np.array(d, dtype=np.float32))
            d = self.getTimelyCatchObjectDistance()
            self._observation.append(np.array(zDelta, dtype=np.float32))
            self._observation.append(np.array(d, dtype=np.float32))

        return self._observation