示例#1
0
    def step(self, action):
        self.supervisor.step(self.timeStep)

        self.timer += 1
        # read state
        uncode_state, self.state = self.__get_state()

        # # record graph
        # self.plt_current_time += self.timeStep * 0.001
        # self.plt_time.append(self.plt_current_time)
        # self.plt_FX.append(self.state[6])
        # self.plt_FY.append(self.state[7])
        # self.plt_FZ.append(self.state[8])
        # self.plt_TX.append(self.state[9])
        # self.plt_TY.append(self.state[10])
        # self.plt_TZ.append(self.state[11])

        # adjust action to usable motion
        action = cal.actions(self.state, action, self.pd)
        # print('action', action)

        # take actions
        self.__execute_action(action)

        # uncode_state, self.state = self.__get_state()
        # safety check
        safe = cal.safetycheck(self.state)
        # done and reward
        r, done = cal.reward_step(self.state, safe, self.timer)
        return self.state, uncode_state, r, done, safe
示例#2
0
    def trajectory(self, context, w):
        # set pd parameters
        kp = w[:, :6][0]
        kd = w[:, 6:][0]
        self.env.pd_control(kd, kp)

        # Start artificial trajectory
        observation = np.array([
            0., -0.327, -53.77, 0., 0., 0., -0.001, 0., -0.604, 0., 0.001, 0.
        ])  # init observation
        ep_reward = 0
        for j in range(self.MAX_EP_STEPS):
            action = self.ddpg.select_action(observation)
            action = np.clip(np.random.normal(action, self.var),
                             -self.action_bound, self.action_bound)
            action = cal.actions(observation, action, True)
            ds = self.state_transfer_model.predict(np.array([action]),
                                                   return_std=0,
                                                   return_cov=0)[0]
            new_pos = ds + observation[:6]
            new_force = self.contact_model.predict(np.array([new_pos]),
                                                   return_std=0,
                                                   return_cov=0)[0]
            observation_ = np.hstack((new_pos, new_force))

            # judge the safety and done, then calculate the reward
            reward = -0.01
            ep_reward += reward
            if observation_[6] >= 50 or observation_[7] >= 50 or observation_[
                    8] >= 200 or observation_[9] >= 3 or observation_[
                        10] >= 3 or observation_[11] >= 3:
                reward = (-1 + (observation_[2] + 52.7) / 40)
                ep_reward += reward
                break
            if observation_[2] > -12:
                reward = 1 - j / self.MAX_EP_STEPS
                ep_reward += reward
                break
        return ep_reward
示例#3
0
    def step(self, action):
        # set FK or IK
        vrep.simxSetIntegerSignal(self.clientID, "movementMode",
                                  self.movementMode, vrep.simx_opmode_oneshot)
        # read force sensor
        self.errorCode, self.forceState, self.forceVector, self.torqueVector = \
            vrep.simxReadForceSensor(self.clientID, self.force_sensor_handle, vrep.simx_opmode_buffer)
        self.errorCode, self.position = \
            vrep.simxGetObjectPosition(self.clientID, self.force_sensor_handle, self.target_handle, vrep.simx_opmode_buffer)

        # calculations
        # adjust action to usable motion
        action = cal.actions(action, self.movementMode)

        # take actions
        if self.movementMode:  # in IK mode
            # do action
            self.IK['Pos_x'] += action[0]
            self.IK['Pos_y'] += action[1]
            self.IK['Pos_z'] += action[2]
            self.IK['Alpha'] += action[4]
            self.IK['Beta'] += action[3]
            self.IK['Gamma'] += action[5]

            # send signal
            vrep.simxSetFloatSignal(self.clientID, "pos_X", self.IK['Pos_x'],
                                    vrep.simx_opmode_oneshot)
            vrep.simxSetFloatSignal(self.clientID, "pos_Y", self.IK['Pos_y'],
                                    vrep.simx_opmode_oneshot)
            vrep.simxSetFloatSignal(self.clientID, "pos_Z", self.IK['Pos_z'],
                                    vrep.simx_opmode_oneshot)
            vrep.simxSetFloatSignal(self.clientID, "Alpha", self.IK['Alpha'],
                                    vrep.simx_opmode_oneshot)
            vrep.simxSetFloatSignal(self.clientID, "Beta", self.IK['Beta'],
                                    vrep.simx_opmode_oneshot)
            vrep.simxSetFloatSignal(self.clientID, "Gamma", self.IK['Gamma'],
                                    vrep.simx_opmode_oneshot)
            time.sleep(0.1)  # wait for action to finish
        else:  # in FK mode
            # do action
            self.FK['Joint1'] += action[0]
            self.FK['Joint2'] += action[1]
            self.FK['Joint3'] += action[2]
            self.FK['Joint4'] += action[3]
            self.FK['Joint5'] += action[4]
            self.FK['Joint6'] += action[5]

            # boundary
            self.FK['Joint1'] = np.clip(self.FK['Joint1'],
                                        *self.Joint_boundary[0])
            self.FK['Joint2'] = np.clip(self.FK['Joint2'],
                                        *self.Joint_boundary[1])
            self.FK['Joint3'] = np.clip(self.FK['Joint3'],
                                        *self.Joint_boundary[2])
            self.FK['Joint4'] = np.clip(self.FK['Joint4'],
                                        *self.Joint_boundary[3])
            self.FK['Joint5'] = np.clip(self.FK['Joint5'],
                                        *self.Joint_boundary[4])
            self.FK['Joint6'] = np.clip(self.FK['Joint6'],
                                        *self.Joint_boundary[5])

            # send signal
            vrep.simxSetFloatSignal(
                self.clientID, "Joint1",
                (self.FK['Joint1'] * np.pi / 180 - self.Joints[0][0]) /
                self.Joints[0][1] * 1000, vrep.simx_opmode_oneshot)
            vrep.simxSetFloatSignal(
                self.clientID, "Joint2",
                (self.FK['Joint2'] * np.pi / 180 - self.Joints[1][0]) /
                self.Joints[1][1] * 1000, vrep.simx_opmode_oneshot)
            vrep.simxSetFloatSignal(
                self.clientID, "Joint3",
                (self.FK['Joint3'] * np.pi / 180 - self.Joints[2][0]) /
                self.Joints[2][1] * 1000, vrep.simx_opmode_oneshot)
            vrep.simxSetFloatSignal(
                self.clientID, "Joint4",
                (self.FK['Joint4'] * np.pi / 180 - self.Joints[3][0]) /
                self.Joints[3][1] * 1000, vrep.simx_opmode_oneshot)
            vrep.simxSetFloatSignal(
                self.clientID, "Joint5",
                (self.FK['Joint5'] * np.pi / 180 - self.Joints[4][0]) /
                self.Joints[4][1] * 1000, vrep.simx_opmode_oneshot)
            vrep.simxSetFloatSignal(
                self.clientID, "Joint6",
                (self.FK['Joint6'] * np.pi / 180 - self.Joints[5][0]) /
                self.Joints[5][1] * 1000, vrep.simx_opmode_oneshot)
            # time.sleep(0.01)  # wait for action to finish

        # print(self.position)
        # state
        self.state = np.concatenate(
            (self.position, self.forceVector, self.torqueVector))

        # done and reward
        r, done = cal.reword(self.state)

        # safety check
        safe = cal.safetycheck(self.state)

        return self.code_state(self.state), self.state, r, done, safe