Пример #1
0
    def step(self, action):
        """
        Implement the environment step abstraction. Execute action and returns:
            - action
            - observation
            - reward
            - done (status)
        """
        self.iterator+=1

        # Execute "action"
        self._pub.publish(ut_mara.getTrajectoryMessage(
            action[:self.numJoints],
            self.environment['jointOrder'],
            self.velocity))

        self.ros_clock = rclpy.clock.Clock().now().nanoseconds

        # Take an observation
        obs = self.take_observation()

        # Fetch the positions of the end-effector which are nr_dof:nr_dof+3
        rewardDist = ut_math.rmseFunc( obs[self.numJoints:(self.numJoints+3)] )

        collided = self.collision()

        reward = ut_math.computeReward(rewardDist)

        # Calculate if the env has been solved
        done = bool(self.iterator == self.max_episode_steps)

        self.buffer_dist_rewards.append(rewardDist)
        self.buffer_tot_rewards.append(reward)
        info = {}
        if self.iterator % self.max_episode_steps == 0:

            max_dist_tgt = max(self.buffer_dist_rewards)
            mean_dist_tgt = np.mean(self.buffer_dist_rewards)
            std_dist_tgt = np.std(self.buffer_dist_rewards)
            min_dist_tgt = min(self.buffer_dist_rewards)
            skew_dist_tgt = skew(self.buffer_dist_rewards)

            max_tot_rew = max(self.buffer_tot_rewards)
            mean_tot_rew = np.mean(self.buffer_tot_rewards)
            std_tot_rew = np.std(self.buffer_tot_rewards)
            min_tot_rew = min(self.buffer_tot_rewards)
            skew_tot_rew = skew(self.buffer_tot_rewards)

            num_coll = self.collided

            info = {"infos":{"ep_dist_max": max_dist_tgt,"ep_dist_mean": mean_dist_tgt,"ep_dist_min": min_dist_tgt,\
                "ep_rew_max": max_tot_rew,"ep_rew_mean": mean_tot_rew,"ep_rew_min": min_tot_rew,"num_coll": num_coll,\
                "ep_dist_skew": skew_dist_tgt,"ep_dist_std": std_dist_tgt, "ep_rew_std": std_tot_rew, "ep_rew_skew":skew_tot_rew,\
                "target_x": self.targetPosition[0],"target_y": self.targetPosition[1],"target_z": self.targetPosition[2]}}
            self.buffer_dist_rewards = []
            self.buffer_tot_rewards = []
            self.collided = 0

        # Return the corresponding observations, rewards, etc.
        return obs, reward, done, info
Пример #2
0
    def step(self, action):
        """
        Implement the environment step abstraction. Execute action and returns:
            - action
            - observation
            - reward
            - done (status)
        """
        self.iterator += 1

        # Execute "action"
        self._pub.publish(
            ut_mara.getTrajectoryMessage(action[:self.numJoints],
                                         self.environment['jointOrder'],
                                         self.velocity))

        # Take an observation
        obs = self.take_observation()

        # Fetch the positions of the end-effector which are nr_dof:nr_dof+3
        rewardDist = ut_math.rmseFunc(obs[self.numJoints:(self.numJoints + 3)])
        rewardOrientation = 2 * np.arccos(abs(obs[self.numJoints + 3]))

        reward = ut_math.computeReward(rewardDist, rewardOrientation, False)

        # Calculate if the env has been solved
        done = bool(self.iterator == self.max_episode_steps)

        # Return the corresponding observations, rewards, etc.
        return obs, reward, done, {}
    def step(self, action):

        done = False
        self.iterator += 1
        # Execute "action"
        self._pub.publish(
            ut_mara.getTrajectoryMessage(action[:self.numJoints],
                                         self.environment['jointOrder'],
                                         self.velocity))

        self.ros_clock = rclpy.clock.Clock().now().nanoseconds

        obs = self.take_observation(action)
        done = True

        return obs, done