Пример #1
0
    def test_save_path(self):
        n_store_episodes = 10
        obs = np.ones(shape=self.env.observation_space.shape, dtype=np.float32)
        for epi in range(n_store_episodes):
            for i in range(self.replay_buffer.get_buffer_size()):
                self.replay_buffer.add(obs=obs * i,
                                       act=i,
                                       rew=0.,
                                       next_obs=obs * (i + 1),
                                       done=False)
            save_path(
                self.replay_buffer.sample(
                    self.replay_buffer.get_buffer_size()),
                os.path.join(self.output_dir,
                             "step_0_epi_{}_return_0.0.pkl").format(epi))
        data = restore_latest_n_traj(self.output_dir)
        self.assertEqual(
            data["obses"].shape[0],
            self.replay_buffer.get_buffer_size() * n_store_episodes)

        max_steps = 10
        data = restore_latest_n_traj(self.output_dir, 1, max_steps)
        self.assertEqual(data["obses"].shape[0], max_steps)
        self.assertEqual(data["acts"].shape[0], max_steps)
        self.assertEqual(data["next_obses"].shape[0], max_steps)
Пример #2
0
    def evaluate_policy(self, total_steps):
        """
        Evaluate policy

        Args:
            total_steps (int): Current total steps of training
        """
        avg_test_return = 0.
        avg_test_steps = 0
        if self._save_test_path:
            replay_buffer = get_replay_buffer(self._policy,
                                              self._test_env,
                                              size=self._episode_max_steps)
        for i in range(self._test_episodes):
            episode_return = 0.
            frames = []
            obs = self._test_env.reset()
            avg_test_steps += 1
            for _ in range(self._episode_max_steps):
                if self._normalize_obs:
                    obs = self._obs_normalizer(obs, update=False)
                act, _ = self._policy.get_action(obs, test=True)
                act = (act if is_discrete(self._env.action_space) else np.clip(
                    act, self._env.action_space.low,
                    self._env.action_space.high))
                next_obs, reward, done, _ = self._test_env.step(act)
                avg_test_steps += 1
                if self._save_test_path:
                    replay_buffer.add(obs=obs,
                                      act=act,
                                      next_obs=next_obs,
                                      rew=reward,
                                      done=done)

                if self._save_test_movie:
                    frames.append(self._test_env.render(mode='rgb_array'))
                elif self._show_test_progress:
                    self._test_env.render()
                episode_return += reward
                obs = next_obs
                if done:
                    break
            prefix = "step_{0:08d}_epi_{1:02d}_return_{2:010.4f}".format(
                total_steps, i, episode_return)
            if self._save_test_path:
                save_path(replay_buffer.sample(self._episode_max_steps),
                          os.path.join(self._output_dir, prefix + ".pkl"))
                replay_buffer.clear()
            if self._save_test_movie:
                frames_to_gif(frames, prefix, self._output_dir)
            avg_test_return += episode_return
        if self._show_test_images:
            images = tf.cast(
                tf.expand_dims(np.array(obs).transpose(2, 0, 1), axis=3),
                tf.uint8)
            tf.summary.image(
                'train/input_img',
                images,
            )
        return avg_test_return / self._test_episodes, avg_test_steps / self._test_episodes
Пример #3
0
    def evaluate_policy(self, total_steps):
        tf.summary.experimental.set_step(total_steps)
        if self._normalize_obs:
            self._test_env.normalizer.set_params(
                *self._env.normalizer.get_params())
        avg_test_return = 0.
        avg_test_steps = 0
        if self._save_test_path:
            replay_buffer = get_replay_buffer(self._policy,
                                              self._test_env,
                                              size=self._episode_max_steps)
        for i in range(self._test_episodes):
            episode_return = 0.
            frames = []
            obs = self._test_env.reset()
            avg_test_steps += 1
            for _ in range(self._episode_max_steps):
                action = self._policy.get_action(obs, test=True)
                next_obs, reward, done, _ = self._test_env.step(action)
                avg_test_steps += 1
                if self._save_test_path:
                    replay_buffer.add(obs=obs,
                                      act=action,
                                      next_obs=next_obs,
                                      rew=reward,
                                      done=done)

                if self._save_test_movie:
                    frames.append(self._test_env.render(mode='rgb_array'))
                elif self._show_test_progress:
                    self._test_env.render()
                episode_return += reward
                obs = next_obs
                if done:
                    break
            prefix = "step_{0:08d}_epi_{1:02d}_return_{2:010.4f}".format(
                total_steps, i, episode_return)
            if self._save_test_path:
                save_path(
                    replay_buffer._encode_sample(
                        np.arange(self._episode_max_steps)),
                    os.path.join(self._output_dir, prefix + ".pkl"))
                replay_buffer.clear()
            if self._save_test_movie:
                frames_to_gif(frames, prefix, self._output_dir)
            avg_test_return += episode_return
        if self._show_test_images:
            images = tf.cast(
                tf.expand_dims(np.array(obs).transpose(2, 0, 1), axis=3),
                tf.uint8)
            tf.summary.image(
                'train/input_img',
                images,
            )
        return avg_test_return / self._test_episodes, avg_test_steps / self._test_episodes
Пример #4
0
    def evaluate_policy(self, total_steps):
        tf.summary.experimental.set_step(total_steps)
        if self._normalize_obs:
            self._test_env.normalizer.set_params(
                *self._env.normalizer.get_params())
        avg_test_return = 0.
        avg_test_cost = 0.
        if self._save_test_path:
            replay_buffer = get_replay_buffer(self._policy,
                                              self._test_env,
                                              size=self._episode_max_steps)
        for i in range(self._test_episodes):
            #print('I AM RUNNING EPISODES')
            episode_return = 0.
            episode_cost = 0.
            frames = []
            obs = self._test_env.reset()
            for _ in range(self._episode_max_steps):
                action = self._policy.get_action(obs, test=True)
                next_obs, reward, done, info = self._test_env.step(action)
                try:
                    cost = info['cost']
                except (TypeError, KeyError, IndexError):
                    cost = 0
                # next_obs, reward, done, _ = self._env.step(self._env.action_space.sample())
                if self._save_test_path:
                    replay_buffer.add(obs=obs,
                                      act=action,
                                      next_obs=next_obs,
                                      rew=reward,
                                      done=done)

                if self._save_test_movie:
                    frames.append(self._test_env.render(mode='rgb_array'))
                elif self._show_test_progress:
                    self._test_env.render()
                episode_return += reward
                episode_cost += cost
                obs = next_obs
                if done:
                    break
            prefix = "step_{0:08d}_epi_{1:02d}_return_{2:010.4f}_cost_{2:010.4f}".format(
                total_steps, i, episode_return, episode_cost)
            if self._save_test_path:
                save_path(
                    replay_buffer._encode_sample(
                        np.arange(self._episode_max_steps)),
                    os.path.join(self._output_dir, prefix + ".pkl"))
                replay_buffer.clear()
            if self._save_test_movie:
                #print('I AM SAVING MOVIES')
                #frames_to_gif(frames, prefix, self._output_dir)
                frames_to_mp4(frames, prefix, self._output_dir)
            avg_test_return += episode_return
            avg_test_cost += episode_cost
        if self._show_test_images:
            images = tf.cast(
                tf.expand_dims(np.array(obs).transpose(2, 0, 1), axis=3),
                tf.uint8)
            tf.summary.image(
                'train/input_img',
                images,
            )
        return avg_test_return / self._test_episodes, avg_test_cost / self._test_episodes
Пример #5
0
    def evaluate_policy(self, total_steps):
        """evaluating the policy."""

        tf.summary.experimental.set_step(total_steps)
        
        total_test_return = 0.
        success_traj = 0
        if self._save_test_path:
            replay_buffer = get_replay_buffer(
                self._policy, self._test_env, size=self._episode_max_steps)

        straight_line_episode = 0 
        no_straight_line_episode = 0
        success_traj_straight_line = 0
        success_traj_no_straight_line = 0

        for i in range(self._test_episodes):
            episode_return = 0.
            frames = []
            workspace, goal, obs = self._test_env.reset()
            start = obs
            reduced_workspace = self._CAE.evaluate(workspace)
            #Concatenate position observation with start, goal, and reduced workspace!!
            obs_full = np.concatenate((obs, goal, reduced_workspace))

            for _ in range(self._episode_max_steps):
                action = self._policy.get_action(obs_full)
                next_obs, reward, done, _ = self._test_env.step(action)
                #Concatenate position observation with start, goal, and reduced workspace!!
                next_obs_full = np.concatenate((obs, goal, reduced_workspace))

                # Add obersvation to the trajectory storage
                self.trajectory.append({'workspace': workspace,'position': obs,
                    'next_position': next_obs,'goal': goal, 'action': action, 'reward': reward, 'done': done})
                
                if self._save_test_path:
                    replay_buffer.add(obs=obs_full, act=action,
                                next_obs=next_obs_full, rew=reward, done=done)

                if self._save_test_movie:
                    frames.append(self._test_env.render(mode='plot'))

                elif self._show_test_progress:
                    self._test_env.render()

                episode_return += reward
                obs = next_obs
                obs_full = next_obs_full
                
                if done:
                    break

            prefix = "step_{0:08d}_epi_{1:02d}_return_{2:010.4f}".format(
                total_steps, i, episode_return)

            if self._save_test_path:
                save_path(replay_buffer._encode_sample(np.arange(self._episode_max_steps)),
                          os.path.join(self._output_dir, prefix + ".pkl"))
                replay_buffer.clear()

            if self._save_test_movie:
                frames_to_gif(frames, prefix, self._output_dir)

            if self._save_test_path_sep:
                self._save_traj_separately(prefix)
                   
            total_test_return += episode_return

            if straight_line_feasible(workspace, start, goal, self._test_env):
                straight_line_episode += 1
                if reward == self._test_env.goal_reward:        
                    success_traj_straight_line += 1
            else:
                no_straight_line_episode += 1
                if reward == self._test_env.goal_reward:        
                    success_traj_no_straight_line += 1

            if reward == self._test_env.goal_reward:        
                success_traj += 1

            # empty trajectory:
            self.trajectory = []

        if self._show_test_images:
            images = tf.cast(
                tf.expand_dims(np.array(obs).transpose(2, 0, 1), axis=2),
                tf.uint8)
            tf.summary.image('train/input_img', images,)

        avg_test_return = total_test_return / self._test_episodes
        success_rate = success_traj / self._test_episodes
        if straight_line_episode > 0:
            success_rate_straight_line = success_traj_straight_line/straight_line_episode
        else:
            success_rate_straight_line = 0
        if no_straight_line_episode > 0:
            success_rate_no_straight_line = success_traj_no_straight_line/no_straight_line_episode
        else:
            success_rate_no_straight_line = 0
        ratio_straight_lines = straight_line_episode/ self._test_episodes

        return avg_test_return, success_rate, ratio_straight_lines, success_rate_straight_line, success_rate_no_straight_line