def test_vec_monitor_load_results(tmp_path):
    """
    test load_results on log files produced by the monitor wrapper
    """
    tmp_path = str(tmp_path)
    env1 = DummyVecEnv([lambda: gym.make("CartPole-v1")])
    env1.seed(0)
    monitor_file1 = os.path.join(
        str(tmp_path), f"stable_baselines-test-{uuid.uuid4()}.monitor.csv")
    monitor_env1 = VecMonitor(env1, monitor_file1)

    monitor_files = get_monitor_files(tmp_path)
    assert len(monitor_files) == 1
    assert monitor_file1 in monitor_files

    monitor_env1.reset()
    episode_count1 = 0
    for _ in range(1000):
        _, _, dones, _ = monitor_env1.step(
            [monitor_env1.action_space.sample()])
        if dones[0]:
            episode_count1 += 1
            monitor_env1.reset()

    results_size1 = len(load_results(os.path.join(tmp_path)).index)
    assert results_size1 == episode_count1

    env2 = DummyVecEnv([lambda: gym.make("CartPole-v1")])
    env2.seed(0)
    monitor_file2 = os.path.join(
        str(tmp_path), f"stable_baselines-test-{uuid.uuid4()}.monitor.csv")
    monitor_env2 = VecMonitor(env2, monitor_file2)
    monitor_files = get_monitor_files(tmp_path)
    assert len(monitor_files) == 2
    assert monitor_file1 in monitor_files
    assert monitor_file2 in monitor_files

    monitor_env2.reset()
    episode_count2 = 0
    for _ in range(1000):
        _, _, dones, _ = monitor_env2.step(
            [monitor_env2.action_space.sample()])
        if dones[0]:
            episode_count2 += 1
            monitor_env2.reset()

    results_size2 = len(load_results(os.path.join(tmp_path)).index)

    assert results_size2 == (results_size1 + episode_count2)

    os.remove(monitor_file1)
    os.remove(monitor_file2)
Exemplo n.º 2
0
 def _on_step(self) -> bool:
     if self.n_calls % self.check_freq == 0:
         # Retrieve training reward
         data_frame = load_results(constants.OUT_DIR)
         x: np.ndarray[Any]
         y: np.ndarray[Any]
         x, y = ts2xy(data_frame, 'timesteps')  # type: ignore
         len_x = len(x)
         if len_x > 0:
             # Mean training reward over the last 100 episodes
             mean_reward = np.mean(y[-100:])
             mean_steps = np.mean(
                 data_frame.l.values[-100:])  # type: ignore
             if self.verbose > 0:
                 print(f"Num timesteps: {self.num_timesteps}")
                 print(
                     f"Best mean reward: {self.best_mean_reward:.2f} - Last mean reward per episode: {mean_reward:.2f}"
                 )
                 print("Last mean time steps: {}".format(mean_steps))
             self.experiment.log_metric('rewards', mean_reward, len_x)
             self.experiment.log_metric('steps', mean_steps, len_x)
             # New best model, you could save the agent here
             if mean_reward > self.best_mean_reward:  # type: ignore
                 self.best_mean_reward = mean_reward  # type: ignore
                 # Example for saving best model
                 # if self.verbose > 0:
                 #     print(f"Saving new best model to {self.save_path}.zip")
                 # self.model.save(self.save_path)
     return True
Exemplo n.º 3
0
def get_mean_reward_last_n_steps(n, log_dir):
    x, y = ts2xy(load_results(log_dir), 'timesteps')
    if len(x) > 0:
        # Mean training reward over the last 100 episodes
        mean_reward = np.mean(y[-n:])
        return mean_reward
    else:
        logging.warning(
            f'{get_mean_reward_last_n_steps.__name__} called when the number of logged timesteps was 0'
        )
def plot_results(dirs: List[str], num_timesteps: Optional[int],
                 x_axis: str, task_name: str, figsize: Tuple[int, int] = (8, 2)) -> None:
    """
    Plot the results using csv files from ``Monitor`` wrapper.

    :param dirs: ([str]) the save location of the results to plot
    :param num_timesteps: (int or None) only plot the points below this value
    :param x_axis: (str) the axis for the x and y output
        (can be X_TIMESTEPS='timesteps', X_EPISODES='episodes' or X_WALLTIME='walltime_hrs')
    :param task_name: (str) the title of the task to plot
    :param figsize: (Tuple[int, int]) Size of the figure (width, height)
    """

    data_frames = []
    for folder in dirs:
        data_frame = load_results(folder)
        if num_timesteps is not None:
            data_frame = data_frame[data_frame.l.cumsum() <= num_timesteps]
        data_frames.append(data_frame)
    xy_list = [ts2xy(data_frame, x_axis) for data_frame in data_frames]
    plot_curves(xy_list, x_axis, task_name, figsize)
Exemplo n.º 5
0
    def _on_step(self) -> bool:
        if self.n_calls % self.check_freq == 0:

            # Retrieve training reward
            x, y = ts2xy(load_results(self.log_dir), 'timesteps')
            if len(x) > 0:
                # Mean training reward over the last 100 episodes
                mean_reward = np.mean(y[-100:])
                if self.verbose > 0:
                    print(f"Num timesteps: {self.num_timesteps}")
                    print(
                        f"Best mean reward: {self.best_mean_reward:.2f} - Last mean reward per episode: {mean_reward:.2f}"
                    )

                # New best model, you could save the agent here
                if mean_reward > self.best_mean_reward:
                    self.best_mean_reward = mean_reward
                    # Example for saving best model
                    if self.verbose > 0:
                        print(f"Saving new best model to {self.save_path}.zip")
                    self.model.save(self.save_path)

        return True
Exemplo n.º 6
0
x_label = {
    'steps': 'Timesteps',
    'episodes': 'Episodes',
    'time': 'Walltime (in hours)'
}[args.x_axis]

dirs = [
    os.path.join(log_path, folder) for folder in os.listdir(log_path)
    if env in folder and os.path.isdir(os.path.join(log_path, folder))
]

plt.figure('Training Success Rate', figsize=args.figsize)
plt.title('Training Success Rate', fontsize=args.fontsize)
plt.xlabel(f'{x_label}', fontsize=args.fontsize)
plt.ylabel('Success Rate', fontsize=args.fontsize)
for folder in dirs:
    data_frame = load_results(folder)
    if args.max_timesteps is not None:
        data_frame = data_frame[data_frame.l.cumsum() <= args.max_timesteps]
    success = np.array(data_frame['is_success'])
    x, _ = ts2xy(data_frame, x_axis)

    # Do not plot the smoothed curve at all if the timeseries is shorter than window size.
    if x.shape[0] >= args.episode_window:
        # Compute and plot rolling mean with window of size args.episode_window
        x, y_mean = window_func(x, success, args.episode_window, np.mean)
        plt.plot(x, y_mean, linewidth=2)
plt.tight_layout()
plt.show()