def plot_results2(log_folder1, log_folder2, title='Learning Curve'): """ plot the results :param log_folder: (str) the save location of the results to plot :param title: (str) the title of the task to plot """ x, y = ts2xy(load_results(log_folder1), 'timesteps') y = moving_average(y, window=50) # Truncate x x = x[len(x) - len(y):] x2, y2 = ts2xy(load_results(log_folder2), 'timesteps') y2 = moving_average(y2, window=50) # Truncate x x2 = x2[len(x2) - len(y2):] fig = plt.figure(title) plt.plot(x, y, label='PPO') plt.plot(x2, y2, label='A2C') plt.xlabel('Number of Timesteps') plt.ylabel('Rewards') plt.title(title + " Smoothed") plt.legend() plt.show()
def main(): # Create log dir args = parse_arguments() save_dir = args.log_dir logger = Logger(output_dir=save_dir) title = 'Learning Curve' x, y = logger.load_results(["EpLen", "EpRet"]) x = cumulative_sum(x) y = moving_average(y, window=50) # Truncate x x = x[len(x) - len(y):] fig = plt.figure(title) plt.plot(x, y, label="Own implementation") if args.compare: log_dir = os.path.join( "Stable_Baselines", "logs", os.path.sep.join(args.log_dir.split(os.path.sep)[1:])) from stable_baselines3.common.results_plotter import load_results, ts2xy x2, y2 = ts2xy(load_results(log_dir), 'timesteps') y2 = moving_average(y2, window=50) # Truncate x x2 = x2[len(x2) - len(y2):] x2, y2 = standardise_graph(x, y, x2, y2) plt.plot(x2, y2, label="Stable_Baselines3 implementation") plt.legend() plt.xlabel('Number of Timesteps') plt.ylabel('Rewards') plt.title(title + " Smoothed") if args.save: fname = "comparison.png" if args.compare else "learning_curve.png" plt.savefig(os.path.join(save_dir, fname)) plt.show()
def _on_step(self): if self.n_calls % self.check_freq == 0: # Retrieve training reward x, y = ts2xy(load_results(self.log_dir), 'timesteps') print(y) if len(x) > 0: # Mean training reward over the last 100 episodes mean_reward = np.mean(y[-50:]) if self.verbose > 0: print("Num timesteps: {}".format(self.num_timesteps)) print( "Best mean reward: {:.2f} - Last mean reward per episode: {:.2f}" .format(self.best_mean_reward, mean_reward)) # New best model, you could save the agent here if mean_reward > self.best_mean_reward: self.best_mean_reward = mean_reward # Example for saving best model if self.verbose > 0: print("Saving new best model to {}".format( self.save_path)) self.model.save(self.save_path) return True
def _on_step(self) -> bool: if self.n_calls % self.check_freq == 0: # スコアの検索 x, y = ts2xy(load_results(self.log_dir), 'timesteps') self.logger.record('timesteps', self.num_timesteps) self.logger.record('mean_reward', self.best_mean_reward) if self.usewandb: wandb.log({'mean_reward': self.best_mean_reward}) wandb.log({'timesteps': self.num_timesteps}) if len(x) > 0: # 直近100lifeのスコアの平均 mean_reward = np.mean(y[-2 * self.check_freq:]) if self.verbose > 0: print(f"Num timesteps: {self.num_timesteps} : ", end='') print( f"Best mean reward: {self.best_mean_reward:.2f} - Last mean reward/ep: {mean_reward:.2f}" ) # 直近の平均報酬が上昇した場合はモデルを保存 if mean_reward > self.best_mean_reward: self.best_mean_reward = mean_reward if self.verbose > 0: print(f"Saving new best model to {self.save_path}.zip") self.model.save(self.save_path) return True
def _on_step(self) -> bool: if self.n_calls % self.check_freq == 0: # Retrieve training reward data_frame = load_results(constants.OUT_DIR) x: np.ndarray[Any] y: np.ndarray[Any] x, y = ts2xy(data_frame, 'timesteps') # type: ignore len_x = len(x) if len_x > 0: # Mean training reward over the last 100 episodes mean_reward = np.mean(y[-100:]) mean_steps = np.mean( data_frame.l.values[-100:]) # type: ignore if self.verbose > 0: print(f"Num timesteps: {self.num_timesteps}") print( f"Best mean reward: {self.best_mean_reward:.2f} - Last mean reward per episode: {mean_reward:.2f}" ) print("Last mean time steps: {}".format(mean_steps)) self.experiment.log_metric('rewards', mean_reward, len_x) self.experiment.log_metric('steps', mean_steps, len_x) # New best model, you could save the agent here if mean_reward > self.best_mean_reward: # type: ignore self.best_mean_reward = mean_reward # type: ignore # Example for saving best model # if self.verbose > 0: # print(f"Saving new best model to {self.save_path}.zip") # self.model.save(self.save_path) return True
def plot_results(log_folder, title='Learning Curve', save_fig=False): """ plot the results :param log_folder: (str) the save location of the results to plot :param title: (str) the title of the task to plot """ x, y = ts2xy(load_results(log_folder), 'timesteps') y = moving_average(y, window=50) # Truncate x x = x[len(x) - len(y):] fig = plt.figure(title) plt.plot(x, y) plt.xlabel('Number of Timesteps') plt.ylabel('Rewards') plt.title(title + " Smoothed") plt.show() if save_fig: fig = plt.figure(title) plt.plot(x, y) plt.xlabel('Number of Timesteps') plt.ylabel('Rewards') plt.title(title + " Smoothed") plt.savefig(os.path.join(save_dir, "learning_curve.png"))
def _on_step(self) -> bool: if self.n_calls % self.check_freq == 0: x, y = ts2xy(load_results(self.locals['tb_log_name']), 'timesteps') if len(x) > 0: # Mean training reward over the last 100 episodes mean_reward = np.mean(y[-100:]) self.logger.record('rollout/mean reward of 100 steps', mean_reward) return True
def get_mean_reward_last_n_steps(n, log_dir): x, y = ts2xy(load_results(log_dir), 'timesteps') if len(x) > 0: # Mean training reward over the last 100 episodes mean_reward = np.mean(y[-n:]) return mean_reward else: logging.warning( f'{get_mean_reward_last_n_steps.__name__} called when the number of logged timesteps was 0' )
def _on_step(self) -> bool: if self.n_calls % self.check_freq == 0: # Retrieve training reward x, y = ts2xy(load_results(self.log_dir), 'timesteps') # x represents timesteps if len(x) > 0: # Mean training reward over the last 100 episodes mean_reward = np.mean(y[-50:]) if self.verbose > 0: print("Num timesteps: {}".format(self.num_timesteps)) print("Best mean reward: {:.2f} - Last mean reward per episode: {:.2f}".format(self.best_mean_reward, mean_reward)) writer.add_scalar("agent_reward/train/mean_episode_reward", mean_reward, self.n_calls) print('iteration: {} - mse_loss: {} - val_mse_loss: {}'.format(i, torch.mean(mse_losses), torch.mean(val_mse_loss)))
def plot_results2(log_folder, title='Learning Curve'): """ plot the results :param log_folder: (str) the save location of the results to plot :param title: (str) the title of the task to plot """ x, y = ts2xy(load_results(log_folder), 'timesteps') y = moving_average(y, window=100) # Truncate x x = x[len(x) - len(y):] fig = plt.figure(title) plt.plot(x, y) plt.xlabel('Number of Timesteps') plt.ylabel('Rewards')
def _on_step(self) -> bool: x, y = ts2xy(load_results(self.log_dir), 'timesteps') if len(x) > 0: mean_reward = np.mean(y[-100:]) if self.verbose > 0: print("Num timesteps: {}".format(self.num_timesteps)) print( "Best mean reward: {:.2f} - Last mean reward per episode: {:.2f}" .format(self.best_mean_reward, mean_reward)) if mean_reward > self.best_mean_reward: self.best_mean_reward = mean_reward if self.verbose > 0: print("Saving new best model to {}".format(self.save_path)) self.model.save(self.save_path) return True
def _on_step(self) -> bool: # get the monitor's data x, y = ts2xy(load_results(self.log_dir), 'timesteps') if self._plot is None: # make the plot plt.ion() fig = plt.figure(figsize=(6,3)) ax = fig.add_subplot(111) line, = ax.plot(x, y) self._plot = (line, ax, fig) plt.show() else: # update and rescale the plot self._plot[0].set_data(x, y) self._plot[-2].relim() self._plot[-2].set_xlim([self.locals["total_timesteps"] * -0.02, self.locals["total_timesteps"] * 1.02]) self._plot[-2].autoscale_view(True,True,True) self._plot[-1].canvas.draw()
def _on_rollout_end(self) -> None: """ This event is triggered before updating the policy. """ dict = get_log_dict() actor_loss = dict.get("train/actor_loss") critic_loss = dict.get("train/critic_loss") x, y = ts2xy(load_results(self.log_dir), 'timesteps') if len(x) > 0 and actor_loss and critic_loss: mean_reward = np.mean(y[-min(100, len(y)):]) wandb.log({ "episode_reward": y[-1], "mean_episode_reward": mean_reward, "actor_loss": actor_loss, "critic_loss": critic_loss })
def _on_step(self) -> bool: if self.n_calls % self.check_freq == 0: x, y = ts2xy(load_results(self.log_dir), 'timesteps') if len(x) > 0: mean_reward = np.mean(y[-100:]) if self.verbose > 0: print(f"Num timesteps: {self.num_timesteps}") print( f"Best mean reward: {self.best_mean_reward:.2f} - Last mean reward per episode: {mean_reward:.2f}" ) if mean_reward > self.best_mean_reward: self.best_mean_reward = mean_reward if self.verbose > 0: print(f"Saving new best drive to {self.save_path}.zip") self.model.save(self.save_path) return True
def _on_step(self) -> bool: if self.n_calls % self.check_freq == 0: # Retrieve training reward x, y = ts2xy(load_results(self.log_dir), 'timesteps') if len(x) > 0: global episodes global mean_reward global best_reward episodes = len(y) # print(episodes) mean_reward = np.mean(y[-50:]) mean_reward = round(mean_reward, 0) if self.verbose > 0: print(f"Episodes: {episodes}") print(f"Num steps: {self.num_timesteps}") print(f"Mean reward: {mean_reward:.2f} ") print("=================================") # Report intermediate objective value to Optima and Handle pruning trial.report(mean_reward,self.num_timesteps) if trial.should_prune(): raise optuna.TrialPruned() # New best model, you could save the agent here if mean_reward > best_reward: best_reward = mean_reward if mean_reward > reward_threshold: print("REWARD ACHIVED") model.save(f"{self.save_path}/reward_achived_{str(mean_reward)}") return False else: model.save(f"{self.save_path}/best_model") # New best model, you could save the agent here # if episodes > episodes_threshold: # print("REWARD ACHIVED") # model.save(self.save_path) # return False return True
def save_episode_rewards_as_csv(self, data_directory="data/", log_dir=None): if log_dir is None: log_dir = self.log_dir episode, rewards = ts2xy(load_results(log_dir), 'timesteps') filename = self.model_name + "_episode_rewards" location = data_directory + '{}.csv'.format(filename) with open(location, mode='w') as results_file: results_writer = csv.writer(results_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) results_writer.writerow(episode) results_writer.writerow(rewards)
def _on_step(self) -> bool: if self.n_calls % self.check_freq == 0: # Retrieve training reward x, y = ts2xy(load_results(self.log_dir), 'timesteps') if len(x) > 0: # Mean training reward over the last 100 episodes mean_reward = np.mean(y[-100:]) if self.verbose > 0: print(f'Num timesteps: {self.num_timesteps}') print(f'Best mean reward: {self.best_mean_reward:.2f} - Last mean reward per episode: {mean_reward:.2f}') # New best model, you could save the agent here if mean_reward > self.best_mean_reward: self.best_mean_reward = mean_reward # Example for saving best model if self.verbose > 0: print(f'Saving new best model to {self.save_path}.zip') self.model.save(self.save_path) return True
def _on_step(self) -> bool: if self.n_calls % self.eval_freq == 0: # Retrieve training reward x, y = ts2xy(load_results(self.data_dir), 'timesteps') if len(x) > 0: # Mean training reward over the last 100 episodes mean_reward = np.mean(y[-100:]) else: mean_reward = -np.inf if self.verbose > 0: print("Num timesteps: {}".format(self.num_timesteps)) print("Best mean reward: {:.2f} - Last mean reward per episode: {:.2f}".format(self.best_mean_reward, mean_reward)) # New best model, you could save the agent here if mean_reward > self.best_mean_reward: self.best_mean_reward = mean_reward # Example for saving best model if self.verbose > 0: print("Saving new best model.") self.model.save(self.data_dir + '/best_model') return True
def plot_training_results(self, title="Learning Curve", xlabel="episode", ylabel="cumulative reward", filename="reward", log_dir=None, show=False): plt.clf() if log_dir is None: log_dir = self.log_dir x, y = ts2xy(load_results(log_dir), 'timesteps') plt.figure(figsize=(20, 10)) plt.title(title) plt.plot(x, y, "b", label="Cumulative Reward") plt.legend() plt.xlabel(xlabel) plt.ylabel(ylabel) plt.axhline(y=0, color='r', linestyle='-') plt.savefig("img/" + filename) if show: plt.show()
def plot_results(log_folder, title='Learning Curve'): """ plot the results :param log_folder: (str) the save location of the results to plot :param title: (str) the title of the task to plot """ x, y = ts2xy(load_results(log_folder), 'timesteps') y = moving_average(y, window=50) # Truncate x x = x[len(x) - len(y):] fig = plt.figure(title) plt.plot(x, y) plt.xlabel('Number of Timesteps') plt.ylabel('Rewards') plt.title(title + " Smoothed") # built int results_plotter.plot_results([log_folder], 3e5, results_plotter.X_TIMESTEPS, "TD3 LunarLander") plt.show()
str(args.num_threads), "--seed", str(args.seed), "--verbose", "0", "--reward-log", reward_log, ] if args.verbose >= 1: print(f"{idx + 1}/{n_experiments}") print(f"Evaluating {algo} on {env_id}...") skip_eval = False if os.path.isdir(reward_log): try: x, y = ts2xy(load_results(reward_log), "timesteps") skip_eval = len(x) > 0 except (json.JSONDecodeError, pd.errors.EmptyDataError, TypeError): pass if skip_eval: print("Skipping eval...") else: return_code = subprocess.call(["python", "enjoy.py"] + arguments) if return_code != 0: print("Error during evaluation, skipping...") continue x, y = ts2xy(load_results(reward_log), "timesteps") if len(x) > 0: # Retrieve training timesteps from config
x_label = { 'steps': 'Timesteps', 'episodes': 'Episodes', 'time': 'Walltime (in hours)' }[args.x_axis] dirs = [ os.path.join(log_path, folder) for folder in os.listdir(log_path) if env in folder and os.path.isdir(os.path.join(log_path, folder)) ] plt.figure('Training Success Rate', figsize=args.figsize) plt.title('Training Success Rate', fontsize=args.fontsize) plt.xlabel(f'{x_label}', fontsize=args.fontsize) plt.ylabel('Success Rate', fontsize=args.fontsize) for folder in dirs: data_frame = load_results(folder) if args.max_timesteps is not None: data_frame = data_frame[data_frame.l.cumsum() <= args.max_timesteps] success = np.array(data_frame['is_success']) x, _ = ts2xy(data_frame, x_axis) # Do not plot the smoothed curve at all if the timeseries is shorter than window size. if x.shape[0] >= args.episode_window: # Compute and plot rolling mean with window of size args.episode_window x, y_mean = window_func(x, success, args.episode_window, np.mean) plt.plot(x, y_mean, linewidth=2) plt.tight_layout() plt.show()
'-f', args.log_dir, '--algo', algo, '--env', env_id, '--no-render', '--seed', str(args.seed), '--verbose', '0', '--reward-log', reward_log ] if args.verbose >= 1: print(f"{idx + 1}/{n_experiments}") print(f"Evaluating {algo} on {env_id}...") skip_eval = False if os.path.isdir(reward_log): try: x, y = ts2xy(load_results(reward_log), 'timesteps') skip_eval = len(x) > 0 # TODO: fix this bare except except: pass if skip_eval: print("Skipping eval...") else: return_code = subprocess.call(['python', 'enjoy.py'] + arguments) x, y = ts2xy(load_results(reward_log), 'timesteps') if len(x) > 0: mean_reward = np.mean(y) std_reward = np.std(y) results['algo'].append(algo)
morphology=morphology, path=base_path, verbose=1) eval_env = get_env(params, morphology=morphology) if args.eval_ep > 0: fitness = eval_policy(model, eval_env, num_ep=args.eval_ep, deterministic=True, gif=False, render=False, verbose=0)[0] else: # Determine fitness from training history x, y = ts2xy(load_results(model_path), 'timesteps') if len(x) > 0: # Mean training reward over the last -eval_ep episodes ys = y[args.eval_ep:] fitness = np.mean(ys) else: fitness = -np.inf # except Exception as e: # print("Encountered Error", e) # print("Assigning zero fitness") # model_path, tb_path = get_paths(params, path=base_path) # os.makedirs(model_path, exist_ok=True) # with open(os.path.join(model_path, 'fitness.tmp'), 'w+') as f: # f.write(str(-1*float('inf'))) # exit() # Exit if we fail to train the model.