def main(): parser = arg_parser() parser.add_argument('--env', help='environment ID', type=str, default='Swimmer-v2') parser.add_argument('--dir', type=str, default='EXP_ON_fix_norm') parser.add_argument('--thesis', type=str, default='Online_V0') args = parser.parse_args() # dirname = '~/Desktop/carla_sample_efficient/data/bk/bkup_EXP1_FINAL/'+args.extra_dir+args.env dirname = '~/Desktop/ppo_test/' + args.dir + '/' + args.env results = pu.load_results(dirname) # r_copos1,r_copos2,r_trpo,r_ppo=filt(results,'copos1'),filt(results,'copos2'),filt(results,'trpo'),filt(results,'ppo') # r_sil_n2=filt(results,'sil_n2_l0.001') # dt={'copos1':r_copos1, 'copos2':r_copos2,'trpo':r_trpo, 'ppo':r_ppo, 'sil_slight':r_sil_n2} r_ppo = filt(results, 'ppo') dt = {'ppo': r_ppo} for name in dt: pu.plot_results(dt[name], xy_fn=pu.progress_default_xy_fn, average_group=True, split_fn=lambda _: '', shaded_err=True, shaded_std=False) plt.xlabel('Number of Timesteps [M]') plt.ylabel('Average Return [-]') plt.tight_layout() fig = plt.gcf() fig.set_size_inches(9, 7.5) # fig.savefig("/Users/zsbjltwjj/Desktop/carla_sample_efficient/plot_f/ONLINE/"+args.extra_dir+args.env+'/'+name+'.pdf', format='pdf') # fig.savefig("/Users/zsbjltwjj/Desktop/thesis/img/"+args.thesis+"/"+args.env+'/'+name+'.pdf', format="pdf") fig.savefig("/Users/zsbjltwjj/Desktop/ppo_test/" + args.dir + '-' + name + '.pdf', format="pdf")
def main(): parser = arg_parser() parser.add_argument('--env', help='environment ID', type=str, default='HalfCheetah-v2') parser.add_argument('--st_seed', help='start number of seeds', type=int, default=0) parser.add_argument('--seeds', help='number of seeds', type=int, default=1) parser.add_argument('--num_timesteps', type=str, default="3e4") parser.add_argument('--filename', type=str, default='_Offline_Evaluation_nosil.png') args = parser.parse_args() if args.env == 'Swimmer-v2' or args.env == 'HalfCheetah-v2': mbl_args = '--num_samples=1500 --num_elites=10 --horizon=10 --eval_freq=10 --mbl_train_freq=10' elif arg.env == 'Reacher-v2' or args.env == 'Ant-v2': mbl_args = '--num_samples=1500 --num_elites=10 --horizon=5 --eval_freq=10 --mbl_train_freq=10' # algo_names=["ppo2_sil_online","copos_sil_online","ppo2_online","copos_online"] # legend_names=["ppo2+sil","copos+sil","ppo2","copos"] # argus=["","","",""] algo_names = [ "mbl_ppo2", "ppo2_offline", "mbl_copos", "copos_offline", "mbl_trpo", "trpo_offline" ] # algo_names=["mbl_ppo2","ppo2_offline", # "mbl_copos","copos_offline"] legend_names = [ "mbl+ppo2", "ppo2", "mbl+copos", "copos", "mbl+trpo", "trpo" ] # legend_names=["mbl+ppo2","ppo2", # "mbl+copos","copos"] #argus=['--num_samples=1 --num_elites=1 --horizon=2' for _ in range(len(algo_names))] argus = [mbl_args for _ in range(len(algo_names))] for i in range(args.st_seed, args.st_seed + args.seeds): for j in range(len(algo_names)): os.system("python ../algos/" + algo_names[j] + "/run.py --alg=" + algo_names[j] + " --num_timestep=" + args.num_timesteps + " --seed=" + str(i) + " --env=" + args.env + " --log_path=~/Desktop/logs/EXP2_nosil/" + args.env + "/" + legend_names[j] + "-" + str(i) + ' ' + argus[j]) results = pu.load_results('~/Desktop/logs/EXP2_nosil/' + args.env) pu.plot_results(results, xy_fn=pu.progress_itermbl_xy_fn, average_group=True, split_fn=lambda _: '') #plt.title(args.env+" Online Evaluation") plt.xlabel('Evaluation Epochs [-]') plt.ylabel('Average Return [-]') fig = plt.gcf() fig.set_size_inches(9.5, 7.5) fig.savefig(args.env + "_" + args.filename)
def plot_results(dirs, num_timesteps=10e6, xaxis=X_TIMESTEPS, yaxis=Y_REWARD, title='', split_fn=split_by_task, resample=100): results = plot_util.load_results(dirs) new_results = [] for result in results: dfs = result.monitor for df in dfs: temp_result = copy.deepcopy(result) temp_result = temp_result._replace(monitor=df) new_results.append(temp_result) plot_util.plot_results(new_results, xy_fn=lambda r: ts2xy(r.monitor, xaxis, yaxis), group_fn=split_fn, average_group=True, resample=resample)
def main(): parser = arg_parser() parser.add_argument('--env', help='environment ID', type=str, default='HalfCheetah-v2') parser.add_argument('--extra_dir', type=str, default='') args = parser.parse_args() dirname = '~/Desktop/carla_sample_efficient/data/bk/bkup_EXP1_FINAL/' + args.extra_dir + args.env results = pu.load_results(dirname) r_copos, r_trpo, r_ppo = filt(results, 'copos'), filt(results, 'trpo'), filt(results, 'ppo') r_sil_n2 = filt(results, 'sil_n2_l0.001') dt = { 'copos': r_copos, 'trpo': r_trpo, 'ppo': r_ppo, 'sil_slight': r_sil_n2 } for name in dt: pu.plot_results(dt[name], xy_fn=pu.progress_default_xy_fn, average_group=True, split_fn=lambda _: '', shaded_err=True, shaded_std=False) plt.xlabel('Number of Timesteps [M]') plt.ylabel('Average Return [-]') plt.tight_layout() fig = plt.gcf() fig.set_size_inches(9, 7.5) fig.savefig( "/Users/zsbjltwjj/Desktop/carla_sample_efficient/plot_f/ONLINE/" + args.env + '/' + name + '.pdf', format='pdf') if name == 'sil_slight': pu.plot_results(dt[name], xy_fn=pu.progress_default_entropy_xy_fn, average_group=True, split_fn=lambda _: '', shaded_err=True, shaded_std=False, legend_entropy=1) plt.xlabel('Number of Timesteps [M]') plt.ylabel('Entropy [-]') plt.tight_layout() fig = plt.gcf() fig.set_size_inches(9, 7.5) fig.savefig( "/Users/zsbjltwjj/Desktop/carla_sample_efficient/plot_f/ONLINE/" + args.env + '/' + name + '_entropy.pdf', format="pdf")
def plot_results(dirs, num_timesteps=10e6, xaxis=X_TIMESTEPS, yaxis=Y_REWARD, title='', split_fn=split_by_task): results = plot_util.load_results(dirs) plot_util.plot_results(results, split_fn=split_fn, average_group=True, resample=int(1e6))
def test_plot_util(): nruns = 4 logdirs = [smoketest('--alg=ppo2 --env=CartPole-v0 --num_timesteps=10000') for _ in range(nruns)] data = pu.load_results(logdirs) assert len(data) == 4 _, axes = pu.plot_results(data[:1]); assert len(axes) == 1 _, axes = pu.plot_results(data, tiling='vertical'); assert axes.shape==(4,1) _, axes = pu.plot_results(data, tiling='horizontal'); assert axes.shape==(1,4) _, axes = pu.plot_results(data, tiling='symmetric'); assert axes.shape==(2,2) _, axes = pu.plot_results(data, split_fn=lambda _: ''); assert len(axes) == 1
def main(): # results = pu.load_results('data_the_best') results = pu.load_results('data_Test_obstacle_origin/log_data') r = results[0] # plt.plot(np.cumsum(r.monitor.l), r.monitor.r) # plt.plot(np.cumsum(r.monitor.l), pu.smooth(r.monitor.r, radius=10)) #### plt.plot(r.progress.total_timesteps, r.progress.eprewmean) # print('keys:', r.progress.keys()) # plt.plot(r.progress['epoch'], r.progress['test/success_rate']) # plt.plot(r.progress['epoch'], pu.smooth(r.progress['test/success_rate'], radius=5)) # pu.plot_results(results) pu.plot_results(results, average_group=True, split_fn=lambda _: '') set_trace()
def plot_accuracy(root_dir): all_results = load_results(root_dir, verbose=True) def xy_fn(r): x = np.cumsum(r.progress['xs']) y = r.progress['errors'] return x, y def split_fn(r): name = r.name splits = name.split('-') return splits[0] def group_fn(r): name = r.name splits = name.split('-') alg_name = splits[1] if alg_name == 'SGES': return 'SGES' elif alg_name == 'CMA': return 'CMA-ES' elif alg_name == 'GES': return 'Guided ES' elif alg_name == 'ES': return 'Vanilla ES' elif alg_name == 'ASEBO': return 'ASEBO' else: raise ValueError('%s not supported' % alg_name) _all_results = [] for result in all_results: if 'Sphere' in result.name: if 'CMA' in result.name: continue _all_results.append(result) all_results = _all_results plt.figure(dpi=300) fig, axarr = pu.plot_results(all_results, xy_fn=xy_fn, split_fn=split_fn, group_fn=group_fn, shaded_std=True, shaded_err=False, average_group=True, tiling='horizontal', xlabel='Evaluations', ylabel='Cosine Similarity') plt.subplots_adjust(hspace=0.2, wspace=0.2, bottom=0.2, left=0.08, top=0.95) for ax in axarr[0]: ax.set_xticks(np.arange(0, 12.5e4, 2.5e4)) ax.set_xticklabels(['0', '25k', '50k', '75k', '100k']) plt.savefig('blackbox_accuracy.pdf', bbox_inches='tight')
def plot_data(exp, savefig, ttype): savefig = abspath(savefig) try: results = organize_results(pu.load_results(exp)) pu.plot_results(results, average_group=True, split_fn=lambda _: '', xy_fn=ep_distance_ratio_train \ if ttype == 'train' else ep_distance_ratio_test, shaded_std=False, shaded_err=True) if os.path.isfile(savefig): os.remove(savefig) plt.savefig(savefig) plt.clf() #print("Plot saved to: {}".format(savefig)) except Exception as e: print("Plotting failed for {}".format(savefig)) print("Reason: {}".format(str(e)))
def plot_save_results(xy_fn, file="logs/time_rewards.png"): f, ax = pu.plot_results( results, xy_fn=xy_fn, split_fn=lambda _: "", average_group=True, shaded_err=False, ) f.savefig(file)
def plot_results(self): # Create plot directory os.makedirs(self.plot_dir, exist_ok=True) results = pu.load_results( os.path.join(self.log_dir, self.env_name.split('-')[0], '')) pu.plot_results(results, average_group=True, split_fn=lambda _: '', shaded_std=False) plt.xlabel('Timestep') plt.ylabel('Reward') fig = plt.gcf() plot_path = os.path.join(self.plot_dir, 'plot_' + self.env_name) fig.savefig(plot_path, bbox_inches='tight') plt.show()
def main(): parser = arg_parser() parser.add_argument('--env', help='environment ID', type=str, default='HalfCheetah-v2') parser.add_argument('--extra_dir', type=str, default='') args = parser.parse_args() args.extra_dir='EXP2_IAS_5M_TRPO_OFF/' dirname = '~/Desktop/carla_sample_efficient/data/bk/bkup_EXP2_FINAL/'+args.extra_dir+args.env results = pu.load_results(dirname) r_copos_nosil,r_trpo_nosil,r_ppo_nosil=filt(results,'copos-'),filt(results,'trpo-'),filt(results,'ppo-') r_copos_sil,r_trpo_sil,r_ppo_sil=filt(results,'copos+sil-'),filt(results,'trpo+sil-'),filt(results,'ppo+sil-') r_mbl_sil=filt(results,'mbl+','sil-') # r_mbl_nosil_tmp=[r for r in results if r not in r_mbl_sil] r_mbl_nosil=filt_or_or(results,'mbl+copos-','mbl+trpo-','mbl+ppo-') r_copos_comp, r_trpo_comp, r_ppo_comp=filt_or(results,'mbl+copos','copos+sil'),filt_or(results,'mbl+trpo','trpo+sil'),filt_or(results,'mbl+ppo','ppo+sil') # dt={'copos_nosil':r_copos_nosil, 'trpo_nosil':r_trpo_nosil, 'ppo_nosil':r_ppo_nosil, # 'copos_sil':r_copos_sil, 'trpo_sil':r_trpo_sil, 'ppo_sil':r_ppo_sil, # 'mbl_nosil':r_mbl_nosil, 'mbl_sil':r_mbl_sil, # 'copos_comp':r_copos_comp, 'trpo_comp':r_trpo_comp, 'ppo_comp':r_ppo_comp} r_trpo_all=filt(results,'trpo') dt={'trpo_research':r_trpo_all} for name in dt: pu.plot_results(dt[name],xy_fn=pu.progress_mbl_vbest_xy_fn,average_group=True,name=name,split_fn=lambda _: '',shaded_err=True,shaded_std=False) plt.xlabel('Number of Timesteps [M]') plt.ylabel('Best Average Return [-]') plt.tight_layout() fig = plt.gcf() fig.set_size_inches(9, 7.5) fig.savefig("/Users/zsbjltwjj/Desktop/carla_sample_efficient/plot_f/OFFLINE/"+args.env+'/'+name+'.pdf',format="pdf") if name=='mbl_nosil' or name=='mbl_sil': pu.plot_results(dt[name],xy_fn=pu.progress_default_entropy_xy_fn,average_group=True,name=name,split_fn=lambda _: '',shaded_err=True,shaded_std=False,legend_entropy=1) plt.xlabel('Number of Timesteps [M]') plt.ylabel('Entropy [-]') plt.tight_layout() fig = plt.gcf() fig.set_size_inches(9, 7.5) fig.savefig("/Users/zsbjltwjj/Desktop/carla_sample_efficient/plot_f/OFFLINE/"+args.env+'/'+name+'_entropy.pdf',format="pdf")
def plot_k(root_dir): all_results = load_results(root_dir, verbose=True) def xy_fn(r): x = np.cumsum(r.progress['xs']) y = r.progress['ys'] return x, y def split_fn(r): name = r.name splits = name.split('-') return splits[0] def group_fn(r): name = r.name splits = name.split('-') alg_name = splits[1] if alg_name == 'ES': return 'Vanilla ES' elif alg_name == 'SGES100': return 'SGES(k=100)' elif alg_name == 'SGES1': return 'SGES(k=1)' elif alg_name == 'SGES5': return 'SGES(k=5)' else: return 'SGES(k=%s)' % alg_name[-2:] plt.figure(dpi=300) fig, axarr = pu.plot_results(all_results, xy_fn=xy_fn, split_fn=split_fn, group_fn=group_fn, shaded_std=True, shaded_err=False, average_group=True, tiling='horizontal', xlabel='Evaluations', ylabel='Loss') plt.subplots_adjust(hspace=0.2, wspace=0.2, bottom=0.2, left=0.08, top=0.95) for ax in axarr[0]: ax.set_xticks(np.arange(0, 12.5e4, 2.5e4)) ax.set_xticklabels(['0', '25k', '50k', '75k', '100k']) # fig.text(0.5, 0.05, s='# Evaluation', fontsize=18) # fig.text(0.04, 0.5, s='Loss', fontsize=18, rotation='vertical') plt.savefig('blackbox_k.pdf', bbox_inches='tight')
def plot_monitors(): # If you want to average results for multiple seeds, LOG_DIRS must contain subfolders in the # following format: <name_exp0>-0, <name_exp0>-1, <name_exp1>-0, <name_exp1>-1. # Where names correspond to experiments you want to compare separated with random seeds by dash. LOG_DIRS = '/home/deep3/logs/Humanoid-v2/' # LOG_DIRS = '/home/deep3/logs/Hopper-v2/' # Uncomment below to see the effect of the timit limits flag # LOG_DIRS = 'time_limit_logs/reacher' results = pu.load_results(LOG_DIRS, running_agents=3) fig, ax = pu.plot_results(results, average_group=True, split_fn=lambda _: '', shaded_std=False)
from baselines.common import plot_util as pu import matplotlib.pyplot as plt import numpy as np dataDir = 'baselinesData' envName = 'HalfCheetah-v2' alg = 'ppo2' results = pu.load_results(dataDir + '/' + envName) #r = results[0] #plt.plot(np.cumsum(r.monitor.l), r.monitor.r) pu.plot_results(results) plt.show()
def plot_results(dirs, num_timesteps=10e6, xaxis=X_TIMESTEPS, yaxis=Y_REWARD, title='', split_fn=split_by_task): results = plot_util.load_results(dirs) plot_util.plot_results(results, xy_fn=lambda r: ts2xy(r['monitor'], xaxis, yaxis), split_fn=split_fn, average_group=True, resample=int(1e6))
from baselines.common import plot_util as pu import matplotlib.pyplot as plt results = pu.load_results('path/to/your/experiment01') pu.plot_results(results, average_group=True, split_fn=lambda _: '', shaded_std=False, shaded_err=True, figsize=(10, 6), smooth_step=10.0) plt.title('Experiment01', fontsize=30) plt.tight_layout() plt.show()
from baselines.common import plot_util as pu import matplotlib.pyplot as plt import argparse import os """Create a plot of training data for a given environment using baselines plotting utility Command line arguments: --env: environment name (ex: --env=RoboschoolHalfCheetah-v1) --dir: directory where training data is logged default is ./data/ """ if __name__ == "__main__": # Parse command line arguments parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--env', help='Environment name') parser.add_argument('--dir', help='Data directory', default='./data') args = parser.parse_args() envName = args.env # Load results with baselines plot utility results = pu.load_results('./'+args.dir+'/'+envName) # Plot results pu.plot_results(results, average_group=True, split_fn=lambda _: '', shaded_std=False) # Save plot as a pdf in a subdirectory of the data directory called plots if not os.path.exists('./'+args.dir+'/plots'): os.mkdir('./'+args.dir+'/plots') plt.savefig('./'+args.dir+'/plots/'+envName+'.pdf')
def main(): """ Plot the plots inside the folder given """ # Now plot the common things args.files = sorted(args.files) splits = args.files[0].split('/') if splits[-1] == '': splits = splits[-3] else: splits = splits[-2] env = splits results = [] for file in args.files: print(file) results.extend( pu.load_results(file, success=args.success, length=args.length)) # Print details if args.print: allrecords = dict() for i in range(len(results)): key = check_last_name(results[i]) data = np.array(results[i].monitor)[-10:, 1] allrecords[key] = allrecords.get(key, []) + [data] # Print results for k, v in allrecords.items(): v = np.concatenate(v) vm = v.mean() vs = v.std() print('{} {} {}'.format(k, vm, vs)) return None fig = pu.plot_results(results, average_group=True, shaded_err=False, shaded_std=True, max_step=args.max_step, smooth_step=args.smooth_step, group_fn=lambda _: check_last_name(_), split_fn=lambda _: '', figsize=(10, 10)) # Add results for behaviour cloning if present ''' allfiles = [] for file in args.files: for r, dirs, files in os.walk(file): txtfiles = list(filter(lambda x: x.endswith('bc.txt'), files)) allfiles.extend(list(map(lambda x: os.path.join(r, x), txtfiles))) if allfiles != []: bcreward = [] for file in allfiles: with open(file, 'r') as fi: meanrew = float(fi.readlines()[0]) bcreward.append(meanrew) # Get mean and std mean = np.mean(bcreward) std = np.std(bcreward) idxcolor=4 plt.plot([0, args.max_step], [mean, mean], label='BC', color=COLORS[idxcolor]) plt.fill_between([0, args.max_step], [mean - std, mean - std], [mean + std, mean + std], alpha=0.2, color=COLORS[idxcolor]) ''' plt.xlabel('Number of steps', fontsize=20) plt.ylabel('Reward' if not args.length else 'Episode length', fontsize=20) #plt.yscale('log') plt.title(env, fontsize=24) if args.legend != []: ''' if allfiles != []: args.legend.append('BC') ''' #plt.legend(args.legend, loc='lower right') plt.legend(args.legend) #plt.ticklabel_format(useOffset=1) plt.savefig( '{}.png'.format(env), bbox_inches='tight', ) print("saved ", env)
def main(): """ Plot the plots inside the folder given """ # Now plot the common things splits = args.files[0].split('/') if splits[-1] == '': splits = splits[-2] else: splits = splits[-1] env = splits results = pu.load_results(args.files, ) fig = pu.plot_results(results, average_group=True, shaded_err=False, shaded_std=True, group_fn=lambda _: check_last_name(_), split_fn=lambda _: '', figsize=(10, 10)) # Add results for behaviour cloning if present allbcfiles = [args.bcpath] allfiles = [] allrandomfiles = [] # For random agent behavior for file in allbcfiles: for r, dirs, files in os.walk(file): print(files) txtfiles = list(filter(lambda x: 'BC_' in x and '.txt' in x, files)) rndfiles = list( filter(lambda x: 'random_' in x and '.txt' in x, files)) allfiles.extend(list(map(lambda x: os.path.join(r, x), txtfiles))) allrandomfiles.extend( list(map(lambda x: os.path.join(r, x), rndfiles))) ## Show all files for BC and plot print(allfiles) if allfiles != []: bcreward = [] for file in allfiles: with open(file, 'r') as fi: rews = fi.read().split('\n') rews = filter(lambda x: x != '', rews) rews = list(map(lambda x: float(x), rews)) bcreward.extend(rews) # Get mean and std #print(bcreward) mean = np.mean(bcreward) std = np.std(bcreward) idxcolor = 10 plt.plot([0, args.max_steps], [mean, mean], label='BC', color=COLORS[idxcolor]) plt.fill_between([0, args.max_steps], [mean - std, mean - std], [mean + std, mean + std], alpha=0.2, color=COLORS[idxcolor]) ## Get random policy if allrandomfiles != []: rndreward = [] for file in allrandomfiles: with open(file, 'r') as fi: rews = fi.read().split('\n') rews = filter(lambda x: x != '', rews) rews = list(map(lambda x: float(x), rews)) rndreward.extend(rews) # Get mean and std #print(bcreward) mean = np.mean(rndreward) plt.plot([0, args.max_steps], [mean, mean], label='random', color='gray', linestyle='dashed') plt.xlabel('# environment interactions', fontsize=20) envnamehere = 'ant' if env.lower().startswith(envnamehere): plt.ylim(ymin=-5000, ymax=5000) if env.lower().startswith(''): plt.ylabel('Reward', fontsize=30) plt.yscale(args.yscale) plt.title(env.replace('BC','').replace('GAIL', '').replace('no', '').replace('alph', ''), \ fontsize=50) if env.lower().startswith(envnamehere): if args.legend != []: if allfiles != []: args.legend.append('BC') plt.legend(args.legend, fontsize=30, loc='bottom right') else: plt.legend().set_visible(False) #plt.ticklabel_format(useOffset=1) plt.savefig( '{}.png'.format(env), bbox_inches='tight', ) print("saved ", env)
from baselines.common import plot_util as pu LOG_DIRS = 'logs/coinrun_500_level/' results = pu.load_results(LOG_DIRS) smooth_step = 50.0 fig = pu.plot_results(results, average_group=True, split_fn=lambda _: '', shaded_std=False, smooth_step=smooth_step) pu.plt.savefig('coinrun_500_level')
from baselines.common import plot_util as pu import matplotlib.pyplot as plt import numpy as np results = pu.load_results('~/logs/NewHopperCmp/') print(len(results)) pu.plot_results(results, average_group=True, split_fn=lambda _: '') #print(np.cumsum(results[0].monitor.l)) #plt.plot(np.cumsum(results[0].monitor.l), pu.smooth(results[0].monitor.r, radius=10)) #plt.show()
def run_cartpole_dqn(num_batches=1000, batch_size=32, log_dir="./logs/dqn", seed=0): os.makedirs(log_dir, exist_ok=True) env = CartPoleEnv() env.seed(seed) torch.manual_seed(seed) agent = CartPoleAgent(env.observation_space, env.action_space) from baselines.bench import Monitor as BenchMonitor env = BenchMonitor(env, log_dir, allow_early_resets=True) train(agent, env, num_batches=num_batches, batch_size=batch_size) return agent, env if __name__ == "__main__": agent, env = run_cartpole_dqn() from baselines.common import plot_util as pu from matplotlib import pyplot as plt results = pu.load_results("logs") f, ax = pu.plot_results(results) f.savefig("logs/dqn_cartpole.png") env = Monitor(env, "./vid", video_callable=lambda episode_id: True, force=True) visualize_it(env, agent)