#returns, fcounts = helper.parse_avefcount_array('../../policies/' + args.env_name +'_' + eval + '_fcounts.txt') #return_dist = np.dot(W,fcounts) write_directory = args.write_dir if eval == "map": #write_directory = '../../bayesian_dropout/' eval_fname = "_scratch_cluster_dsbrown_tflogs_mcmc_" + args.env_name + "_64_all_checkpoints_43000_" pred_filename = write_directory + args.env_name + "_" + eval_fname + "pred.txt" true_filename = write_directory + args.env_name + "_" + eval_fname + "true.txt" elif eval == "mean": #write_directory = '../../bayesian_dropout/' eval_fname = "_scratch_cluster_dsbrown_tflogs_mcmc_" + args.env_name + "_64_all_mean_checkpoints_43000_" pred_filename = write_directory + args.env_name + "_" + eval_fname + "pred.txt" true_filename = write_directory + args.env_name + "_" + eval_fname + "true.txt" elif eval == "noop": #write_directory = '../../bayesian_dropout/' pred_filename = write_directory + args.env_name + "_no_op_pred.txt" true_filename = write_directory + args.env_name + "_no_op_true.txt" else: #write_directory = '../../bayesian_dropout/' pred_filename = write_directory + args.env_name + "_" + eval + "pred.txt" true_filename = write_directory + args.env_name + "_" + eval + "true.txt" return_dist = genfromtxt(pred_filename, delimiter='\n') returns = genfromtxt(true_filename, delimiter='\n') #print(returns) print("{} & {:.1f} & {:.1f} & {:.1f} & {:.0f} \\\\".format( name_transform[eval], np.mean(return_dist), helper.worst_percentile(return_dist, 0.05), np.mean(returns), np.min(returns)))
fcount_list = [] return_dist_list = [] print(" policy & mean & 0.05-VaR & ave length & gt & min gt \\\\ \hline") for eval in eval_policies: #print("-"*20) #print("eval", eval) returns, fcounts = helper.parse_avefcount_array('../../policies/' + args.env_name + '_' + eval + '_fcounts.txt') #normalize fcounts_l1 = fcounts / np.sum(np.abs(fcounts)) return_dist = np.dot(W, fcounts_l1) print("{} & {:.2E} & {:.2E} & {:.2E} & {:.1f} & {:.0f} \\\\".format( name_transform[eval], np.mean(return_dist), helper.worst_percentile(return_dist, 0.05), fcounts[-1], np.mean(returns), np.min(returns))) if args.env_name == "breakout": #evaluate the no-op policy returns, fcounts = helper.parse_avefcount_array( '../../policies/breakout_noop_fcounts.txt') #normalize fcounts_l1 = fcounts / np.sum(np.abs(fcounts)) return_dist = np.dot(W, fcounts_l1) print("{} & {:.2E} & {:.2E} & {:.2E} & {:.1f} & {:.0f} \\\\".format( "no-op", np.mean(return_dist), helper.worst_percentile(return_dist, 0.05), fcounts[-1], np.mean(returns), np.min(returns)))
name_transform = {'00025':'policy A', '00325':'policy B', '00800':'policy C', '01450':'policy D', 'mean':'mean', 'map':'MAP', 'noop': 'no-op'} if args.env_name == "enduro": eval_policies =['03125', '03425', '03900', '04875', 'mean', 'map'] name_transform = {'03125':'policy A', '03425':'policy B', '03900':'policy C', '04875':'policy D', 'mean':'mean', 'map':'MAP', 'noop': 'no-op'} gt_return_list = [] fcount_list = [] return_dist_list = [] print(" policy & mean & " + str(alpha) + "-VaR & ave length & gt & min gt \\\\ \hline") for eval in eval_policies: #print("-"*20) #print("eval", name_transform[eval]) returns, fcounts = helper.parse_avefcount_array('../../policies/' + args.env_name +'_' + eval + '_fcounts_onehot.txt') #print(fcounts) return_dist = np.dot(W,fcounts) print("{} & {:.1f} & {:.1f} & {:.1f} & {:.1f} & {:.0f} \\\\".format(name_transform[eval], np.mean(return_dist), helper.worst_percentile(return_dist, alpha), np.sum(fcounts), np.mean(returns), np.min(returns))) gt_return_list.append(returns) fcount_list.append(fcounts) return_dist_list.append(return_dist) if args.env_name == "breakout": #I realized that I need to rerun the noop code for the full features. I keep overwriting it. #evaluate the no-op policy returns, fcounts = helper.parse_avefcount_array('../../policies/breakout_noop_fcounts.txt') #normalize return_dist = np.dot(W,fcounts) return_dist_list.append(return_dist) print("{} & {:.1f} & {:.1f} & {:.1f} & {:.1f} & {:.0f} \\\\".format("no-op", np.mean(return_dist), helper.worst_percentile(return_dist, alpha), np.sum(fcounts), np.mean(returns), np.min(returns)))
for f in eval_files: avefcounts, returns, lengths, fcounts = helper.parse_fcount_policy_eval('/scratch/cluster/dsbrown/rl_policies/' + f) print(f) print(np.mean(returns), np.mean(lengths)) #input() alphas = np.linspace(0,1.0,21) best_returns = [] for alpha in alphas: print(alpha) alpha_vars = [] for f in eval_files: avefcounts, returns, lengths, fcounts = helper.parse_fcount_policy_eval('/scratch/cluster/dsbrown/rl_policies/' + f) return_dist = np.dot(W,avefcounts) var = helper.worst_percentile(return_dist, alpha) alpha_vars.append(var) #find the best under this alpha print(alpha_vars) print(len(alpha_vars)) best_indx = np.argmax(alpha_vars) print("best index", best_indx) print("best file", eval_files[best_indx]) avefcounts, returns, lengths, fcounts = helper.parse_fcount_policy_eval('/scratch/cluster/dsbrown/rl_policies/' + eval_files[best_indx]) best_returns.append(np.mean(returns)) import matplotlib.pylab as pylab params = {'legend.fontsize': 'xx-large', # 'figure.figsize': (6, 5), 'axes.labelsize': 'xx-large', 'axes.titlesize':'xx-large', 'xtick.labelsize':'xx-large',
print("alpha:", args.alpha) print(args.env_name) #read in the weights as a 2-d array and the feature counts of the policy W, likelihood = helper.get_weightchain_array(args.mcmc_file, burn=5000, skip=10, return_likelihood=True, preburn_until_accept=True) gt_return_list = [] fcount_list = [] return_dist_list = [] #print(np.mean(W, axis=0)) print("policy & map & posterior ave & best-case & worst-case & ground truth & ave length\\\\") #eval_policies = ['beamrider_rl_fcounts.txt', 'beamrider_brex_fcounts.txt', 'beamrider_reward_hack_fcounts.txt'] eval_policies = ['human_good.txt', 'human_everyother.txt', 'human_holdnfire.txt', 'human_shootbarriers.txt', 'human_flee.txt', 'human_hide.txt', 'human_miss.txt', 'human_suicidal.txt'] #eval_policies = ['beamrider_rl_fcounts_30000.txt', 'beamrider_brex_fcounts_30000.txt', 'beamrider_reward_hack_fcount_30000.txt'] for eval in eval_policies: fcounts, returns, lengths, all_fcounts = helper.parse_fcount_policy_eval('/home/dsbrown/Code/deep-bayesian-irl/spaceinvaders_eval_policies/' + eval) return_dist = np.dot(W,fcounts) #print(map_return) #print(return_dist[:200]) #input() #print("{} & {:.1f} & {:.1f} & {:.1f} & {:.1f} & {:.1f} & {:.1f} \\\\".format(eval, map_return, np.mean(return_dist), helper.worst_percentile(return_dist, 1-args.alpha), helper.worst_percentile(return_dist, args.alpha), np.mean(returns), np.mean(lengths))) print("{} & {:.1f} & {:.1f} & {:.1f} & {:.1f} \\\\".format(eval, np.mean(return_dist),helper.worst_percentile(return_dist, args.alpha), np.mean(returns), np.mean(lengths))) gt_return_list.append(returns) fcount_list.append(fcounts) return_dist_list.append(return_dist)
gt_return_list = [] fcount_list = [] return_dist_list = [] #print(np.mean(W, axis=0)) print( "policy & map & posterior ave & best-case & worst-case & ground truth & ave length\\\\" ) #eval_policies = ['beamrider_rl_fcounts.txt', 'beamrider_brex_fcounts.txt', 'beamrider_reward_hack_fcounts.txt'] eval_policies = [ 'beamrider_rl_fcounts_2000.txt', 'beamrider_brex_fcounts_2000.txt', 'beamrider_reward_hack_fcount_2000.txt' ] #eval_policies = ['beamrider_rl_fcounts_30000.txt', 'beamrider_brex_fcounts_30000.txt', 'beamrider_reward_hack_fcount_30000.txt'] for eval in eval_policies: fcounts, returns, lengths, all_fcounts = helper.parse_fcount_policy_eval( '/home/dsbrown/Code/deep-bayesian-irl/beamrider_eval_policies/' + eval) return_dist = np.dot(W, fcounts) map_return = np.dot(map_weights, fcounts)[0] #print(map_return) #print(return_dist[:200]) #input() #print("{} & {:.1f} & {:.1f} & {:.1f} & {:.1f} & {:.1f} & {:.1f} \\\\".format(eval, map_return, np.mean(return_dist), helper.worst_percentile(return_dist, 1-args.alpha), helper.worst_percentile(return_dist, args.alpha), np.mean(returns), np.mean(lengths))) print("{} & {:.1f} & {:.1f} & {:.1f} & {:.1f} \\\\".format( eval, np.mean(return_dist), helper.worst_percentile(return_dist, args.alpha), np.mean(returns), np.mean(lengths))) gt_return_list.append(returns) fcount_list.append(fcounts) return_dist_list.append(return_dist)
avefcounts, returns, lengths, fcounts = helper.parse_fcount_policy_eval( '/home/dsbrown/Code/deep-bayesian-irl/rl_policies/' + f) print(f) print(np.mean(returns), np.mean(lengths)) #input() alphas = np.linspace(0, 1.0, 11) best_returns = [] for alpha in alphas: print(alpha) alpha_vars = [] for f in eval_files: avefcounts, returns, lengths, fcounts = helper.parse_fcount_policy_eval( '/home/dsbrown/Code/deep-bayesian-irl/rl_policies/' + f) return_dist = np.dot(W, avefcounts) var = helper.worst_percentile(return_dist, alpha) alpha_vars.append(var) #find the best under this alpha print(alpha_vars) print(len(alpha_vars)) best_indx = np.argmax(alpha_vars) print("best index", best_indx) print("best file", eval_files[best_indx]) avefcounts, returns, lengths, fcounts = helper.parse_fcount_policy_eval( '/home/dsbrown/Code/deep-bayesian-irl/rl_policies/' + eval_files[best_indx]) best_returns.append(np.mean(returns)) import matplotlib.pylab as pylab params = { 'legend.fontsize': 'xx-large', # 'figure.figsize': (6, 5),