Exemplo n.º 1
0
#read in the weights as a 2-d array and the feature counts of the policy
W = helper.get_weightchain_array("../../mcmc_data/" + args.env_name + "_0.txt")
print(np.mean(W, axis=0))
eval_policies = ['00025', '00325', '00800', '01450']
if args.env_name == "enduro":
    eval_policies = ['03125', '03425', '03900', '04875']
gt_return_list = []
fcount_list = []
return_dist_list = []
print(" policy & mean & 0.05-VaR & ave length & min & stdev")
for eval in eval_policies:
    #print("-"*20)
    #print("eval", eval)
    returns, fcounts = helper.parse_avefcount_array('../../policies/' +
                                                    args.env_name + '_' +
                                                    eval + '_fcounts_100.txt')
    return_dist = np.dot(W, fcounts)

    print(
        "{} & {:.1f} & {:.1f} & {:.1f} & {:.1f} & {:.0f} & {:.0f} \\\\".format(
            eval, np.mean(return_dist),
            helper.worst_percentile(return_dist, 0.01), fcounts[-1],
            np.mean(returns), np.min(returns), np.std(returns)))

    gt_return_list.append(returns)
    fcount_list.append(fcounts)
    return_dist_list.append(return_dist)
plt.figure(0)
plt.hist(return_dist_list,
         30,
#read in the weights as a 2-d array and the feature counts of the policy
W, log_lik = helper.get_weightchain_array(args.mcmc_file, return_likelihood=True)
print(np.mean(W, axis=0))
eval_policies = ['00025', '00325', '00800', '01450', 'mean', 'map']
name_transform = {'00025':'policy A', '00325':'policy B', '00800':'policy C', '01450':'policy D', 'mean':'mean', 'map':'MAP', 'noop': 'no-op'}
if args.env_name == "enduro":
    eval_policies =['03125', '03425', '03900', '04875', 'mean', 'map']
    name_transform = {'03125':'policy A', '03425':'policy B', '03900':'policy C', '04875':'policy D', 'mean':'mean', 'map':'MAP', 'noop': 'no-op'}
gt_return_list = []
fcount_list = []
return_dist_list = []
print(" policy & mean & " +  str(alpha) + "-VaR & ave length & gt & min gt \\\\ \hline")
for eval in eval_policies:
    #print("-"*20)
    #print("eval", name_transform[eval])
    returns, fcounts = helper.parse_avefcount_array('../../policies/' + args.env_name +'_' + eval + '_fcounts_onehot_truncated_terminal' + str(args.no_term) + '.txt')
    #print(fcounts)
    return_dist = np.dot(W,fcounts)

    print("{} & {:.1f} & {:.1f} & {:.1f} & {:.1f} & {:.0f}  \\\\".format(name_transform[eval], np.mean(return_dist), helper.worst_percentile(return_dist, alpha), np.sum(fcounts), np.mean(returns), np.min(returns)))

    gt_return_list.append(returns)
    fcount_list.append(fcounts)
    return_dist_list.append(return_dist)

if args.env_name == "breakout":
    #I realized that I need to rerun the noop code for the full features. I keep overwriting it.
    #evaluate the no-op policy
    #returns, fcounts = helper.parse_avefcount_array('../../policies/breakout_noop_fcounts.txt')
    returns = np.array([0,0])
    if args.no_term:
params = {
    'legend.fontsize': 'x-large',
    'figure.figsize': (5, 4),
    'axes.labelsize': 'x-large',
    'axes.titlesize': 'x-large',
    'xtick.labelsize': 'x-large',
    'ytick.labelsize': 'x-large'
}
plt.rcParams.update(params)
#plt.style.use('seaborn-deep')

parser = argparse.ArgumentParser(description=None)
#parser.add_argument('--env_name', help="name of the environment, e.g. 'breakout'")

args = parser.parse_args()

eval_games = ['beamrider', 'breakout', 'enduro', 'seaquest', 'spaceinvaders']
gt_return_list = []
fcount_list = []
return_dist_list = []

for game in eval_games:
    print(game)
    for eval in ["mean", "map"]:
        print(eval)
        #print("eval", eval)
        returns, fcounts = helper.parse_avefcount_array('../../policies/' +
                                                        game + '_' + eval +
                                                        '_fcounts.txt')
        print("{:.0f} & {:.1f}".format(np.min(returns), np.mean(returns)))
#read in the weights as a 2-d array and the feature counts of the policy
W = helper.get_weightchain_array("../../mcmc_data/" + args.env_name + "_gt_chain.txt")
print(np.mean(W, axis=0))
eval_policies = ['00025', '00325', '00800', '01450', 'mean', 'map']
name_transform = {'00025':'policy A', '00325':'policy B', '00800':'policy C', '01450':'policy D', 'mean':'mean', 'map':'MAP', 'noop': 'no-op'}
if args.env_name == "enduro":
    eval_policies =['03125', '03425', '03900', '04875', 'mean', 'map']
    name_transform = {'03125':'policy A', '03425':'policy B', '03900':'policy C', '04875':'policy D', 'mean':'mean', 'map':'MAP', 'noop': 'no-op'}
gt_return_list = []
fcount_list = []
return_dist_list = []
print(" policy & mean & 0.05-VaR & ave length & gt & min gt \\\\ \hline")
for eval in eval_policies:
    #print("-"*20)
    #print("eval", eval)
    returns, fcounts = helper.parse_avefcount_array('../../policies/' + args.env_name +'_' + eval + '_fcounts_gt.txt')
    return_dist = np.dot(W,fcounts)

    print("{} & {:.1f} & {:.1f} & {:.1f} & {:.1f} & {:.0f}  \\\\".format(name_transform[eval], np.mean(return_dist), helper.worst_percentile(return_dist, alpha), fcounts[-1], np.mean(returns), np.min(returns)))

    gt_return_list.append(returns)
    fcount_list.append(fcounts)
    return_dist_list.append(return_dist)

if args.env_name == "breakout":
    #evaluate the no-op policy
    returns, fcounts = helper.parse_avefcount_array('../../policies/breakout_noop_fcounts.txt')
    #normalize
    return_dist = np.dot(W,fcounts)

    print("{} & {:.1f} & {:.1f} & {:.1f} & {:.1f} & {:.0f} ".format("no-op", np.mean(return_dist), helper.worst_percentile(return_dist, 0.05), fcounts[-1], np.mean(returns), np.min(returns)))
plt.show()


eval_policies = ['00025', '00325', '00800', '01450', 'mean', 'map']
name_transform = {'00025':'policy A', '00325':'policy B', '00800':'policy C', '01450':'policy D', 'mean':'mean', 'map':'MAP', 'noop': 'no-op'}
if args.env_name == "enduro":
    eval_policies =['03125', '03425', '03900', '04875', 'mean', 'map']
    name_transform = {'03125':'policy A', '03425':'policy B', '03900':'policy C', '04875':'policy D', 'mean':'mean', 'map':'MAP', 'noop': 'no-op'}
gt_return_list = []
fcount_list = []
return_dist_list = []
print(" policy & mean & " + str(args.alpha) + "-VaR & mu+10*Var & ave length & gt & min gt \\\\ \hline")
for eval in eval_policies:
    #print("-"*20)
    #print("eval", eval)
    returns, fcounts = helper.parse_avefcount_array('../../policies/' + args.env_name +'_' + eval + args.identifier + '.params_stripped.params_fcounts_auxiliary.txt')
    #print("num rollouts", len(returns))
    return_dist = np.dot(W,fcounts)

    print("{} & {:.1f} & {:.1f} & {:.1f} & {:.1f} & {:.1f} & {:.0f}  \\\\".format(name_transform[eval], np.mean(return_dist), helper.worst_percentile(return_dist, args.alpha), 10*helper.worst_percentile(return_dist, args.alpha) + np.mean(return_dist), 0, np.mean(returns), np.min(returns)))

    gt_return_list.append(returns)
    fcount_list.append(fcounts)
    return_dist_list.append(return_dist)

if args.env_name == "breakout" and args.noop:
    #evaluate the no-op policy
    returns, fcounts = helper.parse_avefcount_array('../../policies/breakout_no-op' + args.identifier + '.params_stripped.params_fcounts_auxiliary.txt')
    noop_returns = returns
    #normalize
    return_dist = np.dot(W,fcounts)