示例#1
0
def run_trial(opt):
    ocs = [
        svm.OneClassSVM(kernel='rbf', gamma=.05, nu=.05) for t in range(opt.t)
    ]
    est = knet.Network(opt.arch, learning_rate=opt.lr, epochs=opt.epochs)
    lnr = learner.Learner(est)
    opt.samples = 20

    sup_reward = np.zeros(opt.misc.num_evaluations)
    lnr_reward = np.zeros(opt.misc.num_evaluations)
    rob_reward = np.zeros(opt.misc.num_evaluations)

    train_err = np.zeros((opt.misc.num_evaluations, opt.t))
    valid_err = np.zeros((opt.misc.num_evaluations, opt.t))
    test_err = np.zeros((opt.misc.num_evaluations, opt.t))
    robust_err = np.zeros((opt.misc.num_evaluations, opt.t))
    correction_freq = np.zeros(opt.misc.num_evaluations)

    trajs_train = []
    for i in range(opt.iters):
        print "\nIteration: " + str(i)
        states, int_actions, taken_actions, r = statistics.collect_traj(
            opt.env, opt.sup, opt.t, False, False)
        trajs_train.append(states)
        lnr.add_data(states, int_actions)

        if (i + 1) % (opt.iters / opt.misc.num_evaluations) == 0:
            print "\tEvaluating..."
            lnr.train()
            fit_all(ocs, trajs_train)
            trajs_valid = []
            trajs_test = []
            trajs_robust = []

            sup_iters_rewards = np.zeros(opt.samples)
            lnr_iters_rewards = np.zeros(opt.samples)
            rob_iters_rewards = np.zeros(opt.samples)
            freqs = np.zeros(opt.samples)

            for j in range(opt.samples):
                states_valid, int_actions_valid, _, r_valid = statistics.collect_traj(
                    opt.env, opt.sup, opt.t, False, False)
                states_test, int_actions_test, _, r_test, _, lnr_score = statistics.collect_score_traj_multiple(
                    opt.env, lnr, ocs, opt.t, False, False)
                states_robust, int_actions_robust, _, r_robust, freq, rob_score, mags = stats.collect_robust_traj_multiple(
                    opt.env, opt.sim, lnr, ocs, opt.t, opt, False, False)

                trajs_valid.append(states_valid)
                trajs_test.append(states_test)
                trajs_robust.append(states_robust)

                sup_iters_rewards[j] = r_valid
                lnr_iters_rewards[j] = r_test
                rob_iters_rewards[j] = r_robust

                freqs[j] = freq

                if j == 0:
                    utils.plot([np.array([lnr_score]),
                                np.array([rob_score])],
                               ['Learner', 'Robust Learner'],
                               opt,
                               "scores/DecisionScores" + str(i),
                               colors=['blue', 'green'])
                    utils.plot([np.array([mags])], ['Robust Learner'],
                               opt,
                               "mags/RecoveryMagnitudes" + str(i),
                               colors=['green'])

            index = i / (opt.iters / opt.misc.num_evaluations)
            train_err[index, :] = eval_ocs(ocs, trajs_train)
            valid_err[index, :] = eval_ocs(ocs, trajs_valid)
            test_err[index, :] = eval_ocs(ocs, trajs_test)
            robust_err[index, :] = eval_ocs(ocs, trajs_robust)

            sup_reward[index] = np.mean(sup_iters_rewards)
            lnr_reward[index] = np.mean(lnr_iters_rewards)
            rob_reward[index] = np.mean(rob_iters_rewards)

            correction_freq[index] = np.mean(freqs)

            if index == (opt.misc.num_evaluations - 1):
                bar_errs = make_bar_graphs(ocs, trajs_train, trajs_valid,
                                           trajs_test, opt)

    return {
        "sup_reward": sup_reward,
        "lnr_reward": lnr_reward,
        "rob_reward": rob_reward,
        "train_err": train_err,
        "valid_err": valid_err,
        "test_err": test_err,
        "robust_err": robust_err,
        "correction_freq": correction_freq,
        "bar_errs": bar_errs
    }
示例#2
0
def main():
    ap = argparse.ArgumentParser()

    ap.add_argument('--arch', required=True, nargs='+', type=int)
    ap.add_argument('--lr', required=False, type=int, default=.01)
    ap.add_argument('--epochs', required=False, type=int, default=100)
    ap.add_argument('--iters', required=True, type=int)
    ap.add_argument('--trials', required=True, type=int)
    ap.add_argument('--env', required=True)
    ap.add_argument('--t', required=True, type=int)
    ap.add_argument('--grads', required=True, type=int)

    opt = Options()
    opt.load_args(ap.parse_args())

    opt.envname = opt.env
    opt.filename = '/Users/JonathanLee/experts/' + opt.envname + '.pkl'
    opt.env = gym.envs.make(opt.envname).env
    opt.sim = gym.envs.make(opt.envname).env
    opt.pi = load_policy.load_policy(opt.filename)
    opt.sess = tf.Session()
    opt.sup = NetSupervisor(opt.pi, opt.sess)
    opt.misc = Options()
    opt.misc.num_evaluations = 10

    plot_dir = utils.generate_plot_dir('initial', 'experts', vars(opt))
    data_dir = utils.generate_data_dir('initial', 'experts', vars(opt))
    if not os.path.exists(plot_dir):
        os.makedirs(plot_dir)
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)
    if not os.path.exists(plot_dir + '/scores'):
        os.makedirs(plot_dir + '/scores')
    if not os.path.exists(plot_dir + '/mags'):
        os.makedirs(plot_dir + '/mags')

    opt.plot_dir = plot_dir
    opt.data_dir = data_dir

    sup_rewards = np.zeros((opt.trials, opt.misc.num_evaluations))
    lnr_rewards = np.zeros((opt.trials, opt.misc.num_evaluations))
    rob_rewards = np.zeros((opt.trials, opt.misc.num_evaluations))

    train_err = np.zeros((opt.trials, opt.misc.num_evaluations, opt.t))
    valid_err = np.zeros((opt.trials, opt.misc.num_evaluations, opt.t))
    test_err = np.zeros((opt.trials, opt.misc.num_evaluations, opt.t))
    robust_err = np.zeros((opt.trials, opt.misc.num_evaluations, opt.t))

    freq = np.zeros((opt.trials, opt.misc.num_evaluations))

    train_bar_errs = np.zeros((opt.trials, opt.t))
    valid_bar_errs = np.zeros((opt.trials, opt.t))
    test_bar_errs = np.zeros((opt.trials, opt.t))

    print "Running Trials:\n\n"

    try:
        for t in range(opt.trials):
            start_time = timer.time()
            results = run_trial(opt)
            sup_rewards[t, :] = results['sup_reward']
            lnr_rewards[t, :] = results['lnr_reward']
            rob_rewards[t, :] = results['rob_reward']

            train_err[t, :, :] = results['train_err']
            valid_err[t, :, :] = results['valid_err']
            test_err[t, :, :] = results['test_err']
            robust_err[t, :, :] = results['robust_err']

            freq[t, :] = results['correction_freq']
            train_bar_errs[t], valid_bar_errs[t], test_bar_errs[t] = results[
                'bar_errs']

            sup_rewards_save, lnr_rewards_save, rob_rewards_save = sup_rewards[:
                                                                               t
                                                                               +
                                                                               1, :], lnr_rewards[:
                                                                                                  t
                                                                                                  +
                                                                                                  1, :], rob_rewards[:
                                                                                                                     t
                                                                                                                     +
                                                                                                                     1, :]
            train_err_save, valid_err_save, test_err_save, robust_err_save = train_err[:
                                                                                       t
                                                                                       +
                                                                                       1, :, :], valid_err[:
                                                                                                           t
                                                                                                           +
                                                                                                           1, :, :], test_err[:
                                                                                                                              t
                                                                                                                              +
                                                                                                                              1, :, :], robust_err[:
                                                                                                                                                   t
                                                                                                                                                   +
                                                                                                                                                   1, :, :]
            freq_save = freq[:t + 1, :]

            pd.DataFrame(sup_rewards_save).to_csv(opt.data_dir +
                                                  'sup_rewards.csv',
                                                  index=False)
            pd.DataFrame(lnr_rewards_save).to_csv(opt.data_dir +
                                                  'lnr_rewards.csv',
                                                  index=False)
            pd.DataFrame(rob_rewards_save).to_csv(opt.data_dir +
                                                  'rob_rewards.csv',
                                                  index=False)

            for tau in range(opt.t):
                pd.DataFrame(train_err_save[:, :, tau]).to_csv(
                    opt.data_dir + 'train_err_t' + str(tau) + '.csv',
                    index=False)
                pd.DataFrame(valid_err_save[:, :, tau]).to_csv(
                    opt.data_dir + 'valid_err_t' + str(tau) + '.csv',
                    index=False)
                pd.DataFrame(test_err_save[:, :, tau]).to_csv(
                    opt.data_dir + 'test_err_t' + str(tau) + '.csv',
                    index=False)
                pd.DataFrame(robust_err_save[:, :, tau]).to_csv(
                    opt.data_dir + 'robust_err_t' + str(tau) + '.csv',
                    index=False)

            pd.DataFrame(freq_save).to_csv(opt.data_dir + 'freq.csv',
                                           index=False)

            train_err_avg = np.mean(train_err_save, axis=2)
            valid_err_avg = np.mean(valid_err_save, axis=2)
            test_err_avg = np.mean(test_err_save, axis=2)
            robust_err_avg = np.mean(robust_err_save, axis=2)

            utils.plot([sup_rewards_save, lnr_rewards_save, rob_rewards_save],
                       ['Supervisor', 'Learner', 'Robust Learner'],
                       opt,
                       "Reward",
                       colors=['red', 'blue', 'green'])
            utils.plot(
                [train_err_avg, valid_err_avg, test_err_avg, robust_err_avg],
                ['Training', 'Validation', 'Learner', 'Robust Learner'],
                opt,
                "Error",
                colors=['red', 'orange', 'blue', 'green'])
            utils.plot([freq_save], ['Frequency'],
                       opt,
                       'Correction Frequency',
                       colors=['green'])

            bar_errs = [
                np.mean(train_bar_errs, axis=0),
                np.mean(valid_bar_errs, axis=0),
                np.mean(test_bar_errs, axis=0)
            ]
            labels = ['train', 'valid', 'test']
            width = .2
            index = np.arange(opt.t)
            for i, (err, label) in enumerate(zip(bar_errs, labels)):
                plt.bar(index + i * width, err, width, label=label)
            plt.legend()
            plt.ylim(0, .75)
            plt.savefig('/Users/JonathanLee/Desktop/bar_new_avg.png')
            utils.clear()

            end_time = timer.time()
            print "Trial time: " + str(end_time - start_time)

    except KeyboardInterrupt:
        pass
示例#3
0
文件: plot5.py 项目: jon--lee/dfr
rand_scores_aggregate = np.array(rand_scores_aggregate)
ag_scores_aggregate = np.array(ag_scores_aggregate)
fd_scores_aggregate = np.array(fd_scores_aggregate)

# rand_means = np.mean(rand_scores_aggregate, axis=0)
# ag_means = np.mean(ag_scores_aggregate, axis=0)
# fd_means = np.mean(fd_scores_aggregate, axis=0)

# rand_sems = scipy.stats.sem(rand_scores_aggregate, axis=0)
# ag_sems = scipy.stats.sem(ag_scores_aggregate, axis=0)
# fd_sems = scipy.stats.sem(fd_scores_aggregate, axis=0)

datas = [rand_scores_aggregate, ag_scores_aggregate, fd_scores_aggregate]
labels = ['Rand', 'Approx Grad', 'Finite Diff']
colors = ['blue', 'red', 'green']
utils.plot(datas, labels, opt, "Optimize", colors)

# plt.plot(rand_scores_aggregate[:els], color='blue', alpha=.5, label='Rand' )
# plt.plot(ag_scores_aggregate[:els], color='red', alpha=.5, label='Rand' )
# plt.plot(fd_scores_aggregate[:els], color='green', alpha=.5, label='Rand')

# for j, trial in enumerate(trials_data):
#     rand_scores = trial['rand_scores']
#     rand_cutoffs = trial['rand_cutoffs']
#     approx_grad_scores = trial['approx_grad_scores']
#     approx_grad_cutoffs = trial['approx_grad_cutoffs']
#     finite_diff_scores = trial['finite_diff_scores']
#     finite_diff_cutoffs = trial['finite_diff_cutoffs']

#     els = 200
示例#4
0
def main():
    ap = argparse.ArgumentParser()

    ap.add_argument('--arch', required=True, nargs='+', type=int)
    ap.add_argument('--lr', required=False, type=int, default=.01)
    ap.add_argument('--epochs', required=False, type=int, default=100)
    ap.add_argument('--iters', required=True, type=int)
    ap.add_argument('--trials', required=True, type=int)
    ap.add_argument('--env', required=True)
    ap.add_argument('--t', required=True, type=int)

    opt = Options()
    opt.load_args(ap.parse_args())

    opt.envname = opt.env
    opt.filename = '/Users/JonathanLee/experts/' + opt.envname + '.pkl'
    opt.env = gym.envs.make(opt.envname).env
    opt.sim = gym.envs.make(opt.envname).env
    opt.pi = load_policy.load_policy(opt.filename)
    opt.sess = tf.Session()
    opt.sup = NetSupervisor(opt.pi, opt.sess)

    plot_dir = utils.generate_plot_dir('initial', 'experts', vars(opt))
    data_dir = utils.generate_data_dir('initial', 'experts', vars(opt))
    if not os.path.exists(plot_dir):
        os.makedirs(plot_dir)
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)

    opt.plot_dir = plot_dir
    opt.data_dir = data_dir

    sup_rewards = np.zeros((opt.trials, opt.iters))
    lnr_rewards = np.zeros((opt.trials, opt.iters))
    rob_rewards = np.zeros((opt.trials, opt.iters))

    train_err = np.zeros((opt.trials, opt.iters))
    valid_err = np.zeros((opt.trials, opt.iters))
    test_err = np.zeros((opt.trials, opt.iters))
    robust_err = np.zeros((opt.trials, opt.iters))

    freq = np.zeros((opt.trials, opt.iters))

    for t in range(opt.trials):
        results = run_trial(opt)
        sup_rewards[t, :] = results['sup_reward']
        lnr_rewards[t, :] = results['lnr_reward']
        rob_rewards[t, :] = results['rob_reward']

        train_err[t, :] = results['train_err']
        valid_err[t, :] = results['valid_err']
        test_err[t, :] = results['test_err']
        robust_err[t, :] = results['robust_err']

        freq[t, :] = results['correction_freq']

    pd.DataFrame(sup_rewards).to_csv(opt.data_dir + 'sup_rewards.csv',
                                     index=False)
    pd.DataFrame(lnr_rewards).to_csv(opt.data_dir + 'lnr_rewards.csv',
                                     index=False)
    pd.DataFrame(rob_rewards).to_csv(opt.data_dir + 'rob_rewards.csv',
                                     index=False)

    pd.DataFrame(train_err).to_csv(opt.data_dir + 'train_err.csv', index=False)
    pd.DataFrame(valid_err).to_csv(opt.data_dir + 'valid_err.csv', index=False)
    pd.DataFrame(test_err).to_csv(opt.data_dir + 'test_err.csv', index=False)
    pd.DataFrame(robust_err).to_csv(opt.data_dir + 'robust_err.csv',
                                    index=False)

    pd.DataFrame(freq).to_csv(opt.data_dir + 'freq.csv', index=False)

    utils.plot([sup_rewards, lnr_rewards, rob_rewards],
               ['Supervisor', 'Learner', 'Robust Learner'],
               opt,
               "Reward",
               colors=['red', 'blue', 'green'])
    utils.plot([train_err, valid_err, test_err, robust_err],
               ['Training', 'Validation', 'Learner', 'Robust Learner'],
               opt,
               "Error",
               colors=['red', 'orange', 'blue', 'green'])
    utils.plot([freq], ['Frequency'],
               opt,
               'Correction Frequency',
               colors=['green'])
示例#5
0
文件: test5.py 项目: jon--lee/dfr
def run_trial(opt):
    oc = svm.OneClassSVM(kernel='rbf', nu=.01, gamma=.01)
    est = knet.Network(opt.arch, learning_rate=opt.lr, epochs=opt.epochs)
    lnr = learner.Learner(est)

    opt.samples = 5

    train_err = np.zeros(opt.iters)
    valid_err = np.zeros(opt.iters)
    test_err = np.zeros(opt.iters)
    robust_err = np.zeros(opt.iters)

    sup_reward = np.zeros(opt.iters)
    lnr_reward = np.zeros(opt.iters)
    rob_reward = np.zeros(opt.iters)

    correction_freq = np.zeros(opt.iters)

    for i in range(opt.iters):
        print "\nIteration: " + str(i)
        states, int_actions, taken_actions, r = statistics.collect_traj(
            opt.env, opt.sup, opt.t)

        lnr.add_data(states, int_actions)
        oc.fit(lnr.X)

        lnr.train()

        X_valid = []
        X_test = []
        X_robust = []
        sup_iter_rewards = np.zeros(opt.samples)
        lnr_iter_rewards = np.zeros(opt.samples)
        rob_iter_rewards = np.zeros(opt.samples)
        freqs = np.zeros(opt.samples)

        for j in range(opt.samples):
            states_valid, int_actions_valid, _, r_valid = statistics.collect_traj(
                opt.env, opt.sup, opt.t, False)
            states_test, int_actions_test, _, r_test, freq, lnr_score = statistics.collect_score_traj(
                opt.env, lnr, oc, opt.t, False)
            states_robust, int_actions_robust, _, r_robust, freq, rob_score = statistics.collect_robust_traj(
                opt.env, lnr, oc, opt.t, False)

            X_valid += states_valid
            X_test += states_test
            X_robust += states_robust

            sup_iter_rewards[j] = r_valid
            lnr_iter_rewards[j] = r_test
            rob_iter_rewards[j] = r_robust

            freqs[j] = freq

            if j == 0:
                utils.plot([np.array([lnr_score])], ['Learner'],
                           opt,
                           "DecisionScore" + str(i),
                           colors=['blue'])
                utils.plot([np.array([rob_score])], ['Robust Learner'],
                           opt,
                           "RobustDecisionScore" + str(i),
                           colors=['green'])

        train_err[i] = eval_oc(oc, lnr.X)
        valid_err[i] = eval_oc(oc, X_valid)
        test_err[i] = eval_oc(oc, X_test)
        robust_err[i] = eval_oc(oc, X_robust)

        sup_reward[i] = np.mean(sup_iter_rewards)
        lnr_reward[i] = np.mean(lnr_iter_rewards)
        rob_reward[i] = np.mean(rob_iter_rewards)

        correction_freq[i] = np.mean(freqs)

        print "One class train error: " + str(train_err[i])
        print "One class valid error: " + str(valid_err[i])

    return {
        "sup_reward": sup_reward,
        "lnr_reward": lnr_reward,
        "rob_reward": rob_reward,
        "train_err": train_err,
        "valid_err": valid_err,
        "test_err": test_err,
        "robust_err": robust_err,
        "correction_freq": correction_freq
    }