Exemplo n.º 1
0
def learn_Maze_HER(n):
    print("Maze({0}), DQN+HER".format(n))

    env = make_env_GoalMaze(
        maze_generator=RandomMazeGenerator,
        width=n,
        height=n,
        complexity=.05,
        density=.1,
        seed=17,
        obs_type='discrete',
        reward_type='sparse',
        step_limit=100)

    model = HER(
        policy=partial(DQN_Policy, layers=[1024]),
        env=env,
        hindsight=3,
        learning_rate=1e-4,
        exploration_fraction=0.1,
        exploration_final_eps=0.05,
    )

    print("Initial distance: {0}".format(env._distance_diameter()))

    try:
        model.learn(total_timesteps=200 * 100 * n,
                    callback=maze_callback,
                    log_interval=20,
                    )
    except KeyboardInterrupt:
        pass

    # env._set_live_display(True)
    evaluate(model, env)
Exemplo n.º 2
0
def learn_BitFlipper_HER(n):
    print("BitFlipper({0}), DQN+HER".format(n))

    env = make_env_GoalBitFlipper(n=n, space_seed=None)
    model = HER(
        policy=partial(DQN_Policy, layers=[1024]),
        env=env,
        hindsight=7,
        learning_rate=1e-4,
        exploration_fraction=0.1,
        exploration_final_eps=0.05,
    )

    try:
        model.learn(total_timesteps=250 * 100 * n,
                    callback=bitflipper_callback,
                    )
    except KeyboardInterrupt:
        pass

    evaluate(model, env)
Exemplo n.º 3
0
def find_optimal_TBS(env):
    max_reward = 0
    flag = True
    lam = env.Lambda
    for r in range(lam):
        # find optimal S
        size = 200
        trans_prob = np.zeros(shape = (size, size))
        for i in range(1, size, 1):
            for j in range(1, min(i+r, size), 1):
                trans_prob[i, j] = poisson.pmf(lam, i+r-j)
            k = i + r 
            prob = poisson.pmf(lam, k)
            while prob > 0:
                trans_prob[i, 0] = trans_prob[i, 0] + prob
                k = k + 1
                prob = poisson.pmf(lam, k)
        stat_dist = power_iteration(trans_prob)
        cum_prob = 0
        S = 0
        while cum_prob < env.b / (env.b + env.h):
            cum_prob = cum_prob + stat_dist[S]
            S = S + 1
        
        # compare reward
        average_reward = utility.evaluate(env, 100, 5000, TBS, env, r, S)[0]
        if flag:
            flag = False
            max_reward = average_reward
            opt_r = r
            opt_S = S
        elif max_reward < average_reward:
            max_reward = average_reward
            opt_r = r
            opt_S = S
    
    return opt_r, opt_S
Exemplo n.º 4
0
from utility import Data, evaluate
import svm_models
import time
if __name__ == "__main__":
    start_time = time.time()
    data = Data()
    data.lda_transform()
    acc = []
    for X_train, X_test, y_train, y_test in data.load():
        svm = svm_models.SklearnSVC(X_train, y_train)
        acc.append(svm.score(X_test, y_test))
    end_time = time.time()
    evaluate(acc)
    print('total time: %.4fs' % (end_time - start_time))
Exemplo n.º 5
0
    # reload the pretrained model parameters.
    if args.pretrain == 1:
        pretrain_path = '%sweights/%s/%s/l%s_r%s' % (
            args.proj_path, args.dataset, model.model_type, str(
                args.lr), '-'.join([str(r) for r in eval(args.regs)]))
        ckpt = tf.train.get_checkpoint_state(
            os.path.dirname(pretrain_path + '/checkpoint'))
        if ckpt and ckpt.model_checkpoint_path:
            sess.run(tf.global_variables_initializer())
            saver.restore(sess, ckpt.model_checkpoint_path)
            print('load the pretrained model parameters from: ', pretrain_path)

            # *********************************************************
            # get the performance from pretrained model.
            users_to_test = list(dataset.test_set.keys())
            ret = evaluate(sess, model, users_to_test, drop_flag=False)
            cur_best_pre_0 = ret[5]

            pretrain_ret = 'pretrained model recall=[%.5f, %.5f],' \
                           'map=[%.5f, %.5f], ndcg=[%.5f, %.5f], auc=[%.5f]' % (ret[0], ret[4], ret[5], ret[9], ret[10],
                                                                            ret[14], ret[15])
            print(pretrain_ret)
        else:
            sess.run(tf.global_variables_initializer())
            cur_best_pre_0 = 0.
            print('without pretraining.')
    else:
        sess.run(tf.global_variables_initializer())
        cur_best_pre_0 = 0.
        print('without pretraining.')
Exemplo n.º 6
0
    if args.pretrain == 1:
        layer = '-'.join([str(l) for l in eval(args.layer_size)])
        pretrain_path = '%sweights/%s/%s/%s/l%s_r%s' % (
            args.proj_path, args.dataset, model.model_type, layer, str(
                args.lr), '-'.join([str(r) for r in eval(args.regs)]))
        ckpt = tf.train.get_checkpoint_state(
            os.path.dirname(pretrain_path + '/checkpoint'))
        if ckpt and ckpt.model_checkpoint_path:
            sess.run(tf.global_variables_initializer())
            saver.restore(sess, ckpt.model_checkpoint_path)
            print('load the pretrained model parameters from: ', pretrain_path)

            # *********************************************************
            # get the performance from pretrained model.
            users_to_test = list(dataset.test_set.keys())
            ret = evaluate(sess, model, users_to_test, drop_flag=True)
            cur_best_pre_0 = ret['recall'][0]

            pretrain_ret = 'pretrained model recall=[%.5f, %.5f], precision=[%.5f, %.5f], hit=[%.5f, %.5f],' \
                           'ndcg=[%.5f, %.5f], auc=[%.5f]' % \
                           (ret['recall'][0], ret['recall'][-1],
                            ret['precision'][0], ret['precision'][-1],
                            ret['hit_ratio'][0], ret['hit_ratio'][-1],
                            ret['ndcg'][0], ret['ndcg'][-1], ret['auc'])
            print(pretrain_ret)
        else:
            sess.run(tf.global_variables_initializer())
            cur_best_pre_0 = 0.
            print('without pretraining.')

    else:
Exemplo n.º 7
0
Reload the pretrained model parameters.
"""
if args.pretrain == 1:
    ckpt = tf.train.get_checkpoint_state(weights_save_path)
    if ckpt and ckpt.model_checkpoint_path:
        sess.run(tf.global_variables_initializer())
        saver.restore(sess, ckpt.model_checkpoint_path)
        print('load the pretrained model parameters from: ', weights_save_path)

        # *********************************************************
        # get the performance from pretrained model.
        if args.report != 1:
            res = evaluate(sess,
                           model,
                           dataset.test_users,
                           dataset,
                           args.batch_size,
                           args.K,
                           drop_flag=True,
                           batch_test_flag=False)
            cur_best_pre = res['recall']

            pretrain_ret = 'pretrained model recall= %.5f, precision= %.5f, hit= %.5f,' \
                           'ndcg= %.5f' % \
                           (res['recall'], res['precision'], res['hit_ratio'], res['ndcg'])
            print(pretrain_ret)
    else:
        raise RuntimeError("Store variables not found.")

else:
    sess.run(tf.global_variables_initializer())
    cur_best_pre = 0.
Exemplo n.º 8
0
from utility import Data, evaluate, convert_to_ovr
from svm_models import LinearSVM
import time
if __name__ == "__main__":
    start_time = time.time()
    data = Data()
    acc, correct = [], []
    for X_train, X_test, y_train, y_test in data.load():
        correct_sum, acc_sum = 0, 0
        for z_train, z_test in convert_to_ovr(y_train, y_test):
            clf = LinearSVM()
            clf.fit(X_train, z_train)
            correct_cnt, single_acc = clf.score(X_test, z_test)
            correct_sum += correct_cnt
            acc_sum += single_acc
        acc.append(acc_sum / 10)
        correct.append(0.1 * correct_sum / len(y_test))
    end_time = time.time()
    print('Overall Accuracy:')
    evaluate(correct)
    print('Average Accuracy:')
    evaluate(acc)
    print('total time: %.4fs' % (end_time - start_time))