Exemplo n.º 1
0
def main():
	config = tf.ConfigProto()

	# Avoid warning message errors
	os.environ['CUDA_VISIBLE_DEVICES'] = '0'

	# Allowing GPU memory growth
	config.gpu_options.allow_growth = False
	K.clear_session()

	with tf.Session(config=config):
		model.learn(policy=policies.PPOPolicy,
							env=SubprocVecEnv([
									env.make_gumball_env(),
									env.make_gumball_env(),
									env.make_gumball_env(),
									env.make_gumball_env(),
								]),
							nsteps=16, # Steps per environment
#							nsteps=2048, # Steps per environment
#							total_timesteps=10000000,
							total_timesteps=10000000,
							gamma=0.99,
							lam=0.95,
							vf_coef=0.5,
							ent_coef=0.01,
							lr = lambda _:2e-4,
							cliprange = lambda _:0.1, # 0.1 * learning_rate
							max_grad_norm = 0.5,
							log_interval  = 10
							)
def main():

    config = tf.ConfigProto()

    # Avoid warning message errors
    os.environ["CUDA_VISIBLE_DEVICES"] = "0"

    # Allowing GPU memory growth
    config.gpu_options.allow_growth = True

    with tf.Session(config=config):
        model.learn(policy=policies.A2CPolicy,
                    env=SubprocVecEnv(
                        [env.make_train_0, env.make_train_1, env.make_train_2, env.make_train_3, env.make_train_4,
                         env.make_train_5, env.make_train_6, env.make_train_7, env.make_train_8, env.make_train_9,
                         env.make_train_10, env.make_train_11, env.make_train_12]),
                    nsteps=2048,  # Steps per environment
                    total_timesteps=10000000,
                    gamma=0.99,   #discount rate
                    lam=0.95,     #lambda used to generalize advantage estimation
                    vf_coef=0.5,  # the value coefficient
                    ent_coef=0.01,  # entropy coefficient
                    lr=2e-4,
                    max_grad_norm=0.5,  #used to have maximum gradient to avoid very big movements during the gredient
                    log_interval=10     #print in the console each 10 updates
                    )
Exemplo n.º 3
0
def main():

    # data
    data_path = "/home/kzk/datasets/uci_csv/spam.csv"
    data = np.loadtxt(data_path, delimiter=" ")
    y = data[:, 0]
    X = data[:, 1:]
    n = X.shape[0]
    X = np.hstack((X, np.reshape(np.ones(n), (n, 1))))
    X_l = X
    X_u = X

    # learn
    C = 1
    model = LSVMClassifier(multi_class="ovo", C=C)
    model.learn(X_l, y, X_u)

    # predict
    outputs = []
    for i, x in enumerate(X):
        outputs_ = model.predict(x)
        outputs.append(outputs_[0][0])

    # confusion matrix
    cm = confusion_matrix(y, outputs)
    print cm
    print 100.0 * np.sum(cm.diagonal()) / len(y)
Exemplo n.º 4
0
def main():

    # data
    data_path = "/home/kzk/datasets/uci_csv/iris.csv"
    data = np.loadtxt(data_path, delimiter=" ")
    y = data[:, 0]
    X = data[:, 1:]
    n = X.shape[0]
    X = np.hstack((X, np.reshape(np.ones(n), (n, 1))))

    # learn
    model = RVMClassifier(
        max_itr=50,
        threshold=1e-4,
        learn_type="batch",
        multi_class="ovo",
        alpha_threshold=1e-24,
    )
    model.learn(X, y)

    # predict
    outputs = []
    for i, x in enumerate(X):
        outputs_ = model.predict(x)
        outputs.append(outputs_[0][0])

    # confusion matrix
    cm = confusion_matrix(y, outputs)
    print cm
    print 100.0 * np.sum(cm.diagonal()) / len(y)
Exemplo n.º 5
0
def get_score(data):
    target = 'label'
    feature = [x for x in data.columns if x not in [target, 'id']]
    score_list = []
    for i in range(5):
        x_train, x_test, y_train, y_test = train_test_split(data[feature],
                                                            data[target],
                                                            test_size=0.4,
                                                            random_state=1)
        pred_proba_gdt = 0
        pred_proba_xgb = 0
        pred_proba_rf = 0
        #        ff = []
        for i in range(5):
            pred_proba_gdt += learn(x_train, y_train, x_test, i, 'GDBT')[1]
            pred_proba_xgb += learn(x_train, y_train, x_test, i, 'XGB')[1]
            pred_proba_rf += learn(x_train, y_train, x_test, i, 'RF')

        pred_proba = pred_proba_gdt + pred_proba_rf * 1.5 + pred_proba_xgb * 2.0
        pred_max = np.max(pred_proba, axis=1)
        zipa = zip(*pred_proba)
        zz = pd.DataFrame()
        zz['max'] = pred_max
        zz['p0'] = (zipa[0] / zz['max']).astype('int') * 0
        zz['p1000'] = (zipa[1] / zz['max']).astype('int') * 1000
        zz['p1500'] = (zipa[2] / zz['max']).astype('int') * 1500
        zz['p2000'] = (zipa[3] / zz['max']).astype('int') * 2000
        zz['label'] = zz['p0'] + zz['p1000'] + zz['p1500'] + zz['p2000']
        pred = zz['label'].values
        score = f1(pred, y_test)
        print 'final_score : ' + str(i), score
        score_list.append(score)
    return score_list
Exemplo n.º 6
0
def main():
    config = tf.ConfigProto()

    os.environ["CUDA_VISIBLE_DEVICES"]="0"

    #Allow GPU Memory Growth
    config.gpu_options.allow_growth = True


    #note: SubprocVecEnv places all our environments in a vector which will allow for us to run them simultaneously 
    with tf.Session(config=config):
        #Call the learn function with all the required A2C Policy Params
        model.learn(
            policy=policies.A2CPolicy,
            env=SubprocVecEnv([env.make_train_0, env.make_train_1, env.make_train_2, env.make_train_3, 
                env.make_train_4, env.make_train_5, env.make_train_6, env.make_train_7, env.make_train_8, 
                env.make_train_9, env.make_train_10, env.make_train_11, env.make_train_12]),
            nsteps=2048,
            total_timesteps=10000000,
            gamma=0.99,
            lam=0.95,
            vf_coeff=0.5,
            ent_coeff=0.01,
            lr = lambda _: 2e-4,
            max_grad_norm = 0.5, #Avoid big gradient steps 
            log_interval = 10 #print in our console every 10 weight updates
        )
Exemplo n.º 7
0
def main():
    config = tf.ConfigProto()
    # Avoid warning message errors
    # os.environ["CUDA_VISIBLE_DEVICES"]="0"
    # # Allowing GPU memory growth
    # In some cases it is desirable
    # for the process to only allocate a subset of the available memory,
    # or to only grow the memory usage as it is needed by the process.TensorFlow provides two configuration options on the session to control this.The first
    # is the allow_growth option,
    # which attempts to allocate only as much GPU memory based on runtime allocations, it starts out allocating very little memory,
    # and as sessions get run and more GPU memory is needed, we extend the GPU memory region needed by the TensorFlow process.
    # config.gpu_options.allow_growth = True
    #[env.make_train_0,env.make_train_1,env.make_train_2,env.make_train_3,env.make_train_4,env.make_train_5,env.make_train_6,env.make_train_7]
    #env.make_train_0,env.make_train_0,env.make_train_0,env.make_train_0,env.make_train_0,env.make_train_0,env.make_train_0,env.make_train_0
    flag.on_desktop = True

    if flag.ON_DESKTOP:
        nsteps = 1
    else:
        nsteps = 2048

    with tf.Session(config=config):
        model.learn(policy=policies.A2CPolicy,
                    env=SubprocVecEnv([env.make_train_0]),
                    nsteps=nsteps,
                    total_timesteps=1000000000,
                    gamma=0.99,
                    lam=0.95,
                    vf_coef=0.5,
                    ent_coef=0.001,
                    lr=2e-4,
                    max_grad_norm=0.5,
                    log_interval=5,
                    save_interval=5)
Exemplo n.º 8
0
def main():
    config = tf.ConfigProto()

    # Avoid warning message errors
    os.environ["CUDA_VISIBLE_DEVICES"] = "0"

    # Allowing GPU memory growth
    config.gpu_options.allow_growth = False

    with tf.Session(config=config):
        model.learn(
            policy=policies.PPOPolicy,
            env=SubprocVecEnv([env.make_train_0]),
            # env.make_train_1,
            # env.make_train_2,
            # env.make_train_3]),
            # env.make_train_4,
            # env.make_train_5,
            # env.make_train_6,
            # env.make_train_7,
            # env.make_train_8,
            # env.make_train_9,
            # env.make_train_10,
            # env.make_train_11,
            # env.make_train_12]),
            nsteps=2048,  # Steps per environment
            total_timesteps=100000,
            gamma=0.99,
            lam=0.95,
            vf_coef=0.5,
            ent_coef=0.01,
            lr=lambda _: 2e-2,
            cliprange=lambda _: 0.3,  # 0.1 * learning_rate
            max_grad_norm=0.5,
            log_interval=10)
def main():
    config = tf.ConfigProto()

    # Avoid warning message errors
    os.environ["CUDA_VISIBLE_DEVICES"]="0"

    # Allowing GPU memory growth
    config.gpu_options.allow_growth = True

    with tf.Session(config=config):
        model.learn(policy=policies.PPOPolicy,
                            env=SubprocVecEnv([env.make_train_0, 
                                                env.make_train_1, 
                                                env.make_train_2, 
                                                env.make_train_3, 
                                                env.make_train_4, 
                                                env.make_train_5,
                                                env.make_train_6,
                                                env.make_train_7,
                                                env.make_train_8,
                                                env.make_train_9,
                                                env.make_train_10,
                                                env.make_train_11,
                                                env.make_train_12]), 
                            nsteps=2048, # Steps per environment
                            total_timesteps=10000000,
                            gamma=0.99,
                            lam = 0.95,
                            vf_coef=0.5,
                            ent_coef=0.01,
                            lr = lambda _: 2e-4,
                            cliprange = lambda _: 0.1, # 0.1 * learning_rate
                            max_grad_norm = 0.5, 
                            log_interval = 10
                            )
Exemplo n.º 10
0
def sparse_main():
    from sklearn.datasets import load_svmlight_file
    from sklearn.metrics import confusion_matrix
    import time

    # data
    data_path = "/home/kzk/datasets/news20/news20.dat"
    (X, y) = load_svmlight_file(data_path)
    n = X.shape[0]
    X = sp.sparse.hstack(
        (X, sp.sparse.csr_matrix(np.reshape(np.ones(n), (n, 1)))))
    X_l = sp.sparse.csr_matrix(X)
    X_u = sp.sparse.csr_matrix(X)

    st = time.time()
    # learn
    model = RegularizedHPFSSLClassifier(max_itr=0,
                                        threshold=1e-4,
                                        learn_type="online",
                                        multi_class="ovo")
    model.learn(X_l, y, X_u)
    et = time.time()
    print "Elapsed time: %f [s]" % (et - st)

    # predict
    outputs = []
    for i, x in enumerate(X):
        outputs_ = model.predict(x)
        outputs.append(outputs_[0][0])

    # confusion matrix
    cm = confusion_matrix(y, outputs)
    print cm
    print 100.0 * np.sum(cm.diagonal()) / len(y)
Exemplo n.º 11
0
def main():
    config = tf.ConfigProto()

    os.environ["CUDA_VISIBLE_DEVICES"] = "0"

    config.gpu_options.allow_growth = True
    environment_list = []
    for i in range(10):
        environment_list.append(env.make_env)

    env_vector = SubprocVecEnv(environment_list)

    with tf.Session(config=config):
        model.learn(policy=policies.A2CNetwork,
                    env=env_vector,
                    nsteps=2048,
                    total_timesteps=10000000,
                    gamma=0.99,
                    lam=0.95,
                    vf_coef=0.5,
                    ent_coef=0.01,
                    lr=2e-4,
                    max_grad_norm=0.5,
                    log_interval=2,
                    restart=True)
Exemplo n.º 12
0
def main():
    config = tf.ConfigProto(gpu_options=tf.GPUOptions(
        per_process_gpu_memory_fraction=0.5))

    # Avoid warning message errors
    os.environ["CUDA_VISIBLE_DEVICES"] = "0"

    # Allowing GPU memory growth
    config.gpu_options.allow_growth = True

    with tf.Session(config=config):
        model.learn(
            policy=policies.A2CPolicy,
            env=SubprocVecEnv([
                env.make_train_0, env.make_train_1, env.make_train_2,
                env.make_train_3, env.make_train_4, env.make_train_5,
                env.make_train_6, env.make_train_7, env.make_train_8,
                env.make_train_9, env.make_train_10, env.make_train_11,
                env.make_train_12
            ]),
            nsteps=2048,  # Steps per environment
            total_timesteps=10000000,
            gamma=0.99,
            lam=0.95,
            vf_coef=0.5,
            ent_coef=0.01,
            lr=2e-4,
            max_grad_norm=0.5,
            log_interval=10)
Exemplo n.º 13
0
def main():

    # data
    data_path = "/home/kzk/datasets/uci_csv/glass.csv"
    data = np.loadtxt(data_path, delimiter=" ")
    y = data[:, 0]
    X = data[:, 1:]
    n = X.shape[0]
    X = np.hstack((X, np.reshape(np.ones(n), (n, 1))))
    X_l = X
    X_u = X

    # learn
    lam = 100
    gamma_s = .001
    model = LapRLSClassifier(lam=lam,
                             normalized=False,
                             kernel="rbf",
                             gamma_s=gamma_s,
                             multi_class="ovo")
    model.learn(X_l, y, X_u)

    # predict
    outputs = []
    for i, x in enumerate(X):
        outputs_ = model.predict(x)
        outputs.append(outputs_[0][0])

    # confusion matrix
    cm = confusion_matrix(y, outputs)
    print cm
    print 100.0 * np.sum(cm.diagonal()) / len(y)
Exemplo n.º 14
0
    def POST(self):

        partial = json.loads(web.data())
        rawdata = model.build_raw_data(partial, web.ctx.environ, web.ctx.ip)
        identity = model.create_user(partial["name"])
        whorls = model.create_get_whorls(rawdata)
        model.learn(whorls, identity)
        Session.commit()

        return ""
Exemplo n.º 15
0
def main(game, representation, experiment, steps, n_cpu, render, logging,
         **kwargs):
    env_name = '{}-{}-v0'.format(game, representation)
    exp_name = get_exp_name(game, representation, experiment, **kwargs)
    resume = kwargs.get('resume', False)
    if representation == 'wide':
        policy = FullyConvPolicyBigMap
        if game == "sokoban":
            policy = FullyConvPolicySmallMap
    else:
        policy = CustomPolicyBigMap
        if game == "sokoban":
            policy = CustomPolicySmallMap
    if game == "binary":
        kwargs['cropped_size'] = 28
    elif game == "zelda":
        kwargs['cropped_size'] = 22
    elif game == "sokoban":
        kwargs['cropped_size'] = 10
    n = max_exp_idx(exp_name)
    global log_dir
    if not resume:
        n = n + 1
    log_dir = 'runs/{}_{}_{}'.format(exp_name, n, 'log')
    if not resume:
        os.mkdir(log_dir)
    else:
        model = load_model(log_dir)
    kwargs = {
        **kwargs,
        'render_rank': 0,
        'render': render,
    }
    used_dir = log_dir
    if not logging:
        used_dir = None
    env = make_vec_envs(env_name, representation, log_dir, n_cpu, **kwargs)
    if not resume or model is None:
        model = PPO2(policy, env, verbose=1, tensorboard_log="./runs")
    else:
        model.set_env(env)
    if not logging:
        model.learn(total_timesteps=int(steps), tb_log_name=exp_name)
    else:
        model.learn(total_timesteps=int(steps),
                    tb_log_name=exp_name,
                    callback=callback)
Exemplo n.º 16
0
def main():
    import time
    from sklearn.metrics import confusion_matrix

    # labeled sample
    l_data_path = "/home/kzk/datasets/uci_csv_ssl_lrate_fixed_1_50_1_98/car/4_l.csv"
    data_l = np.loadtxt(l_data_path, delimiter=" ")
    data_l = np.hstack(
        (data_l, np.reshape(np.ones(data_l.shape[0]), (data_l.shape[0], 1))))
    y_l = data_l[:, 0]
    X_l = data_l[:, 1:]

    # unlabeled sample
    u_data_path = "/home/kzk/datasets/uci_csv_ssl_lrate_fixed_1_50_1_98/car/4_u.csv"
    data_u = np.loadtxt(u_data_path, delimiter=" ")
    data_u = np.hstack(
        (data_u, np.reshape(np.ones(data_u.shape[0]), (data_u.shape[0], 1))))
    X_u = data_u[:, 1:]

    # test sample
    t_data_path = "/home/kzk/datasets/uci_csv_ssl_lrate_fixed_1_50_1_98/car/4_t.csv"
    data_t = np.loadtxt(t_data_path, delimiter=" ")
    data_t = np.hstack(
        (data_t, np.reshape(np.ones(data_t.shape[0]), (data_t.shape[0], 1))))
    y_t = data_t[:, 0]
    X_t = data_t[:, 1:]

    # learn
    st = time.time()
    model = RegularizedHPFSSLClassifier(max_itr=10,
                                        threshold=1e-4,
                                        learn_type="online",
                                        multi_class="ovo")
    model.learn(X_l, y_l, X_u)
    et = time.time()
    print "Elapsed time: %f [s]" % (et - st)

    # predict
    outputs = []
    for i, x in enumerate(X_t):
        outputs_ = model.predict(x)
        outputs.append(outputs_[0][0])

    # confusion matrix
    cm = confusion_matrix(y_t, outputs)
    print cm
    print 100.0 * np.sum(cm.diagonal()) / len(y_t)
Exemplo n.º 17
0
def main():
    config = tf.ConfigProto()

    # Avoid warning message errors
    os.environ["CUDA_VISIBLE_DEVICES"] = "0"

    # Allowing GPU memory growth
    config.gpu_options.allow_growth = True

    with tf.Session(config=config):
        #load_path = "./model/mario/1-1/scratch/action_repeat_4/30/PPO/300000/model.ckpt"
        model.learn(
            policy=policies.PPOPolicy,
            env=SubprocVecEnv([
                env.make_train_0,
                env.make_train_0,
                env.make_train_0,
                env.make_train_0,
                #env.make_train_1,
                #env.make_train_2,
                #env.make_train_3,
                #env.make_train_4,
                #env.make_train_5,
                #env.make_train_6,
                #env.make_train_7,
                #env.make_train_8,
                #env.make_train_9,
                #env.make_train_10,
                #env.make_train_11,
                #env.make_train_12
            ]),
            nsteps=512,  # Steps per environment
            total_timesteps=100000000,
            gamma=0.99,
            lam=0.95,
            vf_coef=0.5,
            ent_coef=0.01,
            lr=lambda _: 2e-4,
            cliprange=lambda _: 0.1,  # 0.1 * learning_rate
            max_grad_norm=0.5,
            log_interval=4)
Exemplo n.º 18
0
def main():
    config = tf.ConfigProto()

    # Allowing GPU memory growth
    config.gpu_options.allow_growth = True

    with tf.Session(config=config):
        model.learn(
            policy=policies.A2CPolicy,
            env=DummyVecEnv([
                env.make_train_0, env.make_train_1, env.make_train_2,
                env.make_train_3
            ]),
            nsteps=2048,  # Steps per environment
            total_timesteps=10000000,
            gamma=0.99,
            lam=0.95,
            vf_coef=0.5,
            ent_coef=0.01,
            lr=2e-4,
            max_grad_norm=0.5,
            log_interval=10)
Exemplo n.º 19
0
def main():
    config = tf.ConfigProto()
    os.environ["CURA_VISIBLE_DEVICES"] = "0"
    config.gpu_options_allow_growth = True

    with tf.Session(Config=config):
        model.learn(
            policy=policies.A2CPolicy,
            env=SubprocVecEnv([
                env.make_train_0, env.make_train_1, env.make_train_2,
                env.make_train_3, env.make_train_4, env.make_train_5,
                env.make_train_6, env.make_train_7, env.make_train_8,
                env.make_train_9, env.make_train_10, env.make_train_11,
                env.make_train_12
            ]),
            nsteps=2048,  # Steps per environment
            total_timesteps=10000000,
            gamma=0.99,
            lam=0.95,
            vf_coef=0.5,
            ent_coef=0.01,
            lr=2e-4,
            max_grad_norm=0.5,
            log_interval=10)
Exemplo n.º 20
0
def speak(channel, target='', target2=None):
    if target[:2] == '<#':
        modelkey = target.split('|')[0][2:]
    elif target[:2] == '<@':
        if target2:
            modelkey = (target, target2.split('|')[0][2:])
        else:
            modelkey = (target, channel)
    else:
        modelkey = channel
    if modelkey not in CACHE:
        o = learn(channel, target, target2)
        model = CACHE[modelkey]
        s = model.make_short_sentence(max_sentence_length, tries=100)
        if s is not None:
            return o + '\n\n' + s
        else:
            return o + '\n\n:robot_face: Beep Boop'
    else:
        model = CACHE[modelkey]
        s = model.make_short_sentence(max_sentence_length, tries=100)
        if s:
            return s
        return ':robot_face: Beep Boop'
Exemplo n.º 21
0
def train(env_id, num_timesteps, seed, lrschedule, num_cpu):
    def make_env(rank):
        def _thunk():
            env = make_atari(env_id)
            env.seed(seed + rank)
            return wrap_deepmind(env)

        return _thunk

    set_global_seeds(seed)
    env = SubprocVecEnv([make_env(i) for i in range(num_cpu)])

    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--vf_coef', help='critic coefficient', default=0.5)
    parser.add_argument('--ent_coef', help='entropy coefficient', default=0.01)
    parser.add_argument('--opt_eps', help='option eps', default=0.01)
    parser.add_argument('--delib_cost',
                        help='deliberation cost',
                        default=0.001)
    parser.add_argument('--max_grad_norm',
                        help='max gradient norm',
                        default=0.5)
    parser.add_argument('--lrschedule',
                        help='learning rate schedule',
                        default='linear')
    parser.add_argument('--epsilon',
                        help='epsilon for exploration',
                        default=1e-5)
    parser.add_argument('--alpha', help='alpha', default=0.99)
    parser.add_argument('--gamma', help='gamma (discounting)', default=0.99)
    parser.add_argument('--log_interval', help='log_interval', default=100)
    parser.add_argument('--lr', help='learning rate', default=0.001)
    parser.add_argument('--nopts', help='number of options', default=4)
    parser.add_argument('--log_dir', help='log directory', default='log')

    args = parser.parse_args()

    model_template = [{
        "model_type": "conv",
        "filter_size": [8, 8],
        "pool": [1, 1],
        "stride": [4, 4],
        "out_size": 32,
        "name": "conv1"
    }, {
        "model_type": "conv",
        "filter_size": [4, 4],
        "pool": [1, 1],
        "stride": [2, 2],
        "out_size": 64,
        "name": "conv2"
    }, {
        "model_type": "conv",
        "filter_size": [3, 3],
        "pool": [1, 1],
        "stride": [1, 1],
        "out_size": 64,
        "name": "conv3"
    }, {
        "model_type": "flatten"
    }, {
        "model_type": "mlp",
        "out_size": 512,
        "activation": "relu",
        "name": "fc1"
    }, {
        "model_type": "option"
    }, {
        "model_type": "value"
    }]

    learn(model_template,
          env,
          seed,
          total_timesteps=int(num_timesteps * 1.1),
          args=args)
    env.close()
Exemplo n.º 22
0
        if args.non_linear == 'relu':
            non_linear = torch.nn.ReLU()
        elif args.non_linear == 'elu':
            non_linear = torch.nn.ELU()

        # New actor and critic policies
        actor = Actor(use_gpu=use_gpu, non_linear=non_linear, batch_norm=args.batch_norm)
        critic = Critic(use_gpu=use_gpu, non_linear=non_linear, batch_norm=args.batch_norm)

        for i in range(args.num_train_cycles):
            print('Training cycle %s of %s' % (i, args.num_train_cycles))
            act(actor, env, task, B,
                num_trajectories=args.num_trajectories,
                task_period=30, writer=writer)
            learn(actor, critic, task, B,
                  num_learning_iterations=args.num_learning_iterations,
                  episode_batch_size=args.episode_batch_size,
                  lr=0.0002, writer=writer, loss=args.loss)
            run(actor, env, min_rate=0.05, writer=writer)
            # Remove early trajectories when buffer gets too large
            B = B[-args.buffer_size:]

        # Save the model to local directory
        if args.saveas is not None:
            save_path = str(root_dir / 'local' / 'models' / args.saveas)
            print('Saving models to %s' % save_path)
            torch.save(actor, save_path + '_actor.pt')
            torch.save(critic, save_path + '_critic.pt')
            print('...done')

    # Close writer
    try:
Exemplo n.º 23
0
    episode_reward += reward
    total_reward += reward
    episode_step += 1
    total_step += 1

    # Renders the game to screen
    if (args.render):
        env.render()

    # Add experience to replay buffer
    rp_buffer.append(obs, act_probs, act_taken_v, reward, done)

    # Learn from experience and clear rp buffer
    if (total_step % args.batch_size == 0):
        # Calculates/Applies grads
        pl, cl, tl, dr, ce, ad = model.learn(rp_buffer)
        # Write outputs out for visualization
        tb_writer.add_scalar('Misc/CrossEntropyMean', ce.mean(), total_step)
        tb_writer.add_scalar('Misc/Advantage', ad.mean(), total_step)
        tb_writer.add_scalar('Loss/PolicyLoss', pl, total_step)
        tb_writer.add_scalar('Loss/CriticLoss', cl, total_step)
        tb_writer.add_scalar('Loss/TotalLoss', tl, total_step)
        tb_writer.add_scalar('Rewards/DiscountedReward', dr, total_step)
        tb_writer.add_histogram('Actions/ActionsTaken',
                                rp_buffer.actions_scalar().cpu().numpy(),
                                total_step,
                                bins=np.arange(-1, env.action_space.n + 1,
                                               0.2))

        # Clears the replay buffer
        rp_buffer = ReplayBuffer(args.batch_size, env.observation_space.shape,
Exemplo n.º 24
0
from data_manager import ClutteredMNIST
from model import STN, learn

dataset_path = "./dataset/mnist_cluttered_60x60_6distortions.npz"
batch_size = 256
num_epochs = 30

data_manager = ClutteredMNIST(dataset_path)
train_data, val_data, test_data = data_manager.load()
x_train, y_train = train_data

print(x_train.shape, y_train.shape)
learn(STN(input_shape=(60, 60, 1), num_classes=10), x_train, y_train,
      val_data[0], val_data[1])
Exemplo n.º 25
0
#! /usr/bin/env python
"""
@author: dell
"""

if __name__ == "__main__":
    import music
    import model
    train_examples = music.load_examples('data/train.pkl')
    model.learn(train_examples)
    test_examples = music.load_examples('data/test.pkl')
    test_ratings = model.predict(test_examples)
    for i in range(len(test_examples)):
        test_examples[i]['rating'] = test_ratings[i]
    music.write_examples('submissions/zmusic_predictions.csv', test_examples)
Exemplo n.º 26
0
                           transforms.ToTensor(),
                           MyNormalize()
                       ]))

train_loader, test_loader, validation_loader = divide_dataset(
    imgDataset, 0.2, 16, 16)

model = Net()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
criterion = nn.CrossEntropyLoss()

epoch = [1, 3]
train_results = []
test_results = []
for ep in epoch:
    train_result, test_result, epoch = learn(train_loader, test_loader, ep)
    train_results.append([train_result, epoch])
    loss, accuracy = test_result
    print(loss)
    print(accuracy)
    print(test_result)
    print(epoch)
    test_results.append([loss, accuracy, epoch])

print(train_results)

for result in train_results:
    print('epoch: ' + str(result[1]) + ", " + "Average loss: " +
          str(result[0]))

for result in test_results:
Exemplo n.º 27
0
def main():

    if not (os.path.exists(f"./api_key/key.json")):
        print(
            "\n\n..oops you need a JSON file called 'key.json' inside the path './api_key/'\n(see the README.md to find out how to structure it)\n\n"
        )
        exit()

    print(
        "\n\nQ-Learning for the greater good.. choose how to interact with the API:\n"
    )

    mode = str(
        input(
            "\noption 't' is train (default)\noption 'c' is train-cycle\noption 'e' is exploit\n\nENTER OPTION: "
        ) or "t")

    if mode == "t":

        world = int(
            input(
                "\nwhich World [0-10] would you like to train on? (default is World 0)\nWORLD: "
            ) or "0")

        epochs = int(
            input(
                f"\nhow many epochs would you like to train the agent on World {world} for? (default is 1 epoch)\nEPOCHS: "
            ) or "1")

        print(
            f"\ntraining from scratch for {epochs} on world {world}! \n(visualizations will be saved to './runs/world_{world}/')\n(Q-tables will be saved to './runs/Q-table_world_{world}'"
        )

        verbose = str(
            input(f"\nverbosity? (default is yes)\n([y]/n)? ") or "y")
        if verbose == "y":
            v = True
        else:
            v = False

        epsilon = 0.9

        q_table = model.init_q_table()

        if not (os.path.exists(f"./runs/world_{world}/")):
            os.makedirs(f"./runs/world_{world}/")

        run_num = len([i for i in os.listdir(f"runs/world_{world}")])

        file_path = f"./runs/Q-table_world_{world}"

        good_term_states = []
        bad_term_states = []
        obstacles = []

        for epoch in range(epochs):
            print("EPOCH #" + str(epoch) + ":\n\n")
            q_table, good_term_states, bad_term_states, obstacles = model.learn(
                q_table,
                worldId=world,
                mode='train',
                learning_rate=0.0001,
                gamma=0.9,
                epsilon=epsilon,
                good_term_states=good_term_states,
                bad_term_states=bad_term_states,
                epoch=epoch,
                obstacles=obstacles,
                run_num=run_num,
                verbose=v)

            epsilon = utils.epsilon_decay(epsilon, epoch, epochs)

            np.save(file_path, q_table)
        np.save(f"./runs/obstacles_world_{world}", obstacles)
        np.save(f"./runs/good_term_states_world_{world}", good_term_states)
        np.save(f"./runs/bad_term_states_world_{world}", bad_term_states)

    elif mode == "e":

        world = int(
            input(
                "\nwhich World [0-10] would you like the agent to exploit? (default is World 0)\nWORLD: "
            ) or "0")
        epochs = int(
            input(
                f"\nhow many times would you like the agent to run on World {world} for? (default is 1 time)\nEPOCHS: "
            ) or "1")

        verbose = str(
            input(f"\nverbosity? (default is yes)\n([y]/n)? ") or "y")
        if verbose == "y":
            v = True
        else:
            v = False

        print(
            f"\nExploiting world {world} for {epochs} iterations! \n(visualizations will be saved to './runs/world_{world}/')"
        )

        file_path = f"./runs/Q-table_world_{world}"
        q_table = np.load(file_path + ".npy")

        obstacles = np.load(f"./runs/obstacles_world_{world}" + ".npy")
        good_term_states = np.load(f"./runs/good_term_states_world_{world}" +
                                   ".npy")
        bad_term_states = np.load(f"./runs/bad_term_states_world_{world}" +
                                  ".npy")

        obstacles = obstacles.tolist()
        good_term_states = good_term_states.tolist()
        bad_term_states = bad_term_states.tolist()

        epsilon = 0.9
        run_num = len([i for i in os.listdir(f"runs/world_{world}")])

        for epoch in range(epochs):
            print("EPOCH #" + str(epoch) + ":\n\n")
            q_table, good_term_states, bad_term_states, obstacles = model.learn(
                q_table,
                worldId=world,
                mode='expl',
                learning_rate=0.0001,
                gamma=0.9,
                epsilon=epsilon,
                good_term_states=good_term_states,
                bad_term_states=bad_term_states,
                epoch=epoch,
                obstacles=obstacles,
                run_num=run_num,
                verbose=v)

    if mode == "c":
        confirm = str(
            input(
                f"\nyou've chosen to train the agent on all Worlds [1-10], this could take a while.. (are you sure?)\nProceed ([y]/n)? "
            ) or "y")

        cont = str(
            input(
                f"\nWould you like to continue training from previous runs? (are you sure?)\nProceed ([y]/n)? "
            ) or "y")

        if cont.lower() == "y":
            epochs_computed = int(
                input(
                    f"\nHow many epochs were used in previous training runs?\nEPOCHS: "
                ))
            epochs = int(
                input(
                    f"\nhow many more epochs would you the agent to train on each World? (default is 10 epochs)\nEPOCHS: "
                ) or "10")
            init_eps = epsilon = utils.epsilon_decay(0.9, 6,
                                                     epochs_computed + epochs)
        else:
            epochs = int(
                input(
                    f"\nhow many epochs would you the agent to train on each World? (default is 10 epochs)\nEPOCHS: "
                ) or "10")
            epochs_computed = 0
            init_eps = epsilon = 0.9

        verbose = str(
            input(f"\nverbosity? (default is yes)\n([y]/n)? ") or "y")
        if verbose == "y":
            v = True
        else:
            v = False

        if confirm == "y":
            for i in range(10):
                world = i + 1

                print(
                    f"\ntraining from scratch for {epochs} on world {world}! \n(visualizations will be saved to './runs/world_{world}/')\n(Q-tables will be saved to './runs/Q-table_world_{world}'"
                )

                if not (os.path.exists(f"./runs/world_{world}/")):
                    os.makedirs(f"./runs/world_{world}/")

                run_num = len([i for i in os.listdir(f"runs/world_{world}")])

                file_path = f"./runs/Q-table_world_{world}"

                if cont.lower() == 'y':
                    good_term_states = np.load(
                        open(f"./runs/good_term_states_world_{world}.npy",
                             "rb"))
                    bad_term_states = np.load(
                        open(f"./runs/bad_term_states_world_{world}.npy",
                             "rb"))
                    obstacles = np.load(
                        open(f"./runs/obstacles_world_{world}.npy", "rb"))

                    q_table = np.load(
                        open(f"./runs/Q-table_world_{world}.npy", "rb"))
                else:
                    good_term_states = []
                    bad_term_states = []
                    obstacles = []
                    q_table = model.init_q_table()

                t = trange(epochs, desc='Training on all worlds', leave=True)

                for epoch in t:
                    t.set_description('Current World={}'.format(i + 1))

                    print("EPOCH #" + str(epoch) + ":\n\n")
                    q_table, good_term_states, bad_term_states, obstacles = model.learn(
                        q_table,
                        worldId=world,
                        mode='train',
                        learning_rate=0.0001,
                        gamma=0.9,
                        epsilon=epsilon,
                        good_term_states=good_term_states,
                        bad_term_states=bad_term_states,
                        epoch=epoch,
                        obstacles=obstacles,
                        run_num=run_num,
                        verbose=v)

                    epsilon = utils.epsilon_decay(init_eps,
                                                  epoch + epochs_computed,
                                                  epochs + epochs_computed)

                    np.save(file_path, q_table)

                np.save(f"./runs/obstacles_world_{world}", obstacles)
                np.save(f"./runs/good_term_states_world_{world}",
                        good_term_states)
                np.save(f"./runs/bad_term_states_world_{world}",
                        bad_term_states)

        else:
            #confirmation not given
            exit()

    else:
        print("that option doesn't exist yet :'(")
        exit()