示例#1
0
def test_feedforward_theano_mix():
    del_shared()
    minibatch_size = 100
    random_state = np.random.RandomState(1999)
    X_sym = tensor.fmatrix()
    y_sym = tensor.fmatrix()

    l1_o = linear([X_sym], [X.shape[1]],
                  proj_dim=20,
                  name='l1',
                  random_state=random_state)
    l1_o = .999 * l1_o
    y_pred = softmax([l1_o], [20],
                     proj_dim=n_classes,
                     name='out',
                     random_state=random_state)

    cost = categorical_crossentropy(y_pred, y_sym).mean()
    params = list(get_params().values())
    grads = theano.grad(cost, params)
    learning_rate = 0.001
    opt = sgd(params, learning_rate)
    updates = opt.updates(params, grads)

    fit_function = theano.function([X_sym, y_sym], [cost],
                                   updates=updates,
                                   mode="FAST_COMPILE")

    cost_function = theano.function([X_sym, y_sym], [cost],
                                    mode="FAST_COMPILE")

    train_itr = minibatch_iterator([X, y], minibatch_size, axis=0)
    valid_itr = minibatch_iterator([X, y], minibatch_size, axis=0)
    X_train, y_train = next(train_itr)
    X_valid, y_valid = next(valid_itr)
    fit_function(X_train, y_train)
    cost_function(X_valid, y_valid)
示例#2
0
predict_function = theano.function([X_sym], [y_pred])

checkpoint_dict = create_checkpoint_dict(locals())


def error(*args):
    xargs = args[:-1]
    y = args[-1]
    final_args = xargs
    y_pred = predict_function(*final_args)[0]
    return 1 - np.mean(
        (np.argmax(y_pred, axis=1).ravel()) == (np.argmax(y, axis=1).ravel()))


train_itr = minibatch_iterator([X, y],
                               minibatch_size,
                               axis=0,
                               stop_index=60000)
valid_itr = minibatch_iterator([X, y],
                               minibatch_size,
                               axis=0,
                               start_index=60000)

TL = TrainingLoop(fit_function,
                  cost_function,
                  train_itr,
                  valid_itr,
                  checkpoint_dict=checkpoint_dict,
                  list_of_train_output_names=["train_cost"],
                  valid_output_name="valid_cost",
                  n_epochs=100,
                  optimizer_object=opt)
示例#3
0
params = list(get_params().values())
grads = theano.grad(cost, params)

learning_rate = 0.0003
opt = adam(params, learning_rate)
updates = opt.updates(params, grads)

fit_function = theano.function([X_sym], [nll, kl, nll + kl], updates=updates)
cost_function = theano.function([X_sym], [nll + kl])
encode_function = theano.function([X_sym], [code_mu, code_log_sigma])
decode_function = theano.function([samp], [out])


checkpoint_dict = create_checkpoint_dict(locals())

train_itr = minibatch_iterator([X], minibatch_size,
                               stop_index=60000, axis=0)
valid_itr = minibatch_iterator([X], minibatch_size,
                               start_index=60000, stop_index=70000,
                               axis=0)


def train_loop(itr):
    X_mb = next(itr)
    return [fit_function(X_mb)[2]]


def valid_loop(itr):
    X_mb = next(itr)
    return cost_function(X_mb)

示例#4
0
learning_rate = 0.0001
opt = adam(params, learning_rate)
updates = opt.updates(params, grads)

fit_function = theano.function([X_sym, y_sym], [nll, kl, nll + kl],
                               updates=updates)
cost_function = theano.function([X_sym, y_sym], [nll + kl])
predict_function = theano.function([X_sym], [y_pred])
encode_function = theano.function([X_sym], [code_mu, code_log_sigma])
decode_function = theano.function([samp, y_sym], [out])

checkpoint_dict = create_or_continue_from_checkpoint_dict(locals())

train_itr = minibatch_iterator([X, y],
                               minibatch_size,
                               stop_index=train_end,
                               axis=0)
valid_itr = minibatch_iterator([X, y],
                               minibatch_size,
                               start_index=train_end,
                               axis=0)

TL = TrainingLoop(fit_function,
                  cost_function,
                  train_itr,
                  valid_itr,
                  checkpoint_dict=checkpoint_dict,
                  list_of_train_output_names=["nll", "kl", "lower_bound"],
                  valid_output_name="valid_lower_bound",
                  n_epochs=2000)
epoch_results = TL.run()
示例#5
0
from collections import OrderedDict
import numpy as np


random_state = np.random.RandomState(1999)
graph = OrderedDict()
base_string = "cat"
true_strings = sorted(list(set(["".join(i) for i in [
    s for s in itertools.permutations(base_string)]])))
ocr = make_ocr(true_strings)
X = ocr["data"]
vocab = ocr["vocabulary"]
y = convert_to_one_hot(ocr["target"], n_classes=len(vocab)).astype(
    theano.config.floatX)
minibatch_size = mbs = 2
train_itr = minibatch_iterator([X, y], minibatch_size, make_mask=True, axis=1)
X_mb, X_mb_mask, y_mb, y_mb_mask = next(train_itr)
train_itr.reset()
valid_itr = minibatch_iterator([X, y], minibatch_size, make_mask=True, axis=1)
datasets_list = [X_mb, X_mb_mask, y_mb, y_mb_mask]
names_list = ["X", "X_mask", "y", "y_mask"]
X_sym, X_mask_sym, y_sym, y_mask_sym = add_datasets_to_graph(
    datasets_list, names_list, graph, list_of_test_values=datasets_list)

n_hid = 256
n_out = 8

h = location_attention_tanh_recurrent_layer(
    [X_sym], [y_sym], X_mask_sym, y_mask_sym, n_hid, graph, 'l1_att_rec',
    random_state=random_state)
示例#6
0
    full_sines = full_sines[:, :, None]
    return full_sines

n_timesteps = 50
minibatch_size = 4
full_sines = make_sines(10 * n_timesteps, minibatch_size)
all_sines = full_sines[:n_timesteps]
n_full = 10 * n_timesteps
X = all_sines[:-1]
y = all_sines[1:]

n_in = 1
n_hid = 20
n_out = 1

train_itr = minibatch_iterator([X, y], minibatch_size, axis=1)
valid_itr = minibatch_iterator([X, y], minibatch_size, axis=1)

h_init = np.zeros((minibatch_size, 2 * n_hid)).astype("float32")

X_sym = tensor.tensor3()
y_sym = tensor.tensor3()
h0 = tensor.fmatrix()

random_state = np.random.RandomState(1999)

X_fork = lstm_fork([X_sym], [n_in], n_hid, name="h1",
                   random_state=random_state)
def step(in_t, h_tm1):
    h_t = lstm(in_t, h_tm1, [n_in], n_hid, name=None, random_state=random_state)
    return h_t
示例#7
0
out = sigmoid_layer([l2_dec], graph, 'out', n_input, random_state=random_state)

nll = binary_crossentropy(out, X_sym).mean()
# log p(x) = -nll so swap sign
# want to minimize cost in optimization so multiply by -1
cost = -1 * (-nll - kl)
params, grads = get_params_and_grads(graph, cost)

learning_rate = 0.0003
opt = adam(params, learning_rate)
updates = opt.updates(params, grads)

fit_function = theano.function([X_sym], [nll, kl, nll + kl], updates=updates)
cost_function = theano.function([X_sym], [nll + kl])
encode_function = theano.function([X_sym], [code_mu, code_log_sigma])
decode_function = theano.function([samp], [out])

checkpoint_dict = create_or_continue_from_checkpoint_dict(locals())

train_itr = minibatch_iterator([X], minibatch_size, stop_index=train_end, axis=0)
valid_itr = minibatch_iterator([X], minibatch_size, start_index=train_end, axis=0)

TL = TrainingLoop(
    fit_function, cost_function,
    train_itr, valid_itr,
    checkpoint_dict=checkpoint_dict,
    list_of_train_output_names=["nll", "kl", "lower_bound"],
    valid_output_name="valid_lower_bound",
    n_epochs=2000)
epoch_results = TL.run()