def run_lstm(): del_shared() n_in = X.shape[-1] n_hid = 20 n_out = y.shape[-1] random_state = np.random.RandomState(42) h_init = np.zeros((minibatch_size, 2 * n_hid)).astype("float32") h0 = tensor.fmatrix() l1 = lstm_fork([X_sym], [n_in], n_hid, name="l1", random_state=random_state) def step(in_t, h_tm1): h_t = lstm(in_t, h_tm1, n_hid, name="rec", random_state=random_state) return h_t h, _ = theano.scan(step, sequences=[l1], outputs_info=[h0]) h_o = slice_state(h, n_hid) pred = linear([h_o], [n_hid], n_out, name="l2", random_state=random_state) cost = ((y_sym - pred) ** 2).sum() params = list(get_params().values()) grads = tensor.grad(cost, params) learning_rate = 0.000000000001 opt = sgd(params, learning_rate) updates = opt.updates(params, grads) f = theano.function([X_sym, y_sym, h0], [cost, h], updates=updates, mode="FAST_COMPILE") f(X, y, h_init)
def test_feedforward_theano_mix(): minibatch_size = 100 random_state = np.random.RandomState(1999) graph = OrderedDict() X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph) l1_o = linear_layer([X_sym], graph, 'l1', proj_dim=20, random_state=random_state) l1_o = .999 * l1_o y_pred = softmax_layer([l1_o], graph, 'pred', n_classes, random_state=random_state) cost = categorical_crossentropy(y_pred, y_sym).mean() params, grads = get_params_and_grads(graph, cost) learning_rate = 0.001 opt = sgd(params) updates = opt.updates(params, grads, learning_rate) fit_function = theano.function([X_sym, y_sym], [cost], updates=updates, mode="FAST_COMPILE") cost_function = theano.function([X_sym, y_sym], [cost], mode="FAST_COMPILE") checkpoint_dict = {} train_indices = np.arange(len(X)) valid_indices = np.arange(len(X)) early_stopping_trainer(fit_function, cost_function, checkpoint_dict, [X, y], minibatch_size, train_indices, valid_indices, fit_function_output_names=["cost"], cost_function_output_name="valid_cost", n_epochs=1)
def test_tanh_rnn(): # random state so script is deterministic random_state = np.random.RandomState(1999) # home of the computational graph graph = OrderedDict() # number of hidden features n_hid = 10 # number of output_features = input_features n_out = X.shape[-1] # input (where first dimension is time) datasets_list = [X, X_mask, y, y_mask] names_list = ["X", "X_mask", "y", "y_mask"] test_values_list = [X, X_mask, y, y_mask] X_sym, X_mask_sym, y_sym, y_mask_sym = add_datasets_to_graph( datasets_list, names_list, graph, list_of_test_values=test_values_list) # Setup weights l1 = linear_layer([X_sym], graph, 'l1_proj', n_hid, random_state) h = tanh_recurrent_layer([l1], X_mask_sym, n_hid, graph, 'l1_rec', random_state) # linear output activation y_hat = linear_layer([h], graph, 'l2_proj', n_out, random_state) # error between output and target cost = squared_error(y_hat, y_sym) cost = masked_cost(cost, y_mask_sym).mean() # Parameters of the model params, grads = get_params_and_grads(graph, cost) # Use stochastic gradient descent to optimize opt = sgd(params) learning_rate = 0.001 updates = opt.updates(params, grads, learning_rate) fit_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym], [cost], updates=updates, mode="FAST_COMPILE") cost_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym], [cost], mode="FAST_COMPILE") checkpoint_dict = {} train_indices = np.arange(X.shape[1]) valid_indices = np.arange(X.shape[1]) early_stopping_trainer(fit_function, cost_function, checkpoint_dict, [X, y], minibatch_size, train_indices, valid_indices, fit_function_output_names=["cost"], cost_function_output_name="valid_cost", n_epochs=1)
def test_fixed_projection_layer(): random_state = np.random.RandomState(1999) rand_projection = random_state.randn(64, 12) graph = OrderedDict() X_sym = add_datasets_to_graph([X], ["X"], graph) out = fixed_projection_layer([X_sym], rand_projection, graph, 'proj') out2 = fixed_projection_layer([X_sym], rand_projection, graph, 'proj', pre=rand_projection[:, 0]) out3 = fixed_projection_layer([X_sym], rand_projection, graph, 'proj', post=rand_projection[0]) final = linear_layer([out2], graph, 'linear', 17, random_state=random_state) # Test that it compiles with and without bias f = theano.function([X_sym], [out, out2, out3, final], mode="FAST_COMPILE") # Test updates params, grads = get_params_and_grads(graph, final.mean()) opt = sgd(params) updates = opt.updates(params, grads, .1) f2 = theano.function([X_sym], [out2, final], updates=updates) ret = f(np.ones_like(X))[0] assert ret.shape[1] != X.shape[1] ret2 = f(np.ones_like(X))[1] assert ret.shape[1] != X.shape[1] out1, final1 = f2(X) out2, final2 = f2(X) # Make sure fixed basis is unchanged assert_almost_equal(out1, out2) # Make sure linear layer is updated assert_raises(AssertionError, assert_almost_equal, final1, final2)
def test_feedforward_theano_mix(): del_shared() minibatch_size = 100 random_state = np.random.RandomState(1999) X_sym = tensor.fmatrix() y_sym = tensor.fmatrix() l1_o = linear([X_sym], [X.shape[1]], proj_dim=20, name='l1', random_state=random_state) l1_o = .999 * l1_o y_pred = softmax([l1_o], [20], proj_dim=n_classes, name='out', random_state=random_state) cost = categorical_crossentropy(y_pred, y_sym).mean() params = list(get_params().values()) grads = theano.grad(cost, params) learning_rate = 0.001 opt = sgd(params, learning_rate) updates = opt.updates(params, grads) fit_function = theano.function([X_sym, y_sym], [cost], updates=updates, mode="FAST_COMPILE") cost_function = theano.function([X_sym, y_sym], [cost], mode="FAST_COMPILE") train_itr = minibatch_iterator([X, y], minibatch_size, axis=0) valid_itr = minibatch_iterator([X, y], minibatch_size, axis=0) X_train, y_train = next(train_itr) X_valid, y_valid = next(valid_itr) fit_function(X_train, y_train) cost_function(X_valid, y_valid)
def test_fixed_projection(): random_state = np.random.RandomState(1999) rand_projection = random_state.randn(64, 12) rand_dim = rand_projection.shape[1] out = fixed_projection([X_sym], [X.shape[1]], rand_projection, 'proj1') out2 = fixed_projection([X_sym], [X.shape[1]], rand_projection, 'proj2', pre=rand_projection[:, 0]) out3 = fixed_projection([X_sym], [X.shape[1]], rand_projection, 'proj3', post=rand_projection[0]) final = linear([out2], [rand_dim], 5, 'linear', random_state=random_state) # Test that it compiles with and without bias f = theano.function([X_sym], [out, out2, out3, final], mode="FAST_COMPILE") # Test updates params = list(get_params().values()) grads = tensor.grad(final.mean(), params) opt = sgd(params, .1) updates = opt.updates(params, grads) f2 = theano.function([X_sym], [out2, final], updates=updates) ret = f(np.ones_like(X))[0] assert ret.shape[1] != X.shape[1] ret2 = f(np.ones_like(X))[1] assert ret.shape[1] != X.shape[1] out1, final1 = f2(X) out2, final2 = f2(X) # Make sure fixed basis is unchanged assert_almost_equal(out1, out2) # Make sure linear layer is updated assert_raises(AssertionError, assert_almost_equal, final1, final2)
minibatch_size = 20 n_hid = 500 l1 = tanh([X_sym], [X.shape[1]], proj_dim=n_hid, name='l1', random_state=random_state) y_pred = softmax_zeros([l1], [n_hid], proj_dim=n_targets, name='y_pred') nll = categorical_crossentropy(y_pred, y_sym).mean() weights = get_weights(skip_regex=None).values() L2 = sum([(w ** 2).sum() for w in weights]) cost = nll + .0001 * L2 params = list(get_params().values()) grads = theano.grad(cost, params) learning_rate = 0.01 opt = sgd(params, learning_rate) updates = opt.updates(params, grads) fit_function = theano.function([X_sym, y_sym], [cost], updates=updates) cost_function = theano.function([X_sym, y_sym], [cost]) predict_function = theano.function([X_sym], [y_pred]) checkpoint_dict = create_checkpoint_dict(locals()) train_itr = minibatch_iterator([X, y], minibatch_size, axis=0, stop_index=60000) valid_itr = minibatch_iterator([X, y], minibatch_size, axis=0, start_index=60000)
def test_vae(): minibatch_size = 10 random_state = np.random.RandomState(1999) graph = OrderedDict() X_sym = add_datasets_to_graph([X], ["X"], graph) l1_enc = softplus_layer([X_sym], graph, 'l1_enc', proj_dim=100, random_state=random_state) mu = linear_layer([l1_enc], graph, 'mu', proj_dim=50, random_state=random_state) log_sigma = linear_layer([l1_enc], graph, 'log_sigma', proj_dim=50, random_state=random_state) samp = gaussian_log_sample_layer([mu], [log_sigma], graph, 'gaussian_log_sample', random_state=random_state) l1_dec = softplus_layer([samp], graph, 'l1_dec', proj_dim=100, random_state=random_state) out = sigmoid_layer([l1_dec], graph, 'out', proj_dim=X.shape[1], random_state=random_state) kl = gaussian_log_kl([mu], [log_sigma], graph, 'gaussian_kl').mean() cost = binary_crossentropy(out, X_sym).mean() + kl params, grads = get_params_and_grads(graph, cost) learning_rate = 0.00000 opt = sgd(params) updates = opt.updates(params, grads, learning_rate) fit_function = theano.function([X_sym], [cost], updates=updates, mode="FAST_COMPILE") cost_function = theano.function([X_sym], [cost], mode="FAST_COMPILE") checkpoint_dict = {} train_indices = np.arange(len(X)) valid_indices = np.arange(len(X)) early_stopping_trainer(fit_function, cost_function, checkpoint_dict, [X], minibatch_size, train_indices, valid_indices, fit_function_output_names=["cost"], cost_function_output_name="valid_cost", n_epochs=1)