def test_feedforward_theano_mix(): minibatch_size = 100 random_state = np.random.RandomState(1999) graph = OrderedDict() X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph) l1_o = linear_layer([X_sym], graph, 'l1', proj_dim=20, random_state=random_state) l1_o = .999 * l1_o y_pred = softmax_layer([l1_o], graph, 'pred', n_classes, random_state=random_state) cost = categorical_crossentropy(y_pred, y_sym).mean() params, grads = get_params_and_grads(graph, cost) learning_rate = 0.001 opt = sgd(params) updates = opt.updates(params, grads, learning_rate) fit_function = theano.function([X_sym, y_sym], [cost], updates=updates, mode="FAST_COMPILE") cost_function = theano.function([X_sym, y_sym], [cost], mode="FAST_COMPILE") checkpoint_dict = {} train_indices = np.arange(len(X)) valid_indices = np.arange(len(X)) early_stopping_trainer(fit_function, cost_function, checkpoint_dict, [X, y], minibatch_size, train_indices, valid_indices, fit_function_output_names=["cost"], cost_function_output_name="valid_cost", n_epochs=1)
def test_conditional_gru_recurrent(): random_state = np.random.RandomState(1999) graph = OrderedDict() n_hid = 5 n_out = n_chars # input (where first dimension is time) datasets_list = [X_mb, X_mask, y_mb, y_mask] names_list = ["X", "X_mask", "y", "y_mask"] X_sym, X_mask_sym, y_sym, y_mask_sym = add_datasets_to_graph( datasets_list, names_list, graph) h = gru_recurrent_layer([X_sym], X_mask_sym, n_hid, graph, 'l1_end', random_state) shifted_y_sym = shift_layer([y_sym], graph, 'shift') h_dec, context = conditional_gru_recurrent_layer([y_sym], [h], y_mask_sym, n_hid, graph, 'l2_dec', random_state) # linear output activation y_hat = softmax_layer([h_dec, context, shifted_y_sym], graph, 'l2_proj', n_out, random_state=random_state) # error between output and target cost = categorical_crossentropy(y_hat, y_sym) cost = masked_cost(cost, y_mask_sym).mean() # Parameters of the model """ params, grads = get_params_and_grads(graph, cost) # Use stochastic gradient descent to optimize opt = sgd(params) learning_rate = 0.00000 updates = opt.updates(params, grads, learning_rate) fit_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym], [cost], updates=updates, mode="FAST_COMPILE") """ cost_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym], [cost], mode="FAST_COMPILE") checkpoint_dict = {} train_indices = np.arange(len(X)) valid_indices = np.arange(len(X)) early_stopping_trainer(cost_function, cost_function, train_indices, valid_indices, checkpoint_dict, [X, y], minibatch_size, list_of_minibatch_functions=[text_minibatch_func], list_of_train_output_names=["cost"], valid_output_name="valid_cost", n_epochs=1)
def test_conditional_gru_recurrent(): random_state = np.random.RandomState(1999) graph = OrderedDict() n_hid = 5 n_out = n_chars # input (where first dimension is time) datasets_list = [X_mb, X_mask, y_mb, y_mask] names_list = ["X", "X_mask", "y", "y_mask"] X_sym, X_mask_sym, y_sym, y_mask_sym = add_datasets_to_graph( datasets_list, names_list, graph) h = gru_recurrent_layer([X_sym], X_mask_sym, n_hid, graph, 'l1_end', random_state) shifted_y_sym = shift_layer([y_sym], graph, 'shift') h_dec, context = conditional_gru_recurrent_layer([y_sym], [h], y_mask_sym, n_hid, graph, 'l2_dec', random_state) # linear output activation y_hat = softmax_layer([h_dec, context, shifted_y_sym], graph, 'l2_proj', n_out, random_state) # error between output and target cost = categorical_crossentropy(y_hat, y_sym) cost = masked_cost(cost, y_mask_sym).mean() # Parameters of the model """ params, grads = get_params_and_grads(graph, cost) # Use stochastic gradient descent to optimize opt = sgd(params) learning_rate = 0.00000 updates = opt.updates(params, grads, learning_rate) fit_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym], [cost], updates=updates, mode="FAST_COMPILE") """ cost_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym], [cost], mode="FAST_COMPILE") checkpoint_dict = {} train_indices = np.arange(len(X)) valid_indices = np.arange(len(X)) early_stopping_trainer(cost_function, cost_function, checkpoint_dict, [X, y], minibatch_size, train_indices, valid_indices, list_of_minibatch_functions=[text_minibatch_func], fit_function_output_names=["cost"], cost_function_output_name="valid_cost", n_epochs=1)
def test_feedforward_classifier(): minibatch_size = 100 random_state = np.random.RandomState(1999) graph = OrderedDict() X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph) l1_o = linear_layer([X_sym], graph, "l1", proj_dim=20, random_state=random_state) y_pred = softmax_layer([l1_o], graph, "pred", n_classes, random_state=random_state) cost = categorical_crossentropy(y_pred, y_sym).mean() params, grads = get_params_and_grads(graph, cost) learning_rate = 0.001 opt = sgd(params) updates = opt.updates(params, grads, learning_rate) train_function = theano.function([X_sym, y_sym], [cost], updates=updates, mode="FAST_COMPILE") iterate_function(train_function, [X, y], minibatch_size, list_of_output_names=["cost"], n_epochs=1)
def test_feedforward_theano_mix(): del_shared() minibatch_size = 100 random_state = np.random.RandomState(1999) X_sym = tensor.fmatrix() y_sym = tensor.fmatrix() l1_o = linear([X_sym], [X.shape[1]], proj_dim=20, name='l1', random_state=random_state) l1_o = .999 * l1_o y_pred = softmax([l1_o], [20], proj_dim=n_classes, name='out', random_state=random_state) cost = categorical_crossentropy(y_pred, y_sym).mean() params = list(get_params().values()) grads = theano.grad(cost, params) learning_rate = 0.001 opt = sgd(params, learning_rate) updates = opt.updates(params, grads) fit_function = theano.function([X_sym, y_sym], [cost], updates=updates, mode="FAST_COMPILE") cost_function = theano.function([X_sym, y_sym], [cost], mode="FAST_COMPILE") train_itr = minibatch_iterator([X, y], minibatch_size, axis=0) valid_itr = minibatch_iterator([X, y], minibatch_size, axis=0) X_train, y_train = next(train_itr) X_valid, y_valid = next(valid_itr) fit_function(X_train, y_train) cost_function(X_valid, y_valid)
# graph holds information necessary to build layers from parents graph = OrderedDict() X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph) # random state so script is deterministic random_state = np.random.RandomState(1999) minibatch_size = 20 n_hid = 1000 l1 = relu_layer([X_sym], graph, 'l1', proj_dim=n_hid, random_state=random_state) y_pred = softmax_zeros_layer([l1], graph, 'y_pred', proj_dim=n_targets) nll = categorical_crossentropy(y_pred, y_sym).mean() weights = get_weights_from_graph(graph) L2 = sum([(w**2).sum() for w in weights]) cost = nll + .0001 * L2 params, grads = get_params_and_grads(graph, cost) learning_rate = 1E-4 momentum = 0.95 opt = rmsprop(params, learning_rate, momentum) updates = opt.updates(params, grads) fit_function = theano.function([X_sym, y_sym], [cost], updates=updates) cost_function = theano.function([X_sym, y_sym], [cost]) predict_function = theano.function([X_sym], [y_pred])
l1 = embed([X_sym], n_classes, n_emb, name="emb", random_state=random_state) in_fork = lstm_fork([l1], [n_emb], n_hid, name="h1", random_state=random_state) def step(in_t, h_tm1): h_t = lstm(in_t, h_tm1, [n_hid], n_hid, name="lstm_l1", random_state=random_state) return h_t h, _ = theano.scan(step, sequences=[in_fork], outputs_info=[h0]) h_o = slice_state(h, n_hid) y_pred = softmax([h_o], [n_hid], n_classes, name="h2", random_state=random_state) loss = categorical_crossentropy(y_pred, y_sym) cost = loss.mean(axis=1).sum(axis=0) params = list(get_params().values()) params = params grads = tensor.grad(cost, params) learning_rate = 0.0001 opt = adam(params, learning_rate) updates = opt.updates(params, grads) fit_function = theano.function([X_sym, y_sym, h0], [cost, h], updates=updates) cost_function = theano.function([X_sym, y_sym, h0], [cost, h]) predict_function = theano.function([X_sym, h0], [y_pred, h])
def test_categorical_crossentropy(): graph = OrderedDict() y_sym = add_datasets_to_graph([y], ["y"], graph) cost = categorical_crossentropy(.99 * y_sym + .001, y_sym) theano.function([y_sym], cost, mode="FAST_COMPILE")
y = mnist["target"] n_targets = 10 y = convert_to_one_hot(y, n_targets) # graph holds information necessary to build layers from parents graph = OrderedDict() X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph) # random state so script is deterministic random_state = np.random.RandomState(1999) minibatch_size = 20 n_hid = 1000 l1 = tanh_layer([X_sym], graph, 'l1', proj_dim=n_hid, random_state=random_state) y_pred = softmax_zeros_layer([l1], graph, 'y_pred', proj_dim=n_targets) nll = categorical_crossentropy(y_pred, y_sym).mean() weights = get_weights_from_graph(graph) L2 = sum([(w ** 2).sum() for w in weights]) cost = nll + .0001 * L2 params, grads = get_params_and_grads(graph, cost) learning_rate = 1E-4 momentum = 0.95 opt = rmsprop(params, learning_rate, momentum) updates = opt.updates(params, grads) fit_function = theano.function([X_sym, y_sym], [cost], updates=updates) cost_function = theano.function([X_sym, y_sym], [cost]) predict_function = theano.function([X_sym], [y_pred])
h_tm1, [n_hid], n_hid, name="lstm_l1", random_state=random_state) return h_t h, _ = theano.scan(step, sequences=[in_fork], outputs_info=[h0]) h_o = slice_state(h, n_hid) y_pred = softmax([h_o], [n_hid], n_classes, name="h2", random_state=random_state) loss = categorical_crossentropy(y_pred, y_sym) cost = loss.mean(axis=1).sum(axis=0) params = list(get_params().values()) params = params grads = tensor.grad(cost, params) learning_rate = 0.0001 opt = adam(params, learning_rate) updates = opt.updates(params, grads) fit_function = theano.function([X_sym, y_sym, h0], [cost, h], updates=updates) cost_function = theano.function([X_sym, y_sym, h0], [cost, h]) predict_function = theano.function([X_sym, h0], [y_pred, h])
def test_categorical_crossentropy(): cost = categorical_crossentropy(.99 * y_sym + .001, y_sym) theano.function([y_sym], cost, mode="FAST_COMPILE")
def test_loop(): # graph holds information necessary to build layers from parents graph = OrderedDict() X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph) # random state so script is deterministic random_state = np.random.RandomState(1999) minibatch_size = 10 y_pred = softmax_zeros_layer([X_sym], graph, "y_pred", proj_dim=n_targets) nll = categorical_crossentropy(y_pred, y_sym).mean() weights = get_weights_from_graph(graph) cost = nll params, grads = get_params_and_grads(graph, cost) learning_rate = 0.13 opt = sgd(params, learning_rate) updates = opt.updates(params, grads) fit_function = theano.function([X_sym, y_sym], [cost], updates=updates) cost_function = theano.function([X_sym, y_sym], [cost]) predict_function = theano.function([X_sym], [y_pred]) checkpoint_dict = { "fit_function": fit_function, "cost_function": cost_function, "predict_function": predict_function, } def error(*args): xargs = args[:-1] y = args[-1] final_args = xargs y_pred = predict_function(*final_args)[0] return 1 - np.mean((np.argmax(y_pred, axis=1).ravel()) == (np.argmax(y, axis=1).ravel())) TL1 = TrainingLoop( fit_function, error, train_indices[:10], valid_indices[:10], minibatch_size, checkpoint_dict=checkpoint_dict, list_of_train_output_names=["train_cost"], valid_output_name="valid_error", n_epochs=1, optimizer_object=opt, ) epoch_results1 = TL1.run([X, y]) TL1.train_indices = train_indices[10:20] TL1.valid_indices = valid_indices[10:20] epoch_results1 = TL1.run([X, y]) TL2 = TrainingLoop( fit_function, error, train_indices[:20], valid_indices[:20], minibatch_size, checkpoint_dict=checkpoint_dict, list_of_train_output_names=["train_cost"], valid_output_name="valid_error", n_epochs=1, optimizer_object=opt, ) epoch_results2 = TL2.run([X, y]) r1 = TL1.__dict__["checkpoint_dict"]["previous_results"]["train_cost"][-1] r2 = TL2.__dict__["checkpoint_dict"]["previous_results"]["train_cost"][-1] assert r1 == r2