def test_rnn_correlated_mixture_density(): # graph holds information necessary to build layers from parents random_state = np.random.RandomState(1999) graph = OrderedDict() minibatch_size = 5 X_seq = np.array([bernoulli_X for i in range(minibatch_size)]) y_seq = np.array([bernoulli_y for i in range(minibatch_size)]) X_mb, X_mb_mask = make_masked_minibatch(X_seq, slice(0, minibatch_size)) y_mb, y_mb_mask = make_masked_minibatch(y_seq, slice(0, minibatch_size)) datasets_list = [X_mb, X_mb_mask, y_mb, y_mb_mask] names_list = ["X", "X_mask", "y", "y_mask"] X_sym, X_mask_sym, y_sym, y_mask_sym = add_datasets_to_graph( datasets_list, names_list, graph) n_hid = 5 train_indices = np.arange(len(X_seq)) valid_indices = np.arange(len(X_seq)) l1 = tanh_layer([X_sym], graph, 'l1', proj_dim=n_hid, random_state=random_state) h = gru_recurrent_layer([l1], X_mask_sym, n_hid, graph, 'l1_rec', random_state=random_state) rval = bernoulli_and_correlated_log_gaussian_mixture_layer( [h], graph, 'hw', proj_dim=2, n_components=3, random_state=random_state) binary, coeffs, mus, log_sigmas, corr = rval cost = bernoulli_and_correlated_log_gaussian_mixture_cost( binary, coeffs, mus, log_sigmas, corr, y_sym) cost = masked_cost(cost, y_mask_sym).mean() cost_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym], [cost], mode="FAST_COMPILE") checkpoint_dict = create_checkpoint_dict(locals()) epoch_results = fixed_n_epochs_trainer( cost_function, cost_function, train_indices, valid_indices, checkpoint_dict, [X_seq, y_seq], minibatch_size, list_of_minibatch_functions=[make_masked_minibatch, make_masked_minibatch], list_of_train_output_names=["train_cost"], valid_output_name="valid_cost", n_epochs=1)
def test_correlated_mixture_density(): # graph holds information necessary to build layers from parents random_state = np.random.RandomState(1999) graph = OrderedDict() X_sym, y_sym = add_datasets_to_graph([bernoulli_X, bernoulli_y], ["X", "y"], graph) n_hid = 20 minibatch_size = len(bernoulli_X) train_indices = np.arange(len(bernoulli_X)) valid_indices = np.arange(len(bernoulli_X)) l1 = tanh_layer([X_sym], graph, 'l1', proj_dim=n_hid, random_state=random_state) rval = bernoulli_and_correlated_log_gaussian_mixture_layer( [l1], graph, 'hw', proj_dim=2, n_components=3, random_state=random_state) binary, coeffs, mus, log_sigmas, corr = rval cost = bernoulli_and_correlated_log_gaussian_mixture_cost( binary, coeffs, mus, log_sigmas, corr, y_sym).mean() params, grads = get_params_and_grads(graph, cost) learning_rate = 1E-6 opt = sgd(params, learning_rate) updates = opt.updates(params, grads) fit_function = theano.function([X_sym, y_sym], [cost], updates=updates, mode="FAST_COMPILE") cost_function = theano.function([X_sym, y_sym], [cost], mode="FAST_COMPILE") checkpoint_dict = create_checkpoint_dict(locals()) epoch_results = fixed_n_epochs_trainer( fit_function, cost_function, train_indices, valid_indices, checkpoint_dict, [bernoulli_X, bernoulli_y], minibatch_size, list_of_train_output_names=["train_cost"], valid_output_name="valid_cost", n_epochs=1)
l1 = tanh_layer([X_sym], graph, 'l1', proj_dim=n_hid, random_state=random_state) y_pred = linear_layer([l1], graph, 'y_pred', proj_dim=n_out, random_state=random_state) cost = ((y_pred - y_sym) ** 2).mean() # Can also define cost this way using dagbldr # cost = squared_error(y_pred, y_sym).mean() params, grads = get_params_and_grads(graph, cost) learning_rate = 1E-3 momentum = 0.8 opt = rmsprop(params, learning_rate, momentum) updates = opt.updates(params, grads) fit_function = theano.function([X_sym, y_sym], [cost], updates=updates) cost_function = theano.function([X_sym, y_sym], [cost]) predict_function = theano.function([X_sym], [y_pred]) checkpoint_dict = create_checkpoint_dict(locals()) epoch_results = fixed_n_epochs_trainer( fit_function, cost_function, train_indices, valid_indices, checkpoint_dict, [X, y], minibatch_size, list_of_train_output_names=["train_cost"], valid_output_name="valid_cost", n_epochs=1000) # pred_sine_y = predict_function(sine_x)[0] # plt.plot(sine_x, pred_sine_y, "o", color="red", alpha=0.3) # plt.plot(sine_x, sine_y, "o", color="steelblue", alpha=0.3)
# remove repeats not_same = np.where((indices[1:] != indices[:-1]))[0] last_char = "" if len(not_same) > 0: last_char = vocab[indices[-1]] indices = indices[not_same] s = "".join([vocab[i] for i in indices]) ctc_string = s + last_char return ctc_string, non_ctc_string def print_ctc_prediction(X_sym, X_mask_sym, y_sym, y_mask_sym): all_y_pred = predict_function(X_sym, X_mask_sym)[0] for n in range(all_y_pred.shape[1]): y_pred = all_y_pred[:, n] ctc_string, non_ctc_string = prediction_strings(y_pred) print(ctc_string) print(non_ctc_string) fixed_n_epochs_trainer(fit_function, cost_function, train_indices, valid_indices, checkpoint_dict, [X, y], minibatch_size, monitor_function=print_ctc_prediction, list_of_minibatch_functions=[ make_masked_minibatch, make_masked_minibatch], list_of_train_output_names=["cost"], valid_output_name="valid_cost", valid_frequency=10, n_epochs=1000)
params, grads = get_params_and_grads(graph, cost) learning_rate = 0.0001 opt = adam(params, learning_rate) updates = opt.updates(params, grads) fit_function = theano.function([X_sym, y_sym], [nll, kl, nll + kl], updates=updates) cost_function = theano.function([X_sym, y_sym], [nll + kl]) predict_function = theano.function([X_sym], [y_pred]) encode_function = theano.function([X_sym], [code_mu, code_log_sigma]) decode_function = theano.function([samp, y_sym], [out]) checkpoint_dict = {} checkpoint_dict["fit_function"] = fit_function checkpoint_dict["cost_function"] = cost_function checkpoint_dict["predict_function"] = predict_function checkpoint_dict["encode_function"] = encode_function checkpoint_dict["decode_function"] = decode_function previous_results = None epoch_results = fixed_n_epochs_trainer( fit_function, cost_function, train_indices, valid_indices, checkpoint_dict, [X, y], minibatch_size, list_of_train_output_names=["nll", "kl", "lower_bound"], valid_output_name="valid_lower_bound", valid_frequency="train_length", n_epochs=2000, previous_results=previous_results, shuffle=True, random_state=random_state)
rval = bernoulli_and_correlated_log_gaussian_mixture_layer( [l2], graph, 'hw', proj_dim=2, n_components=20, random_state=random_state) binary, coeffs, mus, sigmas, corr = rval cost = bernoulli_and_correlated_log_gaussian_mixture_cost( binary, coeffs, mus, sigmas, corr, y_sym) cost = masked_cost(cost, y_mask_sym).sum(axis=0).mean() params, grads = get_params_and_grads(graph, cost) learning_rate = 0.0003 opt = adam(params, learning_rate) clipped_grads = gradient_clipping(grads) updates = opt.updates(params, clipped_grads) fit_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym], [cost], updates=updates) cost_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym], [cost]) predict_function = theano.function([X_sym, X_mask_sym], [binary, coeffs, mus, sigmas, corr]) checkpoint_dict = create_checkpoint_dict(locals()) epoch_results = fixed_n_epochs_trainer( fit_function, cost_function, train_indices, valid_indices, checkpoint_dict, [X, y], minibatch_size, list_of_minibatch_functions=[make_masked_minibatch, make_masked_minibatch], list_of_train_output_names=["train_cost"], valid_output_name="valid_cost", valid_frequency="train_length", n_epochs=20)
print("Compiling fit...") fit_function = theano.function(X_story_syms + [X_story_mask_sym] + X_query_syms + [X_query_mask_sym, y_sym], [cost], updates=updates) print("Compiling cost...") cost_function = theano.function(X_story_syms + [X_story_mask_sym] + X_query_syms + [X_query_mask_sym, y_sym], [cost]) print("Compiling predict...") predict_function = theano.function(X_story_syms + [X_story_mask_sym] + X_query_syms + [X_query_mask_sym], [y_pred]) def error(*args): xargs = args[:-1] y = args[-1] final_args = xargs y_pred = predict_function(*final_args)[0] return 1 - np.mean((np.argmax( y_pred, axis=1).ravel()) == (np.argmax(y, axis=1).ravel())) checkpoint_dict = {} epoch_results = fixed_n_epochs_trainer( fit_function, error, train_indices, valid_indices, checkpoint_dict, [X_story, X_query, y_answer], minibatch_size, list_of_minibatch_functions=[make_embedding_minibatch, make_embedding_minibatch, make_minibatch], list_of_train_output_names=["cost"], valid_output_name="valid_error", n_epochs=100)
params, grads = get_params_and_grads(graph, cost) learning_rate = .13 opt = sgd(params, learning_rate) updates = opt.updates(params, grads) fit_function = theano.function([X_sym, y_sym], [cost], updates=updates) cost_function = theano.function([X_sym, y_sym], [cost]) predict_function = theano.function([X_sym], [y_pred]) checkpoint_dict = create_checkpoint_dict(locals()) def error(*args): xargs = args[:-1] y = args[-1] final_args = xargs y_pred = predict_function(*final_args)[0] return 1 - np.mean((np.argmax( y_pred, axis=1).ravel()) == (np.argmax(y, axis=1).ravel())) epoch_results = fixed_n_epochs_trainer( fit_function, error, train_indices, valid_indices, checkpoint_dict, [X, y], minibatch_size, list_of_train_output_names=["train_cost"], valid_output_name="valid_error", n_epochs=100, optimizer_object=opt)
if len(not_same) > 0: last_char = vocab[indices[-1]] indices = indices[not_same] s = "".join([vocab[i] for i in indices]) ctc_string = s + last_char return ctc_string, non_ctc_string def print_ctc_prediction(X_sym, X_mask_sym, y_sym, y_mask_sym): all_y_pred = predict_function(X_sym, X_mask_sym)[0] for n in range(all_y_pred.shape[1]): y_pred = all_y_pred[:, n] ctc_string, non_ctc_string = prediction_strings(y_pred) print(ctc_string) print(non_ctc_string) fixed_n_epochs_trainer( fit_function, cost_function, train_indices, valid_indices, checkpoint_dict, [X, y], minibatch_size, monitor_function=print_ctc_prediction, list_of_minibatch_functions=[make_masked_minibatch, make_masked_minibatch], list_of_train_output_names=["cost"], valid_output_name="valid_cost", valid_frequency=10, n_epochs=1000)