def test_vae(): minibatch_size = 10 random_state = np.random.RandomState(1999) graph = OrderedDict() X_sym = add_datasets_to_graph([X], ["X"], graph) l1_enc = softplus_layer([X_sym], graph, 'l1_enc', proj_dim=100, random_state=random_state) mu = linear_layer([l1_enc], graph, 'mu', proj_dim=50, random_state=random_state) log_sigma = linear_layer([l1_enc], graph, 'log_sigma', proj_dim=50, random_state=random_state) samp = gaussian_log_sample_layer([mu], [log_sigma], graph, 'gaussian_log_sample', random_state=random_state) l1_dec = softplus_layer([samp], graph, 'l1_dec', proj_dim=100, random_state=random_state) out = sigmoid_layer([l1_dec], graph, 'out', proj_dim=X.shape[1], random_state=random_state) kl = gaussian_log_kl([mu], [log_sigma], graph, 'gaussian_kl').mean() cost = binary_crossentropy(out, X_sym).mean() + kl params, grads = get_params_and_grads(graph, cost) learning_rate = 0.00000 opt = sgd(params, learning_rate) updates = opt.updates(params, grads) fit_function = theano.function([X_sym], [cost], updates=updates, mode="FAST_COMPILE") cost_function = theano.function([X_sym], [cost], mode="FAST_COMPILE") checkpoint_dict = {} train_indices = np.arange(len(X)) valid_indices = np.arange(len(X)) early_stopping_trainer(fit_function, cost_function, train_indices, valid_indices, checkpoint_dict, [X], minibatch_size, list_of_train_output_names=["cost"], valid_output_name="valid_cost", n_epochs=1)
def test_gaussian_sample_layer(): random_state = np.random.RandomState(42) graph = OrderedDict() X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph) mu = linear_layer([X_sym], graph, 'mu', proj_dim=20, random_state=random_state) sigma = softplus_layer([X_sym], graph, 'sigma', proj_dim=20, random_state=random_state) samp = gaussian_sample_layer([mu], [sigma], graph, 'gaussian_sample', random_state=random_state) out = linear_layer([samp], graph, 'out', proj_dim=10, random_state=random_state) f = theano.function([X_sym], [out], mode="FAST_COMPILE")
# graph holds information necessary to build layers from parents graph = OrderedDict() X_sym = add_datasets_to_graph([X], ["X"], graph) # random state so script is deterministic random_state = np.random.RandomState(1999) minibatch_size = 100 n_code = 400 n_enc_layer = [600, 600] n_dec_layer = [600, 600] width = 48 height = 48 n_input = width * height # encode path aka q l1_enc = softplus_layer([X_sym], graph, 'l1_enc', n_enc_layer[0], random_state) l2_enc = softplus_layer([l1_enc], graph, 'l2_enc', n_enc_layer[1], random_state) code_mu = linear_layer([l2_enc], graph, 'code_mu', n_code, random_state) code_log_sigma = linear_layer([l2_enc], graph, 'code_log_sigma', n_code, random_state) kl = gaussian_log_kl([code_mu], [code_log_sigma], graph, 'kl').mean() samp = gaussian_log_sample_layer([code_mu], [code_log_sigma], graph, 'samp', random_state) # decode path aka p l1_dec = softplus_layer([samp], graph, 'l1_dec', n_dec_layer[0], random_state) l2_dec = softplus_layer([l1_dec], graph, 'l2_dec', n_dec_layer[1], random_state) out = linear_layer([l2_dec], graph, 'out', n_input, random_state) nll = squared_error(out, X_sym).mean()
X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph) # random state so script is deterministic random_state = np.random.RandomState(1999) minibatch_size = 100 n_code = 100 n_enc_layer = 200 n_dec_layer = 200 width = 28 height = 28 n_input = width * height # q(y_pred | x) y_l1_enc = softplus_layer([X_sym], graph, 'y_l1_enc', n_enc_layer, random_state=random_state) y_l2_enc = softplus_layer([y_l1_enc], graph, 'y_l2_enc', n_targets, random_state=random_state) y_pred = softmax_layer([y_l2_enc], graph, 'y_pred', n_targets, random_state=random_state) # partial q(z | x) X_l1_enc = softplus_layer([X_sym],
# graph holds information necessary to build layers from parents graph = OrderedDict() X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph) # random state so script is deterministic random_state = np.random.RandomState(1999) minibatch_size = 100 n_code = 100 n_enc_layer = [200, 200, 200] n_dec_layer = [200, 200] width = 48 height = 48 n_input = width * height # q(y_pred | x) y_l1_enc = softplus_layer([X_sym], graph, 'y_l1_enc', n_enc_layer[0], random_state) y_pred = softmax_layer([y_l1_enc], graph, 'y_pred', n_targets, random_state) # partial q(z | x, y_pred) X_l1_enc = softplus_layer([X_sym, y_pred], graph, 'X_l1_enc', n_enc_layer[1], random_state) # combined q(y_pred | x) and partial q(z | x) for q(z | x, y_pred) l2_enc = softplus_layer([X_l1_enc], graph, 'l2_enc', n_enc_layer[2], random_state) # code layer code_mu = linear_layer([l2_enc], graph, 'code_mu', n_code, random_state) code_log_sigma = linear_layer([l2_enc], graph, 'code_log_sigma', n_code, random_state) kl = gaussian_log_kl([code_mu], [code_log_sigma], graph, 'kl').mean()
# graph holds information necessary to build layers from parents graph = OrderedDict() X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph) # random state so script is deterministic random_state = np.random.RandomState(1999) minibatch_size = 100 n_code = 100 n_enc_layer = 200 n_dec_layer = 200 width = 28 height = 28 n_input = width * height # q(y_pred | x) y_l1_enc = softplus_layer([X_sym], graph, 'y_l1_enc', n_enc_layer, random_state=random_state) y_l2_enc = softplus_layer([y_l1_enc], graph, 'y_l2_enc', n_targets, random_state=random_state) y_pred = softmax_layer([y_l2_enc], graph, 'y_pred', n_targets, random_state=random_state) # partial q(z | x) X_l1_enc = softplus_layer([X_sym], graph, 'X_l1_enc', n_enc_layer, random_state=random_state) X_l2_enc = softplus_layer([X_l1_enc], graph, 'X_l2_enc', n_enc_layer, random_state=random_state) # combined q(y_pred | x) and partial q(z | x) for q(z | x, y_pred) l3_enc = softplus_layer([X_l2_enc, y_pred], graph, 'l3_enc', n_enc_layer, random_state=random_state)
# graph holds information necessary to build layers from parents graph = OrderedDict() X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph) # random state so script is deterministic random_state = np.random.RandomState(1999) minibatch_size = 100 n_code = 100 n_enc_layer = [200, 200, 200] n_dec_layer = [200, 200] width = 48 height = 48 n_input = width * height # q(y_pred | x) y_l1_enc = softplus_layer([X_sym], graph, 'y_l1_enc', n_enc_layer[0], random_state=random_state) y_pred = softmax_layer([y_l1_enc], graph, 'y_pred', n_targets, random_state=random_state) # partial q(z | x, y_pred) X_l1_enc = softplus_layer([X_sym, y_pred], graph, 'X_l1_enc', n_enc_layer[1], random_state=random_state) # combined q(y_pred | x) and partial q(z | x) for q(z | x, y_pred) l2_enc = softplus_layer([X_l1_enc], graph, 'l2_enc', n_enc_layer[2], random_state=random_state) # code layer code_mu = linear_layer([l2_enc], graph, 'code_mu', n_code, random_state=random_state) code_log_sigma = linear_layer([l2_enc], graph, 'code_log_sigma', n_code,
def test_vae(): minibatch_size = 10 random_state = np.random.RandomState(1999) graph = OrderedDict() X_sym = add_datasets_to_graph([X], ["X"], graph) l1_enc = softplus_layer([X_sym], graph, 'l1_enc', proj_dim=100, random_state=random_state) mu = linear_layer([l1_enc], graph, 'mu', proj_dim=50, random_state=random_state) log_sigma = linear_layer([l1_enc], graph, 'log_sigma', proj_dim=50, random_state=random_state) samp = gaussian_log_sample_layer([mu], [log_sigma], graph, 'gaussian_log_sample', random_state=random_state) l1_dec = softplus_layer([samp], graph, 'l1_dec', proj_dim=100, random_state=random_state) out = sigmoid_layer([l1_dec], graph, 'out', proj_dim=X.shape[1], random_state=random_state) kl = gaussian_log_kl([mu], [log_sigma], graph, 'gaussian_kl').mean() cost = binary_crossentropy(out, X_sym).mean() + kl params, grads = get_params_and_grads(graph, cost) learning_rate = 0.00000 opt = sgd(params) updates = opt.updates(params, grads, learning_rate) fit_function = theano.function([X_sym], [cost], updates=updates, mode="FAST_COMPILE") cost_function = theano.function([X_sym], [cost], mode="FAST_COMPILE") checkpoint_dict = {} train_indices = np.arange(len(X)) valid_indices = np.arange(len(X)) early_stopping_trainer(fit_function, cost_function, checkpoint_dict, [X], minibatch_size, train_indices, valid_indices, fit_function_output_names=["cost"], cost_function_output_name="valid_cost", n_epochs=1)