h, _ = theano.scan(step, sequences=[in_fork], outputs_info=[h0]) h_o = slice_state(h, n_hid) y_pred = softmax([h_o], [n_hid], n_classes, name="h2", random_state=random_state) loss = categorical_crossentropy(y_pred, y_sym) cost = loss.mean(axis=1).sum(axis=0) params = list(get_params().values()) params = params grads = tensor.grad(cost, params) learning_rate = 0.0001 opt = adam(params, learning_rate) updates = opt.updates(params, grads) fit_function = theano.function([X_sym, y_sym, h0], [cost, h], updates=updates) cost_function = theano.function([X_sym, y_sym, h0], [cost, h]) predict_function = theano.function([X_sym, h0], [y_pred, h]) def train_loop(itr): mb = next(itr) X_mb, y_mb = mb[:-1], mb[1:] cost, h = fit_function(X_mb, y_mb, train_h_init) train_h_init[:] = h[-1, :] return [cost]
samp = gaussian_log_sample_layer([code_mu], [code_log_sigma], graph, 'samp', random_state) # decode path aka p l1_dec = softplus_layer([samp], graph, 'l1_dec', n_dec_layer[0], random_state) l2_dec = softplus_layer([l1_dec], graph, 'l2_dec', n_dec_layer[1], random_state) out = linear_layer([l2_dec], graph, 'out', n_input, random_state) nll = squared_error(out, X_sym).mean() # log p(x) = -nll so swap sign # want to minimize cost in optimization so multiply by -1 cost = -1 * (-nll - kl) params, grads = get_params_and_grads(graph, cost) learning_rate = 0.0003 opt = adam(params) updates = opt.updates(params, grads, learning_rate) # Checkpointing try: checkpoint_dict = load_last_checkpoint() fit_function = checkpoint_dict["fit_function"] cost_function = checkpoint_dict["cost_function"] encode_function = checkpoint_dict["encode_function"] decode_function = checkpoint_dict["decode_function"] previous_epoch_results = checkpoint_dict["previous_epoch_results"] except KeyError: fit_function = theano.function([X_sym], [nll, kl, nll + kl], updates=updates) cost_function = theano.function([X_sym], [nll + kl]) encode_function = theano.function([X_sym], [code_mu, code_log_sigma])
out = sigmoid_layer([l3_dec], graph, 'out', n_input, random_state=random_state) nll = binary_crossentropy(out, X_sym).mean() # log p(x) = -nll so swap sign # want to minimize cost in optimization so multiply by -1 base_cost = -1 * (-nll - kl) # -log q(y | x) is negative log likelihood already alpha = 0.1 err = categorical_crossentropy(y_pred, y_sym).mean() cost = base_cost + alpha * err params, grads = get_params_and_grads(graph, cost) learning_rate = 0.0001 opt = adam(params, learning_rate) updates = opt.updates(params, grads) fit_function = theano.function([X_sym, y_sym], [nll, kl, nll + kl], updates=updates) cost_function = theano.function([X_sym, y_sym], [nll + kl]) predict_function = theano.function([X_sym], [y_pred]) encode_function = theano.function([X_sym], [code_mu, code_log_sigma]) decode_function = theano.function([samp, y_sym], [out]) checkpoint_dict = create_or_continue_from_checkpoint_dict(locals()) train_itr = minibatch_iterator([X, y], minibatch_size, stop_index=train_end, axis=0)