def fit(self, x_train, y_train, params, reg_param=None): ''' Wrapper for MLE through gradient descent ''' assert x_train.shape[0] == self.params['D_in'] assert y_train.shape[0] == self.params['D_out'] ### make objective function for training self.objective, self.gradient = self.make_objective(x_train, y_train, reg_param) ### set up optimization step_size = 0.01 max_iteration = 5000 check_point = 100 weights_init = self.weights.reshape((1, -1)) mass = None optimizer = 'adam' random_restarts = 5 if 'step_size' in params.keys(): step_size = params['step_size'] if 'max_iteration' in params.keys(): max_iteration = params['max_iteration'] if 'check_point' in params.keys(): self.check_point = params['check_point'] if 'init' in params.keys(): weights_init = params['init'] if 'call_back' in params.keys(): call_back = params['call_back'] if 'mass' in params.keys(): mass = params['mass'] if 'optimizer' in params.keys(): optimizer = params['optimizer'] if 'random_restarts' in params.keys(): random_restarts = params['random_restarts'] def call_back(weights, iteration, g): ''' Actions per optimization step ''' objective = self.objective(weights, iteration) self.objective_trace = np.vstack((self.objective_trace, objective)) self.weight_trace = np.vstack((self.weight_trace, weights)) if iteration % check_point == 0: mag = np.linalg.norm(self.gradient(weights, iteration)) # print("Iteration {} lower bound {}; gradient mag: {}".format(iteration, objective, mag)) ### train with random restarts optimal_obj = 1e16 optimal_weights = self.weights for i in range(random_restarts): if optimizer == 'adam': adam(self.gradient, weights_init, step_size=step_size, num_iters=max_iteration, callback=call_back) local_opt = np.min(self.objective_trace[-100:]) if local_opt < optimal_obj: opt_index = np.argmin(self.objective_trace[-100:]) self.weights = self.weight_trace[-100:][opt_index].reshape((1, -1)) weights_init = self.random.normal(0, 1, size=(1, self.D)) self.objective_trace = self.objective_trace[1:] self.weight_trace = self.weight_trace[1:]
def fit(self, step_size=1e-2, max_iteration=5000, check_point=None, params_init=None, call_back=None, verbose=True, optimizer='adam', mass=None, reset=True): ''' Optimization of the variational objective ''' if check_point is not None: self.check_point = check_point if params_init is None: mean_init = self.random.normal(0, 0.1, size=self.D) parametrized_var_init = self.random.normal(0, 0.1, size=self.D) params_init = np.concatenate([mean_init, parametrized_var_init]) assert len(params_init) == 2 * self.D self.verbose = verbose if call_back is None: call_back = self.call_back if reset: self.ELBO = np.empty((1, 1)) self.variational_params = np.empty((1, 2 * self.D)) if optimizer == 'adam': adam(self.gradient, params_init, step_size=step_size, num_iters=max_iteration, callback=call_back) elif optimizer == 'sgd': if mass is None: mass = 1e-16 sgd(self.gradient, params_init, step_size=step_size, num_iters=max_iteration, callback=call_back, mass=mass) elif optimizer == 'debug': params = params_init for i in range(max_iteration): params -= step_size * self.gradient(params, i) self.debug_call_back(params, i) self.variational_params = self.variational_params[1:] self.ELBO = self.ELBO[1:]
def find_minimum(self, current_x, constant, target, initial_guess, n_timesteps, n_steps, mode = 'MPC'): self.current_x = current_x self.target_x = target self.time = n_timesteps if mode == 'MPC': param_vec = constant new_Cin = adam(self.MPC_grad_wrapper, Cin, num_iters = n_steps, step_size = 0.01) return new_Cin elif mode == 'param_est': self.Cin = constant param_vec = initial_guess new_param_vec = adam(self.param_est_grad_wrapper, param_vec, num_iters = n_steps) return new_param_vec
def train(self, n_iters=100, n_mc_samples=200, callback=None): def discriminator_loss(params, x_p, x_q): logit_p = sigmoid(self.discriminator.predict(params, x_p)) logit_q = sigmoid(self.discriminator.predict(params, x_q)) loss = agnp.mean(agnp.log(logit_q)) + agnp.mean( agnp.log(1 - logit_p)) return loss grad_discriminator_loss = autograd.elementwise_grad(discriminator_loss) # Train the generator, fixing the discriminator def generator_loss(params, z): og = self.generator.predict(params, z)[0, :, :] ratio = self.discriminator.predict(self.discriminator.get_params(), og) preds = sigmoid(ratio) op_preds = 1 - preds ll = agnp.mean(ratio) - agnp.mean(self.model.log_prob(og)) return ll grad_generator_loss = autograd.elementwise_grad(generator_loss) for i in range(n_iters): print("Iteration %d " % (i + 1)) # Fix the generator, train the discriminator # Sample random generator samples z = agnp.random.uniform(-10, 10, size=(n_mc_samples, 20)) # Samples from the prior prior_samples = agnp.random.uniform(-10, 10, size=(n_mc_samples, self.n_params)) var_dist_samples = self.generator.predict( self.generator.get_params(), z)[0, :, :] # Requires a differentiable Discriminator ret = adam( lambda x, i: -grad_discriminator_loss(x, prior_samples, var_dist_samples), self.discriminator.get_params()) self.discriminator.set_params(ret) # Requires a differentiable Generator ret = adam(lambda x, i: grad_generator_loss(x, z), self.generator.get_params(), callback=callback) self.generator.set_params(ret)
def train_bnn(data='expx', n_data=50, n_samples=20, arch=[1,20,1], prior_params=None, prior_type=None, act='rbf', iters=300, lr=0.01, plot=True, save=False): if type(data) == str: inputs, targets = build_toy_dataset(data=data, n_data=n_data) else: inputs, targets = data if plot: fig, ax = p.setup_plot() init_params= init_var_params(arch) def loss(params, t): return vlb_objective(params, inputs, targets, arch, n_samples, act=act, prior_params=prior_params, prior_type=prior_type) def callback(params, t, g): plot_inputs = np.linspace(-10, 10, num=500)[:, None] f_bnn = sample_bnn(params, plot_inputs, 5, arch, act) #print(params[1]) # Plot data and functions. p.plot_iter(ax, inputs, plot_inputs, targets, f_bnn) print("ITER {} | LOSS {}".format(t, -loss(params, t))) var_params = adam(grad(loss),init_params , step_size=lr, num_iters=iters, callback=callback)
def train_SBLVbnn(inputs, targets, dimz=1, dimx=1, dimy=1, arch = [20, 20], lr=0.01, iters=500, n_samples=10, act=rbf): arch = [dimx+dimz] + arch + [dimy] fig = plt.figure(facecolor='white') ax = fig.add_subplot(111) plt.ion() plt.show(block=False) def objective(params, t): return vlb_objective(params, inputs, targets, arch, n_samples, act) def callback(params, t, g): N_samples, nd = 5, 80 plot_inputs = np.linspace(-8, 8, num=80) f_bnn = sample_bnn(params, plot_inputs[:, None], N_samples, arch, act) plt.cla() ax.plot(inputs.ravel(), targets.ravel(), 'k.') ax.plot(plot_inputs, f_bnn.T, color='r') ax.set_ylim([-5, 5]) plt.draw() plt.pause(1.0 / 60.0) print("ITER {} | OBJ {}".format(t, -objective(params, t))) var_params = adam(grad(objective), init_var_params(arch, dimz), step_size=lr, num_iters=iters, callback=callback) return var_params
def train(self, num_iters=100): trainable_params = self.getTrainableParamsFromCheckpoint( 'saved_params.p') # Callback to run the test set and update the next graph def callback(full_params, i, g): if (i and i % 100 == 0): self.saveParamsToCheckpoint(full_params) # Every 500 steps, run the algorithm on the test set if (i % 500 == 0): true_labels, predicted_labels = [], [] for graph_and_fbs in self.test_set: if (np.random.random() < 0.3): continue true_labels.append(graph_and_fbs[0].inheritancePattern) predicted_labels.append( self.predict(full_params, graph_and_fbs, 1)) # Print the confusion matrix and kappa score on the test set self.printMetrics(true_labels, predicted_labels) # Swap out the current graph and update the current label self.updateCurrentGraphAndLabel(training=True) # Optimize grads = grad(self.fullLoss) final_params = adam(grads, trainable_params, num_iters=num_iters, callback=callback) return final_params
def train(self, n_mc_samples, n_elbo_samples=20, step_size=0.01, num_iters=1000, verbose=False, callback=None): def variational_objective(params, var_it, n_mc_samples=n_mc_samples): samples = self.v_dist.sample(params, n_mc_samples) elbo = self.v_dist.entropy(params) + agnp.mean( self.model.log_prob(samples)) return -elbo if verbose: def cb(params, i, g): print("Negative ELBO: %f" % variational_objective( params, i, n_mc_samples=n_elbo_samples)) if callback is not None: callback(params, i, g) else: cb = callback grad_elbo = autograd.elementwise_grad(variational_objective) ret = adam(lambda x, i: grad_elbo(x, i), self.v_dist.get_params(), step_size=step_size, num_iters=num_iters, callback=cb) self.v_dist.set_params(ret) return ret
def fit(self, target, input, nb_epochs=500, batch_size=16, lr=1e-3, verbose=True): nb_batches = int(np.ceil(len(input) / batch_size)) def batch_indices(iter): idx = iter % nb_batches return slice(idx * batch_size, (idx + 1) * batch_size) def _objective(params, iter): self.params = params idx = batch_indices(iter) return self.cost(target[idx], input[idx]) def _callback(params, iter, grad): if iter % (nb_batches * 10) == 0: self.params = params if verbose: print('Epoch: {}/{}.............'.format( iter // nb_batches, nb_epochs), end=' ') print("Loss: {:.4f}".format(self.cost(target, input))) _gradient = grad(_objective) self.params = adam(_gradient, self.params, step_size=lr, num_iters=nb_epochs * nb_batches, callback=_callback)
def train(self, train_x, train_y, iters): self.train_x = train_x self.train_y = train_y self.train_loss = [] self.pbar = tqdm(total=iters, desc='Optimising parameters') init_params = self.params # Optimisation via Autograd's implementation of Adam optimised_params = adam(grad(self.objective_train), init_params, step_size=0.01, num_iters=iters, callback=self.callback) self.params = optimised_params self.pbar.close() # Plot evolution of training loss means = [] for i in range(iters): if i == 0: means.append(self.train_loss[i]) else: mean = ((means[(i - 1)] * i) + self.train_loss[i]) / (i + 1) means.append(mean) plt.plot(self.train_loss, label='SE Loss') plt.plot(means, c='r', linewidth=3, label='Averge SE Loss') plt.title("Training Error") plt.legend() plt.show() return
def trainMarginal(self, num_iters=100): params = {} for group, dist in self.params.emission_dists.items(): params[group] = dist.recognizer_params emission_grads = grad(self.marginalLoss) def callback(x, i, g): if (i % 25 == 0): print('i', i) gs = emission_grads(params) opt_params = adam(emission_grads, params, num_iters=num_iters, callback=callback) # Update the model parameters for group in self.params.emission_dists.keys(): self.params.emission_dists[group].recognizer_params = opt_params[ group] return opt_params
def trainSVAE(self, num_iters=100): svae_params = ({}, {}) for group, dist in self.params.emission_dists.items(): svae_params[0][group] = dist.recognizer_params svae_params[1][group] = dist.generative_hyper_params emission_grads = grad(self.svaeLoss) def callback(x, i, g): if (i % 25 == 0): print('i', i) opt_params = adam(emission_grads, svae_params, num_iters=num_iters, callback=callback) # Update the model parameters for group in self.params.emission_dists.keys(): self.params.emission_dists[group].recognizer_params = opt_params[ 0][group] self.params.emission_dists[ group].generative_hyper_params = opt_params[1][group] return opt_params
def train_bnn(inputs, targets, arch = [1, 20, 20, 1], lr=0.01, iters=50, n_samples=10, act=np.tanh): fig = plt.figure(facecolor='white') ax = fig.add_subplot(111) plt.ion() plt.show(block=False) def objective(params,t): return vlb_objective(params, inputs, targets, arch, n_samples, act) def callback(params, t, g): # Sample functions from posterior f ~ p(f|phi) or p(f|varphi) N_samples, nd = 5, 400 plot_inputs = np.linspace(-8, 8, num=400) f_bnn = sample_bnn(params, plot_inputs[:,None], N_samples, arch, act) plt.cla() ax.plot(inputs.ravel(), targets.ravel(), 'k.') ax.plot(plot_inputs, f_bnn.T, color='r') ax.set_ylim([-5, 5]) plt.draw() plt.pause(1.0 / 60.0) print("ITER {} | OBJ {}".format(t, -objective(params, t))) var_params = adam(grad(objective), init_var_params(arch), step_size=lr, num_iters=iters, callback=callback) return var_params
def fit(self, X, y, batch_size=5, n_iter=10000, lr=0.001, lr_type='constant'): X = np.array(X).astype(np.float32) y = np.array(y).reshape(-1, 1).astype(np.float32) m, n = X.shape epochs = ceil(n_iter / floor(m / batch_size)) # print(epochs) objective_grad = grad(self.objective) for i in range(epochs): for j in range(0, m, batch_size): self.X_batch = X[j:j + batch_size] self.y_batch = y[j:j + batch_size] step_size = lr self.params = adam(objective_grad, self.params, step_size=step_size, num_iters=1) self.X_batch = X self.y_batch = y
def fit(self, X, y): def objective(weights, iteration): # The sum of squared errors squared_error = (y - self.predict(X, weights))**2 return np.sum(squared_error) def callback(weights, iteration, g): it = iteration + 1 if it % self.checkpoint == 0 or it in {1, self.num_iters}: obj = objective(weights, iteration) padding = int(np.log10(self.num_iters) + 1) print( f"[Iteration {it:{padding}d}] Sum of squared errors: {obj:.6f}" ) # Ensure that X is two-dimensional X = np.asarray(X).reshape(-1, 1) y = np.asarray(y) # Reinitialize the weights vector weights_init = self.random.normal(size=self.n_weights) # Run optimizatio self.weights = adam( grad(objective), weights_init, num_iters=self.num_iters, step_size=self.step_size, callback=callback, )
def adam_solve(lambda_flows, grad_energy_bound, samples, u_func, h, m=1000, step_size=0.001): ''' Uses adam solver to optimize the energy bound ''' output = np.copy( lambda_flows) # Copies so original parameters are not modified print("BEFORE LEARNING:\n{}".format(output)) grad_energy_bound = autograd.grad( energy_bound) # Autograd gradient of energy bound g_eb = lambda lambda_flows, i: grad_energy_bound( lambda_flows, samples, h, u_func, #beta= (0.1 + i/1000)) beta=min(1, 0.01 + i / 10000)) # Annealing output = adam(g_eb, output, num_iters=m, callback=callback, step_size=step_size) print("AFTER LEARNING:\n{}".format(output)) # Resample and flow a larger number of samples to better show fit samples = np.random.randn(20000)[:, np.newaxis] samples_flowed = flow_samples(output, samples, h) np.savetxt("./linear_plots/flow_params.txt", output) return samples_flowed
def fit_nn(x, y, arch): def nll(weights, t): return map_objective(weights, x, y) return adam(grad(nll), init_random_params(arch), step_size=0.05, num_iters=500)
def pack(self): print(" Iter | Ball radius | Density ") self.logits = adam(grad( lambda logits, i: -1 * self.ball_radius(self.box_warp(logits), i)), self.logits, num_iters=self.n_iters, callback=self.print_status) # one more print at final iteration self.print_status(i=self.n_iters)
def fit(self, X, method="exact"): ''' function: fit Description: Fit the model to the data in X. method can either be "exact" for standard maximum likelihood learning using the exact marginal log likelihood, or "bbsvl" for black-box stochastic variational learning using diagonal Gaussian variational posteriors. The optimized W and Psi parameters should be stored in member variables W and Psi after learning. Inputs: X - (np.array) Data matrix. Shape (N,D) Outputs: None ''' K = self.K D = self.D gamma = np.log(np.diag(np.cov(X.T))) #gamma = np.random.randn(D)*1e-5 N, _ = X.shape W = np.random.randn(K, D) * 1e-5 if method == "exact": #gamma = np.log(np.diag(np.cov(X.T))) init_params = np.concatenate((gamma.flatten(), W.flatten())) fprime = (self.marginal_likelihood_wrapper(init_params, X)) learnt_params = adam(fprime, init_params) self.W = learnt_params[D:].reshape(K, D) self.Psi = np.diag(np.exp(learnt_params[:self.D])) elif method == "bbsvl": #gamma = np.log(np.diag(np.cov(X.T))) mus = np.random.randn(X.shape[0], X.shape[1]) / 100 stds = np.random.randn(X.shape[0], X.shape[1]) * 1e-5 init_var_params = np.concatenate( (gamma.flatten(), W.flatten(), mus.flatten(), stds.flatten())) fprime = (self.svl_wrapper(X, init_var_params)) learnt_params = adam(fprime, init_var_params) self.W = learnt_params[D:(D * K + D)].reshape((K, D)) self.Psi = np.diag(np.exp(learnt_params[:D].reshape(D))) self.mus = learnt_params[D * (K + 1):D * (K + 1) + N * K].reshape( (N, K)) self.stds = np.exp(learnt_params[-(N * K):].reshape((N, K))) else: print 'invalid method' pass
def variational_inference(Sigma_W, y_train, x_train, S, max_iteration, step_size, verbose): '''implements wrapper for variational inference via bbb for bayesian regression''' D = Sigma_W.shape[0] Sigma_W_inv = np.linalg.inv(Sigma_W) Sigma_W_det = np.linalg.det(Sigma_W) variational_dim = D # define the log prior on the model parameters def log_prior(W): constant_W = -0.5 * (D * np.log(2 * np.pi) + np.log(Sigma_W_det)) exponential_W = -0.5 * np.diag(np.dot(np.dot(W, Sigma_W_inv), W.T)) log_p_W = constant_W + exponential_W return log_p_W # define the log likelihood def log_lklhd(W): log_odds = np.matmul(W, x_train) + 10 p = 1 / (1 + np.exp(-log_odds)) log_likelihood = y_train * np.log(p) return log_likelihood # define the log joint density log_density = lambda w, t: log_lklhd(w) + log_prior(w) # build variational objective. objective, gradient, unpack_params = black_box_variational_inference( log_density, D, num_samples=S) def callback(params, t, g): if verbose: if verbose: if t % 10 == 0: var_means = params[:D] var_variance = np.diag(np.exp(params[D:])**2) print( "Iteration {} lower bound {}; gradient mag: {}".format( t, -objective(params, t), np.linalg.norm(gradient(params, t)))) print('Variational Mean: ', var_means) print('Variational Variances: ', var_variance) print("Optimizing variational parameters...") # initialize variational parameters init_mean = 0 * np.ones(D) init_log_std = -1 * np.ones(D) init_var_params = np.concatenate([init_mean, init_log_std]) # perform gradient descent using adam (a type of gradient-based optimizer) variational_params = adam(gradient, init_var_params, step_size=step_size, num_iters=max_iteration, callback=callback) return variational_params
def adam_solve(lambda_flows, grad_energy_bound, samples, u_func, h, m=1000, step_size=0.001, bnn=False): ''' Uses adam solver to optimize the energy bound ''' output = np.copy(lambda_flows) # Copies to avoid changing initial conditions print("BEFORE LEARNING:\n{}".format(output)) grad_energy_bound = autograd.grad(energy_bound) # Autograd gradient of energy g_eb = lambda lambda_flows, i: grad_energy_bound(lambda_flows, samples, h, u_func, #beta= (0.1 + i/1000)) beta=min(2, i/1000), # Annealing #beta=min(1, 0.01+i/10000), bnn=bnn) # Annealing output = adam(g_eb, output, num_iters=m, callback=callback, step_size=step_size) print("\nAFTER LEARNING:\n{}".format(output)) #samples = np.random.randn(30000)[:,np.newaxis] # Plot with more samples for better clarity q_0_mu = np.array([0,0]) q_0_sigma = 1 D = q_0_mu.shape[0] #samples = np.random.multivariate_normal(q_0_mu, q_0_sigma*np.eye(D), 20000) samples_flowed = flow_samples(output, samples, h) #np.savetxt("./data_fit_1d/flow_params.txt", output) np.savetxt("./nn_fit/flow_params.txt", output) if(bnn): np.savetxt("./nn_fit/energy_bound.txt", e_bound) fig, ax = plt.subplots() ax.plot(e_bound) ax.set(title="Energy Bound") plt.savefig("./nn_fit/energy_bound.png") plt.close() np.savetxt("./nn_fit/joint_probs.txt", joint_probs) fig, ax = plt.subplots() ax.plot(joint_probs) ax.set(title="Joint Probability") plt.savefig("./nn_fit/joint_probs.png") plt.close() np.savetxt("./nn_fit/flow_probs.txt", flow_probs) fig, ax = plt.subplots() ax.plot(flow_probs) ax.set(title="Flow Probs") plt.savefig("./nn_fit/flow_probs.png") plt.close() np.savetxt("./nn_fit/grad_norms.txt", grad_norms) fig, ax = plt.subplots() ax.plot(grad_norms) ax.set(title="Gradient Norms") plt.savefig("./nn_fit/grad_norms.png") plt.close() return samples_flowed
def run(): # train and save the neural network global inputs, targets, training_error training_error = [] # max number of iterations in optimization num_iters = 100 N = 100 # Number of uniformly sampled trajectories in training data set. # sample training data # x_traj, y_traj, index = randomsample(N) x_traj, y_traj, index = shufflesample(N*10, sampling_rate = 0.1) # normalize the training data x_scaler = MinMaxScaler((-1,1)) x_scaler.fit(x_traj) y_scaler = MinMaxScaler((-1,1)) y_scaler.fit(y_traj) x_traj_scale = x_scaler.transform(x_traj) y_traj_scale = y_scaler.transform(y_traj) inputs = x_traj_scale targets = y_traj_scale # Decide NN architecture D = x_traj.shape[1] G = 20 init_weights = initialize_weights(G, D) print('---------- Optimizing KOOPMAN NEURAL NET for {} iterations ..... \n'.format(num_iters)) # use adam to optimize opt_weights = adam(grad(objective), init_weights, step_size=0.01, num_iters = num_iters, callback=callback) # use sgd to optimize # opt_weights = sgd(grad(objective), init_weights, step_size=0.1, num_iters = num_iters, callback=callback) print('done') # save the optimal weights and related parameters np.savez('data/sample_1/optweights_tanh_minmax_random1000shuffle_G20_layer2_sgd_2.npz', optweights = opt_weights, x_scaler = x_scaler, y_scaler = y_scaler, index = index, training_error = training_error) # Pick a trajectory and check the prediction of the nn on this trajectory x_traj_test, y_traj_test = sample_multitraj(6350, 6351) inputs = x_scaler.transform(x_traj_test) targets = y_scaler.transform(y_traj_test) outputs = nn_encode_foward_decode(opt_weights, inputs) re = np.mean([np.linalg.norm(targets[i] - outputs[i]) / np.linalg.norm(targets[i]) for i in range(len(targets))]) print('Relative training norm error {:+1.4e}'.format(re)) figplot(outputs, url = None )
def partial_fit_base(self, X, y): check_is_fitted(self, "base_model_") batch_indices = generate_batch( X, self.autograd_config.get("batch_size", 32)) esp = 1e-11 # where should this live? step_size = self.autograd_config.get("step_size", 0.05) callback = (None if self.autograd_config.get("verbose", False) else simple_callback) num_iters = self.autograd_config.get("num_iters", 1000) nclass = self.n_classes_ model_dump = self.base_model_.booster_.dump_model() trees_ = [m["tree_structure"] for m in model_dump["tree_info"]] trees_params = multi_tree_to_param(X, y, trees_) model_ = gbm_gen(trees_params[0], X, trees_params[2], trees_params[1], False, 2) def training_loss(weights, idx=0): # Training loss is the negative log-likelihood of the training labels. t_idx_ = batch_indices(idx) preds = sigmoid(model_(weights, X[t_idx_, :])) label_probabilities = preds * y[t_idx_] + (1 - preds) * (1 - y[t_idx_]) # print(label_probabilities) loglik = -np.sum(np.log(label_probabilities)) num_unpack = 3 reg = 0 # reg_l1 = np.sum(np.abs(flattened)) * 1. for idx_ in range(0, len(weights), num_unpack): param_temp_ = weights[idx_:idx_ + num_unpack] flattened, _ = weights_flatten(param_temp_[:2]) reg_l1 = np.sum(np.abs(flattened)) * 1.0 reg += reg_l1 return loglik + reg training_gradient_fun = grad(training_loss) param_ = adam( training_gradient_fun, trees_params[0], callback=callback, step_size=step_size, num_iters=num_iters, ) self.base_param_ = copy.deepcopy(trees_params) self.partial_param_ = param_ self.is_partial = True return self
def train(self, step_size=0.01, num_iters=1000, verbose=False, callback=None): init = self.model.get_params() final_params = adam(lambda x, _: -self.grad(x), init, step_size=step_size, num_iters=num_iters, callback=callback) self.model.set_params(final_params.reshape(init.shape)) return self.model
def run(self): L2_reg = self.L2_reg activations = self.activations step_size = self.step_size y_type = self.y_type loss_type = self.loss_type # Initial neural net parameters init_params = initialize_parameters(self.layer_sizes, var=self.w_var) print("Loading training data...") X_train, X_test, y_train, y_test = load_data(self.y_type) self.store(X_train, X_test, y_train, y_test) self.Coordinates = Coordinates( np.concatenate((y_train, y_test), axis=0)) num_batches = int(ceil(X_train.shape[0] / BATCH_SIZE)) def batch_indices(iter): if iter % num_batches == 0: # Shuffle the data X_train, X_test, y_train, y_test = load_data(self.y_type) self.store(X_train, X_test, y_train, y_test) idx = iter % num_batches return slice(idx * BATCH_SIZE, (idx + 1) * BATCH_SIZE) def objective(parameters, iter): idx = batch_indices(iter) return loss(parameters, X_train[idx], y_train[idx], L2_reg, activations, y_type, loss_type) objective_grad = grad(objective) def print_perf(parameters, iter, gradient): if iter % num_batches == 0: train_acc = error(parameters, X_train, y_train, activations, y_type, loss_type) test_acc = error(parameters, X_test, y_test, activations, y_type, loss_type) reg = reg_loss(parameters, L2_reg) print("{:15}|{:20}|{:20}|{:20}".format(iter // num_batches, train_acc, test_acc, reg)) print("Training the neural network ...") self.optimized_params = adam(objective_grad, init_params, step_size=step_size, num_iters=EPOCHS * num_batches, callback=print_perf) return self.results(self.optimized_params, activations, L2_reg, X_train, X_test, y_train, y_test)
def run(self): self.objectPoints = self.sph.get_sphere_points() self.init_params = flatten_points(self.objectPoints, type='object') self.objective1 = lambda params: matrix_condition_number_autograd( params, self.cam.P, normalize=False) self.objective2 = lambda params, iter: matrix_condition_number_autograd( params, self.cam.P, normalize=True) print("Optimizing condition number...") objective_grad = grad(self.objective2) self.optimized_params = adam(objective_grad, self.init_params, step_size=0.001, num_iters=200, callback=self.plot_points)
def learn(self, **kwargs): params = self.tet.get_params() optimizer = kwargs["optimizer"] objective_grad = grad(self.calculate_loss, argnum=0) self.X_train, self.X_val, self.y_train, self.y_val = train_test_split(self.X, self.y, test_size=0.01, random_state=42) print("DATASET SIZE\tTrain set: {}\tValidation set: {}".format(len(self.X_train), len(self.X_val))) if optimizer == "adam": num_iters = kwargs["num_iters"] step_size = kwargs["step_size"] optimized_params = adam(objective_grad, params, step_size=step_size, num_iters=num_iters, callback=self.print_perf) print("BEST VALIDATION ERROR: ", self.best_v_err) print("BEST PARAMS: ", self.best_params) return optimized_params
def run(): global inputs, targets, hyper num_iters = 150 # inputs, targets = build_tvb_dataset() inputs, targets = build_wc_dataset() D = inputs.shape[1] G = 20 init_weights = initialize_weights(G, D) print('---------- Optimizing KOOPMAN NEURAL NET for {} iterations ..... \n'.format(num_iters)) opt_weights = adam(grad(objective), init_weights, step_size=0.01, num_iters=num_iters, callback=callback) decoded = nn_encode_decode(opt_weights, inputs) outputs = nn_encode_foward_decode(opt_weights, inputs) plt.figure() _ = plt.scatter(targets, outputs, marker='D', c='g', alpha=0.1) plt.xlabel('targets') plt.ylabel('outputs') plt.title('Dynamic Scatter') plt.grid() plt.figure() _ = plt.scatter(inputs, decoded, marker='D', c='b', alpha=0.1) plt.xlabel('inputs') plt.ylabel('decoded') plt.title('Encoding-decoding Scatter') plt.grid() plt.figure() _ = plt.plot(outputs[:, 0:3], marker='x') _ = plt.plot(targets[:, 0:3], marker='+') plt.show() re = np.mean([np.linalg.norm(targets[i] - outputs[i]) / np.linalg.norm(targets[i]) for i in range(len(targets))]) print('Relative norm error {:+1.4e}'.format(re)) print('--- Finish ---')
def infer(self, x, W=None, Psi=None, method="exact"): ''' function: infer Description: Run inference to obtain the posterior distribution for a single data case x. method can either be "exact" for exact inference, or "bbsvi" for black-box stochastic variational inference. Output is a tuple consisting of the posterior mean and the posterior covariance matrix. Inputs: x - (np.array) Data matrix. Shape (1,D) W - (np.array) Factor loading matrix. Shape (K,D). Psi - (np.array) Output covariance matrix. Shape (D,D). Positive, diagonal. method-(string) Either "exact" or "bbsvi" Outputs: mu - (np.array) Value of the exact or approximate posterior mean. Shape (1,D) Sigma - (np.array) Value of the exact or approximate posterior covariance matrix. Shape (D,D) ''' if (W is None): W = self.W if (Psi is None): Psi = self.Psi K = self.K D = self.D if method == "exact": #print 'exact' inter = np.linalg.inv(np.dot(W.T, W) + Psi) mean_conditional = (np.dot(W, np.dot(inter, x.T))).T cov_conditional = np.identity(K) - np.dot(W, np.dot(inter, W.T)) return mean_conditional, cov_conditional elif method == "bbsvi": #print 'bbsvi' init_mean = np.random.randn(1, K) / 100 init_log_std = 1e-5 * np.ones((1, K)) init_var_params = np.concatenate( (init_mean.flatten(), init_log_std.flatten())) gradient = self.svi_wrapper(x, init_var_params, W, Psi) variational_params = adam(gradient, init_var_params, num_iters=1000) return variational_params[:K], np.diag( (np.exp(variational_params[K:])**2)) else: print 'invalid method' pass
def learn(self, **kwargs): params = self.tet.get_params() optimizer = kwargs["optimizer"] objective_grad = grad(self.calculate_loss, argnum=0) self.X_train, self.X_val = self.create_triplets([3,5,7]) print("DATASET SIZE - \t TRAIN: {} ex \t VALIDATION: {} ex".format(len(self.X_train), len(self.X_val))) print("Itr\t|\tTr Error\t|\tVal Error\t|\tParams\t|\tGradient\t") if optimizer == "adam": num_iters = kwargs["num_iters"] step_size = kwargs["step_size"] optimized_params = adam(objective_grad, params, step_size=step_size, num_iters=num_iters, callback=self.print_perf) print("\nBEST VALIDATION ERROR: ", self.best_v_err) print("BEST PARAMS: ", self.best_params) return optimized_params
inputs, targets = build_toy_dataset() def objective(weights, t): return -logprob(weights, inputs, targets)\ -log_gaussian(weights, weight_prior_variance) print(grad(objective)(init_params, 0)) # Set up figure. fig = plt.figure(figsize=(12,8), facecolor='white') ax = fig.add_subplot(111, frameon=False) plt.show(block=False) def callback(params, t, g): print("Iteration {} log likelihood {}".format(t, -objective(params, t))) # Plot data and functions. plt.cla() ax.plot(inputs.ravel(), targets.ravel(), 'bx', ms=12) plot_inputs = np.reshape(np.linspace(-7, 7, num=300), (300,1)) outputs = nn_predict(params, plot_inputs) ax.plot(plot_inputs, outputs, 'r', lw=3) ax.set_ylim([-1, 1]) plt.draw() plt.pause(1.0/60.0) print("Optimizing network parameters...") optimized_params = adam(grad(objective), init_params, step_size=0.01, num_iters=1000, callback=callback)
ax_vecfield.cla() ax_vecfield.set_title('Learned Vector Field') ax_vecfield.set_xlabel('x') ax_vecfield.set_ylabel('y') ax_vecfield.xaxis.set_ticklabels([]) ax_vecfield.yaxis.set_ticklabels([]) # vector field plot y, x = npo.mgrid[-2:2:21j, -2:2:21j] dydt = nn_predict(np.stack([x, y], -1).reshape(21 * 21, 2), 0, params).reshape(-1, 2) mag = np.sqrt(dydt[:, 0]**2 + dydt[:, 1]**2).reshape(-1, 1) dydt = (dydt / mag) dydt = dydt.reshape(21, 21, 2) ax_vecfield.streamplot(x, y, dydt[:, :, 0], dydt[:, :, 1], color="black") ax_vecfield.set_xlim(-2, 2) ax_vecfield.set_ylim(-2, 2) fig.tight_layout() plt.draw() plt.pause(0.001) # Train neural net dynamics to match data. init_params = init_nn_params(0.1, layer_sizes=[D, 150, D]) optimized_params = adam(grad(train_loss), init_params, num_iters=1000, callback=callback)
zs = func(np.concatenate([np.atleast_2d(X.ravel()), np.atleast_2d(Y.ravel())]).T) Z = zs.reshape(X.shape) plt.contour(X, Y, Z) ax.set_yticks([]) ax.set_xticks([]) # Set up figure. fig = plt.figure(figsize=(8,8), facecolor='white') ax = fig.add_subplot(111, frameon=False) plt.ion() plt.show(block=False) def callback(params, t, g): print("Iteration {} lower bound {}".format(t, -objective(params, t))) plt.cla() target_distribution = lambda x : np.exp(log_density(x, t)) plot_isocontours(ax, target_distribution) mean, log_std = unpack_params(params) variational_contour = lambda x: mvn.pdf(x, mean, np.diag(np.exp(2*log_std))) plot_isocontours(ax, variational_contour) plt.draw() plt.pause(1.0/30.0) print("Optimizing variational parameters...") init_mean = -1 * np.ones(D) init_log_std = -5 * np.ones(D) init_var_params = np.concatenate([init_mean, init_log_std]) variational_params = adam(gradient, init_var_params, step_size=0.1, num_iters=2000, callback=callback)
step_size = 0.001 print("Loading training data...") N, train_images, train_labels, test_images, test_labels = load_mnist() init_params = init_random_params(param_scale, layer_sizes) num_batches = int(np.ceil(len(train_images) / batch_size)) def batch_indices(iter): idx = iter % num_batches return slice(idx * batch_size, (idx+1) * batch_size) # Define training objective def objective(params, iter): idx = batch_indices(iter) return -log_posterior(params, train_images[idx], train_labels[idx], L2_reg) # Get gradient of objective using autograd. objective_grad = grad(objective) print(" Epoch | Train accuracy | Test accuracy ") def print_perf(params, iter, gradient): if iter % num_batches == 0: train_acc = accuracy(params, train_images, train_labels) test_acc = accuracy(params, test_images, test_labels) print("{:15}|{:20}|{:20}".format(iter//num_batches, train_acc, test_acc)) # The optimizers provided can optimize lists, tuples, or dicts of parameters. optimized_params = adam(objective_grad, init_params, step_size=step_size, num_iters=num_epochs * num_batches, callback=print_perf)
elbos.append(elbo_val) if t % 50 == 0: print("Iteration {} lower bound {}".format(t, elbo_val)) init_mean = -1 * np.ones(D) init_log_std = -5 * np.ones(D) init_var_params = np.concatenate([init_mean, init_log_std]) variational_params = optfun(num_iters, init_var_params, callback) return np.array(elbos) # let's optimize this with a few different step sizes elbo_lists = [] step_sizes = [.1, .25, .5] for step_size in step_sizes: # optimize with standard gradient + adam optfun = lambda n, init, cb: adam(gradient, init, step_size=step_size, num_iters=n, callback=cb) standard_lls = optimize_and_lls(optfun) # optimize with natural gradient + sgd, no momentum optnat = lambda n, init, cb: sgd(natural_gradient, init, step_size=step_size, num_iters=n, callback=cb, mass=.001) natural_lls = optimize_and_lls(optnat) elbo_lists.append((standard_lls, natural_lls)) # visually compare the ELBO plt.figure(figsize=(12,8)) colors = ['b', 'k', 'g'] for col, ss, (stand_lls, nat_lls) in zip(colors, step_sizes, elbo_lists): plt.plot(np.arange(len(stand_lls)), stand_lls, '--', label="standard (adam, step-size = %2.2f)"%ss, alpha=.5, c=col) plt.plot(np.arange(len(nat_lls)), nat_lls, '-',
ax.set_yticks([]) ax.set_xticks([]) fig = plt.figure(figsize=(8,8), facecolor='white') ax = fig.add_subplot(111, frameon=False) plt.ion() plt.show(block=False) num_plotting_samples = 51 def callback(params, t, g): print("Iteration {} lower bound {}".format(t, -objective(params, t))) plt.cla() target_distribution = lambda x: np.exp(log_density(x, t)) var_distribution = lambda x: np.exp(variational_log_density(params, x)) plot_isocontours(ax, target_distribution) plot_isocontours(ax, var_distribution, cmap=plt.cm.bone) ax.set_autoscale_on(False) rs = npr.RandomState(0) samples = variational_sampler(params, num_plotting_samples, rs) plt.plot(samples[:, 0], samples[:, 1], 'x') plt.draw() plt.pause(1.0/30.0) print("Optimizing variational parameters...") variational_params = adam(grad(objective), init_var_params(D), step_size=0.1, num_iters=2000, callback=callback)
training_text = one_hot_to_string(train_inputs[:,t,:]) predicted_text = one_hot_to_string(logprobs[:,t,:]) print(training_text.replace('\n', ' ') + "|" + predicted_text.replace('\n', ' ')) def training_loss(params, iter): return -rnn_log_likelihood(params, train_inputs, train_inputs) def callback(weights, iter, gradient): if iter % 10 == 0: print("Iteration", iter, "Train loss:", training_loss(weights, 0)) print_training_prediction(weights) # Build gradient of loss function using autograd. training_loss_grad = grad(training_loss) print("Training RNN...") trained_params = adam(training_loss_grad, init_params, step_size=0.1, num_iters=1000, callback=callback) print() print("Generating text from RNN...") num_letters = 30 for t in range(20): text = "" for i in range(num_letters): seqs = string_to_one_hot(text, num_chars)[:, np.newaxis, :] logprobs = rnn_predict(trained_params, seqs)[-1].ravel() text += chr(npr.choice(len(logprobs), p=np.exp(logprobs))) print(text)