def _laplace_vcov(self): """Finds the variance-covariance matrix of Laplace approximation""" # Find laplace variance-covariance matrix pi = sigmoid(np.dot(self.X, self.mode)) outed = row_outer(self.X) return np.linalg.inv(self.sigma0_inv + (( (pi * (1 - pi)).reshape(self.n, 1, 1)) * outed).sum(axis=0))
def compare_approximations(self, n_min, n_max, n_step): """This function tries to compare Laplace approximation and variational approximation. Since they are both normal, we only really need to compare their mean and their scale. For this reason, this function tries to create some useful plots.""" var_means = [] var_scales = [] lap_means = [] lap_scales = [] n_list = list(range(n_min, n_max, n_step)) number = len(n_list) # store previous n to save figure properly n_old = self.n for n in n_list: self.n = n self.ybar = np.mean( np.random.binomial(n=1, p=sigmoid(b), size=self.n) ) self.sum = self.n * self.ybar lap_means.append(logit(self.ybar)) lap_scales.append(np.sqrt(1.0 / (self.sum * (1.0 - self.ybar)))) var_mean, var_sigma2 = self._variational_em() var_means.append(var_mean) var_scales.append(np.sqrt(var_sigma2)) var_means = np.array(var_means) var_scales = np.array(var_scales) lap_means = np.array(lap_means) lap_scales = np.array(lap_scales) # Make colormaps prettier # scales s_sizes = np.arange(10, number*11, 10) fig, ax = plt.subplots() ax.set_title("Comparing Parameters of Laplace and Variational" " Normal Approximations") # Variational, Laplace sigma ax.scatter(var_means, var_scales, c='#2ca02c', s=s_sizes) # c=n_list ax.plot(var_means, var_scales, c='#2ca02c', label='variational') ax.scatter(lap_means, lap_scales, c='#ff7f0e', s=s_sizes) ax.plot(lap_means, lap_scales, c='#ff7f0e', label='laplace') ax.set_xlabel(r'$\mu$') ax.set_ylabel(r'$\sigma$') ax.axvline(self.beta, alpha=0.5, label='True Mode', ls=':', color='k') ax.legend() # First plot annotations for i, txt in enumerate(n_list): #ax.annotate(txt, (var_means[i], var_scales[i])) ax.annotate(txt, (lap_means[i], lap_scales[i])) plt.tight_layout() plt.subplots_adjust(top=0.95) if self.save: self.n = n_old self.save_image("mean_sd_comparison.png") plt.show()
def forward_pass(W1, W2, X, y): l0 = X l0_conv = convolve(l0, W1[::-1, ::-1], 'same', 'direct') l1 = relu(l0_conv) l1_max_pooled_raveled = block_reduce(l1, (max_pool_size, max_pool_size), np.max).ravel() l2 = sigmoid(np.dot(l1_max_pooled_raveled, W2)) l2 = l2.clip(10**-16, 1 - 10**-16) loss = -(y * np.log(l2) + (1 - y) * np.log(1 - l2)) accuracy = int(y == np.where(l2 > 0.5, 1, 0)) return accuracy, loss
def predict_proba(X, parameters): ''' Predicts the output for given input and paramters :param X: shape: (m,n) :param parameters: shape: (n,1) :return: yhat: the prediction probabilities ''' m = X.shape[0] w = parameters["w"] b = parameters["b"] Z = np.dot(X, w.T) + b yhat = sigmoid(Z) return yhat.reshape(m, 1)
import numpy as np import matplotlib.pyplot as plt from no_explanatory_variables import NoExplanatoryVariables from utility_functions import sigmoid n_iter = 100 steps = 1000 x = np.linspace(-10, 10, steps) tvs = np.zeros((n_iter, steps)) tls = np.zeros((n_iter, steps)) for i in range(n_iter): n = 1000 theta = 1 y = np.random.binomial(n=1, p=sigmoid(theta), size=n) mydict = {'seed': np.random.randint(0, 10000), 'n': n, 'ybar': np.mean(y)} np.random.seed(mydict['seed']) model = NoExplanatoryVariables(False, dict=mydict) # _ = model.sample(s=100000, b=500, t=1, scale=0.25, kde_scale=0.15) tvs[i, :] = abs(model.true_log_posterior(x) - model.log_variational(x)) tls[i, :] = abs(model.true_log_posterior(x) - model.log_laplace(x)) fig, ax = plt.subplots() tv_mean = tvs.mean(axis=0) tl_mean = tls.mean(axis=0) ax.plot(x, tv_mean, label='abs(true-var)') ax.plot(x, tl_mean, label='abs(true-lap)') ax.legend() plt.show()
def true_posterior(self, t): """Transformation of beta distribution (not a beta anymore)""" return (sigmoid(t)**self.a)*(sigmoid(-t)**self.b)/beta(self.a, self.b)
def train_model(W1, W2, num_epochs=5, eta=0.001, update_W1=True, update_W2=True): dl1 = np.zeros((image_size, image_size)) for epoch in range(num_epochs): train_loss = averager() train_accuracy = averager() for i in range(len(y_train)): # Take a random sample k = np.random.randint(len(y_train)) X = X_train[k] y = y_train[k] if (i + 1) % 100 == 0: sys.stdout.write('{}\r'.format(i + 1)) # First layer is just the input l0 = X # Embed the image in a bigger image. It would be useful in computing corrections to the # convolution filter lt0[K // 2:-K // 2 + 1, K // 2:-K // 2 + 1] = l0 # convolve with the filter l0_conv = convolve(l0, W1[::-1, ::-1], 'same') # Layer one is Relu applied on the convolution l1 = relu(l0_conv) # max pooling view = view_as_blocks(l1, (max_pool_size, max_pool_size)).reshape( max_pooled_image_size, max_pooled_image_size, -1) l1_max_pooled_raveled = np.max(view, axis=2).ravel() arg_max_1d = np.argmax(view, axis=2) max_rows = (arg_max_1d // 2 + _rows_adder).ravel() max_cols = (arg_max_1d % 2 + _cols_adder).ravel() # Compute layer 2 l2 = sigmoid(np.dot(l1_max_pooled_raveled, W2)) l2 = l2.clip(10**-16, 1 - 10**-16) # Loss and Accuracy loss = -(y * np.log(l2) + (1 - y) * np.log(1 - l2)) accuracy = int(y == np.where(l2 > 0.5, 1, 0)) # Save the loss and accuracy to a running averager train_loss.send(loss) train_accuracy.send(accuracy) # Derivative of loss wrt the dense layer if update_W2: dW2 = (((1 - y) * l2 - y * (1 - l2)) * l1_max_pooled_raveled) if update_W1: # Derivative of loss wrt the output of the first layer dl1_max_pooled_raveled = ( ((1 - y) * l2 - y * (1 - l2)) * W2 ) #.reshape(half_image_size, half_image_size) dl1[max_rows, max_cols] = dl1_max_pooled_raveled # Derivative of the loss wrt the convolution filter dl1_f1p = np.where(l0_conv > 0, dl1, 0) dW1 = np.array([[(lt0[alpha:+alpha + image_size, beta:beta + image_size] * dl1_f1p).sum() for beta in range(K)] \ for alpha in range(K)]) # Surprizingly this is slower even though my code is not vectorized # dW1=(view_as_windows(lt0,(image_size,image_size))*dl1_f1p[None,None,:,:]).sum(axis=(2,3)) if update_W2: W2 += -eta * dW2 if update_W1: W1 += -eta * dW1 dl1[max_rows, max_cols] = 0 loss_averager_valid = averager() accuracy_averager_valid = averager() for X, y in zip(X_valid, y_valid): accuracy, loss = forward_pass(W1, W2, X, y) loss_averager_valid.send(loss) accuracy_averager_valid.send(accuracy) train_loss, train_accuracy, valid_loss, valid_accuracy = map( extract_averager_value, [ train_loss, train_accuracy, loss_averager_valid, accuracy_averager_valid ]) msg = 'Epoch {}: train loss {:.2f}, train acc {:.2f}, valid loss {:.2f}, valid acc {' \ ':.2f}'.format( epoch + 1, train_loss, train_accuracy, valid_loss, valid_accuracy ) print(msg)