def sample_mean_cov_from_deep_gp(all_params, X, with_noise = False): predict = predict_funcs_with_noise if with_noise else predict_layer_funcs X_star = X layer_params, x0, y0 = unpack_all_params(all_params) n_layers = len(x0) for layer in xrange(n_layers): layer_mean, layer_cov = predict[layer](layer_params[layer],np.atleast_2d(x0[layer]).T, y0[layer],X_star) X_star = np.atleast_2d(sample_from_mvn(layer_mean, layer_cov)).T return layer_mean,layer_cov
def plot_isocontours(ax, func, xlimits=[-2, 2], ylimits=[-4, 2], numticks=101): x = np.linspace(*xlimits, num=numticks) y = np.linspace(*ylimits, num=numticks) X, Y = np.meshgrid(x, y) zs = func(np.concatenate([np.atleast_2d(X.ravel()), np.atleast_2d(Y.ravel())]).T) Z = zs.reshape(X.shape) plt.contour(X, Y, Z) ax.set_yticks([]) ax.set_xticks([])
def evaluate_prior(all_params): # clean up code so we don't compute matrices twice layer_params, x0, y0 = unpack_all_params(all_params) log_prior = 0 for layer in xrange(n_layers): #import pdb; pdb.set_trace() mean, cov_params, noise_scale = unpack_kernel_params(layer_params[layer]) cov_y_y = covariance_function(cov_params, np.atleast_2d(x0[layer]).T, np.atleast_2d(x0[layer]).T) + noise_scale * np.eye(len(y0[layer])) log_prior += mvn.logpdf(y0[layer],np.ones(len(cov_y_y))*mean,cov_y_y+np.eye(len(cov_y_y))*1e-6*np.max(np.diag(cov_y_y))) return log_prior
def mvt_logpdf(x, mu, Li, df): dim = Li.shape[0] Ki = np.dot(Li.T, Li) #determinant is just multiplication of diagonal elements of cholesky logdet = 2*log(1./np.diag(Li)).sum() lpdf_const = (gammaln((df + dim) / 2) -(gammaln(df/2) + (log(df)+log(np.pi)) * dim*0.5 + logdet * 0.5) ) x = np.atleast_2d(x) if x.shape[1] != mu.size: x = x.T assert(x.shape[1] == mu.size or x.shape[0] == mu.size) d = (x - mu.reshape((1 ,mu.size))).T Ki_d_scal = np.dot(Ki, d) /df #vector d_Ki_d_scal_1 = diag_dot(d.T, Ki_d_scal) + 1. #scalar res_pdf = (lpdf_const - 0.5 * (df + dim) * np.log(d_Ki_d_scal_1)).flatten() if res_pdf.size == 1: res_pdf = np.float(res_pdf) return res_pdf
def fit(cls, samples, return_instance = False): # observations expected in rows mu = samples.mean(0) var = np.atleast_2d(np.cov(samples, rowvar = 0)) if return_instance: return mvnorm(mu, var) else: return (mu, var)
def log_pdf_and_grad(self, x, pdf = True, grad = True, T = np): assert(pdf or grad) if T == np: x = np.atleast_2d(x) if x.shape[1] != self.mu.size: x = x.T assert(np.sum(np.array(x.shape) == self.mu.size)>=1) d = (x - self.mu.reshape((1 ,self.mu.size))).T Ki_d = T.dot(self.Ki, d) #vector if pdf: # vector times vector res_pdf = (self.lpdf_const - 0.5 * diag_dot(d.T, Ki_d)).T if res_pdf.size == 1: res_pdf = res_pdf.reshape(res_pdf.size)[0] if not grad: return res_pdf if grad: # nothing res_grad = - Ki_d.T #.flat[:] if res_grad.shape[0] <= 1: res_grad = res_grad.flatten() if not pdf: return res_grad return (res_pdf, res_grad)
def grad_func(*args): inp = anp.array(anp.broadcast_arrays(*args)) result = anp.atleast_2d(elementwise_grad(argwrapper)(inp)) # Put 'gradient' axis at end axes = list(range(len(result.shape))) result = result.transpose(*chain(axes[1:], [axes[0]])) return result
def __init__(self, state_dim: np.ndarray, target_state: np.ndarray = None, weights: np.ndarray = None, cost_width: np.ndarray = None): """ Initialize saturated loss function :param state_dim: state dimensionality :param target_state: target state which should be reached :param weights: weight matrix :param cost_width: TODO what is this """ self.state_dim = state_dim # set target state to all zeros if not other specified self.target_state = np.atleast_2d( np.zeros(self.state_dim) if target_state is None else target_state) # weight matrix self.weights = np.identity( self.state_dim) if weights is None else weights # ----------------------------------------------------- # This is only useful if we have any penalties etc. self.cost_width = np.array([1]) if cost_width is None else cost_width
def __call__(self, log_hyperparams, x, z=None): log_hyperparams = np.atleast_2d(log_hyperparams) left, right = self.sub split = left.n_hyperparams(x) return left(log_hyperparams[:, :split], x, z) + right( log_hyperparams[:, split:], x, z)
def ppf(self, component_cum_prob, eig=True): assert(component_cum_prob.shape[1] == self.get_num_unif()) rval = [] for i in range(component_cum_prob.shape[0]): r = component_cum_prob[i,:] comp = self.dist_cat.ppf(r[0]) rval.append(self.comp_dist[comp].ppf(np.atleast_2d(r[1:]), eig=True)) return np.array(rval).reshape((component_cum_prob.shape[0], self.dim))
def equa2pixel(self, s_equa): if self.use_wcs: print "using wcs in equa2pixel" return self.wcs.wcs_world2pix(np.atleast_2d(s_equa), 0).squeeze() phi1rad = self.phi_n[1] / 180. * np.pi s_iwc = np.array([ (s_equa[0] - self.phi_n[0]) * np.cos(phi1rad), (s_equa[1] - self.phi_n[1]) ]) s_pix = np.dot(self.Ups_n_inv, s_iwc) + self.rho_n return s_pix
def callback(params): print("Log likelihood {}, Squared Error {}".format(-objective(params),squared_error(params))) layer_params, x0, y0 = unpack_all_params(params) # Show posterior marginals. plot_xs = np.reshape(np.linspace(-5, 5, 300), (300,1)) plot_full_gp(ax_end_to_end, params, plot_xs) if n_layers == 1: ax_end_to_end.plot(x0[0],y0[0], 'ro') else: hidden_mean, hidden_cov = predict_layer_funcs[0](layer_params[0], np.atleast_2d(x0[0]).T, y0[0], plot_xs) plot_gp(ax_x_to_h, x0[0], y0[0], hidden_mean, hidden_cov, plot_xs) ax_x_to_h.set_title("X to hiddens, with inducing points") y_mean, y_cov = predict_layer_funcs[1](layer_params[1], np.atleast_2d(x0[1]).T, y0[1], plot_xs) plot_gp(ax_h_to_y, x0[1], y0[1], y_mean, y_cov, plot_xs) ax_h_to_y.set_title("hiddens to layer 2, with inducing points") plt.draw() plt.pause(1.0/60.0)
def mvt_ppf(component_cum_prob, mu, L, df): from scipy.stats import norm, chi2 mu = np.atleast_1d(mu).flatten() assert(component_cum_prob.shape[1] == mu.size+1) L = np.atleast_2d(L) rval = [] for r in range(component_cum_prob.shape[0]): samp_mvn_0mu = L.dot(norm.ppf(component_cum_prob[r, :-1])) samp_chi2 = chi2.ppf(component_cum_prob[r, -1], df) samp_mvt_0mu = samp_mvn_0mu * np.sqrt(df / samp_chi2) rval.append(mu + samp_mvt_0mu) return np.array(rval)
def callback(params): print("Log marginal likelihood {}".format(log_marginal_likelihood(params))) # Show posterior marginals. plot_xs = np.reshape(np.linspace(-5, 5, 300), (300,1)) pred_mean, pred_cov = combined_predict_fun(params, X, y, plot_xs) plot_gp(ax_end_to_end, X, y, pred_mean, pred_cov, plot_xs) ax_end_to_end.set_title("X to y") layer1_params, layer2_params, hiddens = unpack_all_params(params) h_star_mean, h_star_cov = predict_layer_funcs[0](layer1_params, X, hiddens, plot_xs) y_star_mean, y_star_cov = predict_layer_funcs[0](layer2_params, np.atleast_2d(hiddens).T, y, plot_xs) plot_gp(ax_x_to_h, X, hiddens, h_star_mean, h_star_cov, plot_xs) ax_x_to_h.set_title("X to hiddens") plot_gp(ax_h_to_y, np.atleast_2d(hiddens).T, y, y_star_mean, y_star_cov, plot_xs) ax_h_to_y.set_title("hiddens to y") plt.draw() plt.pause(1.0/60.0)
def choose_next_point(domain_min, domain_max, acquisition_function, num_tries=15, rs=npr.RandomState(0)): """Uses gradient-based optimization to find next query point.""" init_points = rs.rand(num_tries, D) * (domain_max - domain_min) + domain_min grad_obj = value_and_grad(lambda x: -acquisition_function(x)) def optimize_point(init_point): print('.', end='') result = minimize(grad_obj, x0=init_point, jac=True, method='L-BFGS-B', options={'maxiter': 10}, bounds=list(zip(domain_min, domain_max))) return result.x, acquisition_function(result.x) optimzed_points, optimized_values = list(zip(*list(map(optimize_point, init_points)))) print() best_ix = np.argmax(optimized_values) return np.atleast_2d(optimzed_points[best_ix])
def mog_like(x, means, icovs, dets, pis): """ compute the log likelihood according to a mixture of gaussians with means = [mu0, mu1, ... muk] icovs = [C0^-1, ..., CK^-1] dets = [|C0|, ..., |CK|] pis = [pi1, ..., piK] (sum to 1) at locations given by x = [x1, ..., xN] """ xx = np.atleast_2d(x) centered = xx[:,:,np.newaxis] - means.T[np.newaxis,:,:] solved = np.einsum('ijk,lji->lki', icovs, centered) logprobs = -0.5*np.sum(solved * centered, axis=1) - np.log(2*np.pi) - 0.5*np.log(dets) + np.log(pis) logprob = scpm.logsumexp(logprobs, axis=1) if len(x.shape) == 1: return np.exp(logprob[0]) else: return np.exp(logprob)
def mog_logmarglike(x, means, covs, pis, ind=0): """ marginal x or y (depending on ind) """ K = pis.shape[0] xx = np.atleast_2d(x) centered = xx.T - means[:,ind,np.newaxis].T logprobs = [] for kk in xrange(K): quadterm = centered[:,kk] * centered[:,kk] * (1./covs[kk,ind,ind]) logprobsk = -.5*quadterm - .5*np.log(2*np.pi) \ -.5*np.log(covs[kk,ind,ind]) + np.log(pis[kk]) logprobs.append(np.squeeze(logprobsk)) logprobs = np.array(logprobs) logprob = scpm.logsumexp(logprobs, axis=0) if np.isscalar(x): return logprob[0] else: return logprob
def __init__(self, mu, K, Ki = None, logdet_K = None, L = None): mu = np.atleast_1d(mu).flatten() K = np.atleast_2d(K) assert(np.prod(mu.shape) == K.shape[0] ) assert(K.shape[0] == K.shape[1]) self.mu = mu self.K = K (val, vec) = np.linalg.eigh(K) idx = np.arange(mu.size-1,-1,-1) (self.eigval, self.eigvec) = (np.diag(val[idx]), vec[:,idx]) self.eig = self.eigvec.dot(np.sqrt(self.eigval)) self.dim = K.shape[0] #(self.Ki, self.logdet) = (np.linalg.inv(K), np.linalg.slogdet(K)[1]) (self.Ki, self.L, self.Li, self.logdet) = pdinv(K) self.lpdf_const = -0.5 *np.float(self.dim * np.log(2 * np.pi) + self.logdet)
def __init__(self, mu, K, df, Ki = None, logdet_K = None, L = None): mu = np.atleast_1d(mu).flatten() K = np.atleast_2d(K) assert(np.prod(mu.shape) == K.shape[0] ) assert(K.shape[0] == K.shape[1]) self.mu = mu self.K = K self.df = df self._freeze_chi2 = stats.chi2(df) self.dim = K.shape[0] self._df_dim = self.df + self.dim #(self.Ki, self.logdet) = (np.linalg.inv(K), np.linalg.slogdet(K)[1]) (self.Ki, self.L, self.Li, self.logdet) = pdinv(K) self.lpdf_const = np.float(gammaln((self.df + self.dim) / 2) -(gammaln(self.df/2) + (log(self.df)+log(np.pi)) * self.dim*0.5 + self.logdet * 0.5) )
def gmm_logprob(x, ws, mus, sigs, invsigs=None, logdets=None): """ Gaussian Mixture Model likelihood Input: - x = N x D array of data (N iid) - ws = K length vector that sums to 1, mixing weights - mus = K x D array of mixture component means - sigs = K x D x D array of mixture component covariances - invsigs = K x D x D array of mixture component covariance inverses - logdets = K array of mixture component covariance logdets Output: - N length array of log likelihood values TODO: speed this up """ if sigs is None: assert invsigs is not None and logdets is not None, \ "need sigs if you don't include logdets and invsigs" # compute invsigs if needed if invsigs is None: invsigs = np.array([np.linalg.inv(sig) for sig in sigs]) logdets = np.array([np.linalg.slogdet(sig)[1] for sig in sigs]) # compute each gauss component separately xx = np.atleast_2d(x) centered = xx[:,:,np.newaxis] - mus.T[np.newaxis,:,:] solved = np.einsum('ijk,lji->lki', invsigs, centered) logprobs = -0.5*np.sum(solved * centered, axis=1) - \ np.log(2*np.pi) - 0.5*logdets + np.log(ws) logprob = scpm.logsumexp(logprobs, axis=1) if len(x.shape) == 1: return logprob[0] else: return logprob
def plot_deep_gp(ax, params, plot_xs): ax.cla() rs = npr.RandomState(0) sampled_means_and_covs = [sample_mean_cov_from_deep_gp(params, plot_xs, rs = rs, with_noise = False, FITC = False) for i in xrange(n_samples_to_plot)] sampled_means, sampled_covs = zip(*sampled_means_and_covs) avg_pred_mean = np.mean(sampled_means, axis = 0) avg_pred_cov = np.mean(sampled_covs, axis = 0) avg_pred_cov = avg_pred_cov+np.sum(np.array([np.dot(np.atleast_2d(sampled_means[i]-avg_pred_mean).T,np.atleast_2d((sampled_means[i]-avg_pred_mean))) for i in xrange(n_samples_to_plot)]),axis = 0)/n_samples marg_std = np.sqrt(np.diag(avg_pred_cov)) if n_samples_to_plot > 19: ax.fill(np.concatenate([plot_xs, plot_xs[::-1]]), np.concatenate([avg_pred_mean - 1.96 * marg_std, (avg_pred_mean + 1.96 * marg_std)[::-1]]), alpha=.15, fc='Blue', ec='None') ax.plot(plot_xs, avg_pred_mean, 'b') sampled_funcs = np.array([rs.multivariate_normal(mean, cov*(random)) for mean,cov in sampled_means_and_covs]) ax.plot(plot_xs,sampled_funcs.T) ax.plot(X, y, 'kx') #ax.set_ylim([-1.5,1.5]) ax.set_xticks([]) ax.set_yticks([]) ax.set_title("Full Deep GP, inputs to outputs")
def sample_from_mvn(mu, sigma): # make sure we return 2d, also make sure data is 2d rs = npr.RandomState(0) return np.atleast_2d(np.dot(np.linalg.cholesky(sigma+1e-6*np.eye(len(sigma))*np.max(np.diag(sigma))),rs.randn(len(sigma)))+mu if random == 1 else mu).T
def combined_predict_fun(all_params, X, y, xs): layer1_params, layer2_params, hiddens = unpack_all_params(all_params) h_star_mean, h_star_cov = predict_layer1(layer1_params, X, hiddens, xs) y_star_mean, y_star_cov = predict_layer2(layer2_params, np.atleast_2d(hiddens).T, y, np.atleast_2d(h_star_mean).T) return y_star_mean, y_star_cov
def log_marginal_likelihood(all_params): layer1_params, layer2_params, h = unpack_all_params(all_params) return log_marginal_likelihood_layer1(layer1_params, X, h) + \ log_marginal_likelihood_layer2(layer2_params, np.atleast_2d(h).T, y)
def acquisition_function(xstar): xstar = np.atleast_2d(xstar) # To work around a bug in scipy.minimize mean, std = predict_func(xstar) return expected_new_max(mean, std, defaultmax(y))
def fit(cls, samples, return_instance = False): # observations expected in rows raise(NotImplementedError()) mu = samples.mean(0) return (mu, np.atleast_2d(np.cov(samples, rowvar = 0)))