def log_pdf(self, x): if self.theta is None: raise RuntimeError("Model not fitted yet.") assert_array_shape(x, ndim=1, dims={0: self.D}) phi = rff_feature_map_single(x, self.omega, self.u) return np.dot(phi, self.theta)
def log_pdf(self, x): if self.theta is None: raise RuntimeError("Model not fitted yet.") assert_array_shape(x, ndim=1, dims={0: self.D}) phi = rff_feature_map_single(x, self.omega, self.u) return np.dot(phi, self.theta)
def fit(self, X): assert_array_shape(X, ndim=2, dims={1: self.D}) if self.basis is None: self.basis = X self.alpha, self.beta = fit(self.basis, X, self.sigma, self.lmbda) self.X = X
def objective(self, X): assert_array_shape(X, ndim=2, dims={1: self.D}) L_X = self.inc_cholesky["R"].T L_Y = incomplete_cholesky_new_points_gaussian( self.X, X, self.sigma, self.inc_cholesky["I"], self.inc_cholesky["R"], self.inc_cholesky["nu"] ).T b = compute_b(self.X, X, L_X, L_Y, self.sigma) return objective(self.X, X, self.sigma, self.lmbda, self.alpha, L_X, L_Y, b)
def objective(self, X): assert_array_shape(X, ndim=2, dims={1: self.D}) L_X = self.inc_cholesky["R"].T L_Y = incomplete_cholesky_new_points_gaussian( self.X, X, self.sigma, self.inc_cholesky['I'], self.inc_cholesky['R'], self.inc_cholesky['nu']).T b = compute_b(self.X, X, L_X, L_Y, self.sigma) return objective(self.X, X, self.sigma, self.lmbda, self.alpha, L_X, L_Y, b)
def fit(self, X): assert_array_shape(X, ndim=2, dims={1: self.D}) # sub-sample if data is larger than previously set N if len(X) > self.N: inds = np.random.permutation(len(X))[:self.N] self.X = X[inds] else: self.X = np.copy(X) self.fit_wrapper_()
def fit(self, X): assert_array_shape(X, ndim=2, dims={1: self.D}) # sub-sample if data is larger than previously set N if len(X) > self.N: inds = np.random.permutation(len(X))[: self.N] self.X = X[inds] else: self.X = np.copy(X) self.alpha = self.fit_wrapper_()
def fit(self, X): assert_array_shape(X, ndim=2, dims={1: self.D}) # sub-sample if data is larger than previously set N if len(X) > self.N: logger.info("Sub-sampling %d/%d data." % (self.N, len(X))) inds = np.random.permutation(len(X))[:self.N] self.X = X[inds] else: self.X = np.copy(X) self.alpha = self.fit_wrapper_()
def update_fit(self, X, log_weights=None): assert_array_shape(X, ndim=2, dims={1: self.D}) N = len(X) # dont do anything if no data observed if N == 0: return if log_weights is None: log_weights = np.log(np.ones(N)) assert_array_shape(log_weights, ndim=1, dims={0: N}) # first update: use first of X and log_weights, and then discard if self.log_sum_weights is None: # assume have observed fake terms, which is needed for making the system well-posed # the L_C says that the fake terms had covariance self.lmbda, which is a regulariser self.L_C = np.eye(self.m) * np.sqrt(self.lmbda) self.log_sum_weights = log_weights[0] self.b = compute_b(X[0].reshape(1, self.D), self.omega, self.u) self.n = 1 X = X[1:] log_weights = log_weights[1:] N -= 1 # dont do anything if no data observed if N == 0: return old_L_C = np.array(self.L_C, copy=True) self.b, self.L_C = update_b_L_C_weighted(X, self.b, self.L_C, self.log_sum_weights, log_weights, self.omega, self.u) if np.any(np.isnan(self.L_C)) or np.any(np.isinf(self.L_C)): logger.warning( "Numerical error while updating Cholesky factor of C.\n" "Before update:\n%s\n" "After update:\n%s\n" "Updating data:\n%s\n" "Updating log weights:\n%s\n" % (str(old_L_C), str(self.L_C), str(X), str(log_weights))) raise RuntimeError( "Numerical error while updating Cholesky factor of C.") # update terms and weights self.n += len(X) self.log_sum_weights = log_sum_exp( list(log_weights) + [self.log_sum_weights]) # finally update solution self.theta = fit_L_C_precomputed(self.b, self.L_C)
def grad(self, x): assert_array_shape(x, ndim=1, dims={0: self.D}) # now x is of shape (D,) # assume M datapoints in x Kxx = self.sigma**2 KxX, Kl = Matern_kernel(x[np.newaxis, :], self.X, sigma=self.sigma) # shape (1, K) xX_grad = (self.X - x) * Kl.T tmp = np.dot(KxX, self.K_inv) # should be of shape (1, K) A = Kxx + self.lmbda - np.sum(tmp * KxX) # should be a scalar B = np.dot(KxX, self.X_grad) - np.dot(tmp + 1, xX_grad) # shape (1, D) gradient = -B[0] / A # shape (D,) return gradient
def grad(x, basis, sigma, alpha, beta): m, D = basis.shape assert_array_shape(x, ndim=1, dims={0: D}) xi_grad = 0 betasum_grad = 0 for a, x_a in enumerate(basis): xi_grad += np.sum(gaussian_kernel_dx_i_dx_i_dx_j(x, x_a, sigma), axis=0) / m left_arg_hessian = gaussian_kernel_dx_i_dx_j(x, x_a, sigma) betasum_grad += beta[a, :].dot(left_arg_hessian) return alpha * xi_grad + betasum_grad
def grad(self, x): assert_array_shape(x, ndim=1, dims={0: self.D}) # now x is of shape (D,) # assume M datapoints in x Kxx = 1 # should be a scalar: Kxx = exp(-(x-x)**2 / self.sigma) = 1 KxX = gaussian_kernel(x[np.newaxis, :], self.X, sigma=self.sigma) # shape (1, K) xX_grad = gaussian_kernel_grad(x, self.X, self.sigma) # should be shape (K, D) tmp = np.dot(KxX, self.K_inv) # should be of shape (1, K) A = Kxx + self.lmbda - np.sum(tmp * KxX) # should be a scalar B = np.dot(KxX, self.X_grad) - np.dot(tmp + 1, xX_grad) # shape (1, D) gradient = -B[0] / A # shape (D,) return gradient
def hessian(self, x): """ Computes the Hessian of the learned log-density function. WARNING: This implementation slow, so don't call repeatedly. """ assert_array_shape(x, ndim=1, dims={0: self.D}) H = np.zeros((self.D, self.D)) for i, a in enumerate(self.alpha): H += a * gaussian_kernel_hessian_theano(x, self.X[i], self.sigma) return H
def third_order_derivative_tensor(self, x): """ Computes the third order derivative tensor of the learned log-density function. WARNING: This implementation is slow, so don't call repeatedly. """ assert_array_shape(x, ndim=1, dims={0: self.D}) G3 = np.zeros((self.D, self.D, self.D)) for i, a in enumerate(self.alpha): G3 += a * gaussian_kernel_third_order_derivative_tensor_theano(x, self.X[i], self.sigma) return G3
def hessian(self, x): """ Computes the Hessian of the learned log-density function. WARNING: This implementation slow, so don't call repeatedly. """ assert_array_shape(x, ndim=1, dims={0: self.D}) H = np.zeros((self.D, self.D)) for i, a in enumerate(self.alpha): H += a * gaussian_kernel_hessian_theano(x, self.X[i], self.sigma) return H
def grad(self, x): assert_array_shape(x, ndim=1, dims={0: self.D}) # now x is of shape (D,) # assume M datapoints in x Kxx = 1 # should be a scalar: Kxx = (1 + (x - x)^2 / 2sigma)^-1 = 1 KxX = Cauchy_kernel(x[np.newaxis, :], self.X, sigma=self.sigma) # shape (1, K) xX_grad = (self.X - x) / self.sigma * KxX.T**2 tmp = np.dot(KxX, self.K_inv) # should be of shape (1, K) A = Kxx + self.lmbda - np.sum(tmp * KxX) # should be a scalar B = np.dot(KxX, self.X_grad) - np.dot(tmp + 1, xX_grad) # shape (1, D) gradient = -B[0] / A # shape (D,) return gradient
def third_order_derivative_tensor(self, x): """ Computes the third order derivative tensor of the learned log-density function. WARNING: This implementation is slow, so don't call repeatedly. """ assert_array_shape(x, ndim=1, dims={0: self.D}) G3 = np.zeros((self.D, self.D, self.D)) for i, a in enumerate(self.alpha): G3 += a * gaussian_kernel_third_order_derivative_tensor_theano(x, self.X[i], self.sigma) return G3
def update_fit(self, X, log_weights=None): assert_array_shape(X, ndim=2, dims={1: self.D}) N = len(X) # dont do anything if no data observed if N == 0: return if log_weights is None: log_weights = np.log(np.ones(N)) assert_array_shape(log_weights, ndim=1, dims={0: N}) # first update: use first of X and log_weights, and then discard if self.log_sum_weights is None: # assume have observed fake terms, which is needed for making the system well-posed # the L_C says that the fake terms had covariance self.lmbda, which is a regulariser self.L_C = np.eye(self.m) * np.sqrt(self.lmbda) self.log_sum_weights = log_weights[0] self.b = compute_b(X[0].reshape(1, self.D), self.omega, self.u) self.n = 1 X = X[1:] log_weights = log_weights[1:] N -= 1 # dont do anything if no data observed if N == 0: return old_L_C = np.array(self.L_C, copy=True) self.b, self.L_C = update_b_L_C_weighted(X, self.b, self.L_C, self.log_sum_weights, log_weights, self.omega, self.u) if np.any(np.isnan(self.L_C)) or np.any(np.isinf(self.L_C)): logger.warning("Numerical error while updating Cholesky factor of C.\n" "Before update:\n%s\n" "After update:\n%s\n" "Updating data:\n%s\n" "Updating log weights:\n%s\n" % (str(old_L_C), str(self.L_C), str(X), str(log_weights)) ) raise RuntimeError("Numerical error while updating Cholesky factor of C.") # update terms and weights self.n += len(X) self.log_sum_weights = log_sum_exp(list(log_weights) + [self.log_sum_weights]) # finally update solution self.theta = fit_L_C_precomputed(self.b, self.L_C)
def xvalidate_objective(self, X, num_folds=5, num_repetitions=1): assert_array_shape(X, ndim=2, dims={1: self.D}) assert_positive_int(num_folds) assert_positive_int(num_repetitions) O = np.zeros((num_repetitions, num_folds)) for i in range(num_repetitions): xval = XVal(N=len(X), num_folds=num_folds) for j, (train, test) in enumerate(xval): self.fit(X[train]) O[i, j] = self.objective(X[test]) return O
def hessian(self, x): """ Computes the Hessian of the learned log-density function. WARNING: This implementation slow, so don't call repeatedly. """ assert_array_shape(x, ndim=1, dims={0: self.D}) H = np.zeros((self.D, self.D)) for i, theta_i in enumerate(self.theta): H += theta_i * rff_feature_map_comp_hessian_theano(x, self.omega[:, i], self.u[i]) # RFF is a monte carlo average, so have to normalise by np.sqrt(m) here return H / np.sqrt(self.m)
def third_order_derivative_tensor(self, x): """ Computes the third order derivative tensor of the learned log-density function. WARNING: This implementation is slow, so don't call repeatedly. """ assert_array_shape(x, ndim=1, dims={0: self.D}) G3 = np.zeros((self.D, self.D, self.D)) for i, theta_i in enumerate(self.theta): G3 += theta_i * rff_feature_map_comp_third_order_tensor_theano(x, self.omega[:, i], self.u[i]) # RFF is a monte carlo average, so have to normalise by np.sqrt(m) here return G3 / np.sqrt(self.m)
def xvalidate_objective(self, X, num_folds=5, num_repetitions=1): assert_array_shape(X, ndim=2, dims={1: self.D}) assert_positive_int(num_folds) assert_positive_int(num_repetitions) O = np.zeros((num_repetitions, num_folds)) for i in range(num_repetitions): xval = XVal(N=len(X), num_folds=num_folds) for j, (train, test) in enumerate(xval): self.fit(X[train]) O[i, j] = self.objective(X[test]) return O
def hessian(self, x): """ Computes the Hessian of the learned log-density function. WARNING: This implementation slow, so don't call repeatedly. """ assert_array_shape(x, ndim=1, dims={0: self.D}) H = np.zeros((self.D, self.D)) for i, theta_i in enumerate(self.theta): H += theta_i * rff_feature_map_comp_hessian_theano( x, self.omega[:, i], self.u[i]) # RFF is a monte carlo average, so have to normalise by np.sqrt(m) here return H / np.sqrt(self.m)
def third_order_derivative_tensor(self, x): """ Computes the third order derivative tensor of the learned log-density function. WARNING: This implementation is slow, so don't call repeatedly. """ assert_array_shape(x, ndim=1, dims={0: self.D}) G3 = np.zeros((self.D, self.D, self.D)) for i, theta_i in enumerate(self.theta): G3 += theta_i * rff_feature_map_comp_third_order_tensor_theano( x, self.omega[:, i], self.u[i]) # RFF is a monte carlo average, so have to normalise by np.sqrt(m) here return G3 / np.sqrt(self.m)
def log_pdf(x, basis, sigma, alpha, beta): m, D = basis.shape assert_array_shape(x, ndim=1, dims={0: D}) SE_dx_dx_l = lambda x, y: gaussian_kernel_dx_dx(x, y.reshape(1, -1), sigma) SE_dx_l = lambda x, y: gaussian_kernel_grad(x, y.reshape(1, -1), sigma) xi = 0 betasum = 0 for a in range(m): x_a = basis[a] xi += np.sum(SE_dx_dx_l(x, x_a)) / m gradient_x_xa = np.squeeze(SE_dx_l(x, x_a)) betasum += np.dot(gradient_x_xa, beta[a, :]) return np.float(alpha * xi + betasum)
def second_order_grad(x, X, sigma, alpha, beta, basis=None): """ Computes $\frac{\partial^2 log p(x)}{\partial x_i^2} """ if basis is None: basis = X _, D = X.shape m, _ = basis.shape assert_array_shape(x, ndim=1, dims={0: D}) xi_grad = 0 betasum_grad = 0 for a, x_a in enumerate(basis): xi_grad += np.sum(gaussian_kernel_dx_i_dx_i_dx_j_dx_j(x, x_a, sigma), axis=0) / m left_arg_hessian = gaussian_kernel_dx_i_dx_i_dx_j(x, x_a, sigma) betasum_grad += beta[a, :].dot(left_arg_hessian) return alpha * xi_grad + betasum_grad
def fit(self, X, log_weights=None): assert_array_shape(X, ndim=2, dims={1: self.D}) N = len(X) # in any case, delete previous solution self._initialise_solution() if N <= 0: # dont do anything if no data observed return elif N == 1: # can get away with single update as not expensive self.update_fit(X, log_weights) return if log_weights is None: # b is the same as first x is used as b straight away in update_fit self.b = compute_b(X, self.omega, self.u) # remove first term from C computation as it is replaced with regulariser C = compute_C(X[1:], self.omega, self.u) # C so far consists of the average of N-1 terms # additional term is regulariser C (first in update_fit) # use Knuth online-update for new mean, which is average of N terms # effectively, this is equal to # C = (C * (N - 1) + np.eye(self.m) * self.lmbda) / N delta = np.eye(self.m) * self.lmbda - C C += delta / N self.L_C = np.linalg.cholesky(C) # as all weights are equal, this corresponds to repeated update_fit calls self.log_sum_weights = np.log(N) self.n = N self.theta = fit_L_C_precomputed(self.b, self.L_C) else: # weighted batch learning here corresponds to repeated online-learning assert_array_shape(log_weights, ndim=1, dims={0: N}) self.update_fit(X, log_weights)
def fit(self, X, log_weights=None): assert_array_shape(X, ndim=2, dims={1: self.D}) N = len(X) # in any case, delete previous solution self._initialise_solution() if N <= 0: # dont do anything if no data observed return elif N == 1: # can get away with single update as not expensive self.update_fit(X, log_weights) return if log_weights is None: # b is the same as first x is used as b straight away in update_fit self.b = compute_b(X, self.omega, self.u) # remove first term from C computation as it is replaced with regulariser C = compute_C(X[1:], self.omega, self.u) # C so far consists of the average of N-1 terms # additional term is regulariser C (first in update_fit) # use Knuth online-update for new mean, which is average of N terms # effectively, this is equal to # C = (C * (N - 1) + np.eye(self.m) * self.lmbda) / N delta = np.eye(self.m) * self.lmbda - C C += delta / N self.L_C = np.linalg.cholesky(C) # as all weights are equal, this corresponds to repeated update_fit calls self.log_sum_weights = np.log(N) self.n = N self.theta = fit_L_C_precomputed(self.b, self.L_C) else: # weighted batch learning here corresponds to repeated online-learning assert_array_shape(log_weights, ndim=1, dims={0: N}) self.update_fit(X, log_weights)
def log_pdf(self, x): assert_array_shape(x, ndim=1, dims={0: self.D}) k = gaussian_kernel(self.X, x.reshape(1, self.D), self.sigma)[:, 0] return np.dot(self.alpha, k)
def log_pdf_multiple(self, X): assert_array_shape(X, ndim=2, dims={1: self.D}) Phi = rff_feature_map(X, self.omega, self.u) return np.dot(Phi, self.theta)
def objective(self, X): assert_array_shape(X, ndim=2, dims={1: self.D}) # note we need to recompute b and C here return objective_L_C_precomputed(X, self.theta, self.omega, self.u, self.b, self.L_C)
def log_pdf_multiple(self, X): assert_array_shape(X, ndim=2, dims={1: self.D}) k = gaussian_kernel(self.X, X, self.sigma) return np.dot(self.alpha, k)
def log_pdf_multiple(self, X): assert_array_shape(X, ndim=2, dims={1: self.D}) Phi = rff_feature_map(X, self.omega, self.u) return np.dot(Phi, self.theta)
def objective(self, X): assert_array_shape(X, ndim=2, dims={1: self.D}) # note we need to recompute b and C here return objective_L_C_precomputed(X, self.theta, self.omega, self.u, self.b, self.L_C)
def grad(self, x): assert_array_shape(x, ndim=1, dims={0: self.D}) k = gaussian_kernel_grad(x, self.X, self.sigma) return np.dot(self.alpha, k)
def log_pdf(self, x): assert_array_shape(x, ndim=1, dims={0: self.D}) k = gaussian_kernel(self.X, x.reshape(1, self.D), self.sigma)[:, 0] return np.dot(self.alpha, k)
def objective(self, X): assert_array_shape(X, ndim=2, dims={1: self.D}) return objective(self.X, X, self.sigma, self.lmbda, self.alpha, self.K)
def grad(self, x): assert_array_shape(x, ndim=1, dims={0: self.D}) k = gaussian_kernel_grad(x, self.X, self.sigma) return np.dot(self.alpha, k)
def log_pdf_multiple(self, X): assert_array_shape(X, ndim=2, dims={1: self.D}) k = gaussian_kernel(self.X, X, self.sigma) return np.dot(self.alpha, k)
def objective(self, X): assert_array_shape(X, ndim=2, dims={1: self.D}) return objective(self.X, X, self.sigma, self.lmbda, self.alpha, self.K)
def objective(self, X): assert_array_shape(X, ndim=2, dims={1: self.D}) return self.compute_objective(X, self.X, self.sigma, self.alpha, self.beta)
def grad(self, x): assert_array_shape(x, ndim=1, dims={0: self.D}) KXx = Cauchy_kernel(self.X, x[np.newaxis, :], sigma=self.sigma) xX_grad = (self.X - x) / self.sigma * KXx**2 gradient = np.dot(self.alpha, xX_grad) return gradient