def _get_inv_logdet_cholesky(mat): try: L = np.linalg.cholesky(mat) except: L = np.linalg.cholesky(_eig_val_correction(mat, eps=1e-1)) L_inv = np.linalg.inv(L) mat_inv = L_inv.T.dot(L_inv) mat_logdet = np.sum(np.log(np.diag(L))) * 2 return mat_inv, mat_logdet
def _vi_means_oracle(self, points, targets, params, ind_points): """ Oracle function for 'vi' and 'means' methods. :param points: data points array :param targets: target values vector :param params: hyper-parameters vector :param ind_points: inducing points """ start = time.time() n = points.shape[1] m = ind_points.shape[1] sigma = params[-1] cov_obj = copy.deepcopy(self.covariance_obj) cov_obj.set_params(params) cov_fun = cov_obj.covariance_function K_mm = cov_fun(ind_points, ind_points) K_mm_l = np.linalg.cholesky(K_mm) K_mm_l_inv = np.linalg.inv(K_mm_l) K_mm_inv = K_mm_l_inv.T.dot(K_mm_l_inv) K_nm = cov_fun(points, ind_points) K_mn = K_nm.T K_mnK_nm = K_mn.dot(K_nm) Q_nn_tr = np.trace(K_mm_inv.dot(K_mnK_nm)) try: anc_l = np.linalg.cholesky(K_mm + K_mnK_nm / sigma**2) except: # print(sigma) print('Warning, matrix is not positive definite', params) new_mat = _eig_val_correction(K_mm + K_mnK_nm / sigma**2, eps=10) # new_mat = (new_mat + new_mat.T)/2 # new_mat += np.eye(m) * (np.abs(np.min(np.linalg.eigvals(new_mat))) + 1e-4) # print(np.linalg.eigvals(new_mat)) anc_l = np.linalg.cholesky(new_mat) # raise ValueError('Singular matrix encountered. Parameters: ' + str(params)) anc_l_inv = np.linalg.inv(anc_l) anc_inv = anc_l_inv.T.dot(anc_l_inv) K_mn_y = K_mn.dot(targets) y_B_inv_y = targets.T.dot(targets) / sigma**2 - K_mn_y.T.dot( anc_inv.dot(K_mn_y)) / sigma**4 B_inv_y = targets / sigma**2 - K_mn.T.dot( anc_inv.dot(K_mn.dot(targets))) / sigma**4 B_log_det = (np.sum(np.log(np.diag(anc_l))) + n * np.log(sigma) - np.sum(np.log(np.diag(K_mm_l)))) * 2 zero = np.array([[0]]) K_nn_diag = cov_fun(zero, zero) F_v = - B_log_det/2 - y_B_inv_y/2 - \ (K_nn_diag * n - Q_nn_tr) / (2 * sigma**2) # Gradient gradient = [] derivative_matrix_list = cov_obj.get_derivative_function_list(params) A = anc_inv for func in derivative_matrix_list: dK_nm = func(points, ind_points) dK_mn = dK_nm.T dK_mm = func(ind_points, ind_points) dK_mm_inv = -K_mm_inv.dot(dK_mm.dot(K_mm_inv)) K_mndK_nm = K_mn.dot(dK_nm) dB_dtheta_tr = 2 * np.trace(K_mm_inv.dot(K_mndK_nm)) + np.trace( dK_mm_inv.dot(K_mnK_nm)) dB_B_inv_y = dK_nm.dot(K_mm_inv.dot(K_mn.dot(B_inv_y))) + K_nm.dot(dK_mm_inv.dot(K_mn.dot(B_inv_y)))\ + K_nm.dot(K_mm_inv.dot(dK_mn.dot(B_inv_y))) y_B_inv_dB_B_inv_y = B_inv_y.T.dot(dB_B_inv_y) B_inv_dB_tr = dB_dtheta_tr / sigma**2 - \ (2 * np.trace((A.dot(K_mndK_nm)).dot(K_mm_inv.dot(K_mnK_nm))) + np.trace((A.dot(K_mnK_nm)).dot(dK_mm_inv.dot(K_mnK_nm))))/sigma**4 dK_nn = func(zero, zero) gradient.append((-B_inv_dB_tr / 2 + y_B_inv_dB_B_inv_y / 2 - (dK_nn * n - dB_dtheta_tr) / (2 * sigma**2))[0, 0]) # sigma derivative dK_mm = cov_obj.get_noise_derivative(K_mm.shape[0]) dK_mm_inv = -K_mm_inv.dot(dK_mm.dot(K_mm_inv)) dQ_dtheta_tr = np.trace(dK_mm_inv.dot(K_mnK_nm)) dQ_B_inv_y = K_nm.dot(dK_mm_inv.dot(K_mn.dot(B_inv_y))) y_B_inv_dQ_B_inv_y = B_inv_y.T.dot(dQ_B_inv_y) y_B_inv_dB_B_inv_y = 2 * sigma * B_inv_y.T.dot( B_inv_y) + y_B_inv_dQ_B_inv_y dB_dtheta_tr = 2 * sigma * n + dQ_dtheta_tr B_inv_dB_tr = dB_dtheta_tr / sigma**2 - \ (np.trace(A.dot(K_mnK_nm).dot(dK_mm_inv).dot(K_mnK_nm)) + 2 * sigma * np.trace(A.dot(K_mnK_nm)))/sigma**4 dK_nn = 2 * sigma gradient.append((-B_inv_dB_tr / 2 + y_B_inv_dB_B_inv_y / 2 - (n * dK_nn - dQ_dtheta_tr) / (2 * sigma**2))[0, 0] + (n * K_nn_diag[0, 0] - Q_nn_tr) / sigma**3) # inducing points derivatives # By now this is not written in an optimal way if self.method == 'vi': B_inv = np.eye(n) / sigma**2 - K_mn.T.dot( anc_inv.dot(K_mn)) / sigma**4 # print('vi method might not work propperly in the current version') K_mn_derivatives = cov_obj.covariance_derivative( ind_points, points) K_mm_derivatives = cov_obj.covariance_derivative( ind_points, ind_points) for j in range(ind_points.shape[0]): for i in range(ind_points.shape[1]): dK_mn = np.zeros(K_mn.shape) dK_mn[i, :] = K_mn_derivatives[j, i, :] dK_nm = dK_mn.T dK_mm = np.zeros(K_mm.shape) dK_mm[i, :] = K_mm_derivatives[j, i, :] dK_mm[:, i] = K_mm_derivatives[j, i, :].T dK_mm_inv = -K_mm_inv.dot(dK_mm.dot(K_mm_inv)) dB_dtheta = (dK_nm.dot(K_mm_inv) + K_nm.dot(dK_mm_inv)).dot(K_mn) + K_nm.dot( K_mm_inv.dot(dK_mn)) dK_nn = 0 gradient.append( self._vi_lower_bound_partial_derivative( targets, dB_dtheta, dB_dtheta, B_inv, sigma, dK_nn)) return F_v[0, 0], np.array(gradient)
def _vi_means_oracle(self, points, targets, params, ind_points): """ Oracle function for 'vi' and 'means' methods. :param points: data points array :param targets: target values vector :param params: hyper-parameters vector :param ind_points: inducing points """ start = time.time() n = points.shape[1] m = ind_points.shape[1] sigma = params[-1] cov_obj = copy.deepcopy(self.covariance_obj) cov_obj.set_params(params) cov_fun = cov_obj.covariance_function K_mm = cov_fun(ind_points, ind_points) K_mm_l = np.linalg.cholesky(K_mm) K_mm_l_inv = np.linalg.inv(K_mm_l) K_mm_inv = K_mm_l_inv.T.dot(K_mm_l_inv) K_nm = cov_fun(points, ind_points) K_mn = K_nm.T K_mnK_nm = K_mn.dot(K_nm) Q_nn_tr = np.trace(K_mm_inv.dot(K_mnK_nm)) try: anc_l = np.linalg.cholesky(K_mm + K_mnK_nm/sigma**2) except: # print(sigma) print('Warning, matrix is not positive definite', params) new_mat = _eig_val_correction(K_mm + K_mnK_nm/sigma**2, eps=10) # new_mat = (new_mat + new_mat.T)/2 # new_mat += np.eye(m) * (np.abs(np.min(np.linalg.eigvals(new_mat))) + 1e-4) # print(np.linalg.eigvals(new_mat)) anc_l = np.linalg.cholesky(new_mat) # raise ValueError('Singular matrix encountered. Parameters: ' + str(params)) anc_l_inv = np.linalg.inv(anc_l) anc_inv = anc_l_inv.T.dot(anc_l_inv) K_mn_y = K_mn.dot(targets) y_B_inv_y = targets.T.dot(targets)/sigma**2 - K_mn_y.T.dot(anc_inv.dot(K_mn_y))/sigma**4 B_inv_y = targets / sigma**2 - K_mn.T.dot(anc_inv.dot(K_mn.dot(targets)))/sigma**4 B_log_det = (np.sum(np.log(np.diag(anc_l))) + n * np.log(sigma) - np.sum(np.log(np.diag(K_mm_l))))*2 zero = np.array([[0]]) K_nn_diag = cov_fun(zero, zero) F_v = - B_log_det/2 - y_B_inv_y/2 - \ (K_nn_diag * n - Q_nn_tr) / (2 * sigma**2) # Gradient gradient = [] derivative_matrix_list = cov_obj.get_derivative_function_list(params) A = anc_inv for func in derivative_matrix_list: dK_nm = func(points, ind_points) dK_mn = dK_nm.T dK_mm = func(ind_points, ind_points) dK_mm_inv = - K_mm_inv.dot(dK_mm.dot(K_mm_inv)) K_mndK_nm = K_mn.dot(dK_nm) dB_dtheta_tr = 2 * np.trace(K_mm_inv.dot(K_mndK_nm)) + np.trace(dK_mm_inv.dot(K_mnK_nm)) dB_B_inv_y = dK_nm.dot(K_mm_inv.dot(K_mn.dot(B_inv_y))) + K_nm.dot(dK_mm_inv.dot(K_mn.dot(B_inv_y)))\ + K_nm.dot(K_mm_inv.dot(dK_mn.dot(B_inv_y))) y_B_inv_dB_B_inv_y = B_inv_y.T.dot(dB_B_inv_y) B_inv_dB_tr = dB_dtheta_tr / sigma**2 - \ (2 * np.trace((A.dot(K_mndK_nm)).dot(K_mm_inv.dot(K_mnK_nm))) + np.trace((A.dot(K_mnK_nm)).dot(dK_mm_inv.dot(K_mnK_nm))))/sigma**4 dK_nn = func(zero, zero) gradient.append((-B_inv_dB_tr / 2 + y_B_inv_dB_B_inv_y / 2 - (dK_nn * n - dB_dtheta_tr) / (2 * sigma**2))[0, 0]) # sigma derivative dK_mm = cov_obj.get_noise_derivative(K_mm.shape[0]) dK_mm_inv = - K_mm_inv.dot(dK_mm.dot(K_mm_inv)) dQ_dtheta_tr = np.trace(dK_mm_inv.dot(K_mnK_nm)) dQ_B_inv_y = K_nm.dot(dK_mm_inv.dot(K_mn.dot(B_inv_y))) y_B_inv_dQ_B_inv_y = B_inv_y.T.dot(dQ_B_inv_y) y_B_inv_dB_B_inv_y = 2 * sigma * B_inv_y.T.dot(B_inv_y) + y_B_inv_dQ_B_inv_y dB_dtheta_tr = 2 * sigma * n + dQ_dtheta_tr B_inv_dB_tr = dB_dtheta_tr / sigma**2 - \ (np.trace(A.dot(K_mnK_nm).dot(dK_mm_inv).dot(K_mnK_nm)) + 2 * sigma * np.trace(A.dot(K_mnK_nm)))/sigma**4 dK_nn = 2 * sigma gradient.append((- B_inv_dB_tr / 2 + y_B_inv_dB_B_inv_y / 2 - (n * dK_nn - dQ_dtheta_tr) / (2 * sigma**2))[0, 0] + (n * K_nn_diag[0, 0] - Q_nn_tr) / sigma**3) # inducing points derivatives # By now this is not written in an optimal way if self.method == 'vi': B_inv = np.eye(n)/sigma**2 - K_mn.T.dot(anc_inv.dot(K_mn))/sigma**4 # print('vi method might not work propperly in the current version') K_mn_derivatives = cov_obj.covariance_derivative(ind_points, points) K_mm_derivatives = cov_obj.covariance_derivative(ind_points, ind_points) for j in range(ind_points.shape[0]): for i in range(ind_points.shape[1]): dK_mn = np.zeros(K_mn.shape) dK_mn[i, :] = K_mn_derivatives[j, i, :] dK_nm = dK_mn.T dK_mm = np.zeros(K_mm.shape) dK_mm[i, :] = K_mm_derivatives[j, i, :] dK_mm[:, i] = K_mm_derivatives[j, i, :].T dK_mm_inv = - K_mm_inv.dot(dK_mm.dot(K_mm_inv)) dB_dtheta = (dK_nm.dot(K_mm_inv) + K_nm.dot(dK_mm_inv)).dot(K_mn) + K_nm.dot(K_mm_inv.dot(dK_mn)) dK_nn = 0 gradient.append(self._vi_lower_bound_partial_derivative(targets, dB_dtheta, dB_dtheta, B_inv, sigma, dK_nn)) return F_v[0, 0], np.array(gradient)