def residual_kernel(K_Y: np.ndarray, K_X: np.ndarray, use_expectation=True, with_gp=True, sigma_squared=1e-3, return_learned_K_X=False): """Kernel matrix of residual of Y given X based on their kernel matrices, Y=f(X)""" import gpflow from gpflow.kernels import White, Linear from gpflow.models import GPR K_Y, K_X = centering(K_Y), centering(K_X) T = len(K_Y) if with_gp: eig_Ky, eiy = truncated_eigen(*eigdec(K_Y, min(100, T // 4))) eig_Kx, eix = truncated_eigen(*eigdec(K_X, min(100, T // 4))) X = eix @ diag(sqrt(eig_Kx)) # X @ X.T is close to K_X Y = eiy @ diag(sqrt(eig_Ky)) n_feats = X.shape[1] linear = Linear(n_feats, ARD=True) white = White(n_feats) gp_model = GPR(X, Y, linear + white) gpflow.train.ScipyOptimizer().minimize(gp_model) K_X = linear.compute_K_symm(X) sigma_squared = white.variance.value P = pdinv(np.eye(T) + K_X / sigma_squared) # == I-K @ inv(K+Sigma) in Zhang et al. 2011 if use_expectation: # Flaxman et al. 2016 Gaussian Processes for Independence Tests with Non-iid Data in Causal Inference. RK = (K_X + P @ K_Y) @ P else: # Zhang et al. 2011. Kernel-based Conditional Independence Test and Application in Causal Discovery. RK = P @ K_Y @ P if return_learned_K_X: return RK, K_X else: return RK
def regression_distance_k(Kx: np.ndarray, Ky: np.ndarray): warnings.warn('not tested yet!') import gpflow from gpflow.kernels import White, Linear from gpflow.models import GPR T = len(Kx) eig_Ky, eiy = truncated_eigen(*eigdec(Ky, min(100, T // 4))) eig_Kx, eix = truncated_eigen(*eigdec(Kx, min(100, T // 4))) X = eix @ diag(sqrt(eig_Kx)) # X @ X.T is close to K_X Y = eiy @ diag(sqrt(eig_Ky)) n_feats = X.shape[1] linear = Linear(n_feats, ARD=True) white = White(n_feats) gp_model = GPR(X, Y, linear + white) gpflow.train.ScipyOptimizer().minimize(gp_model) Kx = linear.compute_K_symm(X) sigma_squared = white.variance.value P = Kx @ pdinv(Kx + sigma_squared * np.eye(T)) M = P @ Ky @ P O = np.ones((T, 1)) N = O @ np.diag(M).T D = np.sqrt(N + N.T - 2 * M) return D
def compute_residual_eig(Y: np.ndarray, Kx: np.ndarray) -> np.ndarray: """Residual of Y based on Kx, a kernel matrix of X""" assert len(Y) == len(Kx) eig_Kx, eix = truncated_eigen(*eigdec(Kx, min(100, len(Kx) // 4))) phi_X = eix @ np.diag(np.sqrt(eig_Kx)) # X @ X.T is close to K_X n_feats = phi_X.shape[1] linear_kernel = Linear(n_feats, ARD=True) gp_model = GPR(phi_X, Y, linear_kernel + White(n_feats)) gp_model.optimize() new_Kx = linear_kernel.compute_K_symm(phi_X) sigma_squared = gp_model.kern.white.variance.value[0] return (pdinv(np.eye(len(Kx)) + new_Kx / sigma_squared) @ Y).squeeze()