def LMO_err(params, M=2): params = np.exp(params) al, bl = params[:-1], params[-1] L = bl * bl * np.exp(-L0[0] / al[0] / al[0] / 2) + bl * bl * np.exp( -L0[1] / al[1] / al[1] / 2) + 1e-6 * EYEN # l(X,None,al,bl)# +1e-6*EYEN if nystr: tmp_mat = L @ eig_vec_K C = L - tmp_mat @ np.linalg.inv(eig_vec_K.T @ tmp_mat / N2 + inv_eig_val_K) @ tmp_mat.T / N2 c = C @ W_nystr_Y * N2 else: LWL_inv = chol_inv( L @ W @ L + L / N2 + JITTER * EYEN ) # chol_inv(W*N2+L_inv) # chol_inv(L@W@L+L/N2 +JITTER*EYEN) C = L @ LWL_inv @ L / N2 c = C @ W @ Y * N2 c_y = c - Y lmo_err = 0 N = 0 for ii in range(1): permutation = np.random.permutation(X.shape[0]) for i in range(0, X.shape[0], M): indices = permutation[i:i + M] K_i = W[np.ix_(indices, indices)] * N2 C_i = C[np.ix_(indices, indices)] c_y_i = c_y[indices] b_y = np.linalg.inv(np.eye(C_i.shape[0]) - C_i @ K_i) @ c_y_i # print(I_CW_inv.shape,c_y_i.shape) lmo_err += b_y.T @ K_i @ b_y N += 1 return lmo_err[0, 0] / N / M**2
def LMO_err(params, M=10): np.random.seed(2) random.seed(2) al, bl = np.exp(params) L = bl * bl * np.exp(-L0 / al / al / 2) + 1e-6 * EYEN if nystr: tmp_mat = L @ eig_vec_K C = L - tmp_mat @ np.linalg.inv(eig_vec_K.T @ tmp_mat / N2 + inv_eig_val_K) @ tmp_mat.T / N2 c = C @ W_nystr_Y * N2 else: LWL_inv = chol_inv(L @ W @ L + L / N2 + JITTER * EYEN) C = L @ LWL_inv @ L / N2 c = C @ W @ Y * N2 c_y = c - Y lmo_err = 0 N = 0 for ii in range(1): permutation = np.random.permutation(X.shape[0]) for i in range(0, X.shape[0], M): indices = permutation[i:i + M] K_i = W[np.ix_(indices, indices)] * N2 C_i = C[np.ix_(indices, indices)] c_y_i = c_y[indices] b_y = np.linalg.inv(np.eye(M) - C_i @ K_i) @ c_y_i lmo_err += b_y.T @ K_i @ b_y N += 1 return lmo_err[0, 0] / N / M ** 2
def get_causal_effect(params, do_A, w): "to be called within experiment function." np.random.seed(4) random.seed(4) al, bl = params L = bl * bl * np.exp(-L0 / al / al / 2) + 1e-6 * EYEN if nystr: alpha = EYEN - eig_vec_K @ np.linalg.inv( eig_vec_K.T @ L @ eig_vec_K / N2 + np.diag(1 / eig_val_K / N2)) @ eig_vec_K.T @ L / N2 alpha = alpha @ W_nystr @ Y * N2 else: LWL_inv = chol_inv(L @ W @ L + L / N2 + JITTER * EYEN) alpha = LWL_inv @ L @ W @ Y # L_W_inv = chol_inv(W*N2+L_inv) EYhat_do_A = [] for a in do_A: a = np.repeat(a, [w.shape[0]]).reshape(-1, 1) w = w.reshape(-1, 1) aw = np.concatenate([a, w], axis=-1) ate_L0 = _sqdist(aw, X) ate_L = bl * bl * np.exp(-ate_L0 / al / al / 2) h_out = ate_L @ alpha mean_h = np.mean(h_out).reshape(-1, 1) EYhat_do_A.append(mean_h) print('a = {}, beta_a = {}'.format(np.mean(a), mean_h)) return np.concatenate(EYhat_do_A)
def callback0(params, timer=None): global Nfeval, prev_norm, opt_params, opt_test_err if Nfeval % 1 == 0: al, bl = params L = bl * bl * np.exp(-L0 / al / al / 2) + 1e-6 * EYEN if nystr: alpha = EYEN - eig_vec_K @ np.linalg.inv( eig_vec_K.T @ L @ eig_vec_K / N2 + np.diag(1 / eig_val_K)) @ eig_vec_K.T @ L / N2 alpha = alpha @ W_nystr @ Y else: LWL_inv = chol_inv(L @ W @ L + L / N2 + JITTER * EYEN) alpha = LWL_inv @ L @ W @ Y # L_W_inv = chol_inv(W*N2+L_inv) test_L = bl * bl * np.exp(-test_L0 / al / al / 2) pred_mean = test_L @ alpha if timer: return test_err = ((pred_mean - test_Y) ** 2).mean() # ((pred_mean-test_Y)**2/np.diag(pred_cov)).mean()+(np.log(np.diag(pred_cov))).mean() norm = alpha.T @ L @ alpha Nfeval += 1 if prev_norm is not None: if norm[0, 0] / prev_norm >= 3: if opt_params is None: opt_test_err = test_err opt_params = params print(True, opt_params, opt_test_err, prev_norm) raise Exception if prev_norm is None or norm[0, 0] <= prev_norm: prev_norm = norm[0, 0] opt_test_err = test_err opt_params = params print('params,test_err, norm: ', opt_params, opt_test_err, prev_norm)
def alt_newton_coord_descent(self, X, Y, max_iter=200, convergence_tolerance=1e-6): m = X.shape[1] self.Sxx = X.dot(X.T) / m self.Syy = Y.dot(Y.T) / m self.Sxy = X.dot(Y.T) / m self.nll = [] self.lnll = [] self.lrs = [] converged_up_to_tolerance = False for t in range(max_iter): if t % 100 == 0: print('newton_iter {}='.format(X.shape[1]), t) # update variable params self.nll.append(self.neg_log_likelihood()) # solve D_lambda via coordinate descent Kyy_direction = self.descent_direction_Kyy() if not np.isfinite(Kyy_direction).all(): print('Newton optimization failed due to overflow.') return self.Kyy.copy(), self.Kyx.copy( ), converged_up_to_tolerance # line search for best step size learning_rate = self.learning_rate LL, learning_rate = self.line_search(Kyy_direction) self.lrs.append(learning_rate) prev_Kyy = np.array(self.Kyy) self.Kyy = self.Kyy.copy() + learning_rate * Kyy_direction # update variable params self.Kyy_inv = util.chol_inv( LL) # use chol decomp from the backtracking # solve theta prev_Kyx = np.array(self.Kyx) self.Kyx = self.Kyx_coordinate_descent() if not (np.isfinite(self.Kyy_inv).all() and np.isfinite(self.Kyx).all()): EPS = 1e-05 self.Kyy_inv = np.linalg.inv(self.Kyy + EPS * np.eye(self.ny)) if not np.isfinite(self.Kyy_inv).all(): print('Newton optimization failed due to overflow.') return self.Kyy.copy(), self.Kyx.copy( ), converged_up_to_tolerance if t > 0 and np.abs(self.nll[-1] - self.neg_log_likelihood() ) < convergence_tolerance: converged_up_to_tolerance = True break return self.Kyy.copy(), self.Kyx.copy(), converged_up_to_tolerance
def train(self): theta0 = self.get_default_theta() self.loss = np.inf self.theta = np.copy(theta0) nlz = self.neg_log_likelihood(theta0) def loss(theta): nlz = self.neg_log_likelihood(theta) return nlz def callback(theta): if self.nlz < self.loss: self.loss = self.nlz self.theta = np.copy(theta) gloss = value_and_grad(loss) try: fmin_l_bfgs_b(gloss, theta0, maxiter=self.bfgs_iter, m=100, iprint=self.debug, callback=callback) except np.linalg.LinAlgError: print('GP. Increase noise term and re-optimization.') theta0 = np.copy(self.theta) theta0[0] += np.log(10) try: fmin_l_bfgs_b(gloss, theta0, maxiter=self.bfgs_iter, m=10, iprint=self.debug, callback=callback) except: print('GP. Exception caught, L-BFGS early stopping...') if self.debug: print(traceback.format_exc()) except: print('GP. Exception caught, L-BFGS early stopping...') if self.debug: print(traceback.format_exc()) sn2 = np.exp(self.theta[0]) hyp = self.theta[1:] K = self.kernel(self.train_x, self.train_x, hyp) + sn2 * np.eye( self.num_train) + self.jitter * np.eye(self.num_train) self.L = np.linalg.cholesky(K) self.alpha = chol_inv(self.L, self.train_y.T) if self.k: self.for_diag = np.exp(self.theta[1]) * np.exp( self.theta[3]) + np.exp(self.theta[3 + self.dim]) else: self.for_diag = np.exp(self.theta[1]) print('GP. Finished training process.')
def predict(self, test_x, is_diag=1): output_scale = np.exp(self.theta[0]) sigma2_tag = np.exp(self.theta[self.dim+2]) C = self.kernel(self.src_x, self.tag_x, self.theta) L_C = np.linalg.cholesky(C) alpha_C = chol_inv(L_C, self.train_y.T) k_star_s = self.kernel2(test_x, self.src_x, self.theta) k_star_t = self.kernel1(test_x, self.tag_x, self.theta) k_star = np.hstack((k_star_s, k_star_t)) py = np.dot(k_star, alpha_C) Cvks = chol_inv(L_C, k_star.T) if is_diag: ps2 = output_scale + sigma2_tag - (k_star * Cvks.T).sum(axis=1) else: ps2 = self.kernel1(test_x, test_x, self.theta) + sigma2_tag - np.dot(k_star, Cvks) ps2 = np.abs(ps2) py = py * self.std + self.mean ps2 = ps2 * (self.std**2) return py, ps2
def predict(self, test_x, is_diag=1): sn2 = np.exp(self.theta[0]) hyp = self.theta[1:] K_star = self.kernel(test_x, self.train_x, hyp) py = np.dot(K_star, self.alpha) KvKs = chol_inv(self.L, K_star.T) if is_diag: ps2 = self.for_diag + sn2 - (K_star * KvKs.T).sum(axis=1) else: ps2 = sn2 - np.dot(K_star, KvKs) + self.kernel(test_x, test_x, hyp) ps2 = np.abs(ps2) py = py * self.std + self.mean py = py.reshape(-1) ps2 = ps2 * (self.std**2) return py, ps2
def neg_log_likelihood(self, theta): sigma2_src = np.exp(theta[self.dim+1]) sigma2_tag = np.exp(theta[self.dim+2]) K_ss = self.kernel1(self.src_x, self.src_x, theta) + sigma2_src * np.eye(self.num_src) + self.jitter*np.eye(self.num_src) K_st = self.kernel2(self.src_x, self.tag_x, theta) K_ts = K_st.T K_tt = self.kernel1(self.tag_x, self.tag_x, theta) + sigma2_tag * np.eye(self.num_tag) + self.jitter*np.eye(self.num_tag) L_ss = np.linalg.cholesky(K_ss) tmp1 = chol_inv(L_ss, self.src_y.T) tmp2 = chol_inv(L_ss, K_st) mu_t = np.dot(K_ts, tmp1) C_t = K_tt - np.dot(K_ts, tmp2) L_t = np.linalg.cholesky(C_t) logDetCt = np.sum(np.log(np.diag(L_t))) delta = self.tag_y.T - mu_t alpha = chol_inv(L_t, delta) nlz = 0.5*(np.dot(delta.T, alpha) + self.num_tag*np.log(2*np.pi)) + logDetCt if(np.isnan(nlz)): nlz = np.inf self.nlz = nlz return nlz
def neg_log_likelihood(self, theta): sn2 = np.exp(theta[0]) hyp = theta[1:] K = self.kernel(self.train_x, self.train_x, hyp) + sn2 * np.eye(self.num_train) L = np.linalg.cholesky(K) logDetK = np.sum(np.log(np.diag(L))) alpha = chol_inv(L, self.train_y.T) nlz = 0.5 * (np.dot(self.train_y, alpha) + self.num_train * np.log(2 * np.pi)) + logDetK if (np.isnan(nlz)): nlz = np.inf self.nlz = nlz return nlz
def callback0(params): global Nfeval, prev_norm, opt_params, opt_test_err if Nfeval % 1 == 0: params = np.exp(params) print('params:', params) al, bl = params[:-1], params[-1] if train.x.shape[1] < 5: train_L = bl**2 * np.exp(-train_L0 / al**2 / 2) + 1e-4 * EYEN test_L = bl**2 * np.exp(-test_L0 / al**2 / 2) else: train_L, test_L = 0, 0 for i in range(len(al)): train_L += train_L0[i] / al[i]**2 test_L += test_L0[i] / al[i]**2 train_L = bl * bl * np.exp(-train_L / 2) + 1e-4 * EYEN test_L = bl * bl * np.exp(-test_L / 2) if nystr: tmp_mat = eig_vec_K.T @ train_L alpha = EYEN - eig_vec_K @ np.linalg.inv( tmp_mat @ eig_vec_K / N2 + inv_eig_val) @ tmp_mat / N2 alpha = alpha @ W_nystr_Y * N2 else: LWL_inv = chol_inv(train_L @ train_W @ train_L + train_L / N2 + JITTER * EYEN) alpha = LWL_inv @ train_L @ train_W @ train.y pred_mean = test_L @ alpha test_err = ((pred_mean - test.g)**2).mean() norm = alpha.T @ train_L @ alpha Nfeval += 1 if prev_norm is not None: if norm[0, 0] / prev_norm >= 3: if opt_test_err is None: opt_test_err = test_err opt_params = params print(True, opt_params, opt_test_err, prev_norm, norm[0, 0]) raise Exception if prev_norm is None or norm[0, 0] <= prev_norm: prev_norm = norm[0, 0] opt_test_err = test_err opt_params = params print(True, opt_params, opt_test_err, prev_norm, norm[0, 0])
def callback0(params, timer=None): global Nfeval, prev_norm, opt_params, opt_test_err if Nfeval % 1 == 0: n_params = len(params) al, bl = np.exp(params) L = bl * bl * np.exp(-L0 / al / al / 2) + 1e-6 * EYEN if nystr: tmp_mat = eig_vec_K.T @ L alpha = EYEN - eig_vec_K @ np.linalg.inv( tmp_mat @ eig_vec_K / N2 + inv_eig_val_K) @ tmp_mat / N2 alpha = alpha @ W_nystr_Y * N2 else: LWL_inv = chol_inv(L @ W @ L + L / N2 + JITTER * EYEN) alpha = LWL_inv @ L @ W @ Y test_L = bl * bl * np.exp( -test_L0 / al / al / 2) # l(test_X,X,al,bl) pred_mean = test_L @ alpha if timer: return test_err = ((pred_mean - test_G)**2).mean( ) # ((pred_mean-test_G)**2/np.diag(pred_cov)).mean()+(np.log(np.diag(pred_cov))).mean() norm = alpha.T @ L @ alpha Nfeval += 1 if prev_norm is not None: if norm[0, 0] / prev_norm >= 3: if opt_params is None: opt_test_err = test_err opt_params = params print(True, opt_params, opt_test_err, prev_norm, norm[0, 0]) raise Exception if prev_norm is None or norm[0, 0] <= prev_norm: prev_norm = norm[0, 0] opt_test_err = test_err opt_params = params print('params,test_err, norm: ', opt_params, opt_test_err, prev_norm, norm[0, 0])
def callback0(params, timer=None): global Nfeval, prev_norm, opt_params, opt_test_err if Nfeval % 1 == 0: params = np.exp(params) al, bl = params[:-1], params[-1] L = bl * bl * np.exp( -L0[0] / al[0] / al[0] / 2) + bl * bl * np.exp( -L0[1] / al[1] / al[1] / 2) + 1e-6 * EYEN if nystr: alpha = EYEN - eig_vec_K @ np.linalg.inv( eig_vec_K.T @ L @ eig_vec_K / N2 + np.diag(1 / eig_val_K / N2)) @ eig_vec_K.T @ L / N2 alpha = alpha @ W_nystr @ Y * N2 else: LWL_inv = chol_inv(L @ W @ L + L / N2 + JITTER * EYEN) alpha = LWL_inv @ L @ W @ Y pred_mean = L @ alpha if timer: return norm = alpha.T @ L @ alpha Nfeval += 1 if prev_norm is not None: if norm[0, 0] / prev_norm >= 3: if opt_params is None: opt_params = params opt_test_err = ((pred_mean - Y)**2).mean() print(True, opt_params, opt_test_err, prev_norm) raise Exception if prev_norm is None or norm[0, 0] <= prev_norm: prev_norm = norm[0, 0] opt_params = params opt_test_err = ((pred_mean - Y)**2).mean() print('params,test_err, norm:', opt_params, opt_test_err, prev_norm) ages = np.linspace( min(X[:, 0]) - abs(min(X[:, 0])) * 0.05, max(X[:, 0]) + abs(max(X[:, 0])) * 0.05, 32) vitd = np.linspace( min(X[:, 1]) - abs(min(X[:, 1])) * 0.05, max(X[:, 1]) + abs(max(X[:, 1])) * 0.05, 64) X_mesh, Y_mesh = np.meshgrid(ages, vitd) table = bl**2 * np.hstack([ np.exp(-_sqdist(X_mesh[:, [i]], X[:, [0]]) / al[0]**2 / 2 - _sqdist(Y_mesh[:, [i]], X[:, [1]]) / al[1]**2 / 2) @ alpha for i in range(X_mesh.shape[1]) ]) maxv = np.max(table[:]) minv = np.min(table[:]) fig = plt.figure() ax = fig.add_subplot(111) # Generate a contour plot Y0 = data0[:, [4]] X0 = data0[:, [0, 2]] Z0 = data0[:, [0, 1]] ages = np.linspace( min(X0[:, 0]) - abs(min(X0[:, 0])) * 0.05, max(X0[:, 0]) + abs(max(X0[:, 0])) * 0.05, 32) vitd = np.linspace( min(X0[:, 1]) - abs(min(X0[:, 1])) * 0.05, max(X0[:, 1]) + abs(max(X0[:, 1])) * 0.05, 64) X_mesh, Y_mesh = np.meshgrid(ages, vitd) cpf = ax.contourf(X_mesh, Y_mesh, (table - minv) / (maxv - minv)) # cp = ax.contour(X_mesh, Y_mesh, table) plt.colorbar(cpf, ax=ax) plt.xlabel('Age', fontsize=12) plt.ylabel('Vitamin D', fontsize=12) plt.xticks(fontsize=12) plt.yticks(fontsize=12) if IV: plt.savefig('VitD_IV.pdf', bbox_inches='tight') else: plt.savefig('VitD.pdf', bbox_inches='tight') plt.close('all')