def likelihood_UB(hyp): X = ModelInfo["X_batch"] y = ModelInfo["y_batch"] Z = ModelInfo["Z"] m = ModelInfo["m"] S = ModelInfo["S"] jitter = ModelInfo["jitter"] jitter_cov = ModelInfo["jitter_cov"] N = X.shape[0] M = Z.shape[0] logsigma_n = hyp[-1] sigma_n = np.exp(logsigma_n) # Compute K_u_inv K_u = kernel(Z, Z, hyp[:-1]) K_u_inv = np.linalg.solve(K_u + np.eye(M) * jitter_cov, np.eye(M)) # L = np.linalg.cholesky(K_u + np.eye(M)*jitter_cov) # K_u_inv = np.linalg.solve(np.transpose(L), np.linalg.solve(L,np.eye(M))) ModelInfo.update({"K_u_inv": K_u_inv}) # Compute mu psi = kernel(Z, X, hyp[:-1]) K_u_inv_m = np.matmul(K_u_inv, m) MU = np.matmul(psi.T, K_u_inv_m) # Compute cov Alpha = np.matmul(K_u_inv, psi) COV = kernel(X, X, hyp[:-1]) - np.matmul(psi.T, np.matmul(K_u_inv,psi)) + \ np.matmul(Alpha.T, np.matmul(S,Alpha)) COV_inv = np.linalg.solve(COV + np.eye(N) * sigma_n + np.eye(N) * jitter, np.eye(N)) # L = np.linalg.cholesky(COV + np.eye(N)*sigma_n + np.eye(N)*jitter) # COV_inv = np.linalg.solve(np.transpose(L), np.linalg.solve(L,np.eye(N))) # Compute cov(Z, X) cov_ZX = np.matmul(S, Alpha) # Update m and S alpha = np.matmul(COV_inv, cov_ZX.T) m = m + np.matmul(cov_ZX, np.matmul(COV_inv, y - MU)) S = S - np.matmul(cov_ZX, alpha) ModelInfo.update({"m": m}) ModelInfo.update({"S": S}) # Compute NLML Beta = y - MU NLML_1 = np.matmul(Beta.T, Beta) / (2.0 * sigma_n * N) NLML_2 = np.trace(COV) / (2.0 * sigma_n) NLML_3 = N * logsigma_n / 2.0 + N * np.log(2.0 * np.pi) / 2.0 NLML = NLML_1 + NLML_2 + NLML_3 return NLML[0, 0]
def predict(X_star): Z = ModelInfo["Z"] m = ModelInfo["m"].value S = ModelInfo["S"].value hyp = ModelInfo["hyp"] K_u_inv = ModelInfo["K_u_inv"] N_star = X_star.shape[0] partitions_size = 10000 (number_of_partitions, remainder_partition) = divmod(N_star, partitions_size) mean_star = np.zeros((N_star, 1)) var_star = np.zeros((N_star, 1)) for partition in range(0, number_of_partitions): print("Predicting partition: %d" % (partition)) idx_1 = partition * partitions_size idx_2 = (partition + 1) * partitions_size # Compute mu psi = kernel(Z, X_star[idx_1:idx_2, :], hyp[:-1]) K_u_inv_m = np.matmul(K_u_inv, m) mu = np.matmul(psi.T, K_u_inv_m) mean_star[idx_1:idx_2, 0:1] = mu # Compute cov Alpha = np.matmul(K_u_inv, psi) cov = kernel(X_star[idx_1:idx_2,:], X_star[idx_1:idx_2,:], hyp[:-1]) - \ np.matmul(psi.T, np.matmul(K_u_inv,psi)) + np.matmul(Alpha.T, np.matmul(S,Alpha)) var = np.abs(np.diag(cov)) + np.exp(hyp[-1]) var_star[idx_1:idx_2, 0] = var print("Predicting the last partition") idx_1 = number_of_partitions * partitions_size idx_2 = number_of_partitions * partitions_size + remainder_partition # Compute mu psi = kernel(Z, X_star[idx_1:idx_2, :], hyp[:-1]) K_u_inv_m = np.matmul(K_u_inv, m) mu = np.matmul(psi.T, K_u_inv_m) mean_star[idx_1:idx_2, 0:1] = mu # Compute cov Alpha = np.matmul(K_u_inv, psi) cov = kernel(X_star[idx_1:idx_2,:], X_star[idx_1:idx_2,:], hyp[:-1]) - \ np.matmul(psi.T, np.matmul(K_u_inv,psi)) + np.matmul(Alpha.T, np.matmul(S,Alpha)) var = np.abs(np.diag(cov)) + np.exp(hyp[-1]) var_star[idx_1:idx_2, 0] = var return mean_star, var_star
def predict(self, X_star): Z = self.sess.run(self.Z) m = self.sess.run(self.m) S = self.sess.run(self.S) hyp = self.hyp K_u_inv = self.sess.run(self.K_u_inv) N_star = X_star.shape[0] partitions_size = 10000 (number_of_partitions, remainder_partition) = divmod(N_star, partitions_size) mean_star = np.zeros((N_star, 1)) var_star = np.zeros((N_star, 1)) for partition in range(0, number_of_partitions): print("Predicting partition: %d" % (partition)) idx_1 = partition * partitions_size idx_2 = (partition + 1) * partitions_size # Compute mu psi = kernel(Z, X_star[idx_1:idx_2, :], hyp[:-1]) K_u_inv_m = np.matmul(K_u_inv, m) mu = np.matmul(psi.T, K_u_inv_m) mean_star[idx_1:idx_2, 0:1] = mu # Compute cov Alpha = np.matmul(K_u_inv, psi) cov = kernel(X_star[idx_1:idx_2,:], X_star[idx_1:idx_2,:], hyp[:-1]) - \ np.matmul(psi.T, np.matmul(K_u_inv,psi)) + np.matmul(Alpha.T, np.matmul(S,Alpha)) var = np.abs(np.diag(cov)) # + np.exp(hyp[-1]) var_star[idx_1:idx_2, 0] = var print("Predicting the last partition") idx_1 = number_of_partitions * partitions_size idx_2 = number_of_partitions * partitions_size + remainder_partition # Compute mu psi = kernel(Z, X_star[idx_1:idx_2, :], hyp[:-1]) K_u_inv_m = np.matmul(K_u_inv, m) mu = np.matmul(psi.T, K_u_inv_m) mean_star[idx_1:idx_2, 0:1] = mu # Compute cov Alpha = np.matmul(K_u_inv, psi) cov = kernel(X_star[idx_1:idx_2,:], X_star[idx_1:idx_2,:], hyp[:-1]) - \ np.matmul(psi.T, np.matmul(K_u_inv,psi)) + np.matmul(Alpha.T, np.matmul(S,Alpha)) var = np.abs(np.diag(cov)) # + np.exp(hyp[-1]) var_star[idx_1:idx_2, 0] = var return mean_star, var_star
def likelihood(self, hyp): M = self.M Z = self.Z m = self.m S = self.S X_batch = self.X_batch y_batch = self.y_batch jitter = self.jitter jitter_cov = self.jitter_cov N = X_batch.shape[0] logsigma_n = hyp[-1] sigma_n = np.exp(logsigma_n) # Compute K_u_inv K_u = kernel(Z, Z, hyp[:-1]) # K_u_inv = np.linalg.solve(K_u + np.eye(M)*jitter_cov, np.eye(M)) L = np.linalg.cholesky(K_u + np.eye(M) * jitter_cov) K_u_inv = np.linalg.solve(L.T, np.linalg.solve(L, np.eye(M))) self.K_u_inv = K_u_inv # Compute mu psi = kernel(Z, X_batch, hyp[:-1]) K_u_inv_m = np.matmul(K_u_inv, m) MU = np.matmul(psi.T, K_u_inv_m) # Compute cov Alpha = np.matmul(K_u_inv, psi) COV = kernel(X_batch, X_batch, hyp[:-1]) - np.matmul(psi.T, np.matmul(K_u_inv,psi)) + \ np.matmul(Alpha.T, np.matmul(S,Alpha)) COV_inv = np.linalg.solve( COV + np.eye(N) * sigma_n + np.eye(N) * jitter, np.eye(N)) # L = np.linalg.cholesky(COV + np.eye(N)*sigma_n + np.eye(N)*jitter) # COV_inv = np.linalg.solve(np.transpose(L), np.linalg.solve(L,np.eye(N))) # Compute cov(Z, X) cov_ZX = np.matmul(S, Alpha) # Update m and S alpha = np.matmul(COV_inv, cov_ZX.T) m = m + np.matmul(cov_ZX, np.matmul(COV_inv, y_batch - MU)) S = S - np.matmul(cov_ZX, alpha) self.m = m self.S = S # Compute NLML K_u_inv_m = np.matmul(K_u_inv, m) NLML = 0.5 * np.matmul(m.T, K_u_inv_m) + np.sum(np.log( np.diag(L))) + 0.5 * np.log(2. * np.pi) * M return NLML[0, 0]
def __init__(self, X, y, M=10, max_iter=2000, N_batch=1, monitor_likelihood=10, lrate=1e-3): (N, D) = X.shape # kmeans on a subset of data N_subset = min(N, 10000) idx = np.random.choice(N, N_subset, replace=False) kmeans = KMeans(n_clusters=M, random_state=0).fit(X[idx, :]) Z = kmeans.cluster_centers_ hyp = np.log(np.ones(D + 1)) logsigma_n = np.array([-4.0]) hyp = np.concatenate([hyp, logsigma_n]) m = np.zeros((M, 1)) S = kernel(Z, Z, hyp[:-1]) self.X = X self.y = y self.M = M self.Z = tf.Variable(Z, dtype=tf.float64, trainable=False) self.K_u_inv = tf.Variable(np.eye(M), dtype=tf.float64, trainable=False) self.m = tf.Variable(m, dtype=tf.float64, trainable=False) self.S = tf.Variable(S, dtype=tf.float64, trainable=False) self.nlml = tf.Variable(0.0, dtype=tf.float64, trainable=False) self.hyp = hyp self.max_iter = max_iter self.N_batch = N_batch self.monitor_likelihood = monitor_likelihood self.jitter = 1e-8 self.jitter_cov = 1e-8 self.lrate = lrate self.optimizer = tf.train.AdamOptimizer(self.lrate) # Tensor Flow Session # self.sess = tf.Session(config=tf.ConfigProto(log_device_placement=True)) self.sess = tf.Session()
def init_params(): X = ModelInfo["X"] M = ModelInfo["M"] (N, D) = X.shape idx = np.random.permutation(N) N_subset = min(N, 10000) kmeans = KMeans(n_clusters=M, random_state=0).fit(X[idx[0:N_subset], :]) Z = kmeans.cluster_centers_ hyp = np.log(np.ones(D + 1)) logsigma_n = np.array([-4.0]) hyp = np.concatenate([hyp, logsigma_n]) m = np.zeros((M, 1)) S = kernel(Z, Z, hyp[:-1]) ModelInfo.update({"hyp": hyp}) ModelInfo.update({"Z": Z}) ModelInfo.update({"m": m}) ModelInfo.update({"S": S})
def __init__(self, X, y, M=10, max_iter=2000, N_batch=1, monitor_likelihood=10, lrate=1e-3): (N, D) = X.shape N_subset = min(N, 10000) idx = np.random.choice(N, N_subset, replace=False) kmeans = KMeans(n_clusters=M, random_state=0).fit(X[idx, :]) Z = kmeans.cluster_centers_ hyp = np.log(np.ones(D + 1)) logsigma_n = np.array([-4.0]) hyp = np.concatenate([hyp, logsigma_n]) m = np.zeros((M, 1)) S = kernel(Z, Z, hyp[:-1]) self.X = X self.y = y self.M = M self.Z = Z self.m = m self.S = S self.hyp = hyp self.max_iter = max_iter self.N_batch = N_batch self.monitor_likelihood = monitor_likelihood self.jitter = 1e-8 self.jitter_cov = 1e-8 # Adam optimizer parameters self.mt_hyp = np.zeros(hyp.shape) self.vt_hyp = np.zeros(hyp.shape) self.lrate = lrate