def prediction(x_star, y_star_true, X_train, L_inv, W, first_deri, kernel_parameter): """ make prediction :param x_star: input test point :param y_star_true: its label :param X_train: training dataset :param L_inv: L inversion after Cholesky decomposition :param W: :param first_deri: first derivative at mode(optimal point after Newton update) :param kernel_parameter: specify kernel parameter :return: prediction label """ l = 1 k_star = RBF_kernel(X_train, x_star, kernel_parameter, l) f_star_mean = np.dot(k_star.T, first_deri) v = np.dot(L_inv, np.dot(np.sqrt(W), k_star)) k_ss = RBF_kernel(x_star, x_star, kernel_parameter, l) var_f_star = k_ss - np.dot(v.T, v) return label_function(f_star_mean) == y_star_true
def bayesian_opt(X_train, X_test, y_train): """ compute current GP for Bayesian optimization :param X_train: training data :param X_test: test data :param y_train: training targets :return: mean of GP posterior function, standard deviation, GP posterior function """ s = 0.0001 # noise variance and zero mean for noise n = len(X_test) # number of test points N = len(X_train) # number of training points num_fun = 1 #print X_test K = RBF_kernel(X_train, X_train, 1, 1) K_s = RBF_kernel(X_train, X_test, 1, 1) K_ss = RBF_kernel(X_test, X_test, 1, 1) L = np.linalg.cholesky(K + s * np.eye(N)) m = np.linalg.solve(L, y_train) alpha = np.linalg.solve(L.T, m) # compute mean of test points for posterior mu_post = np.dot(K_s.T, alpha) v = np.linalg.solve(L, K_s) # compute variance for test points var_test = np.diag(K_ss) - np.sum(v**2, axis=0) stand_devi = np.sqrt(var_test) # sample from test points, in other words, make prediction L_ = np.linalg.cholesky(K_ss + 1e-6 * np.eye(n) - np.dot(v.T, v)) f_post_fun = mu_post.reshape(-1, 1) + np.dot( L_, np.random.normal(size=(n, num_fun))) #plot_BO(X_train, y_train, X_test, f_post_fun, mu_post, stand_devi) return mu_post, stand_devi, f_post_fun
def prediction(x_star, y_star_true, X_train, C, y, pi_vector, kernel_parameter): """ make prediction :param x_star: test input :param y_star_true: label of test input :param X_train: training dataset :param C: num of classes :param y: labels of training dataset :param pi_vector: pi vector which computed through softmax function :param kernel_parameter: parameter for kernel :return: true or false """ n = len(X_train) l = 1 k_star = RBF_kernel(X_train, x_star, kernel_parameter, l) f_star_mean = np.zeros((C, )) for c in range(C): f_star_mean[c] = np.dot( k_star.T, y[c * n:(c + 1) * n] - pi_vector[c * n:(c + 1) * n]) return np.argmax(f_star_mean) == y_star_true
def compute_mar_likelihood(X_train, X_test, y_train, sigma, l): """ compute log marginal likelihood for tuning parameters using Bayesian optimization :param X_train: training data :param X_test: test data :param y_train: training targets :param sigma: output variance :param l: lengthscalar :return: log marginal likelihood """ s = 0.0005 # noise variance and zero mean for noise n = len(X_train) # choose RBF kernel in this regression case K_train = RBF_kernel(X_train, X_train, sigma, l) L = np.linalg.cholesky(K_train + s * np.eye(n)) m = np.linalg.solve(L, y_train) alpha = np.linalg.solve(L.T, m) # compute log marginal likelihood log_marg_likelihood = -.5 * np.dot(y_train.T, alpha) - np.log( np.diagonal(L)).sum(0) - n / 2.0 * np.log(2 * np.pi) return log_marg_likelihood
num_sampling = num_train kernel_parameter = 1 # plot dataset scatter at first cm_bright = ListedColormap(['#FF0000', '#0000FF']) plt.subplot(2, 3, 1) plt.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright) plt.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright, alpha=0.6) plt.title('data points') # compute covariance matrix K under RBF_kernel. K_train = RBF_kernel(X_train, X_train, kernel_parameter, l=1) # sampling points for GP prior function x1_min = np.min(X[:, 0]) x1_max = np.max(X[:, 0]) x2_min = np.min(X[:, 1]) x2_max = np.max(X[:, 1]) X1_sampling = np.linspace(x1_min, x1_max, num_sampling).reshape(-1, 1) X2_sampling = np.linspace(x2_min, x2_max, num_sampling).reshape(-1, 1) X_sampling = np.concatenate((X1_sampling, X2_sampling), axis=1) # sampling GP prior sampling GP prior f for likelihood mu_prior = np.zeros((num_sampling, 1)) num_sampling = X_sampling.shape[0] f_prior = f_prior(X_sampling, mu_prior, 'rbf', num_sampling, num_funs) print 'shape of prior function:' + ` f_prior.shape `
def tune_hyperparms_first(X_train, X_test, y_train, num_fun, sigma, l): """ maximize log marginal likelihood using gradient ascent :param X_train: training data :param X_test: test data :param y_train: training target :param num_fun: number of functions :param sigma: the output variance of RBF kernel :param l: lengthscalar :return: mean, standard derivation, posterior function """ s = 0.0005 # noise variance and zero mean for noise log_marg_likelihood_old = 0 tolerance = 0.001 n = len(X_train) N = len(X_test) for i in range(10000): # choose RBF kernel in this regression case K_train = RBF_kernel(X_train, X_train, sigma, l) K_s = RBF_kernel(X_train, X_test, sigma, l) K_ss = RBF_kernel(X_test, X_test, sigma, l) L = np.linalg.cholesky(K_train + s * np.eye(n)) m = np.linalg.solve(L, y_train) alpha = np.linalg.solve(L.T, m) # compute mean of test points for posterior mu_post = np.dot(K_s.T, alpha) v = np.linalg.solve(L, K_s) # compute variance for test points var_test = np.diag(K_ss) - np.sum(v**2, axis=0) stand_devi = np.sqrt(var_test) # compute log marginal likelihood #log_marg_likelihood = -.5 * np.dot(y_train.T, alpha) - np.diagonal(L).sum(0) - n / 2 * np.log(2 * np.pi) log_marg_likelihood = -.5 * np.dot(y_train.T, alpha) - np.log( np.diagonal(L)).sum(0) - n / 2.0 * np.log(2 * np.pi) # tune the hyperparameters for RBF kernel K_y_inv = np.dot(np.linalg.inv(L.T), np.linalg.inv(L)) sigma, l = gradient_ascent(X_train, X_train, sigma, l, alpha.reshape(-1, 1), K_y_inv) error = np.sqrt( np.sum((log_marg_likelihood - log_marg_likelihood_old)**2)) log_marg_likelihood_old = log_marg_likelihood if error <= tolerance: print "The hyperparameter tuning function has already converged after " + ` i + 1 ` + " iterations!" print "The error is " + ` error ` print "training end!" break optimal_likelihood = log_marg_likelihood print 'optimal lenghscalar is: ' + ` l[0] ` print 'maximum log marginal likelihood is: ' + ` optimal_likelihood ` # sample from test points, in other words, make prediction L_ = np.linalg.cholesky(K_ss + 1e-6 * np.eye(N) - np.dot(v.T, v)) f_post_fun = mu_post.reshape(-1, 1) + np.dot( L_, np.random.normal(size=(N, num_fun))) plt.axis([-5, 5, -3, 3]) return mu_post, stand_devi, f_post_fun, optimal_likelihood
X = np.load('X_multi.npy') y = np.load('y_multi.npy') X_train, X_test, y_train, y_test = \ train_test_split(X, y, test_size=.4, random_state=42) num_train = len(X_train) num_test = len(X_test) num_classes = np.size(np.unique(y)) # hyper-parameters num_funs = num_classes # num of GP prior functions = num of categories kernel_parameter = 1 l = 1 # compute kernel matrix K for c in range(num_classes): K_sub = RBF_kernel(X_train, X_train, kernel_parameter, l) if c == 0: K = K_sub else: K = block_diag(K, K_sub) # generate 0/1 targets for training dataset y_targets = np.zeros((num_classes * num_train, )) index = np.arange(num_train) indices = y_train * 60 + index y_targets[indices] = 1 # train the model #model_training(K, y_targets, num_classes, num_train) pi_vector = model_training2(K, y_targets, num_classes, num_train) true_count = np.ones(num_test) for i in range(len(X_test)):