def experiment1D(): gp = GaussianProcess() # setup data n = 500 m = 50 f = lambda x: np.sin(x) * np.exp(-x**2 / 50) X = np.random.uniform(-10, 10, size=n) X = np.sort(X) y = f(X) + np.random.normal(0, 1, size=n) y -= np.mean(y) x_min, x_max = np.min(X), np.max(X) U = np.linspace(x_min, x_max, m).reshape(-1, 1) X = X.reshape(-1, 1) hyp_cov = np.asarray([np.log(1), np.log(2)]) hyp_lik = float(np.log(1)) hyp_old = {'mean': [], 'lik': hyp_lik, 'cov': hyp_cov} hyp = gp.train_exact(X, y.reshape(-1, 1), hyp_old) hyp_cov = hyp['cov'][0] sigmasq = np.exp(2 * hyp['lik']) kernel = SEiso() distill = Distillation(X=X, y=y, U=U, kernel=kernel, hyp=hyp_cov, num_iters=10, eta=5e-4, sigmasq=sigmasq, width=3, use_kmeans=True, optimizer='sgd') distill.grad_descent() distill.precompute(use_true_K=False) xx = np.linspace(x_min, x_max, 2 * n) mm_true, vv_true = gp.predict_exact(X, y.reshape(-1, 1), xx.reshape(-1, 1), hyp=hyp) mm = [] vv = [] opt = {'cg_maxit': 500, 'cg_tol': 1e-5} k = n / 2 hyp = gp.train_kiss(X, y.reshape(-1, 1), k, hyp=hyp_old, opt=opt) mm_kiss, vv_kiss = gp.predict_kiss(X, y.reshape(-1, 1), xx.reshape(-1, 1), k, hyp=hyp, opt=opt) for xstar in xx: xstar = np.asarray([xstar]) mstar, vstar = distill.predict(xstar, width=3) vv.append(vstar) mm.append(mstar) mm = np.asarray(mm).flatten() vv = np.asarray(vv).flatten() mm_kiss = np.asarray(mm_kiss).flatten() vv_kiss = np.asarray(vv_kiss).flatten() plt.fill_between(xx, mm - np.sqrt(vv) * 2, mm + np.sqrt(vv) * 2, color='gray', alpha=.5) plt.plot(xx, mm_true, color='y', lw=3, label='exact mean') plt.plot(xx, mm, color='r', lw=3, label='distill mean', ls='dotted') plt.plot(xx, mm_kiss, color='g', lw=3, label='kiss mean', ls=':') plt.plot(xx, f(xx), lw=3, label='true value', ls='dashed') plt.scatter(X, y, color='m', label='train data', marker='+') plt.xlim([x_min, x_max]) plt.legend() plt.show() plt.plot(xx, vv_kiss, color='g', lw=3, label='kiss var', ls=':') plt.plot(xx, vv_true, color='y', lw=3, label='exact var') plt.plot(xx, vv, color='r', lw=3, label='distill var', ls='dotted') plt.xlim([x_min, x_max]) plt.legend() plt.show()
def experiment_distill(dataset=PUMADYN32NM, use_kmeans=True, m=100, reduce_from='fitc', cov='covSEard', width=20, standardize=True, load_trained=False): train_x, train_y, test_x, test_y = load_dataset(dataset) if standardize: scaler = StandardScaler() train_x = scaler.fit_transform(train_x) test_x = scaler.transform(test_x) n, d = train_x.shape print 'Distilling with {} data points and {} dimension'.format(n, d) # get GP functionality gp = GaussianProcess() # subtract mean train_y -= np.mean(train_y) test_y -= np.mean(train_y) # initialization hyp_lik = float(0.5 * np.log(np.var(train_y) / 4)) if cov == 'covSEard': init_ell = np.log((np.max(train_x, axis=0) - np.min(train_x, axis=0)) / 2) hyp_cov = np.append(init_ell, [0.5 * np.log(np.var(train_y))]) else: hyp_cov = np.asarray([np.log(2)] * 2) hyp_old = {'mean': [], 'lik': hyp_lik, 'cov': hyp_cov} # train the kernel to reduce from # load hyp if trained already xu = np.random.choice(n, m, replace=False) xu = train_x[xu] hyp_fname = '../model/{}_hyp_{}.pkl'.format(dataset, reduce_from) if load_trained and os.path.exists(hyp_fname): print 'Loady hyperparams from {}'.format(hyp_fname) f = open(hyp_fname, 'rb') hyp = pkl.load(f) f.close() test_mean, test_var = gp.predict_exact(train_x, train_y.reshape(-1, 1), xstar=test_x, hyp=hyp, cov=cov) else: print 'Training the given kernel with {}'.format(reduce_from) if reduce_from == 'fitc': if dataset in {PUMADYN32NM, KIN40K}: hyp = load_trained_hyp(dataset) else: hyp = gp.train_fitc(train_x, train_y.reshape(-1, 1), xu, hyp_old, cov=cov) test_mean, test_var = gp.predict_fitc(train_x, train_y.reshape(-1, 1), xu=xu, xstar=test_x, hyp=hyp, cov=cov) f = open(hyp_fname, 'wb') pkl.dump(hyp, f, -1) f.close() elif reduce_from == 'exact': if dataset in {PUMADYN32NM, KIN40K}: hyp = load_trained_hyp(dataset) else: hyp = gp.train_exact(train_x, train_y.reshape(-1, 1), hyp_old, cov=cov, n_iter=100) f = open(hyp_fname, 'wb') pkl.dump(hyp, f, -1) f.close() else: raise ValueError(reduce_from) print '{} Error:'.format(reduce_from.capitalize()) print_error(test_y, test_mean) hyp_cov = hyp['cov'].flatten() sigmasq = np.exp(2 * hyp['lik']) kernel = SEard() if cov == 'covSEard' else SEiso() # distill the kernel distill = Distillation(X=train_x, y=train_y, U=xu, kernel=kernel, hyp=hyp_cov, num_iters=0, eta=1e-3, sigmasq=sigmasq, width=width, use_kmeans=use_kmeans, optimizer='adagrad') distill.grad_descent() distill.precompute(use_true_K=False) # plt.pcolor(np.abs(distill.diff_to_K()[:2000, :2000])) # plt.colorbar() # plt.show() mm, vv = [], [] for xstar in test_x: xstar = xstar.reshape(1, d) mstar, vstar = distill.predict(xstar, width=width) vv.append(vstar) mm.append(mstar) mm = np.asarray(mm).flatten() print 'Distill Error:' print_error(test_y, mm) print 'Mean error to true K:' print_error(test_mean, mm) return smse(test_mean, mm), smse(test_y, test_mean), smse(test_y, mm)