def train_mcmc(self, num_samples=1000, burn=500, thin=2, epsilon=0.05, lmin=10, lmax=20): """ MCMC """ k = gpflow.kernels.Matern52(input_dim=self.X.shape[1], ARD=True) l = gpflow.likelihoods.Gaussian() m = gpflow.models.GPMC(self.X, self.y[:, None], k, l) m.clear() m.likelihood.variance.prior = gpflow.priors.Gamma(1., 1.) m.kern.lengthscales.prior = gpflow.priors.Gamma(1., 1.) m.kern.variance.prior = gpflow.priors.Gamma(1., 1.) m.compile() #o = gpflow.train.AdamOptimizer(0.01) #o.minimize(m, maxiter=notebook_niter(15)) # start near MAP gpflow.train.ScipyOptimizer().minimize(m) sampler = gpflow.train.HMC() samples = sampler.sample(m, num_samples=notebook_niter(num_samples), burn=notebook_niter(burn), thin=notebook_niter(thin), epsilon=epsilon, lmin=lmin, lmax=lmax, logprobs=False) self.m = m self.samples = samples print('GP MCMC training is done ...')
def ex2b(): fX_dim = 5 minibatch_size = notebook_niter(1000, test_n=10) M = notebook_niter(100, test_n=5) # annoyingly only float32 and lower is supported by the conv layers f = lambda x: tf.cast(cnn_fn(tf.cast(x, tf.float32), fX_dim), float_type) kern = KernelWithNN(gpflow.kernels.Matern32(fX_dim), f) # reset inducing (they live in a different space as X, so need to be careful with this) ind = np.random.choice(Mnist.X.shape[0], minibatch_size, replace=False) # currently we need a hack due to model initialization. feat = KernelSpaceInducingPoints(np.empty((M, fX_dim))) # feat = FFeature(Z_0) # ideally, we could move the calculation of Z_0 # build the model lik = gpflow.likelihoods.MultiClass(Mnist.Nclasses) # Z = kmeans2(Mnist.X, M, minit='points')[0] model = NN_SVGP(Mnist.X, Mnist.Y, kern, lik, feat=feat, num_latent=Mnist.Nclasses, minibatch_size=minibatch_size) fZ = model.kern.compute_f(Mnist.X[ind]) # Z_0 = kmeans2(fZ, M)[0] might fail Z_0 = fZ[np.random.choice(len(fZ), M, replace=False)] model.feature.Z = Z_0 # use gpflow wrappers to train. NB all session handling is done for us gpflow.training.AdamOptimizer(0.001).minimize(model, maxiter=ITERATIONS) # predictions m, v = model.predict_y(Mnist.Xtest) preds = np.argmax(m, 1).reshape(Mnist.Ytest.shape) correct = preds == Mnist.Ytest.astype(int) acc = np.average(correct.astype(float)) * 100. print('accuracy is {:.4f}%'.format(acc))
def repeatMinimization(model, xtest, ytest): callback = cb(model, xtest, ytest) opt = gpflow.train.ScipyOptimizer() #print("Optimising for {} repetitions".format(nRepeats)) for repeatIndex in range(nRepeats): opt.minimize(model, disp=False, maxiter=notebook_niter(2000), step_callback=callback) return callback
def trainSparseModel(xtrain,ytrain,exact_model,isFITC, xtest, ytest): sparse_model = getSparseModel(xtrain,ytrain,isFITC) sparse_model.likelihood.variance = exact_model.likelihood.variance.read_value().copy() sparse_model.kern.lengthscales = exact_model.kern.lengthscales.read_value().copy() sparse_model.kern.variance = exact_model.kern.variance.read_value().copy() callback = cb(sparse_model, xtest, ytest) opt = gpflow.train.ScipyOptimizer() for repeatIndex in range(nRepeats): print("repeatIndex ", repeatIndex) opt.minimize(sparse_model, disp=False, maxiter=notebook_niter(2000), step_callback=callback) return sparse_model, callback
def snelsonDemo(): from matplotlib import pyplot as plt from IPython import embed xtrain, ytrain, xtest, ytest = getTrainingTestData() # run exact inference on training data. exact_model = getRegressionModel(xtrain, ytrain) opt = gpflow.train.ScipyOptimizer() opt.minimize(exact_model, maxiter=notebook_niter(2000000)) figA, axes = plt.subplots(1, 1) inds = np.argsort(xtrain.flatten()) axes.plot(xtrain[inds, :], ytrain[inds, :], 'ro') plotPredictions(axes, exact_model, 'g', None) figB, axes = plt.subplots(3, 2) # run sparse model on training data initialized from exact optimal solution. VFEmodel, VFEcb = trainSparseModel(xtrain, ytrain, exact_model, False, xtest, ytest) FITCmodel, FITCcb = trainSparseModel(xtrain, ytrain, exact_model, True, xtest, ytest) print("Exact model parameters \n") printModelParameters(exact_model) print("Sparse model parameters for VFE optimization \n") printModelParameters(VFEmodel) print("Sparse model parameters for FITC optimization \n") printModelParameters(FITCmodel) VFEiters = FITCcb.n_iters VFElog_likelihoods = stretch(len(VFEiters), VFEcb.log_likelihoods) VFEhold_out_likelihood = stretch(len(VFEiters), VFEcb.hold_out_likelihood) plotComparisonFigure(xtrain, VFEmodel, exact_model, axes[0, 0], axes[1, 0], axes[2, 0], VFEiters, VFElog_likelihoods, VFEhold_out_likelihood, "VFE") plotComparisonFigure(xtrain, FITCmodel, exact_model, axes[0, 1], axes[1, 1], axes[2, 1], FITCcb.n_iters, FITCcb.log_likelihoods, FITCcb.hold_out_likelihood, "FITC") axes[0, 0].set_title('VFE', loc='center', fontdict={'fontsize': 22}) axes[0, 1].set_title('FITC', loc='center', fontdict={'fontsize': 22}) embed()
def snelsonDemo(): from matplotlib import pyplot as plt from IPython import embed xtrain,ytrain,xtest,ytest = getTrainingTestData() #run exact inference on training data. exact_model = getRegressionModel(xtrain,ytrain) opt = gpflow.train.ScipyOptimizer() opt.minimize(exact_model, maxiter=notebook_niter(2000000)) figA, axes = plt.subplots(1,1) inds = np.argsort(xtrain.flatten()) axes.plot(xtrain[inds,:], ytrain[inds,:], 'ro') plotPredictions(axes, exact_model, 'g', None) figB, axes = plt.subplots(3,2) #run sparse model on training data intialized from exact optimal solution. VFEmodel, VFEcb = trainSparseModel(xtrain,ytrain,exact_model,False,xtest,ytest) FITCmodel, FITCcb = trainSparseModel(xtrain,ytrain,exact_model,True,xtest,ytest) print("Exact model parameters \n") printModelParameters(exact_model) print("Sparse model parameters for VFE optimization \n") printModelParameters(VFEmodel) print("Sparse model parameters for FITC optimization \n") printModelParameters(FITCmodel) VFEiters = FITCcb.n_iters VFElog_likelihoods = stretch(len(VFEiters), VFEcb.log_likelihoods) VFEhold_out_likelihood = stretch(len(VFEiters), VFEcb.hold_out_likelihood) plotComparisonFigure(xtrain, VFEmodel, exact_model, axes[0,0], axes[1,0], axes[2,0], VFEiters, VFElog_likelihoods.tolist(), VFEhold_out_likelihood.tolist(), "VFE") plotComparisonFigure(xtrain, FITCmodel, exact_model, axes[0,1], axes[1,1], axes[2,1],FITCcb.n_iters, FITCcb.log_likelihoods, FITCcb.hold_out_likelihood , "FITC") axes[0,0].set_title('VFE', loc='center', fontdict = {'fontsize': 22}) axes[0,1].set_title('FITC', loc='center', fontdict = {'fontsize': 22}) embed()
from __future__ import print_function import gpflow from gpflow.test_util import notebook_niter import tensorflow as tf import os import numpy as np import cProfile import csv nRepeats = notebook_niter(50) predict_limits = [-4., 4.] inducing_points_limits = [-1., 9] hold_out_limits = [0.20, 0.60] optimization_limits = [18., 25.] def readCsvFile(fileName): reader = csv.reader(open(fileName,'r')) dataList = [] for row in reader: dataList.append([float(elem) for elem in row]) return np.array(dataList) def getTrainingTestData(): overallX = readCsvFile('data/snelson_train_inputs') overallY = readCsvFile('data/snelson_train_outputs') trainIndeces = [] testIndeces = []
import tensorflow as tf from self_awareness import networks import gpflow from gpflow.test_util import notebook_niter, is_continuous_integration import gpflow.multioutput.kernels as mk import tensorflow.contrib.distributions as tfd from scipy.cluster.vq import kmeans2 import numpy as np float_type = gpflow.settings.float_type ITERATIONS = notebook_niter(1000) class Model(): def __init__(self, args, is_training=True): # Store the arguments self.args = args # args.rnn_size contains the dimension of the hidden state of the LSTM # TODO: (resolve) Do we need to use a fixed seq_length? # Input data contains sequence of (x,y) points self.input_data = tf.placeholder(tf.float32, [ args.batch_size, args.target_image_size[0], args.target_image_size[1], 1 ])
def ex2(): minibatch_size = notebook_niter(1000, test_n=10) gp_dim = 5 M = notebook_niter(100, test_n=5) ## placeholders X = tf.placeholder(tf.float32, [minibatch_size, Mnist.input_dim ]) # fixed shape so num_data works in SVGP Y = tf.placeholder(tf.float32, [minibatch_size, 1]) Xtest = tf.placeholder(tf.float32, [None, Mnist.input_dim]) ## build graph with tf.variable_scope('cnn'): f_X = tf.cast(cnn_fn(X, gp_dim), dtype=float_type) with tf.variable_scope('cnn', reuse=True): f_Xtest = tf.cast(cnn_fn(Xtest, gp_dim), dtype=float_type) gp_model = gpflow.models.SVGP( f_X, tf.cast(Y, dtype=float_type), gpflow.kernels.RBF(gp_dim), gpflow.likelihoods.MultiClass(Mnist.Nclasses), Z=np.zeros((M, gp_dim)), # we'll set this later num_latent=Mnist.Nclasses) loss = -gp_model.likelihood_tensor m, v = gp_model._build_predict(f_Xtest) my, yv = gp_model.likelihood.predict_mean_and_var(m, v) with tf.variable_scope('adam'): opt_step = tf.train.AdamOptimizer(0.001).minimize(loss) tf_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='adam') tf_vars += tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='cnn') ## initialize sess = tf.Session() sess.run(tf.variables_initializer(var_list=tf_vars)) gp_model.initialize(session=sess) ## reset inducing (they live in a different space as X, so need to be careful with this) ind = np.random.choice(Mnist.X.shape[0], minibatch_size, replace=False) fZ = sess.run(f_X, feed_dict={X: Mnist.X[ind]}) # Z_0 = kmeans2(fZ, M)[0] might fail Z_0 = fZ[np.random.choice(len(fZ), M, replace=False)] def set_gp_param(param, value): sess.run( tf.assign(param.unconstrained_tensor, param.transform.backward(value))) set_gp_param(gp_model.feature.Z, Z_0) ## train for i in range(ITERATIONS): ind = np.random.choice(Mnist.X.shape[0], minibatch_size, replace=False) sess.run(opt_step, feed_dict={X: Mnist.X[ind], Y: Mnist.Y[ind]}) ## predict preds = np.argmax(sess.run(my, feed_dict={Xtest: Mnist.Xtest}), 1).reshape(Mnist.Ytest.shape) correct = preds == Mnist.Ytest.astype(int) acc = np.average(correct.astype(float)) * 100. print('acc is {:.4f}'.format(acc))
(X1, np.zeros_like(X1))), np.hstack((X2, np.ones_like(X2))))) ## IE, one dimension per factor group # Augment the Y data to indicate which likelihood we should use Y_augmented = np.vstack((np.hstack( (Y1, np.zeros_like(X1))), np.hstack((Y2, np.ones_like(X2))))) m = gpflow.models.VGP(X_augmented, Y_augmented, kern=kern, likelihood=lik, num_latent=1) from gpflow.test_util import notebook_niter m.kern.kernels[1].W = np.random.randn(2, 1) gpflow.train.ScipyOptimizer().minimize(m, maxiter=notebook_niter(2000)) def plot_gp(x, mu, var, color='k'): plt.plot(x, mu, color=color, lw=2) plt.plot(x, mu + 2 * np.sqrt(var), '--', color=color) plt.plot(x, mu - 2 * np.sqrt(var), '--', color=color) def plot(m): xtest = np.linspace(0, 1, 100)[:, None] line, = plt.plot(X1, Y1, 'x', mew=2) mu, var = m.predict_f(np.hstack((xtest, np.zeros_like(xtest)))) plot_gp(xtest, mu, var, line.get_color()) line, = plt.plot(X2, Y2, 'x', mew=2)
def main(): # read the training data_x, data_y from dataset data_x, data_y = read_train_zip("conll_train.zip") # transform data_x matrix to sparse csr_matrix X_sparse = csr_matrix(data_x) # use truncatedSVD to do dimensional reduction tsvd = TruncatedSVD(n_components=50, algorithm='arpack') tsvd.fit(X_sparse) X_sparse_tsvd = tsvd.transform(X_sparse) lable_dict = dict() for i in range(len(data_y)): if data_y[i][0] not in lable_dict: lable_dict[data_y[i][0]] = [i] else: lable_dict[data_y[i][0]].append(i) random_sample = [] # extract reasonable number of training data for e in lable_dict: if len(lable_dict[e]) > 5000: k = random.sample(lable_dict[e], 5000) random_sample = random_sample + k else: random_sample = random_sample + lable_dict[e] # shuffle training dataset and validation dataset test_index = [_ for _ in range(211727)] random_test = random.sample(test_index, 10000) train_x = X_sparse_tsvd[random_sample] train_y = data_y[random_sample] val_x = X_sparse_tsvd[random_test] val_y = data_y[random_test] shuffle_list = np.array([_ for _ in range(train_x.shape[0])]) np.random.shuffle(shuffle_list) train_x = train_x[shuffle_list] train_y = train_y[shuffle_list] # train the gp model g = gpflow.models.SVGP(train_x, train_y, kern=gpflow.kernels.RBF(input_dim=50), likelihood=gpflow.likelihoods.MultiClass(23), minibatch_size=1000, Z=train_x[::50].copy(), num_latent=23, whiten=True, q_diag=True) opt = gpflow.train.AdamOptimizer() opt.minimize(g, maxiter=notebook_niter(2000)) result_t = g.predict_y(val_x)[0] #calculate the ER and MNLP for validation data set with GP model c = 0 for i in range(len(val_x)): if result_t[i].argmax() == val_y[i][0]: c += 1 er = 1 - c / len(val_x) mnlp = 0 result_te = np.log(result_t) for i in range(len(val_x)): for j in range(23): mnlp += result_te[i][j] mnlp = -mnlp / len(val_x) print("GP model:") print("error rate: {}, mean negative log probability: {}".format(er, mnlp)) # calculate the ER and MNLP for validation data set with softmax model lgpredict = LogisticRegression(solver='lbfgs', multi_class="multinomial").fit( train_x, train_y) lgpresult = lgpredict.predict_proba(val_x) c = 0 for i in range(len(val_x)): if lgpresult[i].argmax() == val_y[i][0]: c += 1 er = 1 - c / len(val_x) mnlp = 0 result_te = np.log(lgpresult) for i in range(len(val_x)): for j in range(22): mnlp += result_te[i][j] mnlp = -mnlp / len(val_x) print("Softmax model:") print("error rate: {}, mean negative log probability: {}".format(er, mnlp)) # read test data from test dataset test_x, seperate_list = read_test_zip("conll_test_features.zip") # dimensional reduction test = tsvd.transform(test_x) # predict result = g.predict_y(test)[0] result_1 = g.predict_y(test[20000:40000])[0] index = 0 result = np.log(result) final = '' for e in seperate_list: for i in range(e): for j in range(22): final += str(round(result[index + i][j], 8)) final += "," final += str(round(result[i][22], 8)) final += "\n" final += "\n" index += e # written result prediction.txt with open("predictions.txt", "w") as f: f.write(final)
import numpy as np import tensorflow as tf from matplotlib import pyplot as plt import gpflow from gpflow.test_util import notebook_niter, is_continuous_integration from scipy.cluster.vq import kmeans2 from tensorflow.examples.tutorials.mnist import input_data float_type = gpflow.settings.float_type ITERATIONS = notebook_niter(1000) #how many training iterations are performed mnist = input_data.read_data_sets("./data/MNIST_data/", one_hot=False) class Mnist: input_dim = 784 Nclasses = 10 X = mnist.train.images.astype(float) Y = mnist.train.labels.astype(float)[:, None] Xtest = mnist.test.images.astype(float) Ytest = mnist.test.labels.astype(float)[:, None] def cnn_fn(x, output_dim): """ Adapted from https://www.tensorflow.org/tutorials/layers """ # input [BXYC]=[B,28,28,1] conv1 = tf.layers.conv2d(inputs=tf.reshape(x, [-1, 28, 28, 1]), filters=32,