Exemplo n.º 1
0
def vem_algorithm(model,
                  stochastic=False,
                  vem_iters=None,
                  step_rate=None,
                  verbose=False,
                  optZ=True,
                  verbose_plot=False,
                  non_chained=True):
    if vem_iters is None:
        vem_iters = 5

    model['.*.kappa'].fix()  # must be always fixed
    model.elbo = np.empty((vem_iters, 1))

    if stochastic is False:

        for i in range(vem_iters):
            # VARIATIONAL E-STEP
            model['.*.lengthscale'].fix()
            model['.*.variance'].fix()
            model.Z.fix()
            model['.*.W'].fix()

            model.q_u_means.unfix()
            model.q_u_chols.unfix()
            model.optimize(messages=verbose, max_iters=100)
            print('iteration (' + str(i + 1) + ') VE step, log_likelihood=' +
                  str(model.log_likelihood().flatten()))

            # VARIATIONAL M-STEP
            model['.*.lengthscale'].unfix()
            model['.*.variance'].unfix()
            if optZ:
                model.Z.unfix()
            if non_chained:
                model['.*.W'].unfix()

            model.q_u_means.fix()
            model.q_u_chols.fix()
            model.optimize(messages=verbose, max_iters=100)
            print('iteration (' + str(i + 1) + ') VM step, log_likelihood=' +
                  str(model.log_likelihood().flatten()))

    else:
        if step_rate is None:
            step_rate = 0.01

        sto_iters = vem_iters
        model.elbo = np.empty((sto_iters + 1, 1))
        optimizer = climin.Adadelta(model.optimizer_array,
                                    model.stochastic_grad,
                                    step_rate=step_rate,
                                    momentum=0.9)
        c_full = partial(model.callback,
                         max_iter=sto_iters,
                         verbose=verbose,
                         verbose_plot=verbose_plot)
        optimizer.minimize_until(c_full)

    return model
Exemplo n.º 2
0
def SVGP(X, Y):
    if Y.ndim != 2:
        Y = Y[:, None]
    Z = np.random.rand(20, 1)
    batchsize = 20
    m = GPy.core.SVGP(X,
                      Y,
                      Z,
                      GPy.kern.Matern52(1),
                      GPy.likelihoods.Gaussian(),
                      batchsize=batchsize)
    #m.kern.white.variance = 1e-5
    #m.kern.white.fix()

    opt = climin.Adadelta(m.optimizer_array,
                          m.stochastic_grad,
                          step_rate=0.2,
                          momentum=0.9)

    def callback(i):
        print m.log_likelihood(), "\r",
        if i['n_iter'] > 5000:
            return True
        return False

    info = opt.minimize_until(callback)
    print info

    return m
Exemplo n.º 3
0
 def _prepare_adadelta(self, x, fp):
     exclude = [
         'verbosity', 'min_grad_ratio', 'max_it', 'permitted_drops',
         'callback'
     ]
     ada_kwargs = {k: v for k, v in self.kwargs.items() if k not in exclude}
     return climin.Adadelta(x, fp, **ada_kwargs)
Exemplo n.º 4
0
def trainGP(all_training_set, all_training_label, all_testing_set,
            all_testing_label):
    t = Text(align='right')
    display(t)
    batchsize = 10
    Z = np.random.rand(20, 72)
    all_training_label = np.vstack(all_training_label)
    m = GPy.core.SVGP(all_training_set,
                      all_training_label,
                      Z,
                      GPy.kern.RBF(72) + GPy.kern.White(72),
                      GPy.likelihoods.Gaussian(),
                      batchsize=batchsize)
    m.kern.white.variance = 1e-5
    m.kern.white.fix()

    opt = climin.Adadelta(m.optimizer_array,
                          m.stochastic_grad,
                          step_rate=0.2,
                          momentum=0.9)

    def callback(i):
        t.value = str(m.log_likelihood())
        #Stop after 288615 iterations
        if i['n_iter'] > 100000:
            return True
        return False

    info = opt.minimize_until(callback)
    all_answers = m.predict(all_testing_set)

    answer_shape = np.shape(all_answers)
    percent_right = np.zeros(answer_shape[1])
    for i in range(answer_shape[1]):
        if all_answers[0][i] > 0.5:
            percent_right[i] = 1
        else:
            percent_right[i] = 0
    final_percent = np.sum(
        abs(all_testing_label - percent_right)) / answer_shape[1]
    print('classifier got ', 1 - final_percent * 100,
          '% correct on the test set')
    print("Finish training, saving...")

    # 1: Saving a model:
    np.save('model_save.npy', m.param_array)
    # 2: loading a model
    # Model creation, without initialization:
    #m_load = GPy.models.GPRegression(X, Y, initialize=False)
    #m_load.update_model(False) # do not call the underlying expensive algebra on load
    #m_load.initialize_parameter() # Initialize the parameters (connect the parameters up)
    #m_load[:] = np.load('model_save.npy') # Load the parameters
    #m_load.update_model(True) # Call the algebra only once
    #print(m_load)
    model_path = "trained_model"
    np.save(
        model_path + "_" + datetime.datetime.now().strftime("%m_%d_%y_%H%M") +
        ".npy", m.param_array)
    return m
Exemplo n.º 5
0
	def inference(self, X, Y,numZ,num_local_Z,num_cluster, batchsize=1000,upperbound=-1, lowerbound_ratio=-1, optimizer=1, num_iters=1000):

		[Ntrain, d]=X.shape

		Yi=(Y==1).astype(int)
		pu=np.random.permutation(Ntrain)
		#numZ=300
		Z=X[pu[range(numZ)], :]

		#batchsize = 1000
		lik = GPy.likelihoods.Bernoulli()
		k = GPy.kern.RBF(d, lengthscale=5.,ARD=True) + GPy.kern.White(1, 1e-6)
		m = SMGP(X, Yi, Z, likelihood=lik, kernel=k, batchsize=batchsize, num_cluster=num_cluster,num_local_Z=num_local_Z,upperbound=upperbound, lowerbound_ratio=lowerbound_ratio)
		m.kern.white.variance = 1e-5
		m.kern.white.fix()


		from ipywidgets import Text
		from IPython.display import display

		t = Text(align='right')
		display(t)
		m.iter_no=0
		#import sys
		def callback_adadelta(i):
			t.value = str(m.log_likelihood())
			print(i['n_iter'])
			if i['n_iter'] > num_iters:
				return True
			return False
		
		def callback_lbfgsb(i):
			m.iter_no=m.iter_no+1
			t.value = str(m.log_likelihood())
			print(m.iter_no)    
		
		if optimizer==1: #Adadelta          
			opt = climin.Adadelta(m.optimizer_array, m.stochastic_grad, step_rate=0.2, momentum=0.9)          
			info = opt.minimize_until(callback_adadelta)
		elif optimizer==2: #l_bfgs_b    		
			x, f, d = optimize.fmin_l_bfgs_b(m._objective, m.optimizer_array, fprime=m.stochastic_grad, maxfun=1000, callback=callback_lbfgsb)
		else:
			print('optimizer not supported')

		return m
Exemplo n.º 6
0
    def minimize(self, fun, x_0, bounds=None):
        """
        Does not take bounds into account
        """
        x = np.copy(x_0).reshape(-1)
        opt = climin.Adadelta(wrt=x,
                              fprime=fun,
                              step_rate=self.step_rate,
                              momentum=self.momentum,
                              decay=self.decay,
                              offset=self.offset)

        x_list = [x.copy()]
        time_list = [0.]
        start = time.time()

        for info in opt:
            i = info['n_iter']
            if i > self.maxiter:
                break

            if self.disp and not (i % self.print_freq):
                grad = info['gradient']
                print('Epoch', int(i / self.iter_per_epoch), ':')
                print('\tx', x.reshape(-1)[:5])
                print("\tGradient norm", np.linalg.norm(grad))

            if not i % int(self.iter_per_epoch):
                x_list.append(x.copy())
                time_list.append(time.time() - start)

        stat_dict = {
            'time_lst': time_list,
            'x_lst': x_list,
            'fun': None,
            'time': time_list[-1],
            'info': info
        }

        return x.copy(), stat_dict
Exemplo n.º 7
0
def test_mlp(learning_rate=0.01,
             L1_reg=0.00,
             L2_reg=0.0001,
             n_epochs=500,
             dataset='mnist.pkl.gz',
             batch_size=20,
             n_hidden=500,
             optimizer='gd',
             activation=T.tanh):

    datasets = load_data(dataset)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    tmpl = [(28 * 28, n_hidden), n_hidden, (n_hidden, 10), 10]
    flat, (Weights_1, bias_1, Weights_2,
           bias_2) = climin.util.empty_with_views(tmpl)

    #Initialize weights with uniformal distribution according to the tutorial
    rng = numpy.random.RandomState(1234)
    Weights_1_init = rng.uniform(low=-numpy.sqrt(6. / (28 * 28 + n_hidden)),
                                 high=numpy.sqrt(6. / (28 * 28 + n_hidden)),
                                 size=(28 * 28, n_hidden))

    Weights_2_init = rng.uniform(low=-numpy.sqrt(6. / (n_hidden + 10)),
                                 high=numpy.sqrt(6. / (n_hidden + 10)),
                                 size=(n_hidden, 10))

    bias_1_init = numpy.zeros((n_hidden, ), dtype=theano.config.floatX)
    bias_2_init = numpy.zeros((10, ), dtype=theano.config.floatX)

    if activation == T.nnet.sigmoid:
        Weights_1_init *= 4
        Weights_2_init *= 4

    def initialize_in_place(array, values):
        for j in range(0, len(values)):
            array[j] = values[j]

    initialize_in_place(Weights_1, Weights_1_init)
    initialize_in_place(Weights_2, Weights_2_init)
    initialize_in_place(bias_1, bias_1_init)
    initialize_in_place(bias_2, bias_2_init)

    if batch_size is None:
        args = itertools.repeat(([train_set_x, train_set_y], {}))
        n_train_batches = 1
    else:
        args = cli.util.iter_minibatches([train_set_x, train_set_y],
                                         batch_size, [0, 0])
        args = ((i, {}) for i in args)
        n_train_batches = train_set_x.shape[0] // batch_size

    print('... building the model')

    x = T.matrix('x')
    y = T.ivector('y')

    rng = numpy.random.RandomState(1234)

    classifier = MLP(rng=rng,
                     input=x,
                     n_in=28 * 28,
                     n_hidden=n_hidden,
                     n_out=10,
                     Weights_1=theano.shared(value=Weights_1,
                                             name='W',
                                             borrow=True),
                     bias_1=theano.shared(value=bias_1, name='b', borrow=True),
                     Weights_2=theano.shared(value=Weights_2,
                                             name='W',
                                             borrow=True),
                     bias_2=theano.shared(value=bias_2, name='b', borrow=True),
                     activation=T.tanh)

    #cost with regularisation terms
    cost = theano.function(inputs=[x, y],
                           outputs=classifier.negative_log_likelihood(y) +
                           L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr,
                           allow_input_downcast=True)

    # gradients with regularisation terms
    gradients = theano.function(
        inputs=[x, y],
        outputs=[
            T.grad(
                classifier.negative_log_likelihood(y) +
                L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr,
                classifier.hiddenLayer.W),
            T.grad(
                classifier.negative_log_likelihood(y) +
                L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr,
                classifier.hiddenLayer.b),
            T.grad(
                classifier.negative_log_likelihood(y) +
                L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr,
                classifier.logRegressionLayer.W),
            T.grad(
                classifier.negative_log_likelihood(y) +
                L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr,
                classifier.logRegressionLayer.b)
        ],
        allow_input_downcast=True)

    def loss(parameters, input, target):
        return cost(input, target)

    def d_loss_wrt_pars(parameters, inputs, targets):
        g_W_1, g_b_1, g_W_2, g_b_2 = gradients(inputs, targets)

        return numpy.concatenate(
            [g_W_1.flatten(), g_b_1,
             g_W_2.flatten(), g_b_2])

    zero_one_loss = theano.function(inputs=[x, y],
                                    outputs=classifier.errors(y),
                                    allow_input_downcast=True)

    if optimizer == 'gd':
        print('... using gradient descent')
        opt = cli.GradientDescent(flat,
                                  d_loss_wrt_pars,
                                  step_rate=learning_rate,
                                  momentum=.95,
                                  args=args)
    elif optimizer == 'bfgs':
        print('... using using quasi-newton BFGS')
        opt = cli.Bfgs(flat, loss, d_loss_wrt_pars, args=args)
    elif optimizer == 'lbfgs':
        print('... using using quasi-newton L-BFGS')
        opt = cli.Lbfgs(flat, loss, d_loss_wrt_pars, args=args)
    elif optimizer == 'nlcg':
        print('... using using non linear conjugate gradient')
        opt = cli.NonlinearConjugateGradient(flat,
                                             loss,
                                             d_loss_wrt_pars,
                                             min_grad=1e-03,
                                             args=args)
    elif optimizer == 'rmsprop':
        print('... using rmsprop')
        opt = cli.RmsProp(flat,
                          d_loss_wrt_pars,
                          step_rate=1e-4,
                          decay=0.9,
                          args=args)
    elif optimizer == 'rprop':
        print('... using resilient propagation')
        opt = cli.Rprop(flat, d_loss_wrt_pars, args=args)
    elif optimizer == 'adam':
        print('... using adaptive momentum estimation optimizer')
        opt = cli.Adam(flat,
                       d_loss_wrt_pars,
                       step_rate=0.0002,
                       decay=0.99999999,
                       decay_mom1=0.1,
                       decay_mom2=0.001,
                       momentum=0,
                       offset=1e-08,
                       args=args)
    elif optimizer == 'adadelta':
        print('... using adadelta')
        opt = cli.Adadelta(flat,
                           d_loss_wrt_pars,
                           step_rate=1,
                           decay=0.9,
                           momentum=.95,
                           offset=0.0001,
                           args=args)
    else:
        print('unknown optimizer')
        return 1

    print('... training')

    # early stopping parameters
    if batch_size == None:
        patience = 250
    else:
        patience = 10000  # look at this many samples regardless

    patience_increase = 2  # wait this mutch longer when a new best is found
    improvement_threshold = 0.995  # a relative improvement of this mutch is considered signigicant
    validation_frequency = min(n_train_batches, patience // 2)
    best_validation_loss = numpy.inf
    test_loss = 0.

    valid_losses = []
    train_losses = []
    test_losses = []

    epoch = 0

    start_time = timeit.default_timer()

    for info in opt:
        iter = info['n_iter']
        epoch = iter // n_train_batches
        minibatch_index = iter % n_train_batches

        if iter % validation_frequency == 0:
            validation_loss = zero_one_loss(valid_set_x, valid_set_y)
            valid_losses.append(validation_loss)
            train_losses.append(zero_one_loss(train_set_x, train_set_y))
            test_losses.append(zero_one_loss(test_set_x, test_set_y))

            print(
                'epoch %i, minibatch %i/%i, validation error % f %%, iter/patience %i/%i'
                % (epoch, minibatch_index + 1, n_train_batches,
                   validation_loss * 100, iter, patience))
            # if we got the best validation score until now
            if validation_loss < best_validation_loss:
                # improve patience if loss improvement is good enough
                if validation_loss < best_validation_loss * improvement_threshold:
                    patience = max(patience, iter * patience_increase)
                best_validation_loss = validation_loss
                # test it on the test set
                test_loss = zero_one_loss(test_set_x, test_set_y)

                print(
                    '    epoch %i, minibatch %i/%i, test error of best model %f %%'
                    % (epoch, minibatch_index + 1, n_train_batches,
                       test_loss * 100))

        if patience <= iter or epoch >= n_epochs:
            break

    end_time = timeit.default_timer()
    print((
        'Optimization complete. Best validation score of %f %% with test performance %f %%'
    ) % (best_validation_loss * 100., test_loss * 100.))
    print(
        ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' %
         ((end_time - start_time) / 60.)),
        file=sys.stderr)

    losses = (train_losses, valid_losses, test_losses)

    return classifier, losses
Exemplo n.º 8
0
#plt.savefig("gaussian_1000obs.pdf", bbox_inches='tight', transparent=True, pad_inches=0)

Z_init = domain[0] + np.random.rand(20, 1) * domain[1]
mf = gen_mf(0)
gsvgp = GPy.core.SVGP(X=x_init,
                      Y=y_init,
                      Z=Z_init,
                      kernel=k3,
                      likelihood=lik,
                      Y_metadata={'trials': np.ones_like(y_init) * NB_SHOTS},
                      mean_function=mf,
                      batchsize=15)

import climin
opt = climin.Adadelta(gsvgp.optimizer_array,
                      gsvgp.stochastic_grad,
                      step_rate=0.2,
                      momentum=0.9)


def callback(i):
    print(str(m.log_likelihood()))
    #Stop after 5000 iterations
    if i['n_iter'] > 5000:
        return True
    return False


info = opt.minimize_until(callback)
### ============================================================ ###
# 3.a Super restricted range no MF
### ============================================================ ###
Exemplo n.º 9
0
def sgd_optimization_mnist(learning_rate=0.01, n_epochs=1000, dataset='mnist.pkl.gz', batch_size=600, optimizer='gd'):

    datasets = load_data(dataset)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    tmpl = [(28 * 28, 10), 10]
    flat, (Weights, bias) = climin.util.empty_with_views(tmpl)

    cli.initialize.randomize_normal(flat, 0, 1)

    if batch_size is None:
        args = itertools.repeat(([train_set_x, train_set_y], {}))
        n_train_batches = 1
    else:
        args = cli.util.iter_minibatches([train_set_x, train_set_y], batch_size, [0, 0])
        args = ((i, {}) for i in args)
        n_train_batches = train_set_x.shape[0] // batch_size

    print('... building the model')

    x = T.matrix('x')
    y = T.ivector('y')

    classifier = LogisticRegression(
            input = x,
            n_in = 28 * 28,
            n_out = 10,
            W = theano.shared(value = Weights, name = 'W', borrow = True),
            b = theano.shared(value = bias, name = 'b', borrow = True)
            )

    gradients = theano.function(
            inputs = [x, y],
            outputs = [
                T.grad(classifier.negative_log_likelihood(y), classifier.W),
                T.grad(classifier.negative_log_likelihood(y), classifier.b)
                ],
            allow_input_downcast = True
            )

    cost = theano.function(
        inputs=[x, y],
        outputs=classifier.negative_log_likelihood(y),
        allow_input_downcast=True
    )

    def loss(parameters, input, target):
        return cost(input, target)

    def d_loss_wrt_pars(parameters, inputs, targets):
        g_W, g_b = gradients(inputs, targets)

        return np.concatenate([g_W.flatten(), g_b])

    zero_one_loss = theano.function(
            inputs = [x, y],
            outputs = classifier.errors(y),
            allow_input_downcast = True
            )

    if optimizer == 'gd':
        print('... using gradient descent')
        opt = cli.GradientDescent(flat, d_loss_wrt_pars, step_rate=learning_rate, momentum=.95, args=args)
    elif optimizer == 'rmsprop':
        print('... using rmsprop')
        opt = cli.RmsProp(flat, d_loss_wrt_pars, step_rate=1e-4, decay=0.9, args=args)
    elif optimizer == 'rprop':
        print('... using resilient propagation')
        opt = cli.Rprop(flat, d_loss_wrt_pars, args=args)
    elif optimizer == 'adam':
        print('... using adaptive momentum estimation optimizer')
        opt = cli.Adam(flat, d_loss_wrt_pars, step_rate = 0.0002, decay = 0.99999999, decay_mom1 = 0.1, decay_mom2 = 0.001, momentum = 0, offset = 1e-08, args=args)
    elif optimizer == 'adadelta':
        print('... using adadelta')
        opt = cli.Adadelta(flat, d_loss_wrt_pars, step_rate=1, decay = 0.9, momentum = .95, offset = 0.0001, args=args)
    else:
        print('unknown optimizer')
        return 1

    print('... training the model')

    # early stopping parameters
    if batch_size== None:
        patience = 250
    else:
        patience = 5000 # look at this many samples regardless

    patience_increase = 2 # wait this mutch longer when a new best is found
    improvement_threshold = 0.995 # a relative improvement of this mutch is considered signigicant
    validation_frequency = min(n_train_batches, patience // 2)
    best_validation_loss = np.inf
    test_loss = 0.

    valid_losses = []
    train_losses = []
    test_losses = []

    epoch = 0

    start_time = timeit.default_timer()
    for info in opt:
        iter = info['n_iter']
        epoch = iter // n_train_batches
        minibatch_index = iter % n_train_batches

        if iter % validation_frequency == 0:
            # compute zero-one loss on validation set
            validation_loss = zero_one_loss(valid_set_x, valid_set_y)
            valid_losses.append(validation_loss)
            train_losses.append(zero_one_loss(train_set_x, train_set_y))
            test_losses.append(zero_one_loss(test_set_x, test_set_y))

            print(
                    'epoch %i, minibatch %i/%i, validation error % f %%, iter/patience %i/%i' %
                    (
                        epoch,
                        minibatch_index + 1,
                        n_train_batches,
                        validation_loss * 100,
                        iter,
                        patience
                        )
                    )
            # if we got the best validation score until now
            if validation_loss < best_validation_loss:
               # improve patience if loss improvement is good enough
                if validation_loss < best_validation_loss * improvement_threshold:
                    patience = max(patience, iter * patience_increase)
                best_validation_loss = validation_loss
                # test it on the test set
                test_loss = zero_one_loss(test_set_x, test_set_y)

                print(
                        '    epoch %i, minibatch %i/%i, test error of best model %f %%' %
                        (
                            epoch,
                            minibatch_index + 1,
                            n_train_batches,
                            test_loss * 100
                            )
                        )

        if patience <= iter or epoch >= n_epochs:
            break

    end_time = timeit.default_timer()

    print('Optimization complete with best validation score of %f %%, with test performance %f %%' % (best_validation_loss * 100., test_loss * 100.))
    print('The code run for %d epochs, with %f epochs/sec' % (epoch, 1. * epoch / (end_time - start_time)))
    print(('The code for file ' + os.path.split(__file__)[1] + ' ran for %.1fs' % ((end_time - start_time))), file=sys.stderr)

    losses = (train_losses, valid_losses, test_losses)

    return classifier, losses
Exemplo n.º 10
0
            model.kern_list[q].variance.fix()
        """"""""""""""""""""""""""""""""""""""""""""""""""""""
        print(model['B'])
        print('Initial Log Likelihood:\n',model.log_likelihood())

        if method == 'adam':
            opt = climin.Adam(model.optimizer_array, model.stochastic_grad, step_rate=0.005, decay_mom1=1 - 0.9,decay_mom2=1 - 0.999)
            ELBO.append(model.log_likelihood())
            #NLPD.append(model.negative_log_predictive(Xtest, Ytest, num_samples=1000))
            start = time.time()
            myTimes.append(start)
            print('Running Adam...')
            info = opt.minimize_until(callback)

        elif method == 'adad':
            opt = climin.Adadelta(model.optimizer_array, model.stochastic_grad, step_rate=0.005, momentum=0.9)
            ELBO.append(model.log_likelihood())
            #NLPD.append(model.negative_log_predictive(Xtest, Ytest, num_samples=1000))
            start = time.time()
            myTimes.append(start)
            print('Running Adadelta...')
            info = opt.minimize_until(callback)
        elif method == 'vo':
            model.Gauss_Newton = False
            opt = climin.VarOpt(model.optimizer_array, model.stochastic_grad, step_rate=0.005, s_ini=q_s_ini,decay_mom1=1 - 0.9,decay_mom2=1 - 0.999,prior_lambda=prior_lamb)
            ELBO.append(model.log_likelihood())
            #NLPD.append(model.negative_log_predictive(Xtest, Ytest, num_samples=1000))
            start = time.time()
            myTimes.append(start)
            print('Running Variationa Opt...')
            info = opt.minimize_until(callback)
Exemplo n.º 11
0
def climin_wrapper(oracle,
                   w0,
                   train_points,
                   train_targets,
                   options,
                   method='AdaDelta'):
    default_options = {
        'maxiter': 1000,
        'print_freq': 1,
        'verbose': False,
        'g_tol': 1e-5,
        'batch_size': 10,
        'step_rate': 0.1
    }
    if not options is None:
        default_options.update(options)
        if 'print_freq' in options.keys():
            default_options['verbose'] = True
    options = default_options

    w = w0.copy()
    data = ((i, {}) for i in iter_minibatches([train_points, train_targets],
                                              options['batch_size'], [1, 0]))

    if method == 'AdaDelta':
        opt = climin.Adadelta(wrt=w,
                              fprime=oracle,
                              args=data,
                              step_rate=options['step_rate'])
    elif method == 'SG':
        opt = climin.GradientDescent(wrt=w,
                                     fprime=oracle,
                                     args=data,
                                     step_rate=options['step_rate'])
    else:
        raise ValueError('Unknown optimizer')

    w_lst = [w.copy()]
    time_lst = [0.]
    start = time.time()
    n_epochs = options['maxiter']
    n_iterations = int(n_epochs * train_targets.size / options['batch_size'])
    print_freq = int(options['print_freq'] * train_targets.size /
                     options['batch_size'])

    if options['verbose']:
        print('Using ' + method + ' optimizer')
    for info in opt:
        i = info['n_iter']
        if i > n_iterations:
            break
        if not (i % print_freq) and options['verbose']:
            grad = info['gradient']
            print("Iteration ",
                  int(i * options['batch_size'] / train_targets.size), ":")
            print("\tGradient norm", np.linalg.norm(grad))
        if not i % int(train_targets.size / options['batch_size']):
            w_lst.append(w.copy())
            time_lst.append(time.time() - start)

    return w.copy(), w_lst, time_lst
Exemplo n.º 12
0
	def inference_time(self, X, Y,Xt, Yt, numZ,num_local_Z,num_cluster, batchsize=1000,upperbound=-1, lowerbound_ratio=-1, optimizer=1, num_iters=1000):
		[Ntrain, d]=X.shape
		ac_array=[];
		decv_array=[];
		duration_array=[]
		self.start_time=time.time()
		self.total_pred_time=0;

		Yi=(Y==1).astype(int)
		pu=np.random.permutation(Ntrain)
	
		Z=X[pu[range(numZ)], :]


		lik = GPy.likelihoods.Bernoulli()
		k = GPy.kern.RBF(d, lengthscale=5.,ARD=True) + GPy.kern.White(1, 1e-6)
		m = SMGP(X, Yi, Z, likelihood=lik, kernel=k, batchsize=batchsize, num_cluster=num_cluster,num_local_Z=num_local_Z,upperbound=upperbound, lowerbound_ratio=lowerbound_ratio)
		m.kern.white.variance = 1e-5
		m.kern.white.fix()


		from ipywidgets import Text
		from IPython.display import display

		t = Text(align='right')
		display(t)

		m.iter_no=0
		def callback_adadelta(i):
			t.value = str(m.log_likelihood())
			print(i['n_iter'])
			if (i['n_iter'] %10==0):
				start_pred=time.time();
				ac, pred,decv, test_error, Yt_m, Yt_v = self.prediction1(m, Xt,Yt)
				ac_array.append(ac)
				decv_array.append(decv)
				pred_dur=time.time()-start_pred
				self.total_pred_time=self.total_pred_time+pred_dur
				duration_array.append(time.time()-self.start_time-self.total_pred_time)
			if i['n_iter'] > 1000:
				return True
			return False
		
		def callback_lbfgsb(i):
			m.iter_no=m.iter_no+1
			t.value = str(m.log_likelihood())
			print(m.iter_no)
			if (m.iter_no %10==0):
				start_pred=time.time();
				ac, pred,decv, test_error, Yt_m, Yt_v = self.prediction1(m, Xt,Yt)
				ac_array.append(ac)
				decv_array.append(decv)
				pred_dur=time.time()-start_pred
				self.total_pred_time=self.total_pred_time+pred_dur
				duration_array.append(time.time()-self.start_time-self.total_pred_time)

		if optimizer==1: #Adadelta          
			opt = climin.Adadelta(m.optimizer_array, m.stochastic_grad, step_rate=0.2, momentum=0.9)          
			info = opt.minimize_until(callback_adadelta)
		elif optimizer==2: #l_bfgs_b
			x, f, d = optimize.fmin_l_bfgs_b(m._objective, m.optimizer_array, fprime=m.stochastic_grad, maxfun=1000, callback=callback_lbfgsb)
		else:
			print('optimizer not supported')
			

		return m,ac_array, decv_array,duration_array
Exemplo n.º 13
0
## load data
train_x, train_y = ds.load('train')
valid_x, valid_y = ds.load('valid')

## setup model
idim = len(train_x[0][0])
odim = max(train_y) + 1
model = RNN(idim, 300, odim, 'lstm')

## setup optimizer
#train_w = utils.balance_prior(train_y)
params = model.get_theta()
#args, n_batches = utils.make_batches([train_x, train_y], None)
#opt = climin.Rprop(params, model.opt_fprime, args=args, init_step=0.0001)
args, n_batches = utils.make_batches([train_x, train_y], 30)
opt = climin.Adadelta(params, model.opt_fprime, offset=1e-6, args=args)
#args, n_batches = utils.make_batches([train_x, train_y], 30)
#opt = climin.rmsprop.RmsProp(params, model.opt_fprime, step_rate=0.01, args=args)

## perform optimization
epoch = 0
start = time.time()
for info in opt:
    if info['n_iter'] % n_batches == 0:
        epoch += 1
        # end
        if epoch == 100:
            break
# print performance
        if epoch % 1 == 0:
            terr, tauc, tlos = utils.eval_perf(
Exemplo n.º 14
0
        else:
            print '\r', m_vb.log_likelihood(),
            sys.stdout.flush()

        if info['n_iter'] >= self.max_iters:
            return True

        return False


stop

m_vb.kern.fix()
m_vb.Z.fix()
opt = climin.Adadelta(m_vb.optimizer_array,
                      m_vb.stochastic_grad,
                      step_rate=step_rates[0])
opt.minimize_until(cb(stop_pc=0.9, max_iters=600))
m_vb.kern.constrain_positive()
m_vb.Z.unfix()
opt = climin.Adadelta(m_vb.optimizer_array,
                      m_vb.stochastic_grad,
                      step_rate=step_rates[1])
opt.minimize_until(cb(max_iters=vb_max_iters))

#set mcmc from vb solution
m.kern[:] = m_vb.kern[:] * 1
m.Z[:] = m_vb.Z[:] * 1
m.Z.fix()
L = GPy.util.choleskies.flat_to_triang(m_vb.q_u_chol)
U = np.vstack(