示例#1
0
def train_scipy(m, maxiter=2000, step=True):
    log_elbo = []
    # log_pi = []

    def step_callback(step, variables, values):
        elbo = m.elbo()
        print('step {} elbo: {}'.format(step, elbo))
        log_elbo.append(elbo)
        # log_pi.append(m.pi.numpy())

    opt = gpflow.optimizers.Scipy()
    if step:
        _ = opt.minimize(
            m.training_loss,
            method="BFGS",
            variables=m.trainable_variables,
            options=dict(maxiter=ci_niter(maxiter), disp=True),
            step_callback=step_callback,
            compile=True
        )
    else:
       _ = opt.minimize(
            m.training_loss,
            method="BFGS",
            variables=m.trainable_variables,
            options=dict(maxiter=ci_niter(maxiter), disp=True),
            compile=True
        ) 
    # return (log_elbo, log_pi)
    return log_elbo
示例#2
0
 def _optimize_model_with_gradienttape(self,train_data,**kwargs):
     """
     Optimize model using the Tensorflow GradientTape with batch optimization
     """
     # obtain train_dataset and batches
     num_train_data = train_data[0].shape[0]
     train_dataset = tf.data.Dataset.from_tensor_slices(train_data)
     batch_size = kwargs.pop('batch_size',32)
     prefetch_size = tf.data.experimental.AUTOTUNE
     shuffle_buffer_size = num_train_data // 2
     num_batches_per_epoch = num_train_data // batch_size
     train_dataset = (
         train_dataset.repeat()
         .prefetch(prefetch_size)
         .shuffle(buffer_size=shuffle_buffer_size)
         .batch(batch_size)
     )
     batches = iter(train_dataset)
     
     optimizer = kwargs.pop('optimizer',tf.optimizers.Adam())
     epochs = kwargs.pop('epochs',100)
     logging_epoch_freq = kwargs.pop('logging_epoch_freq',1)
     test_data = kwargs.pop('test_data',None)
     for epoch in range(epochs):
         for _ in range(ci_niter(num_batches_per_epoch)):
             grads=self.stochastic_gradient(next(batches))
             optimizer.apply_gradients(zip(grads, self.model.trainable_variables))
         
         epoch_id = epoch + 1
         if epoch_id % logging_epoch_freq == 0:
             if test_data is None:
                 tf.print(f"Epoch {epoch_id}: ELBO (train) {self.model.elbo(train_data)}")
             else:
                 tf.print(f"Epoch {epoch_id}: ELBO (train) {self.model.elbo(train_data)}; ELBO (test) {self.model.elbo(test_data)}")
示例#3
0
def train_exact_heteroskedastic(
        model: gpflow.models.VGP,
        optimizer: tf.optimizers = tf.optimizers.Adam(learning_rate=0.1),
        natgrad_opt: gpflow.optimizers = gpflow.optimizers.NaturalGradient(
            gamma=1.0),
        epochs: int = 100,
        logging_epoch_freq: int = 10):
    """
    Training loop for heteroskedastic GP
    """

    set_trainable(model.q_mu, False)
    set_trainable(model.q_sqrt, False)
    set_trainable(model.mean_function, False)

    loss = list()
    for epoch in range(ci_niter(epochs)):
        epoch_id = epoch + 1
        natgrad_opt.minimize(model.training_loss, [(model.q_mu, model.q_sqrt)])
        optimizer.minimize(model.training_loss, model.trainable_variables)
        loss.append(model.training_loss())
        if epoch_id % logging_epoch_freq == 0:
            tf.print(f"Epoch {epoch_id}: LOSS (train) {model.training_loss()}")
    plt.plot(range(epochs), loss)
    plt.xlabel('Epoch', fontsize=25)
    plt.ylabel('Loss', fontsize=25)
    plt.tight_layout()
示例#4
0
    def train_model(self, model, t, x, t_hourly_out):
        # Dataset
        train_dataset = tf.data.Dataset.from_tensor_slices(
            (t, x)).repeat().shuffle(buffer_size=t.shape[0],
                                     seed=Const.RANDOM_SEED)
        # Training
        start = time.time()
        iter_loglikelihood = SVGaussianProcess.run_optimization(
            model=model,
            iterations=ci_niter(self.max_iterations),
            train_dataset=train_dataset,
            minibatch_size=self.minibatch_size)
        end = time.time()

        logging.info("Training finished after: {:>10} sec".format(end - start))
        logging.info("Trained model.\n\n" + str(get_summary(model)) + "\n")

        # Prediction
        signal_hourly_out, signal_var_hourly_out = model.predict_y(
            t_hourly_out)
        signal_std_hourly_out = tf.sqrt(signal_var_hourly_out)

        signal_hourly_out = tf.reshape(signal_hourly_out, [-1]).numpy()
        signal_std_hourly_out = tf.reshape(signal_std_hourly_out, [-1]).numpy()

        return signal_hourly_out.reshape(-1, 1), signal_std_hourly_out.reshape(
            -1, 1), iter_loglikelihood
示例#5
0
def checkpointing_training_loop(
    model: gpflow.models.SVGP,
    batch_size: int,
    epochs: int,
    manager: tf.train.CheckpointManager,
    logging_epoch_freq: int = 100,
    epoch_var: Optional[tf.Variable] = None,
    step_var: Optional[tf.Variable] = None,
):
    tf_optimization_step = tf.function(optimization_step)

    batches = iter(train_dataset)

    for epoch in range(epochs):
        for step in range(ci_niter(num_batches_per_epoch)):
            tf_optimization_step(model, next(batches))
            if step_var is not None:
                step_var.assign(epoch * num_batches_per_epoch + step + 1)
        if epoch_var is not None:
            epoch_var.assign(epoch + 1)

        epoch_id = epoch + 1
        if epoch_id % logging_epoch_freq == 0:
            ckpt_path = manager.save()
            tf.print(
                f"Epoch {epoch_id}: ELBO (train) {model.elbo(data)}, saved at {ckpt_path}"
            )
示例#6
0
def analyze(f, title="Plot"):
    X, Y, groups = f()
    Y_data = np.hstack([Y, groups])
    likelihood = gpflow.likelihoods.SwitchedLikelihood([
        gpflow.likelihoods.Gaussian(variance=1.0),
        gpflow.likelihoods.Gaussian(variance=1.0)
    ])
    # model construction (notice that num_latent_gps is 1)
    natgrad = NaturalGradient(gamma=1.0)
    adam = tf.optimizers.Adam()
    kernel = gpflow.kernels.Matern52(lengthscales=0.5)
    model = gpflow.models.VGP((X, Y_data),
                              kernel=kernel,
                              likelihood=likelihood,
                              num_latent_gps=1)
    # here's a plot of the raw data.
    fig, ax = plt.subplots(1, 1, figsize=(12, 6))
    _ = ax.plot(X, Y_data, "kx")
    plt.xlabel("Minutes")
    plt.ylabel("Value")
    plt.title(title)
    plt.savefig(title + '.png')
    for _ in range(ci_niter(1000)):
        natgrad.minimize(model.training_loss, [(model.q_mu, model.q_sqrt)])


# let's do some plotting!
    xx = np.linspace(0, 30, 200)[:, None]

    mu, var = model.predict_f(xx)

    plt.figure(figsize=(12, 6))
    plt.plot(xx, mu, "C0")
    plt.plot(xx, mu + 2 * np.sqrt(var), "C0", lw=0.5)
    plt.plot(xx, mu - 2 * np.sqrt(var), "C0", lw=0.5)
    plt.plot(X, Y, "C1x", mew=2)
    plt.xlabel("Minutes")
    plt.ylabel("Value")
    plt.title(title)
    plt.savefig(title + ' GP model.png')

    print_summary(model)
    # print(type(summary))
    # summary.to_markdown(title+'.md')
    # plt.set_xlim(0, 30)
    # _ = ax.plot(xx, 2.5 * np.sin(6 * xx) + np.cos(3 * xx), "C2--")

    # plt.errorbar(
    #     X.squeeze(),
    #     Y.squeeze(),
    #     # yerr=2 * (np.sqrt(NoiseVar)).squeeze(),
    #     marker="x",
    #     lw=0,
    #     elinewidth=1.0,
    #     color="C1",
    # )
    # _ = plt.xlim(-5, 5)
    return
示例#7
0
def simple_training_loop(model: gpflow.models.SVGP, epochs: int = 1, logging_epoch_freq: int = 10):
    tf_optimization_step = tf.function(optimization_step)

    batches = iter(train_dataset)
    for epoch in range(epochs):
        for _ in range(ci_niter(num_batches_per_epoch)):
            tf_optimization_step(model, next(batches))

        epoch_id = epoch + 1
        if epoch_id % logging_epoch_freq == 0:
            tf.print(f"Epoch {epoch_id}: ELBO (train) {model.elbo(data)}")
示例#8
0
    def hmcmc(self, model):

        if logger.isEnabledFor(logging.INFO):
            logger.info('here in the hmcmc method')
        #we add priors to the hyperparameters.

        # tfp.distributions dtype is inferred from parameters - so convert to 64-bit
        model.kernel.lengthscales.prior = tfd.Gamma(f64(1.0), f64(1.0))
        model.kernel.variance.prior = tfd.Gamma(f64(1.0), f64(1.0))
        model.likelihood.variance.prior = tfd.Gamma(f64(1.0), f64(1.0))
        #model.mean_function.A.prior = tfd.Normal(f64(0.0), f64(10.0))
        #model.mean_function.b.prior = tfd.Normal(f64(0.0), f64(10.0))

        num_burnin_steps = ci_niter(500)
        num_samples = ci_niter(1000)

        # Note that here we need model.trainable_parameters, not trainable_variables - only parameters can have priors!
        hmc_helper = gpflow.optimizers.SamplingHelper(
            model.log_posterior_density, model.trainable_parameters)

        hmc = tfp.mcmc.HamiltonianMonteCarlo(
            target_log_prob_fn=hmc_helper.target_log_prob_fn,
            num_leapfrog_steps=10,
            step_size=0.01)
        adaptive_hmc = tfp.mcmc.SimpleStepSizeAdaptation(
            hmc,
            num_adaptation_steps=10,
            target_accept_prob=f64(0.75),
            adaptation_rate=0.1)

        @tf.function
        def run_chain_fn():
            return tfp.mcmc.sample_chain(
                num_results=num_samples,
                num_burnin_steps=num_burnin_steps,
                current_state=hmc_helper.current_state,
                kernel=adaptive_hmc,
                trace_fn=lambda _, pkr: pkr.inner_results.is_accepted,
            )

        samples, traces = run_chain_fn()
示例#9
0
    def sample_f(self):
        """
        Runs MCMC to sample posterior functions.
        """
        # add priors to the hyperparameters.
        self.model.kernel.lengthscales.prior = tfd.Gamma(f64(1.0), f64(1.0))
        self.model.kernel.variance.prior = tfd.Gamma(f64(1.0), f64(1.0))
        self.model.likelihood.variance.prior = tfd.Gamma(f64(1.0), f64(1.0))
        if self.mean_function is not None:
            self.model.mean_function.A.prior = tfd.Normal(f64(0.0), f64(10.0))
            self.model.mean_function.b.prior = tfd.Normal(f64(0.0), f64(10.0))

        # sample from the posterior using HMC (required to estimate epistemic uncertainty)
        num_burnin_steps = ci_niter(300)
        num_samples = ci_niter(self.num_samples)

        # Note that here we need model.trainable_parameters, not trainable_variables - only parameters can have priors!
        self.hmc_helper = gpflow.optimizers.SamplingHelper(
            self.model.log_posterior_density, self.model.trainable_parameters)

        hmc = tfp.mcmc.HamiltonianMonteCarlo(
            target_log_prob_fn=self.hmc_helper.target_log_prob_fn,
            num_leapfrog_steps=10,
            step_size=0.01)
        adaptive_hmc = tfp.mcmc.SimpleStepSizeAdaptation(
            hmc,
            num_adaptation_steps=10,
            target_accept_prob=f64(0.75),
            adaptation_rate=0.1)

        @tf.function
        def run_chain_fn():
            return tfp.mcmc.sample_chain(
                num_results=num_samples,
                num_burnin_steps=num_burnin_steps,
                current_state=self.hmc_helper.current_state,
                kernel=adaptive_hmc,
                trace_fn=lambda _, pkr: pkr.inner_results.is_accepted,
            )

        self.samples, traces = run_chain_fn()
示例#10
0
def monitored_training_loop(epochs: int):
    tf_optimization_step = tf.function(optimization_step)

    batches = iter(train_dataset)

    for epoch in range(epochs):
        for _ in range(ci_niter(num_batches_per_epoch)):
            batch = next(batches)
            tf_optimization_step(model, batch)

        epoch_id = epoch + 1
        monitor(epoch, epoch_id=epoch_id, data=data)
示例#11
0
    def _optimize(self):
        if logger.isEnabledFor(logging.INFO):
            logger.info('entering _optimize method')
            logger.info(
                f'trainable variables {self.model_cost.trainable_variables}')

        adam_learning_rate = 0.01
        iterations = ci_niter(1000)
        opt = tf.optimizers.Adam(adam_learning_rate)

        opt_rev = tf.optimizers.Adam(adam_learning_rate)

        @tf.function
        def cost_optimization_step():
            opt.minimize(self.model_cost.training_loss,
                         self.model_cost.trainable_variables)

        @tf.function
        def rev_optimization_step():
            opt_rev.minimize(self.model_rev.training_loss,
                             self.model_rev.trainable_variables)

        for i in range(iterations):
            opt_logs = cost_optimization_step()

            # for i in range(iterations):
            opt_logs_rev = rev_optimization_step()

        # opt = gpflow.optimizers.Scipy()

        # opt_rev = gpflow.optimizers.Scipy()
        # opt_logs = opt.minimize(self.model_cost.training_loss,
        #                         self.model_cost.trainable_variables,
        #                         # method='COBYLA',  # -BFGS-B',
        #                         method='BFGS',  # L-BFGS-B',  # 'SLSQP',
        #                         options=dict(maxiter=ci_niter(2000)))  #dict(maxiter=500))
        # opt_logs_rev = opt_rev.minimize(self.model_rev.training_loss,
        #                                 self.model_rev.trainable_variables,
        #                                 # method='COBYLA',  # 'L-BFGS-B',
        #                                 method='BFGS',  # L-BFGS-B',  # 'SLSQP',
        #                                 options=dict(maxiter=ci_niter(2000)))  # dict(maxiter=500))

        if logger.isEnabledFor(logging.INFO):
            logger.info(f'opt_logs:\n{opt_logs}')
            logger.info(f'opt_logs_rev:\n{opt_logs_rev}')
        if logger.isEnabledFor(logging.WARNING):
            logger.warning(f'summary cost gp model')
            logger.warning(f'{tabulate_module_summary(self.model_cost)}')
            logger.warning(f'summary rev gp model')
            logger.warning(f'({tabulate_module_summary(self.model_rev)})')
示例#12
0
 def _optimize_model_with_scipy(self,train_data,**kwargs):
     """
     Optimize model using the Scipy optimizer in a single call
     """
     method=kwargs.pop('method',"l-bfgs-b")
     disp=kwargs.pop("disp",True)
     maxiter=kwargs.pop( "maxiter",ci_niter(200))
     optimizer = gpf.optimizers.Scipy()
     optimizer.minimize(
         self.model.training_loss_closure(train_data),
         variables=self.model.trainable_variables,
         method=method,
         options={"disp": disp, "maxiter": maxiter},
         **kwargs
     )
def gp_model(x_train, y_train, x_test, num_classes):
    """This function instantiates the gp model and gets the predictions from the model.

    :param x_train: The training dataset.
    :param y_train: The training dataset labels.
    :param x_test: The test dataset.
    :param num_classes: The number of classes in the dataset.
    :return: predictions, the predictions from the gp model.
    :return time_taken: The time taken to train the model."""

    data = (x_train, y_train)
    kernel = gpflow.kernels.SquaredExponential() + gpflow.kernels.Matern12(
    ) + gpflow.kernels.Exponential()

    invlink = gpflow.likelihoods.RobustMax(num_classes)
    likelihood = gpflow.likelihoods.MultiClass(num_classes, invlink=invlink)
    z = x_train[::5].copy()

    model = gpflow.models.SVGP(kernel=kernel,
                               likelihood=likelihood,
                               inducing_variable=z,
                               num_latent_gps=num_classes,
                               whiten=True,
                               q_diag=True)

    set_trainable(model.inducing_variable, False)

    print('\nInitial parameters:')
    print_summary(model, fmt="notebook")

    start = time.time()

    opt = gpflow.optimizers.Scipy()
    opt.minimize(model.training_loss_closure(data),
                 model.trainable_variables,
                 options=dict(maxiter=ci_niter(1000)))

    print('\nParameters after optimization:')
    print_summary(model, fmt="notebook")

    end = time.time()
    time_taken = round(end - start, 2)

    print('Optimization took {:.2f} seconds'.format(time_taken))

    predictions = model.predict_y(x_test)[0]

    return predictions, time_taken
示例#14
0
def repeatMinimization(model, Xtest, Ytest):
    callback = Callback(model, Xtest, Ytest)

    opt = gpflow.optimizers.Scipy()
    # print("Optimising for {} repetitions".format(nRepeats))
    for repeatIndex in range(nRepeats):
        # print(repeatIndex)
        opt.minimize(
            model.training_loss,
            model.trainable_variables,
            method="L-BFGS-B",
            tol=1e-11,
            options=dict(disp=False, maxiter=ci_niter(2000)),
            step_callback=callback,
            compile=True,
        )
    return callback
示例#15
0
def snelsonDemo():
    from matplotlib import pyplot as plt
    from IPython import embed
    xtrain, ytrain, xtest, ytest = getTrainingTestData()

    # run exact inference on training data.
    exact_model = getRegressionModel(xtrain, ytrain)
    opt = gpflow.train.ScipyOptimizer()
    opt.minimize(exact_model, maxiter=ci_niter(2000000))

    figA, axes = plt.subplots(1, 1)
    inds = np.argsort(xtrain.flatten())
    axes.plot(xtrain[inds, :], ytrain[inds, :], 'ro')
    plotPredictions(axes, exact_model, 'g', None)

    figB, axes = plt.subplots(3, 2)

    # run sparse model on training data initialized from exact optimal solution.
    VFEmodel, VFEcb = trainSparseModel(xtrain, ytrain, exact_model, False,
                                       xtest, ytest)
    FITCmodel, FITCcb = trainSparseModel(xtrain, ytrain, exact_model, True,
                                         xtest, ytest)

    print("Exact model parameters \n")
    printModelParameters(exact_model)
    print("Sparse model parameters for VFE optimization \n")
    printModelParameters(VFEmodel)
    print("Sparse model parameters for FITC optimization \n")
    printModelParameters(FITCmodel)

    VFEiters = FITCcb.n_iters
    VFElog_likelihoods = stretch(len(VFEiters), VFEcb.log_likelihoods)
    VFEhold_out_likelihood = stretch(len(VFEiters), VFEcb.hold_out_likelihood)

    plotComparisonFigure(xtrain, VFEmodel, exact_model, axes[0, 0], axes[1, 0],
                         axes[2, 0], VFEiters, VFElog_likelihoods,
                         VFEhold_out_likelihood, "VFE")
    plotComparisonFigure(xtrain, FITCmodel, exact_model, axes[0, 1], axes[1,
                                                                          1],
                         axes[2, 1], FITCcb.n_iters, FITCcb.log_likelihoods,
                         FITCcb.hold_out_likelihood, "FITC")

    axes[0, 0].set_title('VFE', loc='center', fontdict={'fontsize': 22})
    axes[0, 1].set_title('FITC', loc='center', fontdict={'fontsize': 22})

    embed()
示例#16
0
    def train_temporal(self, X, Y, iteration):
        def temporal_elbo(X, Y, full_cov=False):
            var_exp, kl_priors = [], []
            for i, layer, likelihood in zip(list(range(self.num_outputs)),
                                            self.temporal_layers,
                                            self.likelihoods):
                meani, vari = layer.conditional(X, full_cov=full_cov)
                available = ~tf.math.is_nan(Y[:, i])
                y = tf.where(available, Y[:, i], 0.)
                var_expi = likelihood.variational_expectations(
                    meani, vari[:, None], y[:, None])
                var_expi = var_expi * tf.cast(tf.where(available, 1., 0.),
                                              dtype=var_expi.dtype)
                var_exp.append(tf.reduce_sum(var_expi))
                kl_priors.append(layer.KL())
            L, KL = tf.reduce_sum(var_exp), tf.reduce_sum(kl_priors)
            if self.minibatch_size is not None:
                num_data = tf.cast(self.num_data, KL.dtype)
                minibatch_size = tf.cast(self.minibatch_size, KL.dtype)
                scale = num_data / minibatch_size
            else:
                scale = tf.cast(1.0, KL.dtype)
            return L * scale - KL

        @tf.function(autograph=False)
        def optimization_step(optimizer, data):
            with tf.GradientTape(watch_accessed_variables=False) as tape:
                tape.watch(self.trainable_variables)
                objective = -temporal_elbo(*data)
                grads = tape.gradient(objective, self.trainable_variables)
            optimizer.apply_gradients(zip(grads, self.trainable_variables))
            return objective

        def run_adam(data, iterations):
            adam = tf.optimizers.Adam(0.001)
            for step in range(iterations):
                neg_elbo = optimization_step(adam, data)
                elbo = -neg_elbo
                if step % 1000 == 0:
                    print(elbo.numpy())

        print("Start initial temporal training.")
        maxiter = ci_niter(iteration)
        run_adam((X, Y), maxiter)
        print("Done initial temporal training.")
示例#17
0
def repeatMinimization(model, xtest, ytest):
    callback = Callback(model, xtest, ytest)

    @tf.function(autograph=False)
    def objective_closure():
        return -model.log_marginal_likelihood()

    opt = gpflow.optimizers.Scipy()
    #print("Optimising for {} repetitions".format(nRepeats))
    for repeatIndex in range(nRepeats):
        #print(repeatIndex)
        opt.minimize(objective_closure,
                     model.trainable_variables,
                     method='L-BFGS-B',
                     tol=1e-11,
                     options=dict(disp=False, maxiter=ci_niter(2000)),
                     step_callback=callback)
    return callback
示例#18
0
def analyze(f, title="Plot", rawplot=True, modelplot=True,summary=True):
    # Obtain randomly generated data
    X, Y, groups = f()
    Y_data = np.hstack([Y, groups])
    # Model construction (notice that num_latent_gps is 1)
    likelihood = gpflow.likelihoods.SwitchedLikelihood(
        [gpflow.likelihoods.Gaussian(variance=1.0),
         gpflow.likelihoods.Gaussian(variance=1.0)]
    )
    natgrad = NaturalGradient(gamma=1.0)
    adam = tf.optimizers.Adam()
    kernel = gpflow.kernels.Matern52(lengthscales=0.5)
    model = gpflow.models.VGP((X, Y_data), kernel=kernel, likelihood=likelihood, num_latent_gps=1)
    for _ in range(ci_niter(1000)):
        natgrad.minimize(model.training_loss, [(model.q_mu, model.q_sqrt)])

    # Plot of the raw data.
    if rawplot:
        fig, ax = plt.subplots(1, 1, figsize=(12, 6))
        _ = ax.plot(X, Y_data, "kx")
        plt.xlabel("Minutes")
        plt.ylabel("Value")
        plt.title(title)
        plt.savefig(title+'.png')

    # Plot of GP model
    if modelplot:
        xx = np.linspace(0, 30, 200)[:, None]
        mu, var = model.predict_f(xx)

        plt.figure(figsize=(12, 6))
        plt.plot(xx, mu, "C0")
        plt.plot(xx, mu + 2 * np.sqrt(var), "C0", lw=0.5)
        plt.plot(xx, mu - 2 * np.sqrt(var), "C0", lw=0.5)
        plt.plot(X, Y, "C1x", mew=2)
        plt.xlabel("Minutes")
        plt.ylabel("Value")
        plt.title(title)
        plt.savefig(title+' GP model.png')

    if summary:
        print_summary(model)

    return model
示例#19
0
def train_loop(meta_tasks, num_iter=5):
    """
    Metalearning training loop
    
    :param meta_tasks: list of metatasks.
    :param num_iter: number of iterations of tasks set
    :returns: a mean function object
    """
    # Initialize mean function
    mean_function = build_mean_function()
    # Iterate for several passes over the tasks set
    for iteration in range(num_iter):
        ts = time.time()
        print("Currently in meta-iteration {}".format(iteration))
        # Iterate over tasks
        for i, task in enumerate(meta_tasks):
            data = task  # (X, Y)
            model = build_model(data, mean_function=mean_function)
            run_adam(model, ci_niter(100))

        print(">>>> iteration took {} ms".format(time.time() - ts))
    return mean_function
    def fit(self, X, Y, variance=0.001, optimize=True, maxiter=100):

        print('-- fitting gaussian process on ' + str(X.shape[0]) + ' data --')

        opt = gpflow.optimizers.Scipy()
        mean_X = X.mean()
        std_X = X.std()
        mean_Y = Y.mean()
        std_Y = Y.std()
        X = (X - mean_X) / std_X
        Y = (Y - mean_Y) / std_Y
        self.mean_X = mean_X
        self.std_X = std_X
        self.mean_Y = mean_Y
        self.std_Y = std_Y
        if self.gp_model == 'GPR':
            model = gpflow.models.GPR(data=(np.array(X, dtype=float),
                                            np.array(Y, dtype=float)),
                                      kernel=self.k,
                                      mean_function=self.mean_function,
                                      noise_variance=variance)
            #model.likelihood.variance.assign(variance)
            #model.likelihood.variance.fixed = True
            #if optimize:
            #    opt_logs = opt.minimize(model.training_loss, model.trainable_variables, options=dict(maxiter=maxiter))
            self.model = model
        elif self.gp_model == 'SVGP':
            data = X, Y
            MAXITER = ci_niter(2000)
            #self.model.likelihood.variance.assign(variance)
            if optimize:
                opt.minimize(
                    self.model.training_loss_closure(data),
                    variables=self.model.trainable_variables,
                    method="l-bfgs-b",
                    options={"maxiter": MAXITER},
                )

        return
示例#21
0
import gpflow
from gpflow.ci_utils import ci_niter
import tensorflow as tf
import numpy as np

nRepeats = ci_niter(50)

predict_limits = [-4.0, 4.0]
inducing_points_limits = [-1.0, 9]
hold_out_limits = [0.20, 0.60]
optimization_limits = [18.0, 25.0]


def readCsvFile(fileName):
    return np.loadtxt(fileName).reshape(-1, 1)


def getTrainingTestData():
    overallX = readCsvFile("data/snelson_train_inputs.dat")
    overallY = readCsvFile("data/snelson_train_outputs.dat")

    trainIndices = []
    testIndices = []

    nPoints = overallX.shape[0]

    for index in range(nPoints):
        if index % 4 == 0:
            trainIndices.append(index)
        else:
            testIndices.append(index)
示例#22
0
                     pY[:, 0] - two_sigma,
                     pY[:, 0] + two_sigma,
                     alpha=0.15)
    lml = m.maximum_log_likelihood_objective().numpy()
    plt.title("%s (lml = %f)" % (name, lml))
    return lml


# %% [markdown]
# ## Full model

# %%
gpr = gpflow.models.GPR((X, Y), gpflow.kernels.SquaredExponential())
gpflow.optimizers.Scipy().minimize(gpr.training_loss,
                                   gpr.trainable_variables,
                                   options=dict(maxiter=ci_niter(1000)))
full_lml = plot_model(gpr)

# %% [markdown]
# ## Upper bounds for sparse variational models
# As a first investigation, we compute the upper bound for models trained using the sparse variational GP approximation.

# %%
Ms = np.arange(4, ci_niter(20, test_n=6), 1)
vfe_lml = []
vupper_lml = []
vfe_hyps = []
for M in Ms:
    Zinit = X[:M, :].copy()
    vfe = gpflow.models.SGPR((X, Y),
                             gpflow.kernels.SquaredExponential(),
示例#23
0
Xtrain, Ytrain, Xtest, Ytest = getTrainingTestData()


def getKernel():
    return gpflow.kernels.SquaredExponential()


# Run exact inference on training data:
exact_model = gpflow.models.GPR((Xtrain, Ytrain), kernel=getKernel())

opt = gpflow.optimizers.Scipy()
opt.minimize(
    exact_model.training_loss,
    exact_model.trainable_variables,
    method="L-BFGS-B",
    options=dict(maxiter=ci_niter(20000)),
    tol=1e-11,
)

print("Exact model parameters:")
printModelParameters(exact_model)

figA, ax = plt.subplots(1, 1)
ax.plot(Xtrain, Ytrain, "ro")
plotPredictions(ax, exact_model, color="g")


# %%
def initializeHyperparametersFromExactSolution(sparse_model):
    sparse_model.likelihood.variance.assign(exact_model.likelihood.variance)
    sparse_model.kernel.variance.assign(exact_model.kernel.variance)
示例#24
0

def run_adam(model, iterations):
    """
    Utility function running the Adam optimiser

    :param model: GPflow model
    :param interations: number of iterations
    """
    # Create an Adam Optimiser action
    logf = []
    train_it = iter(train_dataset.batch(minibatch_size))
    adam = tf.optimizers.Adam()
    for step in range(iterations):
        elbo = -optimization_step(adam, model, next(train_it))
        if step % 10 == 0:
            logf.append(elbo.numpy())
    return logf


maxiter = ci_niter(10000)

logf = run_adam(m, maxiter)
plt.figure()
plt.plot(np.arange(maxiter)[::10], logf)
plt.xlabel('iteration')
plt.ylabel('ELBO')

plot("Predictions after training")

print_summary(m)
示例#25
0
np.random.seed(0)
tf.random.set_seed(0)

N, D = 100, 2
batch_size = 50

# inducing points
M = 10

x = np.random.uniform(size=(N, D))
y = np.sin(10 * x[:, :1]) + 5 * x[:, 1:]**2

data = (x, y)
inducing_variable = tf.random.uniform((M, D))
adam_learning_rate = 0.01
iterations = ci_niter(5)

# %% [markdown]
# ### VGP is a GPR

# %% [markdown]
# The following section demonstrates how natural gradients can turn VGP into GPR *in a single step, if the likelihood is Gaussian*.

# %% [markdown]
# Let's start by first creating a standard GPR model with Gaussian likelihood:

# %%
gpr = GPR(data, kernel=gpflow.kernels.Matern52())

# %% [markdown]
# The log marginal likelihood of the exact GP model is:
示例#26
0
def run_gpr(nout, iterations, ds_single, ages, k1len, k2len, k3len, k4len, df_place):
            
            
    # Input space, rsl normalized to zero mean, unit variance
    X = np.stack((df_place.lon, df_place.lat, df_place.age), 1)

    RSL = normalize(df_place.rsl_realresid)
    
    #define kernels  with bounds
    k1 = gpf.kernels.Matern32(active_dims=[0, 1])
    k1.lengthscales = bounded_parameter(1, 10, k1len) 
    k1.variance = bounded_parameter(0.02, 100, 2)

    k2 = gpf.kernels.Matern32(active_dims=[2])
    k2.lengthscales = bounded_parameter(1, 100000, k2len)
    k2.variance = bounded_parameter(0.02, 100, 1)

    k3 = gpf.kernels.Matern32(active_dims=[0, 1])
    k3.lengthscales = bounded_parameter(10, 100, k3len) 
    k3.variance = bounded_parameter(0.01, 100, 1)

    k4 = gpf.kernels.Matern32(active_dims=[2]) 
    k4.lengthscales = bounded_parameter(1, 100000, k4len)
    k4.variance = bounded_parameter(0.01, 100, 1)

    k5 = gpf.kernels.White(active_dims=[0, 1, 2])
    k5.variance = bounded_parameter(0.01, 100, 1)

    kernel = (k1 * k2) + (k3 * k4) + k5 

    ##################	  BUILD AND TRAIN MODELS 	#######################
    noise_variance = (df_place.rsl_er.ravel())**2  

    m = GPR_new((X, RSL), kernel=kernel, noise_variance=noise_variance) 
    
    #Sandwich age of each lat/lon to enable gradient calculation
    lonlat = df_place[['lon', 'lat']]
    agetile = np.stack([df_place.age - 10, df_place.age, df_place.age + 10], axis=-1).flatten()
    xyt_it = np.column_stack([lonlat.loc[lonlat.index.repeat(3)], agetile])

    #hardcode indices for speed (softcoded alternative commented out)
    indices = np.arange(1, len(df_place)*3, 3)
    # indices = np.where(np.in1d(df_place.age, agetile))[0]
    
    iterations = ci_niter(iterations)
    learning_rate = 0.05
    logging_freq = 100
    opt = tf.optimizers.Adam(learning_rate)

    #first optimize without age errs to get slope
    tf.print('___First optimization___')
    likelihood = -10000
    for i in range(iterations):
        opt.minimize(m.training_loss, var_list=m.trainable_variables)

        likelihood_new = m.log_marginal_likelihood()
        if i % logging_freq == 0:
            tf.print(f"iteration {i + 1} likelihood {m.log_marginal_likelihood():.04f}")
            if abs(likelihood_new - likelihood) < 0.001:
                    break
        likelihood = likelihood_new

    # Calculate posterior at training points + adjacent age points
    mean, _ = m.predict_f(xyt_it)

    # make diagonal matrix of age slope at training points
    Xgrad = np.diag(np.gradient(mean.numpy(), axis=0)[indices][:,0])

    # multipy age errors by gradient 
    Xnigp = np.diag(Xgrad @ np.diag((df_place.age_er/2)**2) @ Xgrad.T)    
    
    m = GPR_new((X, RSL), kernel=kernel, noise_variance=noise_variance + Xnigp)

    #reoptimize
    tf.print('___Second optimization___')
    opt = tf.optimizers.Adam(learning_rate)
    
    for i in range(iterations):
        opt.minimize(m.training_loss, var_list=m.trainable_variables)
        
        likelihood_new = m.log_marginal_likelihood()
        if i % logging_freq == 0:
            tf.print(f"iteration {i + 1} likelihood {m.log_marginal_likelihood():.04f}")
            if abs(likelihood_new - likelihood) < 0.001:
                    break
        likelihood = likelihood_new
            
    ##################	  INTERPOLATE MODELS 	#######################
    ##################  --------------------	 ######################
    # output space
    da_zp, da_varp = predict_post_f(nout, ages, ds_single, df_place, m)

    #interpolate all models onto GPR grid
    ds_giapriorinterp  = interp_likegpr(ds_single, da_zp)

    # add total prior RSL back into GPR
    ds_priorplusgpr = da_zp + ds_giapriorinterp
    ds_varp = da_varp.to_dataset(name='rsl')
    ds_zp = da_zp.to_dataset(name='rsl')

    #Calculate data-model misfits & GPR vals at RSL data locations
    df_place['gpr_posterior'] = df_place.apply(lambda row: ds_select(ds_priorplusgpr, row), axis=1)
    df_place['gprpost_std'] = df_place.apply(lambda row: ds_select(ds_varp, row), axis=1)
    df_place['gpr_diff'] = df_place.apply(lambda row: row.rsl - row.gpr_posterior, axis=1)
    df_place['diffdiv'] = df_place.gpr_diff / df_place.rsl_er
    
    k1_l = m.kernel.kernels[0].kernels[0].lengthscales.numpy()
    k2_l = m.kernel.kernels[0].kernels[1].lengthscales.numpy()
    k3_l = m.kernel.kernels[1].kernels[0].lengthscales.numpy()
    k4_l = m.kernel.kernels[1].kernels[1].lengthscales.numpy()
    
    
    return ds_giapriorinterp, ds_zp, ds_priorplusgpr, ds_varp, m.log_marginal_likelihood().numpy(), m, df_place

    def interp_ds(ds):
        return ds.interp(age=ds_single.age, lat=ds_single.lat, lon=ds_single.lon)

    def slice_dataset(ds):
        return ds.rsl.sel(lat=site[1].lat.unique() ,
                      lon=site[1].lon.unique(),
                      method='nearest').sel(age=slice(11500, 0))
    
    def ds_ageselect(ds, row):
        return ds.rsl.interp(age=[row.age]).age.values[0]
示例#27
0
# %%
import numpy as np
import tensorflow as tf
from matplotlib import pyplot as plt
import gpflow
from gpflow.ci_utils import ci_niter
from scipy.cluster.vq import kmeans2

from typing import Dict, Optional, Tuple
import tensorflow as tf
import tensorflow_datasets as tfds
import gpflow
from gpflow.utilities import to_default_float

iterations = ci_niter(100)

# %% [markdown]
# ## Convolutional network inside a GPflow model

# %%
original_dataset, info = tfds.load(name="mnist",
                                   split=tfds.Split.TRAIN,
                                   with_info=True)
total_num_data = info.splits["train"].num_examples
image_shape = info.features["image"].shape
image_size = tf.reduce_prod(image_shape)
batch_size = 32


def map_fn(input_slice: Dict[str, tf.Tensor]):
示例#28
0
@tf.function(autograph=False)
def objective():
    return -model.log_marginal_likelihood()


optimizer.minimize(objective,
                   variables=model.trainable_variables,
                   options={'maxiter': 20})
print(f'log likelihood at optimum: {model.log_likelihood()}')

# %% [markdown]
# Sampling starts with a 'burn in' period.

# %%
burn = ci_niter(100)
thin = ci_niter(10)

# %%
num_samples = 500

hmc_helper = gpflow.optimizers.SamplingHelper(model.log_marginal_likelihood,
                                              model.trainable_parameters)

hmc = tfp.mcmc.HamiltonianMonteCarlo(
    target_log_prob_fn=hmc_helper.target_log_prob_fn,
    num_leapfrog_steps=10,
    step_size=0.01)

adaptive_hmc = tfp.mcmc.SimpleStepSizeAdaptation(hmc,
                                                 num_adaptation_steps=10,
def main(path, representation):
    """
    :param path: str specifying path to dataset.
    :param representation: str specifying the molecular representation. One of ['fingerprints, 'fragments', 'fragprints']
    """

    task = 'e_iso_pi'  # task always e_iso_pi with human performance comparison
    data_loader = TaskDataLoader(task, path)
    smiles_list, y = data_loader.load_property_data()
    X = featurise_mols(smiles_list, representation)

    # 5 test molecules

    test_smiles = [
        'BrC1=CC=C(/N=N/C2=CC=CC=C2)C=C1',
        'O=[N+]([O-])C1=CC=C(/N=N/C2=CC=CC=C2)C=C1',
        'CC(C=C1)=CC=C1/N=N/C2=CC=C(N(C)C)C=C2',
        'BrC1=CC([N+]([O-])=O)=CC([N+]([O-])=O)=C1/N=N/C2=CC([H])=C(C=C2[H])N(CC)CC',
        'ClC%11=CC([N+]([O-])=O)=CC(C#N)=C%11/N=N/C%12=CC([H])=C(C=C%12OC)N(CC)CC'
    ]

    # and their indices in the loaded data
    test_smiles_indices = [116, 131, 168, 221, 229]

    X_train = np.delete(X, np.array(test_smiles_indices), axis=0)
    y_train = np.delete(y, np.array(test_smiles_indices))
    X_test = X[[116, 131, 168, 221, 229]]

    # experimental wavelength values in EtOH. Main csv file has 400nm instead of 407nm because measurement was
    # under a different solvent
    y_test = y[[116, 131, 168, 221, 229]]
    y_test[2] = 407.

    y_train = y_train.reshape(-1, 1)
    y_test = y_test.reshape(-1, 1)

    # #  We standardise the outputs but leave the inputs unchanged
    #
    # _, y_train, _, y_test, y_scaler = transform_data(X_train, y_train, X_test, y_test)

    X_train = X_train.astype(np.float64)
    X_test = X_test.astype(np.float64)

    data_loader_z_iso_pi = TaskDataLoader('z_iso_pi', path)
    data_loader_e_iso_n = TaskDataLoader('e_iso_n', path)
    data_loader_z_iso_n = TaskDataLoader('z_iso_n', path)

    smiles_list_z_iso_pi, y_z_iso_pi = data_loader_z_iso_pi.load_property_data(
    )
    smiles_list_e_iso_n, y_e_iso_n = data_loader_e_iso_n.load_property_data()
    smiles_list_z_iso_n, y_z_iso_n = data_loader_z_iso_n.load_property_data()

    y_z_iso_pi = y_z_iso_pi.reshape(-1, 1)
    y_e_iso_n = y_e_iso_n.reshape(-1, 1)
    y_z_iso_n = y_z_iso_n.reshape(-1, 1)

    X_z_iso_pi = featurise_mols(smiles_list_z_iso_pi, representation)
    X_e_iso_n = featurise_mols(smiles_list_e_iso_n, representation)
    X_z_iso_n = featurise_mols(smiles_list_z_iso_n, representation)

    output_dim = 4  # Number of outputs
    rank = 1  # Rank of W
    feature_dim = len(X_train[0, :])

    tanimoto_active_dims = [i for i in range(feature_dim)
                            ]  # active dims for Tanimoto base kernel.

    # We define the Gaussian Process Regression Model using the Tanimoto kernel

    m = None

    def objective_closure():
        return -m.log_marginal_likelihood()

    # Augment the input with zeroes, ones, twos, threes to indicate the required output dimension
    X_augmented = np.vstack((np.append(X_train,
                                       np.zeros((len(X_train), 1)),
                                       axis=1),
                             np.append(X_z_iso_pi,
                                       np.ones((len(X_z_iso_pi), 1)),
                                       axis=1),
                             np.append(X_e_iso_n,
                                       np.ones((len(X_e_iso_n), 1)) * 2,
                                       axis=1),
                             np.append(X_z_iso_n,
                                       np.ones((len(X_z_iso_n), 1)) * 3,
                                       axis=1)))

    X_test = np.append(X_test, np.zeros((len(X_test), 1)), axis=1)
    X_train = np.append(X_train, np.zeros((len(X_train), 1)), axis=1)

    # Augment the Y data with zeroes, ones, twos and threes that specify a likelihood from the list of likelihoods
    Y_augmented = np.vstack(
        (np.hstack((y_train, np.zeros_like(y_train))),
         np.hstack((y_z_iso_pi, np.ones_like(y_z_iso_pi))),
         np.hstack((y_e_iso_n, np.ones_like(y_e_iso_n) * 2)),
         np.hstack((y_z_iso_n, np.ones_like(y_z_iso_n) * 3))))

    y_test = np.hstack((y_test, np.zeros_like(y_test)))

    # Base kernel
    k = Tanimoto(active_dims=tanimoto_active_dims)
    # set_trainable(k.variance, False)

    # Coregion kernel
    coreg = gpflow.kernels.Coregion(output_dim=output_dim,
                                    rank=rank,
                                    active_dims=[feature_dim])

    # Create product kernel
    kern = k * coreg

    # This likelihood switches between Gaussian noise with different variances for each f_i:
    lik = gpflow.likelihoods.SwitchedLikelihood([
        gpflow.likelihoods.Gaussian(),
        gpflow.likelihoods.Gaussian(),
        gpflow.likelihoods.Gaussian(),
        gpflow.likelihoods.Gaussian()
    ])

    # now build the GP model as normal
    m = gpflow.models.VGP((X_augmented, Y_augmented),
                          mean_function=Constant(np.mean(y_train[:, 0])),
                          kernel=kern,
                          likelihood=lik)

    # fit the covariance function parameters
    maxiter = ci_niter(1000)
    gpflow.optimizers.Scipy().minimize(
        m.training_loss,
        m.trainable_variables,
        options=dict(maxiter=maxiter),
        method="L-BFGS-B",
    )
    print_summary(m)

    # mean and variance GP prediction

    y_pred, y_var = m.predict_f(X_test)

    # Output Standardised RMSE and RMSE on Train Set

    y_pred_train, _ = m.predict_f(X_train)
    train_rmse_stan = np.sqrt(mean_squared_error(y_train, y_pred_train))
    train_rmse = np.sqrt(mean_squared_error(y_train, y_pred_train))
    print("\nStandardised Train RMSE: {:.3f}".format(train_rmse_stan))
    print("Train RMSE: {:.3f}".format(train_rmse))

    r2 = r2_score(y_test[:, 0], y_pred)
    rmse = np.sqrt(mean_squared_error(y_test[:, 0], y_pred))
    mae = mean_absolute_error(y_test[:, 0], y_pred)
    per_molecule = np.diag(abs(y_pred - y_test[:, 0]))

    print("\n Averaged test statistics are")
    print("\nR^2: {:.3f}".format(r2))
    print("RMSE: {:.3f}".format(rmse))
    print("MAE: {:.3f}".format(mae))
    print("\nAbsolute error per molecule is {} ".format(per_molecule))
示例#30
0
    3, invlink=invlink)  # Multiclass likelihood
Z = X[::5].copy()  # inducing inputs

m = gpflow.models.SVGP(
    kernel=kernel,
    likelihood=likelihood,
    inducing_variable=Z,
    num_latent_gps=C,
    whiten=True,
    q_diag=True,
)

# Only train the variational parameters
set_trainable(m.kernel.kernels[1].variance, False)
set_trainable(m.inducing_variable, False)
print_summary(m, fmt="notebook")

# %% [markdown]
# #### Running inference

# %%
opt = gpflow.optimizers.Scipy()

opt_logs = opt.minimize(m.training_loss_closure(data),
                        m.trainable_variables,
                        options=dict(maxiter=ci_niter(1000)))
print_summary(m, fmt="notebook")

# %%
plot_posterior_predictions(m, X, Y)