Exemplo n.º 1
0
    def test_general_gp(self):
        """Test the functions to build a general model."""
        train_features, train_targets, test_features, test_targets = get_data()

        ggp = GeneralGaussianProcess()

        ggp.train_gaussian_process(train_features, train_targets)
        pred = ggp.gaussian_process_predict(test_features)
        self.assertEqual(len(pred['prediction']), len(test_features))

        print('GeneralGP error: {0:.3f}'.format(
            get_error(pred['prediction'], test_targets)['rmse_average']))

        ggp = GeneralGaussianProcess(kernel='smooth')

        ggp.train_gaussian_process(train_features, train_targets)
        pred = ggp.gaussian_process_predict(test_features)
        self.assertEqual(len(pred['prediction']), len(test_features))

        print('GeneralGP error: {0:.3f}'.format(
            get_error(pred['prediction'], test_targets)['rmse_average']))

        ggp = GeneralGaussianProcess(dimension='features')

        ggp.train_gaussian_process(train_features, train_targets)
        pred = ggp.gaussian_process_predict(test_features)
        self.assertEqual(len(pred['prediction']), len(test_features))

        print('GeneralGP error: {0:.3f}'.format(
            get_error(pred['prediction'], test_targets)['rmse_average']))
Exemplo n.º 2
0
def gp_predict(train_features, train_targets, test_features, test_targets):
    """Function to perform the GP predictions."""
    data = {}

    kdict = [
        {'type': 'gaussian', 'width': 1., 'scaling': 1., 'dimension': 'single'},
        ]
    gp = GaussianProcess(train_fp=train_features, train_target=train_targets,
                         kernel_list=kdict, regularization=1e-2,
                         optimize_hyperparameters=True, scale_data=True)

    pred = gp.predict(test_fp=test_features)

    data['result'] = get_error(pred['prediction'],
                               test_targets)['rmse_average']
    data['size'] = len(train_targets)

    return data
Exemplo n.º 3
0
    kdict = {'k1': {'type': 'gaussian', 'width': w1}}
    # Set up the prediction routine.
    gp = GaussianProcess(kernel_dict=kdict,
                         regularization=sdt1**2,
                         train_fp=std['train'],
                         train_target=train_targets['target'],
                         optimize_hyperparameters=False)
    # Do predictions.
    under_fit = gp.predict(test_fp=std['test'], uncertainty=True)
    # Scale predictions back to the original scale.
    under_prediction = np.vstack(under_fit['prediction']) * \
        train_targets['std'] + train_targets['mean']
    under_uncertainty = np.vstack(under_fit['uncertainty']) * \
        train_targets['std']
    # Get average errors.
    error = get_error(under_prediction.reshape(-1), afunc(test).reshape(-1))
    print('Gaussian linear regression prediction:', error['absolute_average'])
    # Get confidence interval on predictions.
    upper = under_prediction + under_uncertainty * tstd
    lower = under_prediction - under_uncertainty * tstd

    # Plot example 1
    ax = fig.add_subplot(grid + 1)
    ax.plot(linex, liney, '-', lw=1, color='black')
    ax.plot(train, target, 'o', alpha=0.2, color='black')
    ax.plot(test, under_prediction, 'b-', lw=1, alpha=0.4)
    ax.fill_between(np.hstack(test),
                    np.hstack(upper),
                    np.hstack(lower),
                    interpolate=True,
                    color='blue',

fig = plt.figure(figsize=(20, 10))

for w, p in zip([1., 1e-2, 1e-4, 1e-6], [141, 142, 143, 144]):
    kdict = [{'type': 'linear', 'scaling': w}]
    # Set up the prediction routine.
    gp = GaussianProcess(kernel_list=kdict, regularization=np.sqrt(1e-3),
                         train_fp=train,
                         train_target=target,
                         optimize_hyperparameters=False, scale_data=True)
    # Do predictions.
    fit = gp.predict(test_fp=test, uncertainty=True)

    # Get average errors.
    error = get_error(fit['prediction'], afunc(test))
    print('Gaussian regression error with {0} width: {1:.3f}'.format(
        w, error['absolute_average']))

    # Plotting.
    plot(p, fit['prediction'])


# ## Regularization parameter <a name="constant-parameter"></a>
# [(Back to top)](#head)
#
# The regularization parameter is varied within the range of `[1., 1e-1, 1e-2, 1e-3]`. Here we find that for larger values the model will under-fit. This will essentially result in the mean of the data being returned across the range of test values. When the regularization parameter gets small enough, it will have little impact on the model predictions as it will be smaller than the noise on the data.

# In[6]:

Exemplo n.º 5
0
kdict = [{'type': 'gaussian', 'width': [w3]}]
gp = GaussianProcess(kernel_list=kdict, regularization=sdt3,
                     train_fp=std['train'],
                     train_target=train_targets['target'],
                     optimize_hyperparameters=True)
print('Optimized kernel:', gp.kernel_list)
print(-gp.theta_opt['fun'])
# Do the optimized predictions.
optimized = gp.predict(test_fp=std['test'], uncertainty=True)

# Scale predictions back to the original scale.
opt_prediction = np.vstack(optimized['prediction']) *     train_targets['std'] + train_targets['mean']
opt_uncertainty = np.vstack(optimized['uncertainty_with_reg']) *     train_targets['std']

# Get average errors.
error = get_error(opt_prediction.reshape(-1), afunc(test).reshape(-1))
print('Gaussian kernel regression prediction:', error['absolute_average'])

# Get confidence interval on predictions.
opt_upper = opt_prediction + opt_uncertainty * tstd
opt_lower = opt_prediction - opt_uncertainty * tstd

# Plot eample 3
plt.figure(2)
plt.plot(linex, liney, '-', lw=1, color='black')
plt.plot(train, target, 'o', alpha=0.2, color='black')
plt.plot(test, opt_prediction, 'g-', lw=1, alpha=0.4)
plt.fill_between(np.hstack(test), np.hstack(opt_upper),
                np.hstack(opt_lower), interpolate=True,
                color='green', alpha=0.2)
plt.title('Optimized GP. \n w: {0:.3f}, r: {1:.3f}'.format(
Exemplo n.º 6
0
                     scale_data=True)
print('Optimized kernel:', gp.kernel_list)

# Do the optimized predictions.
pred = gp.predict(test_fp=test, uncertainty=True)
prediction = np.array(pred['prediction'][:, 0])

# Calculate the uncertainty of the predictions.
uncertainty = np.array(pred['uncertainty_with_reg'])

# Get confidence interval on predictions.
upper = prediction + uncertainty
lower = prediction - uncertainty

# Get average errors.
error = get_error(prediction, afunc(test)[0])
print('Gaussian linear regression prediction:', error['absolute_average'])

# Plotting.
# Store the known underlying function for plotting.

linex = np.linspace(0.0, 7.0, test_points)
linex = np.reshape(linex, (1, np.shape(linex)[0]))
linex = np.sort(linex)
liney = []
for i in linex:
    liney.append(afunc(i)[0])

fig = plt.figure(figsize=(5, 5))

# Example
Exemplo n.º 7
0
    },
    {
        'type': 'linear',
        'scaling': 1.
    },
]
gp = GaussianProcess(train_fp=train_features,
                     train_target=train_targets,
                     kernel_list=kdict,
                     regularization=1e-2,
                     optimize_hyperparameters=True,
                     scale_data=True)

pred = gp.predict(test_fp=test_features)

error = get_error(pred['prediction'], test_targets)['rmse_average']

print(error)

plt.figure(6)
plt.figure(figsize=(30, 15))
plt.plot(test_targets, pred['prediction'], 'o', c='r', alpha=0.5)
plt.savefig('gaussian_process.png')

# Here we see that the Gaussian process performs slightly worse than the simple ridge regression model. This is to be expected when we are trying to model linear data with a non-linear model. However, the inclusion of the linear kernel results in a good prediction error. If the squared exponential kernel were to be removed from the above example, the resulting model would be the same as the ridge regression model, just trained with the Gaussian process.
#
# ## Cross-validation <a name="cross-validation"></a>
# [(Back to top)](#head)
#
# We can use the hierarchy cross-validation module to investigate how the model performs with different data sizes. In the following, we set up a prediction function. As the ridge regression function performs well, we just redefine this. The prediction function should take in the training and testing data and return a dictionary in the form `{'result': list, 'size': list}`.
Exemplo n.º 8
0
gp = GaussianProcess(kernel_list=kdict,
                     regularization=sdt1,
                     train_fp=std["train"],
                     train_target=train_targets["target"],
                     optimize_hyperparameters=False)

under_fit = gp.predict(test_fp=std["test"], uncertainty=True)

# Scale predictions back to the original scale
under_prediction = np.vstack(
    under_fit["prediction"]) * train_targets["std"] + train_targets["mean"]
under_certainty = np.vstack(
    under_fit["uncertainty_with_reg"]) * train_targets["std"]

# Get average errors
error = get_error(under_prediction.reshape(-1), my_func(test).reshape(-1))

# Get confidence interval on predictions
upper = under_prediction + under_certainty * tstd
lower = under_prediction - under_certainty * tstd

plt.figure(0)
plt.plot(linex, liney, "-", lw=1, color="black")
plt.plot(train, target, "o", alpha=0.2, color="black")
plt.plot(test, under_prediction, "b-", lw=1, alpha=0.4)
plt.fill_between(np.hstack(test),
                 np.hstack(upper),
                 np.hstack(lower),
                 interpolate=True,
                 color="blue",
                 alpha=0.2)