Пример #1
0
if __name__ == '__main__':
    B.epsilon = 1e-8
    wbml.out.report_time = True
    wd = WorkingDirectory('_experiments', 'eeg')

    _, train, test = load()

    x = np.array(train.index)
    y = np.array(train)

    # Fit and predict GPAR.
    model = GPARRegressor(scale=0.02,
                          linear=False,
                          nonlinear=True,
                          nonlinear_scale=1.0,
                          noise=0.01,
                          impute=True,
                          replace=False,
                          normalise_y=True)
    model.fit(x, y)
    means, lowers, uppers = \
        model.predict(x, num_samples=100, credible_bounds=True, latent=True)

    # Report SMSE.
    pred = pd.DataFrame(means, index=train.index, columns=train.columns)
    smse = wbml.metric.smse(pred, test)
    wbml.out.kv('SMSEs', smse.dropna())
    wbml.out.kv('Average SMSEs', smse.mean())

    # Name of output to plot.
    name = 'F2'
Пример #2
0
    y_train, y_test = y[inds_train], y[inds_test]

    # Perform dropping of data.
    prob_drop = 0.3
    indices_all = np.arange(y_train.shape[0])
    indices_remain = indices_all
    for i in range(1, num_outputs):
        # Drop indices randomly.
        n = len(indices_remain)
        perm = np.random.permutation(n)[:int(np.round(0.3 * n))]
        indices_drop = indices_remain[perm]
        indices_remain = np.array(list(set(indices_remain) - set(indices_drop)))

        # Drop data.
        y_train[indices_drop, i:] = np.nan

    # Fit and predict GPAR.
    model = GPARRegressor(scale=0.1,
                          linear=True, linear_scale=100.,
                          nonlinear=True, nonlinear_scale=1.0,
                          noise=0.01,
                          impute=True, replace=True, normalise_y=True)
    model.fit(x_train, y_train)
    means = model.predict(x_test, num_samples=100, latent=True)

    # Print remaining numbers:
    wbml.out.kv('Remaining', np.sum(~np.isnan(y_train), axis=0))

    # Compute SMSEs for all but the first output.
    wbml.out.kv('SMSE', wbml.metric.smse(means, y_test))
Пример #3
0
     np.linspace(100, 980, 45)])
xx1, xx2 = np.meshgrid(x1, x2)
x = np.stack([np.ravel(xx1), np.ravel(xx2)], axis=1)

# model = GPARRegressor(scale=[2., .3], scale_tie=True,
#                               linear=True, linear_scale=10., linear_with_inputs=False,
#                               nonlinear=False, nonlinear_with_inputs=False,
#                               markov=1,
#                               replace=True,
#                               noise=args.noise)

model = GPARRegressor(
    scale=[2., 0.5],
    scale_tie=True,
    linear=True,
    linear_scale=10.,
    input_linear=False,
    nonlinear=False,  # missing non linear inputs now?
    markov=1,
    replace=True,
    noise=args.noise)

n = 3

y = model.sample(transform_x(x), p=n, latent=args.latent)
plt.figure(figsize=(20, 10))

cs = ['tab:red', 'tab:blue', 'tab:green', 'tab:pink', 'tab:cyan']

for i in range(y.shape[1]):
    plt.subplot(2, 5, i + 1)
    plt.title('Output {}'.format(i + 1))
Пример #4
0
from gpar import GPARRegressor, log_transform


def inputs(df):
    return df.reset_index()[['x', 'y']].to_numpy()


if __name__ == '__main__':
    B.epsilon = 1e-8
    wbml.out.report_time = True
    wd = WorkingDirectory('_experiments', 'jura')

    train, test = load()

    # Fit and predict GPAR.
    model = GPARRegressor(scale=10.,
                          linear=False,
                          nonlinear=True,
                          nonlinear_scale=1.0,
                          noise=0.1,
                          impute=True,
                          replace=True,
                          normalise_y=True,
                          transform_y=log_transform)
    model.fit(inputs(train), train.to_numpy(), fix=False)
    means = model.predict(inputs(test), num_samples=200, latent=True)
    means = pd.DataFrame(means, index=test.index, columns=train.columns)

    wbml.out.kv('MAE', wbml.metric.mae(means, test)['Cd'])
Пример #5
0
    d_size = 0 if len(sys.argv) < 2 else int(sys.argv[1])
    d_all, d_train, d_tests = load_temp()[d_size]

    # Determine the number of inducing points.
    n_ind = [10 * 10 + 1, 10 * 15 + 1, 10 * 31 + 1][d_size]

    # Place inducing points evenly spaced.
    x = convert_index(d_all)
    x_ind = np.linspace(x.min(), x.max(), n_ind)

    # Fit and predict GPAR.
    #   Note: we use D-GPAR-L-NL here, as opposed to D-GPAR-L, to make the
    #   results a little more drastic.
    model = GPARRegressor(scale=0.2,
                          linear=True, linear_scale=10.,
                          nonlinear=True, nonlinear_scale=1.,
                          noise=0.1,
                          impute=True, replace=True, normalise_y=True,
                          x_ind=x_ind)
    model.fit(convert_index(d_train), d_train.to_numpy())

    # Predict for the test sets.
    preds = []
    for i, d in enumerate(d_tests):
        preds.append(model.predict(convert_index(d),
                                   num_samples=50,
                                   credible_bounds=True,
                                   latent=False))

    # Save predictions.
    wd.save(preds, f'results{d_size}.pickle')
Пример #6
0
header = [header[i] for i in order]

# Remove regions from training data.
y_all = y.copy()
regions = [('o7', np.arange(301,1400), header.index('o7'))]#,
           # ('o7', np.arange(451,500), header.index('o7')),
           # ('o7', np.arange(451,500), header.index('o7')),
           # ('o7', np.arange(451,500), header.index('o7'))]

for _, inds, p in regions:
    y[inds, p] = np.nan

# Fit and predict GPAR.
model = GPARRegressor(scale=0.1,
                      linear=True, linear_scale=3.,
                      nonlinear=True, nonlinear_scale=0.2,
                      rq=False,
                      noise=1.,
                      impute=True, replace=True, normalise_y=True)
model.fit(x, y)
means, lowers, uppers = \
    model.predict(x, num_samples=200, credible_bounds=True, latent=False)

# Compute SMSEs.
smses = []
for _, inds, p in regions:
    # For the purpose of comparison, standardise using the mean of the
    # *training* data! This is *not* how the SMSE usually is defined.
    mse_mean = np.nanmean((y_all[inds, p] - np.nanmean(y[:, p])) ** 2)
    mse_gpar = np.nanmean((y_all[inds, p] - means[inds, p]) ** 2)
    smses.append(mse_gpar / mse_mean)
print('Average SMSE:', np.mean(smses))
Пример #7
0
if __name__ == "__main__":
    wbml.out.report_time = True
    wd = WorkingDirectory("_experiments", "exchange")

    _, train, test = load()

    x = np.array(train.index)
    y = np.array(train)

    # Fit and predict GPAR.
    model = GPARRegressor(
        scale=0.1,
        linear=True,
        linear_scale=10.0,
        nonlinear=True,
        nonlinear_scale=1.0,
        rq=True,
        noise=0.01,
        impute=True,
        replace=False,
        normalise_y=True,
    )
    model.fit(x, y)
    means, lowers, uppers = model.predict(x,
                                          num_samples=200,
                                          credible_bounds=True,
                                          latent=False)

    # For the purpose of comparison, standardise using the mean of the *training*
    # data. This is not how the SMSE usually is defined!
    pred = pd.DataFrame(means, index=train.index, columns=train.columns)
    mse = ((pred - test)**2).mean(axis=0)
Пример #8
0
d_size = 0 if len(sys.argv) < 2 else int(sys.argv[1])
d_all, d_train, d_tests = load_temp()[d_size]
n_ind = [10 * 10 + 1, 10 * 15 + 1, 10 * 31 + 1][d_size]

# Place inducing points evenly spaced.
x_ind = np.linspace(d_all.x[:, 0].min(), d_all.x[:, 0].max(), n_ind)

# Fit and predict GPAR.
#   Note: we use D-GPAR-L-NL here, as opposed to D-GPAR-L, to make the results
#   a little more drastic.
model = GPARRegressor(scale=0.2,
                      linear=True,
                      linear_scale=10.,
                      nonlinear=True,
                      nonlinear_scale=1.,
                      noise=0.1,
                      impute=True,
                      replace=True,
                      normalise_y=True,
                      x_ind=x_ind)
model.fit(d_train.x, d_train.y)

# Predict for the test sets.
preds = []
for i, d in enumerate(d_tests):
    print('Sampling', i + 1)
    preds.append(
        model.predict(d.x, num_samples=50, credible_bounds=True, latent=False))

# Save predictions.
with open('examples/paper/air_temp_results{}.pickle'.format(d_size),
Пример #9
0
    y = data[:, [header.index(name) for name in ['Ni', 'Zn', 'Cd']]]
    return x, y


# Load and extract data.
x_train, y_train = load('examples/data/jura/jura_prediction.dat')
x_test, y_test = load('examples/data/jura/jura_validation.dat')

# Append first two outputs of test data to training data: the last one is
# predicted.
x_train = np.concatenate((x_train, x_test), axis=0)
y_train_test = y_test.copy()
y_train_test[:, -1] = np.nan
y_train = np.concatenate((y_train, y_train_test), axis=0)

# Fit and predict GPAR.
model = GPARRegressor(scale=10.,
                      linear=False,
                      nonlinear=True,
                      nonlinear_scale=1.0,
                      noise=0.1,
                      impute=True,
                      replace=True,
                      normalise_y=True,
                      transform_y=log_transform)
model.fit(x_train, y_train, fix=False)
means_test = model.predict(x_test, num_samples=200, latent=True)

# Compute MAE.
print('MAE:', np.nanmean(np.abs(y_test[:, -1] - means_test[:, -1])))