_, train, test = load() x = np.array(train.index) y = np.array(train) # Fit and predict GPAR. model = GPARRegressor(scale=0.02, linear=False, nonlinear=True, nonlinear_scale=1.0, noise=0.01, impute=True, replace=False, normalise_y=True) model.fit(x, y) means, lowers, uppers = \ model.predict(x, num_samples=100, credible_bounds=True, latent=True) # Report SMSE. pred = pd.DataFrame(means, index=train.index, columns=train.columns) smse = wbml.metric.smse(pred, test) wbml.out.kv('SMSEs', smse.dropna()) wbml.out.kv('Average SMSEs', smse.mean()) # Name of output to plot. name = 'F2' # Plot the result. plt.figure(figsize=(12, 1.75)) wbml.plot.tex()
y_train, y_test = y[inds_train], y[inds_test] # Perform dropping of data. prob_drop = 0.3 indices_all = np.arange(y_train.shape[0]) indices_remain = indices_all for i in range(1, num_outputs): # Drop indices randomly. n = len(indices_remain) perm = np.random.permutation(n)[:int(np.round(0.3 * n))] indices_drop = indices_remain[perm] indices_remain = np.array(list(set(indices_remain) - set(indices_drop))) # Drop data. y_train[indices_drop, i:] = np.nan # Fit and predict GPAR. model = GPARRegressor(scale=0.1, linear=True, linear_scale=100., nonlinear=True, nonlinear_scale=1.0, noise=0.01, impute=True, replace=True, normalise_y=True) model.fit(x_train, y_train) means = model.predict(x_test, num_samples=100, latent=True) # Print remaining numbers: wbml.out.kv('Remaining', np.sum(~np.isnan(y_train), axis=0)) # Compute SMSEs for all but the first output. wbml.out.kv('SMSE', wbml.metric.smse(means, y_test))
from gpar import GPARRegressor, log_transform def inputs(df): return df.reset_index()[['x', 'y']].to_numpy() if __name__ == '__main__': B.epsilon = 1e-8 wbml.out.report_time = True wd = WorkingDirectory('_experiments', 'jura') train, test = load() # Fit and predict GPAR. model = GPARRegressor(scale=10., linear=False, nonlinear=True, nonlinear_scale=1.0, noise=0.1, impute=True, replace=True, normalise_y=True, transform_y=log_transform) model.fit(inputs(train), train.to_numpy(), fix=False) means = model.predict(inputs(test), num_samples=200, latent=True) means = pd.DataFrame(means, index=test.index, columns=train.columns) wbml.out.kv('MAE', wbml.metric.mae(means, test)['Cd'])
d_all, d_train, d_tests = load_temp()[d_size] # Determine the number of inducing points. n_ind = [10 * 10 + 1, 10 * 15 + 1, 10 * 31 + 1][d_size] # Place inducing points evenly spaced. x = convert_index(d_all) x_ind = np.linspace(x.min(), x.max(), n_ind) # Fit and predict GPAR. # Note: we use D-GPAR-L-NL here, as opposed to D-GPAR-L, to make the # results a little more drastic. model = GPARRegressor(scale=0.2, linear=True, linear_scale=10., nonlinear=True, nonlinear_scale=1., noise=0.1, impute=True, replace=True, normalise_y=True, x_ind=x_ind) model.fit(convert_index(d_train), d_train.to_numpy()) # Predict for the test sets. preds = [] for i, d in enumerate(d_tests): preds.append(model.predict(convert_index(d), num_samples=50, credible_bounds=True, latent=False)) # Save predictions. wd.save(preds, f'results{d_size}.pickle')
trial = pickle.load(f) x = trial['x'] y_train = trial['y_train'] y_test = trial['y_test'] y_labels = trial['y_labels'] # Fit and predict GPAR. model = GPARRegressor(scale=0.02, linear=False, nonlinear=True, nonlinear_scale=1.0, noise=0.01, impute=True, replace=False, normalise_y=True) model.fit(x, y_train) means, lowers, uppers = \ model.predict(x, num_samples=100, credible_bounds=True, latent=True) # Compute SMSE. i_test = np.any(~np.isnan(y_test), axis=0) mse_mean = np.nanmean( (y_test[:, i_test] - np.nanmean(y_test[:, i_test], axis=0, keepdims=True))**2) mse_gpar = np.nanmean((y_test[:, i_test] - means[:, i_test])**2) print('SMSE:', mse_gpar / mse_mean) # Plot the result. plt.figure(figsize=(12, 9)) plt.rcParams['font.family'] = 'serif' plt.rcParams['mathtext.fontset'] = 'dejavuserif'
# Place inducing points evenly spaced. x_ind = np.linspace(d_all.x[:, 0].min(), d_all.x[:, 0].max(), n_ind) # Fit and predict GPAR. # Note: we use D-GPAR-L-NL here, as opposed to D-GPAR-L, to make the results # a little more drastic. model = GPARRegressor(scale=0.2, linear=True, linear_scale=10., nonlinear=True, nonlinear_scale=1., noise=0.1, impute=True, replace=True, normalise_y=True, x_ind=x_ind) model.fit(d_train.x, d_train.y) # Predict for the test sets. preds = [] for i, d in enumerate(d_tests): print('Sampling', i + 1) preds.append( model.predict(d.x, num_samples=50, credible_bounds=True, latent=False)) # Save predictions. with open('examples/paper/air_temp_results{}.pickle'.format(d_size), 'wb') as f: pickle.dump(preds, f)
y = data[:, [header.index(name) for name in ['Ni', 'Zn', 'Cd']]] return x, y # Load and extract data. x_train, y_train = load('examples/data/jura/jura_prediction.dat') x_test, y_test = load('examples/data/jura/jura_validation.dat') # Append first two outputs of test data to training data: the last one is # predicted. x_train = np.concatenate((x_train, x_test), axis=0) y_train_test = y_test.copy() y_train_test[:, -1] = np.nan y_train = np.concatenate((y_train, y_train_test), axis=0) # Fit and predict GPAR. model = GPARRegressor(scale=10., linear=False, nonlinear=True, nonlinear_scale=1.0, noise=0.1, impute=True, replace=True, normalise_y=True, transform_y=log_transform) model.fit(x_train, y_train, fix=False) means_test = model.predict(x_test, num_samples=200, latent=True) # Compute MAE. print('MAE:', np.nanmean(np.abs(y_test[:, -1] - means_test[:, -1])))