Пример #1
0
def gp_fit(M=108, seed=0):
    # Make data
    np.random.seed(seed)
    I = np.random.permutation(X.shape[0])[:M]
    gp = GaussianProcess()
    gp.fit(X[I, :], fX[I])
    return gp
    def __init__(self, opt_fun, bounds, kernel, acquisition, 
                 n_random_samples=None, X_train=None, X_pre_calc=None, Y_pre_calc=None):
        """
        Find optima within bounds using bayesian optimization

        Parameters
        ----------
        opt_fun : function call
            function to be optimized.
        bounds : list of tuples
            lower and upper limit for each variable.
        kernel : kernel call
            initilized kernel class.
        acquisition : function call
            function used for determining the next point.
        n_random_samples : int, optional
            number of randomly generated samples within the bounds
        X_train : numpy array, optional
            sample points to estimate
        X_pre_calc : numpy array, optional
            sample points already estimated
        Y_pre_calc : numpy array, optional
            function values for an already calculated sample
        """
        
        self.opt_fun = opt_fun
        self.bounds = bounds
        self.n_vals = len(bounds)
        
        self.kernel = kernel
        self.acquisition = acquisition
        
        self.construct_sample(n_random_samples, X_train, X_pre_calc, Y_pre_calc)
        self.gpr = GaussianProcess(self.X_sample, self.Y_sample, kernel)
def test_gaussian_process():
    space = Space({
        "x": [[-3.0, 3.0] for _ in range(2)],
        "y": [[-3.0, 3.0] for _ in range(2)]
    })

    GaussianProcess(to_maximize,
                    space).maximize().plot(path="history_maximize.png")
    GaussianProcess(to_minimize,
                    space).minimize().plot(path="history_minimize.png")
Пример #4
0
def run_demo(args):
    """
    @brief a Gaussian Process regression example using an input covariance model
    """

    np.random.seed(1)

    def f(x):
        """
        @brief the function to predict.
        """
        return x * np.sin(x)
  
    # the inpute data points
    X = np.linspace(0.1, 9.9, 20)

    # make the observations with added noise
    y = f(X).ravel()
    dy = 0.5 + 1.0 * np.random.random(y.shape)
    noise = np.random.normal(0, dy)
    y += noise
    
    # mesh the input space for evaluations of the prediction
    x = np.linspace(-2, 12, 2*len(X))

    # instanciate a Gaussian Process model, allowing all params to vary
    gp = GaussianProcess(theta0 = [0.5, 2.0, 1.0], 
                         covfunction=args.covariance, verbose=True, 
                         fixed=[False, False, False], random_start=10)

    # fit to data using Maximum Likelihood Estimation of the parameters
    gp.fit(X, y, dy)

    # make the prediction on the meshed x-axis
    y_pred, sigma = gp.predict(x)
    
    # plot the function, the prediction and the 95% confidence interval based on
    # the standard deviation
    fig = pl.figure()
    pl.plot(x, f(x), 'r:', label=r'$f(x) = x \ \mathrm{sin}(x)$')
    pl.errorbar(X.ravel(), y, dy, label='Observations')
    pl.plot(x, y_pred, label='Prediction')
    pl.fill(np.concatenate([x, x[::-1]]),
            np.concatenate([y_pred - 1.9600 * sigma,
                           (y_pred + 1.9600 * sigma)[::-1]]),
            alpha=.2, fc='DarkGoldenRod', ec="None", label='95% confidence interval')
    
    pl.xlabel('$x$', fontsize=16)
    pl.ylabel('$f(x)$', fontsize=16)
    pl.ylim(-15, 20)
    pl.legend(loc='upper left')
    
    pl.show()
Пример #5
0
def get_estimated_mean_and_std(gp: GaussianProcess, array_samples_parameters,
                               X):
    functions_samples = []
    X = X.reshape((-1, gp.array_dataset.shape[1]))

    for num_samples, sample_gp_parameter in enumerate(
            array_samples_parameters):
        gp.set_kernel_parameters(*sample_gp_parameter.flatten())
        functions_samples.append(gp.get_sample(X))

        yield gp.get_sample(X)

        if num_samples % 50 == 0:
            print(f'num samples: {num_samples} \r')
Пример #6
0
 def __init__(self,
              kernel: Kernel,
              objective_function: objective_functions.abstract_objective_function.ObjectiveFunction,
              acquisition_function: AcquisitionFunction,
              ):
     """
     :param kernel: Kernel object used by the gaussian process to perform a regression.
     :param objective_function: ObjectiveFunction object which we will try to minimise
     :param acquisition_function: AcquisitionFunction object
     """
     self._initial_kernel = copy.deepcopy(kernel)
     self._gaussian_process = GaussianProcess(kernel)
     self._objective_function = objective_function
     self._acquisition_function = acquisition_function
Пример #7
0
    def __init__(self, discount=0.9, perf_weight=1., depth=0, n_sim=10):
        # Settings
        self.discount = discount
        self.n_queries = 20
        self.depth = depth
        self.n_sim = n_sim
        self.n_reward_samples = 200
        self.perf_sample_std = 0.001
        self.runtime_sample_std = 0.001
        self.perf_weight = perf_weight
        batch_size = 10

        # create bayesian optimizer and gassian process
        self.kernel = SquaredExponential(n_dim=1,  # assuming 1 hyperparameter
                                         init_scale_range=(.01, .1),
                                         init_amp=1.)

        self.gp = GaussianProcess(n_epochs=100,
                                  batch_size=batch_size,
                                  n_dim=1,  # assuming 1 hyperparameter
                                  kernel=self.kernel,
                                  noise=0.01,
                                  train_noise=False,
                                  optimizer=tf.train.AdagradOptimizer(0.01),
                                  # optimizer=tf.train.GradientDescentOptimizer(0.01),
                                  verbose=0)

        self.bo = BayesianOptimizer(self.gp,
                                    region=np.array([[0., 1.]]),  # assuming 1 hyperparameter
                                    iters=100,
                                    tries=2,
                                    optimizer=tf.train.GradientDescentOptimizer(0.1),
                                    verbose=0)
    def _evaluate(self, gaussian_process: GaussianProcess,
                  data_points: np.ndarray) -> np.ndarray:
        """
        Evaluates the acquisition function at all the data points
        :param gaussian_process:
        :param data_points: numpy array of dimension n x m where n is the number of elements to evaluate
        and m is the number of variables used to calculate the objective function
        :return: a numpy array of shape n x 1 (or a float) representing the estimation of the acquisition function at
        each point
        """

        # TODO

        mean_data_points, std_data_points = gaussian_process.get_gp_mean_std(
            data_points.reshape((-1, gaussian_process.array_dataset.shape[1])))

        mean = mean_data_points

        mean = mean.flatten()
        mean_opt = np.min(gaussian_process.array_objective_function_values)

        difference = mean_opt - mean
        Z = difference / std_data_points

        ei = difference * norm.cdf(Z) + std_data_points * norm.pdf(Z)
        ei[std_data_points == 0.0] = 0.0

        return ei
def rui_1d():
    from kernels import SquaredExponential
    from gaussian_process import GaussianProcess
    import matplotlib.pyplot as plt

    batch_size = 4
    new_samples = 1000
    n_dim = 1
    # Set up the modules for bayesian optimizer
    kernel = SquaredExponential(n_dim=n_dim,
                                init_scale_range=(.1, .5),
                                init_amp=1.)
    gp = GaussianProcess(n_epochs=100,
                         batch_size=10,
                         n_dim=n_dim,
                         kernel=kernel,
                         noise=0.05,
                         train_noise=False,
                         optimizer=tf.train.GradientDescentOptimizer(0.001),
                         verbose=0)
    bo = BayesianOptimizer(gp,
                           region=np.array([[0., 1.]]),
                           iters=100,
                           tries=20,
                           optimizer=tf.train.GradientDescentOptimizer(0.1),
                           verbose=1)

    X = np.array([1.0, 0.000335462627903, 0.0314978449076,
                  2980.95798704]).reshape(-1, 1)
    X = np.log(X) / 8
    y = np.array([0.864695262443, 0.5, 0.860244469176,
                  0.862691649896]).reshape(-1, 1)
    X_old = X
    y_old = y
    bo.fit(X, y)
    x_next, y, z = bo.select()

    x = np.linspace(-1, 1).reshape(-1, 1)
    y_pred, var = gp.np_predict(x)
    ci = 2 * np.sqrt(var)
    ci = np.sqrt(var) * 2
    plt.plot(x, y_pred)
    plt.plot(x, y_pred + ci, 'g--')
    plt.plot(x, y_pred - ci, 'g--')
    plt.scatter(X_old, y_old)
    plt.plot([x_next[0, 0], x_next[0, 0]], plt.ylim(), 'r--')
    plt.show()
    def __init__(self, score_func, bounds, policy='ei', epsilon=1e-7, lambda_val=1.5, gp_params=None):

        assert policy == 'ei' or policy =='ucb'

        self.score_func = score_func
        self.bounds = bounds
        self.policy = policy
        self.epsilon = epsilon
        self.lambda_val = lambda_val  # for ucb policy only
        if gp_params is not None:
            self.gp = GaussianProcess(**gp_params)
        else:
            n_params = bounds.shape[0]
            length_scale = 0.5 * np.ones(n_params)
            bounds = np.tile(np.array([1e-2, 1e2]), (n_params, 1))
            kernel = RBFKernel(length_scale=length_scale, length_scale_bounds=bounds)
            self.gp = GaussianProcess(kernel, alpha=0.03)
Пример #11
0
def plot_vi_gp(obj, mu, Sigma, X, y):
    gp = GaussianProcess(GaussianLinearKernel(0., 0., 0., 0., 0., 0.), X, y)

    xlim, = obj.boundaries
    x_gt = np.linspace(xlim[0], xlim[1], 100)
    xx = np.linspace(xlim[0] - 2, xlim[1] + 2, 200)

    plt.plot(x_gt, obj.evaluate_without_noise(x_gt), c='c')

    plt.title(f"Gaussian Process Regression")
    mu = mu.flatten()
    for _ in range(500):
        sample_gp_parameter = onp.random.multivariate_normal(mu, Sigma)
        gp.set_kernel_parameters(*sample_gp_parameter)
        function_sample = gp.get_sample(xx.reshape((-1, 1)))
        plt.plot(xx, function_sample, alpha=0.3, c='C0')
        plt.scatter(gp.array_dataset,
                    gp.array_objective_function_values,
                    c='m',
                    marker="+",
                    zorder=1000,
                    s=(30, ))
        plt.pause(0.01)
    plt.show()
Пример #12
0
def get_log_upper_proba_distribution_gp(gaussian_process: GaussianProcess,
                                        theta: np.ndarray):
    """
    This functions evaluates log( p_1(theta | X, y) ) where:
     - p_1 = Z * p
     - p is the posterior distribution
     - p_1 is easy to calculate

    There are 2 methods that you might find useful in the class GaussianProcess:
    - get_log_marginal_likelihood
    - get_log_prior_at

    :param gaussian_process
    :param theta: parameters at which we evaluate p_1. In our example, it is a numpy array (row vector)
    of shape (6,). As our linear + gaussian kernel depends on 6 real numbers.
    :return: log( p_1(theta | X, y) )
    """
    # TODO

    log_marginal_likelihood = gaussian_process.get_log_marginal_likelihood(
        *theta)
    log_prior = gaussian_process.get_log_prior_at(*theta)

    return log_marginal_likelihood + log_prior
Пример #13
0
def gp_fit(M=108, seed=0, obj='lift'):
    # Make data
    np.random.seed(seed)
    I = np.random.permutation(X.shape[0])[:M]
    gp = GaussianProcess()

    if obj == 'lift':
        gp.fit(X[I, :], f_lift[I])
    elif obj == 'drag':
        gp.fit(X[I, :], f_drag[I])
    return gp
Пример #14
0
    def _evaluate(self, gaussian_process: GaussianProcess,
                  data_points: np.ndarray) -> np.ndarray:
        """
        Evaluates the acquisition function at all the data points
        :param gaussian_process:
        :param data_points: numpy array of dimension n x m where n is the number of elements to evaluate
        and m is the number of variables used to calculate the objective function
        :return: a numpy array of shape n x 1 (or a float) representing the estimation of the acquisition function at
        each point
        """

        array_objective_function_values = gaussian_process.array_objective_function_values
        best_objective_function_value = np.min(array_objective_function_values)

        mean_data_points, std_data_points = gaussian_process.get_gp_mean_std(
            data_points)
        mean_data_points = mean_data_points.reshape((-1, 1))
        std_data_points = std_data_points.reshape((-1, 1))

        gamma = (best_objective_function_value -
                 mean_data_points) / std_data_points

        return std_data_points * (gamma * norm.cdf(gamma) + norm.pdf(gamma))
Пример #15
0
b_kri = np.zeros((nSamples, nSamples))
for i in range(nSamples):
    for j in range(nSamples):
        a_kri[j, i] = np.dot(U_comp[:, j].T, uV[:, i])
        b_kri[j, i] = np.dot(G_comp[:, j].T, gV[:, i])
np.save("../results/Offline/a_kri", a_kri)
np.save("../results/Offline/b_kri", b_kri)
# Trained solution
mean = "constant"
covariance = "squared_exponential"
theta_U = np.array([100000.0] * 6)
theta_L = np.array([0.001] * 6)
theta_0 = np.array([1.0] * 6)
for i in range(nSamples):
    GP_u = GaussianProcess(regr=mean,
                           corr=covariance,
                           theta0=theta_0,
                           thetaL=theta_L,
                           thetaU=theta_U)
    GP_u.fit(pCandMax.T, a_kri[:, i])
    GP_g = GaussianProcess(regr=mean,
                           corr=covariance,
                           theta0=theta_0,
                           thetaL=theta_L,
                           thetaU=theta_U)
    GP_g.fit(pCandMax.T, b_kri[:, i])
    joblib.dump(GP_u, "../results/Offline/GP_alpha_" + str(i) + ".pkl")
    joblib.dump(GP_g, "../results/Offline/GP_beta_" + str(i) + ".pkl")

toc = timeit.default_timer()
print("KRIGING COMPUTATION TIME: " + str(toc - tic) + " s")
Пример #16
0
import numpy as np
import matplotlib.pyplot as plt
from gaussian_process import GaussianProcess


def func(x):
    return np.sin(0.9 * x)


g = GaussianProcess(noise_variance=1E-5)


def sample_and_plot():
    xtest = np.linspace(-5, 5, 100).reshape(-1, 1)
    ytest = func(xtest)
    mu, cov_posterior, s = g.predict(xtest)

    plt.figure()
    plt.clf()
    plt.plot(g.X, g.Y, 'r+', ms=20)
    plt.plot(xtest, ytest, 'b-')
    plt.gca().fill_between(xtest.flat,
                           mu[:, 0] - 3 * s,
                           mu[:, 0] + 3 * s,
                           color="#dddddd")
    plt.plot(xtest, mu, 'r--', lw=2)
    # plt.savefig('predictive.png', bbox_inches='tight')
    plt.title('Mean predictions plus 3 st.deviations')
    plt.axis([-5, 5, -3, 3])

    # draw samples from the posterior at our test points.
Пример #17
0
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm as stats_norm

from gaussian_process import GaussianProcess, SquaredDistanceKernel

BOUNDS = [0, 10, -10, 10]
PLOT_POINT_COUNT = 1000


def func(v):
    return v * np.sin(v)


gp = GaussianProcess(kernel=SquaredDistanceKernel(kernel_param=0.01),
                     noise_variance=1E-3)

fig = plt.figure()
ax_data = fig.add_subplot(311)
ax_acquisition = fig.add_subplot(312)
ax_func = fig.add_subplot(313)

l_mu = None
l_data = None
l_stddev = None
l_acquisition = None
l_acquisition_area = None
l_func = None
xx = np.linspace(BOUNDS[0], BOUNDS[1], PLOT_POINT_COUNT).reshape(-1, 1)

Пример #18
0
        "--mu_memory={mu_memory}".format(mu_memory=mu_memory),
        "--mu_data={mu_data}".format(mu_data=mu_data), "--ant_system"
    ])
    df = pd.read_csv(filename, index_col=0)
    score = int(-df["mean"].values.flatten()[0])
    return score


space = Space({
    "oblivion": (0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0),
    "mu_memory": [0, 1000],
    "mu_data": [0, 1000]
})

n_calls = 1000
gp = GaussianProcess(score, space)
with Notipy():
    results = gp.minimize(n_calls=n_calls,
                          n_random_starts=10,
                          callback=[
                              TQDMGaussianProcess(n_calls=n_calls),
                              DeltaYStopper(**{
                                  "delta": 1,
                                  "n_best": 100
                              })
                          ],
                          random_state=42,
                          n_jobs=cpu_count())

print(gp.best_parameters)
pd.DataFrame(dict(gp.best_parameters),
Пример #19
0
def gaussian_process(kernel, nb_training_points=6, nb_samples=10, plot_dist=False, plot_gt=True, save=None):
    """
    Fit and plot a Gaussian process.

    # Parameters
        * kernel: an object deriving from gaussian_process.Kernel
            Kernel used to fit the GP
        * nb_training_points: int, default 6
            How many points to fit
        * nb_samples: int, default 10
            How many samples to draw
        * plot_dist: bool, default False
            If True, plot the mean and 95% confidence interval of the GP
        * plot_gt: bool, default True
            If True, plot the true underlying function
        * save: str, default None
            if not None, save fig under save
    """
    # Prepare training and test set
    def f(x):
        return 2 * np.sin(2*x) / x

    rs = np.random.RandomState(5)
    x = np.linspace(0.1, 10., 50)
    rs.shuffle(x)

    x_train = x[:nb_training_points]
    y_train = f(x_train)

    x_pred = np.linspace(-1, 12., 1000)
    y = f(x_pred)

    # Fit and predict
    gp = GaussianProcess(kernel, random_state=rs)
    if nb_training_points > 0:
        gp.fit(x_train.reshape(-1, 1), y_train)

    y_pred, std_pred = gp.predict(x_pred.reshape(-1, 1), return_std=True)
    y_pred = y_pred.squeeze()

    std_pred += 1e-15  # Nobody likes 0

    # Configure plot settings
    color = sns.diverging_palette(15, 255, n=9, s=90, l=40)
    fig = plt.figure(figsize=(12, 4))
    sns.set_style("dark")
    ax = plt.Axes(fig, [0., 0., 1., 1.])
    fig.add_axes(ax)

    # Plot ground truth if required
    if plot_gt:
        plt.plot(x_pred, y, c=color[1], lw=3, label=u'Truth')

    # Plot mean and 95% prediction interval if required
    if plot_dist:
        plt.plot(x_pred, y_pred, c=color[8], lw=3, label=u'Prediction', zorder=4)
        plt.fill(np.concatenate([x_pred, x_pred[::-1]]),
                 np.concatenate([y_pred - 1.9600 * std_pred, (y_pred + 1.9600 * std_pred)[::-1]]),
                 alpha=.4, fc=color[7], ec='None', label="95% prediction interval")

    # Plot required number of samples
    if nb_samples > 0:
        samples = gp.sample_y(x_pred.reshape(-1, 1), nb_samples)
        plt.plot(x_pred, samples)

    # Plot training set
    plt.scatter(x_train, y_train, facecolors=color[0], s=80, zorder=5)

    # More plot settings
    plt.xlim([-1, 12.])
    plt.ylim([-5, 5.])
    ax.xaxis.set_visible(False)
    ax.yaxis.set_visible(False)

    if save is None:
        plt.show()
    else:
        plt.savefig(save, bbox_inches='tight', pad_inches=0)
    plt.close()
Пример #20
0
def run_demo(args):
    """
    @brief a Gaussian Process regression example that fits several supernovae
           spectra
    """
    
    # read in the relevant files in correct order
    f1 = glob("../data/SN2011fe/11feM*")
    f1.sort(reverse=True)
    f2 = glob("../data/SN2011fe/11feP*")
    f2.sort()
    files = f1+f2
    
    pl.ion()
    
    # fit each supernova spectrum in serial
    for i, f in enumerate(files):
        
        file_root = os.path.splitext(os.path.basename(f))[0]
        time = float(file_root[-3:])/10.
        if 'M' in file_root: time *= -1.
        
        # load the data from inputdata.txt
        X, Y, Yerr = np.loadtxt(f, unpack=True)
  
        # save these for later
        X_tot    = X.copy()
        Y_tot    = Y.copy()
        Yerr_tot = Yerr.copy()
    
        n_eval = len(X)
        batch_size = args.batch_size
        resolution = args.resolution
    
        xrec_full = []
        yrec_full = []
        yerr_full = []
    
        # instanciate a Gaussian Process model, allowing all params to vary
        gp = GaussianProcess(theta0 = [1e-13, 2.0, 1e-13], 
                             covfunction=args.covariance, verbose=True, 
                             fixed=[False, False, False])
                         
        nbatches = max(1, n_eval / batch_size + 1)
        # fit the spectra in batches along the x axis
        for k in range(nbatches):

            batch_from = k * batch_size 
            batch_to = min([(k + 1) * batch_size + 1, n_eval + 1])
            if k == nbatches-1: batch_to = len(X_tot)
            
            xmin = np.amin(X_tot[batch_from:batch_to])
            xmax = np.amax(X_tot[batch_from:batch_to])
            nstar = len(X_tot[batch_from:batch_to])*resolution
        
            batch_to += 0.1*batch_size
            batch_from -= 0.1*batch_size
            if batch_from < 0: batch_from = 0
        
            X  = X_tot[batch_from:batch_to]
            Y  = Y_tot[batch_from:batch_to]
            Yerr = Yerr_tot[batch_from:batch_to]
    
            # mesh the input space for evaluations of the prediction
            x = np.linspace(xmin, xmax, nstar)

            # fit to data using Maximum Likelihood Estimation of the parameters
            gp.fit(X, Y, Yerr)

            # make the prediction on the meshed x-axis
            y_pred, sigma = gp.predict(x)
    
    
            xrec_full += list(x)
            yrec_full += list(y_pred)
            yerr_full += list(sigma)
        
        yerr_full = np.array(yerr_full)
        yrec_full = np.array(yrec_full)
    
        # plot the function, the prediction and the 95% confidence interval based on
        # the standard deviation
        pl.cla()
        pl.plot(X_tot, Y_tot, label='Observations')
        pl.plot(xrec_full, yrec_full, label='Prediction')
        pl.fill(np.concatenate([xrec_full, xrec_full[::-1]]),
               np.concatenate([yrec_full - 1.9600 * yerr_full,
                              (yrec_full + 1.9600 * yerr_full)[::-1]]),
               alpha=0.5, fc='DarkGoldenRod', ec="None", 
               label='95% confidence interval')
    
        pl.xlabel(r'$\lambda \ (\AA)$', fontsize=16)
        pl.ylabel('$\mathrm{Flux \ (erg/s/cm^2/\AA)}$', fontsize=16)
        pl.legend(loc='upper right')
        pl.title("SNe 2011fe %+.1f days relative to B-band max" %time)
        pl.ylim(-0.2e-12, 1.2e-12)
        pl.savefig("figures/SN11fe_%02d.png" %i)
        pl.draw()
class BayesianOptimization(object):

    def __init__(self, score_func, bounds, policy='ei', epsilon=1e-7, lambda_val=1.5, gp_params=None):

        assert policy == 'ei' or policy =='ucb'

        self.score_func = score_func
        self.bounds = bounds
        self.policy = policy
        self.epsilon = epsilon
        self.lambda_val = lambda_val  # for ucb policy only
        if gp_params is not None:
            self.gp = GaussianProcess(**gp_params)
        else:
            n_params = bounds.shape[0]
            length_scale = 0.5 * np.ones(n_params)
            bounds = np.tile(np.array([1e-2, 1e2]), (n_params, 1))
            kernel = RBFKernel(length_scale=length_scale, length_scale_bounds=bounds)
            self.gp = GaussianProcess(kernel, alpha=0.03)

    def clone(self):
        cloned_obj = BayesianOptimization(self.score_func, self.bounds, self.policy,
                                          self.epsilon, self.lambda_val)
        cloned_obj.gp = self.gp.clone()
        return cloned_obj

    def fit(self, n_iter=10, x0=None, n_pre_samples=5, random_search=False):
        """
        Apply Bayesian Optimization to find the optimal parameter
        """
        if x0 is None:
            assert n_pre_samples is not None and n_pre_samples > 0

        if random_search:
            assert random_search > 1

        n_params = self.bounds.shape[0]

        x_list = []
        y_list = []

        if x0 is None:
            for params in np.random.uniform(self.bounds[:, 0], self.bounds[:, 1], size=(n_pre_samples, n_params)):
                x_list.append(params)
                y_list.append(self.score_func(params))
        else:
            for params in x0:
                x_list.append(params)
                y_list.append(self.score_func(params))

        X = np.atleast_2d(np.array(x_list))
        y = np.array(y_list)

        for i in range(n_iter):

            self.gp.fit(X, y)

            if random_search:
                x_candidates = np.random.uniform(self.bounds[:, 0], self.bounds[:, 1], size=(random_search, n_params))
                acquisitions = -self.acquisition_function(x_candidates, y, n_params, self.policy)
                next_sample = x_candidates[np.argmax(acquisitions)]
            else:
                next_sample = self.sample_next_hyperparameter(self.acquisition_function, y, n_restart=10, policy=self.policy)

            if np.any(np.abs(next_sample - X) <= self.epsilon):
                next_sample = np.random.uniform(self.bounds[:, 0], self.bounds[:, 1])

            x_list.append(next_sample)
            y_list.append(self.score_func(next_sample))

            X = np.atleast_2d(np.array(x_list))
            y = np.array(y_list)

        self.X_search = X
        self.y_search = y

    def optimal(self):
        return self.X_search[np.argmax(self.y_search)], np.max(self.y_search)

    def get_iteration_history(self):
        return self.X_search, self.y_search

    def acquisition_function(self, X, y, n_params, policy):

        if policy == 'ei':
            return self.negative_expected_improvement(X, y, n_params)
        elif self.policy == 'ucb':
            return self.negative_upper_confidence_bound(X, y, n_params)
        else:
            raise ValueError("unknown policy {0:}".format(self.policy))

    def negative_expected_improvement(self, X, y, n_params):

        X = np.reshape(X, (-1, n_params))

        mu, Sigma = self.gp.predict(X, return_cov=True)
        sigma = np.sqrt(np.diag(Sigma))

        mu = mu.ravel()
        sigma = sigma.ravel()

        f_best = np.max(y)
        Z = (mu - f_best) / sigma
        ei = (mu - f_best) * norm.cdf(Z) + sigma * norm.pdf(-Z)
        ei[sigma == 0.0] = 0.0
        return -ei

    def negative_upper_confidence_bound(self, X, y, n_params):

        X = np.reshape(X, (-1, n_params))

        mu, Sigma = self.gp.predict(X, return_cov=True)
        sigma = np.sqrt(np.diag(Sigma))

        mu = mu.ravel()
        sigma = sigma.ravel()

        ucb = mu + self.lambda_val * sigma
        return -ucb

    def sample_next_hyperparameter(self, acquisition_function, y, n_restart, policy):

        n_params = self.bounds.shape[0]
        best_x = None
        best_acquisition_value = 100.0

        for initial_value in np.random.uniform(self.bounds[:, 0], self.bounds[:, 1], size=(n_restart, n_params)):
            res = minimize(fun=acquisition_function,
                           x0=initial_value,
                           bounds=self.bounds,
                           method='L-BFGS-B',
                           args=(y, n_params, policy))

            if res.fun < best_acquisition_value:
                best_acquisition_value = res.fun
                best_x = res.x

        return best_x
class BayesianOptimization:
    
    def __init__(self, opt_fun, bounds, kernel, acquisition, 
                 n_random_samples=None, X_train=None, X_pre_calc=None, Y_pre_calc=None):
        """
        Find optima within bounds using bayesian optimization

        Parameters
        ----------
        opt_fun : function call
            function to be optimized.
        bounds : list of tuples
            lower and upper limit for each variable.
        kernel : kernel call
            initilized kernel class.
        acquisition : function call
            function used for determining the next point.
        n_random_samples : int, optional
            number of randomly generated samples within the bounds
        X_train : numpy array, optional
            sample points to estimate
        X_pre_calc : numpy array, optional
            sample points already estimated
        Y_pre_calc : numpy array, optional
            function values for an already calculated sample
        """
        
        self.opt_fun = opt_fun
        self.bounds = bounds
        self.n_vals = len(bounds)
        
        self.kernel = kernel
        self.acquisition = acquisition
        
        self.construct_sample(n_random_samples, X_train, X_pre_calc, Y_pre_calc)
        self.gpr = GaussianProcess(self.X_sample, self.Y_sample, kernel)


    def opt_fun_iter(self, x):
        return np.array([self.opt_fun(x[i,:]) for i in range(x.shape[0])])
    
    
    def construct_sample(self, n_random_samples, X_train, X_pre_calc, Y_pre_calc):
        """
        Function for combining all sample values and creating random input 
        values and their corresponding outputs.
        """
        if isinstance(X_train, np.ndarray):
            Y_train = self.opt_fun_iter(X_train)
        elif X_train == None:
            X_train = np.empty((0, self.n_vals))
            Y_train = np.empty(0)
        else:
            ValueError("X_train needs to be a numpy array.")
            
        if type(n_random_samples) in [int, float]:
            X_rand = np.random.random((n_random_samples, self.n_vals))
            
            for i, val in enumerate(self.bounds):
                X_rand[:,i] *= val[1] - val[0]
                X_rand[:,i] += val[0]
            # implement checking for duplicates

            Y_rand = self.opt_fun_iter(X_rand)
        else:
            X_rand = np.empty((0, self.n_vals))
            Y_rand = np.empty(0)
            
        if not isinstance(X_pre_calc, np.ndarray):
            X_pre_calc = np.empty((0, self.n_vals))
            Y_pre_calc = np.empty(0)
        else:
            Y_pre_calc = Y_pre_calc.reshape(-1)
            
        self.X_sample = np.concatenate((X_pre_calc, X_train, X_rand), axis=0)
        self.Y_sample = np.concatenate((Y_pre_calc, Y_train, Y_rand), axis=0)

    
    def next_location(self, n_restarts):
        """
        Determine the next location for evaluation by minimizing the negative
        acquisition function

        Parameters
        ----------
        n_restarts : int
            number of times the minimization algorithm should be run with 
            random initial values.

        Returns
        -------
        numpy array
            with next values
        """
        dim = self.X_sample.shape[1]
        min_val = 1
        min_x = None
        
        def min_obj(X):
            return -self.acquisition(X, self.X_sample, self.Y_sample, self.gpr)
        
        # Find the best optimum by starting from n_restart different random points.
        for x0 in np.random.uniform(np.asarray(self.bounds)[:, 0], np.asarray(self.bounds)[:, 1], size=(n_restarts, dim)):
            try:
                res = minimize(min_obj, x0=x0, bounds=self.bounds, method='L-BFGS-B')
                val = res.fun[0]
            except ValueError:
                val = np.inf
                
            if val < min_val:
                min_val = val
                min_x = res.x           
                
        return min_x#.reshape(1, -1)
        
    def search(self, n_iter, re_opt_gpr, n_restarts, print_progress=True):
        """
        Search for the best function value

        Parameters
        ----------
        n_iter : int
            how many new function evaluations are made
        re_opt_gpr : int
            after how many trials should the kernel hyper parameters be optimized
        n_restarts : int
            number of times the minimization algorithm should be run with 
            random initial values when determining the next location.
        print_progress : bool, optional
            True if the current iteration values should be printed

        Returns
        -------
        ind : int
            index of the best location.
        X_opt : numpy array
            best location.
        Y_opt : float
            best function vlaue.

        """

        for i in range(n_iter):
            
            if i % re_opt_gpr:
                self.gpr.kernel.hyper = np.exp(self.gpr.opt_kernel_hyper())
                
            X_new = self.next_location(n_restarts=n_restarts)
            
            Y_new = self.opt_fun(X_new).reshape(1,)

            self.X_sample = np.concatenate((self.X_sample, X_new.reshape(1,-1)), axis=0)
            self.Y_sample = np.concatenate((self.Y_sample, Y_new), axis=0)
            
            self.gpr.update_train_data(self.X_sample, self.Y_sample)
            
            if print_progress:
                print(f"Iteration {i}: Current function value {Y_new[0]} and max value of {np.max(self.Y_sample)}")

        ind = self.Y_sample.argmax()
        Y_opt = self.Y_sample[ind]
        X_opt = self.X_sample[ind,:]
        return ind, X_opt, Y_opt
Пример #23
0
    X,observations = load_air()

    # Separate the values we're regressing on from the dates
    indexes = np.array(X[3], dtype=np.int32)
    dates = X[4]
    X = np.array(X[0:3], dtype=np.float64)

    # Make the data be zero-mean
    y_mean = observations.mean()
    y = observations - y_mean

    # Create a partial function so we can try multiple data points easily
    sqexp = partial(squared_exponential, BANDWIDTH, y.std(), TAU2)

    # Create our Gaussian process with zero mean and squared exponential covariance
    gp = GaussianProcess(lambda x: np.zeros(x.shape[0]), sqexp)

    # Calculate the initial features considering only features with lower indices
    error = np.array(y)
    features = []
    for i,x in enumerate(X):
        f_i = gp.predict(x, x, error, error.std(), percentile=None)[0]
        features.append(f_i)
        error -= f_i
    features = np.array(features)

    # Track the squared error of the estimates
    mse = (error * error).mean()
    mse_delta = mse

    print 'Initial mean squared error: {0}'.format(mse)
Пример #24
0
import numpy as np
import matplotlib.pyplot as plt
from gaussian_process import GaussianProcess


def func(x):
    return np.sin(0.9 * x)


g = GaussianProcess(noise_variance=1E-5)


def sample_and_plot():
    xtest = np.linspace(-5, 5, 100).reshape(-1, 1)
    ytest = func(xtest)
    mu, cov_posterior, s = g.predict(xtest)

    plt.figure()
    plt.clf()
    plt.plot(g.X, g.Y, 'r+', ms=20)
    plt.plot(xtest, ytest, 'b-')
    plt.gca().fill_between(xtest.flat, mu[:, 0] - 3*s, mu[:, 0] + 3*s, color="#dddddd")
    plt.plot(xtest, mu, 'r--', lw=2)
    # plt.savefig('predictive.png', bbox_inches='tight')
    plt.title('Mean predictions plus 3 st.deviations')
    plt.axis([-5, 5, -3, 3])

    # draw samples from the posterior at our test points.
    L = np.linalg.cholesky(cov_posterior)
    f_post = mu + np.dot(L, np.random.normal(size=(xtest.shape[0], 10)))
    plt.figure()
Пример #25
0
save_dir = 'gp_saves'
env = gym.make('CartPole-v0')
discount_factors = [0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 0.99]
learning_rates = [0.001, 0.005, 0.01, 0.05, 0.1, 0.15, 0.2]
memory_sizes = [1000, 10000, 100000]
update_frequencies = [1, 10, 50, 100]
n_layers = [0, 1, 2]
n_units = [4, 8, 16, 32, 64]

param_space = [
    discount_factors, learning_rates, memory_sizes, update_frequencies,
    n_layers, n_units
]

gp = GaussianProcess(space_dim=len(param_space),
                     length_scale=0.5,
                     noise=0.1,
                     standardize=True)
# Uncomment this to start from saved values
#known_points, known_values = load(save_dir)
#gp.add_points(known_points, known_values)
#eval_point = gp.most_likely_max(param_space)
eval_point = [
    0.99,  # df
    0.005,  # lr
    1000,  # memsize
    1,  # freq
    0,  # n layers
    32
]  # n units

while True:
Пример #26
0
import matplotlib.pyplot as plt
from scipy.stats import norm as stats_norm

from gaussian_process import GaussianProcess, SquaredDistanceKernel, Matern52Kernel


BOUNDS = [0, 10, -10, 10]
PLOT_POINT_COUNT = 1000


def func(v):
    return v * np.sin(v)


# gp = GaussianProcess(kernel=SquaredDistanceKernel(kernel_param=0.01), noise_variance=1E-3)
gp = GaussianProcess(kernel=Matern52Kernel(kernel_param=0.01), noise_variance=1E-3)

fig = plt.figure()
ax_data = fig.add_subplot(311)
ax_acquisition = fig.add_subplot(312)
ax_func = fig.add_subplot(313)


l_mu = None
l_data = None
l_stddev = None
l_acquisition = None
l_acquisition_area = None
l_func = None
xx = np.linspace(BOUNDS[0], BOUNDS[1], PLOT_POINT_COUNT).reshape(-1, 1)
Пример #27
0
class BayesianOptimisation(object):

    def __init__(self,
                 kernel: Kernel,
                 objective_function: objective_functions.abstract_objective_function.ObjectiveFunction,
                 acquisition_function: AcquisitionFunction,
                 ):
        """
        :param kernel: Kernel object used by the gaussian process to perform a regression.
        :param objective_function: ObjectiveFunction object which we will try to minimise
        :param acquisition_function: AcquisitionFunction object
        """
        self._initial_kernel = copy.deepcopy(kernel)
        self._gaussian_process = GaussianProcess(kernel)
        self._objective_function = objective_function
        self._acquisition_function = acquisition_function

    def _initialise_gaussian_process(self,
                                     array_initial_dataset: np.ndarray,
                                     array_initial_objective_function_values: np.ndarray
                                     ) -> None:
        """
        Initialise the gaussian process with its initial dataset
        :param array_initial_dataset: array representing all the data points used to calculate the posterior mean and variance of the GP.
        Its dimension is n x l, there are:
        - n elements in the dataset. Each row corresponds to a data point x_i (with 1<=i<=n), at which the objective function can be evaluated
        - each one of them is of dimension l (representing the number of variables required by the objective function)
        :param array_initial_objective_function_values: array of the evaluations for all the elements in array_dataset. Its shape is hence n x 1 (it's a column vector)
        """

        self._gaussian_process.initialise_dataset(array_initial_dataset, array_initial_objective_function_values)

    def run(self,
            number_steps: int,
            array_initial_dataset: np.ndarray,
            array_initial_objective_function_values: np.ndarray,
            ) -> None:
        """
        Generator that performs a bayesian optimisation

        This method is a generator: at every step, it yields a tuple containing 3 elements:
        - the current up-to-date gaussian process
        - the acquisition function
        - the last computed argmax of the acquisition function.

        Hence, in order to use this method, you need to put it in a for loop,
            for gp, af, arg_max in bo.run(): # Here, bo is a BayesianOptimisation object
                # some code here


        :param number_steps: number of steps to execute in the Bayesian Optimisation procedure.

        :param array_initial_dataset: array_initial_dataset: array representing all the data points used to calculate the posterior mean and variance of the GP.
        Its dimension is n x l, there are:
        - n elements in the dataset. Each row corresponds to a data point x_i (with 1<=i<=n), at which the objective function can be evaluated
        - each one of them is of dimension l (representing the number of variables required by the objective function)

        :param array_initial_objective_function_values: array of the evaluations for all the elements in array_dataset. Its shape is hence n x 1 (it's a column vector)
        """

        print(f"Step {0}/{number_steps} - Initialise Gaussian Process for Provided Dataset")
        self._initialise_gaussian_process(array_initial_dataset,
                                          array_initial_objective_function_values)
        arg_max_acquisition_function = self.compute_arg_max_acquisition_function()

        for index_step in range(number_steps):
            print(f"Step {index_step}/{number_steps} - Evaluating Objective Function at position {arg_max_acquisition_function.tolist()}")
            arg_max_acquisition_function = self._bayesian_optimisation_step(arg_max_acquisition_function)

            # The yield keyword makes the method behave like a generator
            yield self._gaussian_process, self._acquisition_function, arg_max_acquisition_function

    def _bayesian_optimisation_step(self,
                                    arg_max_acquisition_function: np.ndarray
                                    ) -> np.ndarray:
        """
        :param arg_max_acquisition_function: the previously computed argmax of the acquisition function
        :return: the next computed arg_max of the acquisition function after having updated the Gaussian Process
        """
        # TODO
        arg_max_acquisition_function

        # Add new data point

        self._gaussian_process.add_data_point(arg_max_acquisition_function, self._objective_function.evaluate(arg_max_acquisition_function))

        # Update gaussian process and optimise parameters
        
        self.reinitialise_kernel()
        self._gaussian_process.optimise_parameters()

        # compute argmax

        return self.compute_arg_max_acquisition_function()

    def get_best_data_point(self) -> np.ndarray:
        index_best_data_point = np.argmin(self._gaussian_process.array_objective_function_values)
        return self._gaussian_process.array_dataset[index_best_data_point]

    def compute_arg_max_acquisition_function(self) -> np.ndarray:
        return self._acquisition_function.compute_arg_max(
            gaussian_process=self._gaussian_process,
            objective_function=self._objective_function
        )

    def reinitialise_kernel(self) -> None:
        self._gaussian_process.set_kernel_parameters(self._initial_kernel.log_amplitude,
                                                     self._initial_kernel.log_length_scale,
                                                     self._initial_kernel.log_noise_scale)
def main_1d():
    from kernels import SquaredExponential
    from gaussian_process import GaussianProcess
    import matplotlib.pyplot as plt
    import time
    np.random.seed(10)
    tf.set_random_seed(10)
    # Settings
    n_samples = 3
    batch_size = 4
    new_samples = 1000
    n_dim = 1
    # Set up the modules for bayesian optimizer
    kernel = SquaredExponential(n_dim=n_dim,
                                init_scale_range=(.1, .5),
                                init_amp=1.)
    gp = GaussianProcess(n_epochs=100,
                         batch_size=10,
                         n_dim=n_dim,
                         kernel=kernel,
                         noise=0.01,
                         train_noise=False,
                         optimizer=tf.train.GradientDescentOptimizer(0.001),
                         verbose=0)
    bo = BayesianOptimizer(gp,
                           region=np.array([[0., 1.]]),
                           iters=100,
                           tries=2,
                           optimizer=tf.train.GradientDescentOptimizer(0.1),
                           verbose=1)

    # Define the latent function + noise
    def observe(X):
        y = np.float32(1 * (-(X - 0.5)**2 + 1) +
                       np.random.normal(0, .1, [X.shape[0], 1]))
        # y = np.float32((np.sin(X.sum(1)).reshape([X.shape[0], 1]) +
        #                 np.random.normal(0,.1, [X.shape[0], 1])))
        return y

    # Get data
    X = np.float32(np.random.uniform(0, 1, [n_samples, n_dim]))
    y = observe(X)
    plt.axis((-0.1, 1.1, 0, 1.5))
    # Fit the gp
    bo.fit(X, y)
    for i in xrange(5):
        # print "Iteration {0:3d}".format(i) + "*"*80
        t0 = time.time()
        max_acq = -np.inf
        # Inner loop to allow for gd with random initializations multiple times
        x_next, y_next, acq_next = bo.select()
        # Plot the selected point
        plt.plot([x_next[0, 0], x_next[0, 0]], plt.ylim(), 'r--')
        plt.scatter(x_next, y_next, c='r', linewidths=0, s=50)
        plt.scatter(x_next, acq_next, c='g', linewidths=0, s=50)
        # Observe and add point to observed data
        y_obs = observe(x_next)
        X = np.vstack((X, x_next))
        y = np.vstack((y, y_obs))
        t2 = time.time()
        # Fit again
        bo.fit(X, y)
        print "BOFitDuration: {0:.5f}".format(time.time() - t2)
        print "BOTotalDuration: {0:.5f}".format(time.time() - t0)
    # Get the final posterior mean and variance for the entire domain space
    X_new = np.float32(np.linspace(0, 1, new_samples).reshape(-1, 1))
    X_new = np.sort(X_new, axis=0)
    y_pred, var = gp.np_predict(X_new)
    # Compute the confidence interval
    ci = np.sqrt(var) * 2
    plt.plot(X_new, y_pred)
    plt.plot(X_new, y_pred + ci, 'g--')
    plt.plot(X_new, y_pred - ci, 'g--')
    plt.scatter(X, y)
    plt.show()
Пример #29
0
            plt.plot(xx, function_sample, alpha=0.3, c='C0')
            plt.scatter(gp.array_dataset,
                        gp.array_objective_function_values,
                        c='m',
                        marker="+",
                        zorder=1000,
                        s=(30, ))
            plt.pause(0.05)
        plt.show()


if __name__ == '__main__':
    # obj = UnivariateObjectiveFunction()
    np.random.seed(207)
    obj = LinearSin(0.5)

    initial_dataset = obj.get_uniform_dataset(21).reshape((-1, 1))
    evaluations = obj(initial_dataset)

    # we use a linear combination of a gaussian and a linear kernel: k = k_gaussian + k_linear
    # Then, there are 4 parameters to sample from in the posterior distribution
    kernel = GaussianLinearKernel(0., 0., 0., 0., 0., 0.)
    gp = GaussianProcess(kernel, initial_dataset, evaluations)

    test_metropolis_hastings(obj,
                             gp,
                             100,
                             sigma_exploration_mh=0.4,
                             number_hyperparameters_gaussian_process=6)