Exemplo n.º 1
0
def test_check_mem():
    design_pts = GaussianProcess.create_uniform_grid(-2, 2, 5, 2)
    obs = np.random.randn(5**2)
    GP = GaussianProcess(design_pts, obs, total_calls=1)
    GP.check_mem()
    assert len(GP.X) == 2 * (5**2)
    assert len(GP.Y) == 2 * (5**2)
Exemplo n.º 2
0
def test_GP_samples_1d(plotFlag=True):
    design_pts = GaussianProcess.create_uniform_grid(-2, 2, 5, 1)
    obs = np.random.randn(5)
    GP1 = GaussianProcess(design_pts, obs)
    grid = GaussianProcess.create_uniform_grid(-2, 2, 3000, 1)
    realisation = GP1.GP_at_points(grid, num_evals=1).T

    if plotFlag:
        plt.figure()
        plt.plot(grid, realisation)
        plt.plot(design_pts, obs, 'ro')
        plt.title('test_GP_samples_1d')
        plt.show()
Exemplo n.º 3
0
def test_GP_bridge():
    design_pts = GaussianProcess.create_uniform_grid(-2, 2, 5, 1)
    obs = np.random.randn(5)
    GP = GaussianProcess(design_pts, obs)
    x = np.array([0.5])
    points = np.array([[0], x, [1.]])
    data = np.array([0, 0, 1.])
    plt.figure()
    for i in range(15):
        data[1] = GP.GP_bridge(x, points[[0, 2]], data[[0, 2]])
        plt.plot(points, data)
    plt.title('test_GP_bridge')
    plt.show()
def predict_many(
    SA, X, Y
):  # Assume that the test points are near each other (Gaussian distributed...)
    sa = np.mean(SA, 0).reshape(1, -1)
    idx = kdt.query(sa, k=K_many, return_distance=False)
    X_nn = X[idx, :].reshape(K_many, Dx)
    Y_nn = Y[idx, :].reshape(K_many, Dy)

    m = np.empty([SA.shape[0], Dy])
    s = np.empty([SA.shape[0], Dy])
    for i in range(Dy):
        gp_est = GaussianProcess(X_nn, Y_nn[:, i], GaussianCovariance())
        m[:, i], s[:, i] = gp_est.estimate_many(SA)
    return m, s
Exemplo n.º 5
0
def test_GP_interp_1d(plotFlag=True):
    design_pts = GaussianProcess.create_uniform_grid(-2, 2, 10)
    obs = np.random.normal(size=10)
    GP1 = GaussianProcess(design_pts, obs)
    GP_interp_method = GP1.GP_interp(100)
    vGP_interp_method = np.vectorize(GP_interp_method)

    #Plot results:
    if plotFlag:
        plt.figure()
        plt.plot(design_pts, obs, 'ro')
        grid = GaussianProcess.create_uniform_grid(-2, 2, 1000)
        plt.plot(grid, vGP_interp_method(grid))
        plt.title('test_GP_interp_1d')
        plt.show()
Exemplo n.º 6
0
    def train_emulators(self, X, y, hyperparams, n_tries=2):
        """Train the emulators
        
        This sets up the required emulators. If necessary (`hypeparams` 
        is set to None), it will train the emulators.

        X: array ( N_train, N_full )
            The modelled output array for training
        y: array (N_train, N_param )
            The corresponding training parameters for `X`
        hyperparams: array ( N_params + 2, N_PCs )
            The hyperparameters for the relevant GPs
        """
        self.emulators = []
        train_data = self.compress(X)
        self.hyperparams = np.zeros((2 + y.shape[1], self.n_pcs))
        for i in xrange(self.n_pcs):

            self.emulators.append ( GaussianProcess ( np.atleast_2d( y), \
                train_data[i] ) )
            if hyperparams is None:
                print "\tFitting GP for basis function %d" % i
                self.hyperparams[ :, i] = \
                    self.emulators[i].learn_hyperparameters ( n_tries = n_tries )[1]
            else:
                self.hyperparams[:, i] = hyperparams[:, i]
                self.emulators[i]._set_params(hyperparams[:, i])
Exemplo n.º 7
0
    def get_emulator(self, tag):
        """
        Recovers an emulator from storage, and returns it to 
        the calller
        """
        if os.path.exists(self.fname):
            # File exists, so open and get a handle to it
            emulators = shelve.open(self.fname)
        else:
            raise IOError("File %s doesn't exist!" % self.fname)

        if type(tag) != str:
            tag = self._declutter_key(tag)

        if emulators[tag].has_key("basis_functions"):
            gp = MultivariateEmulator ( \
                X = emulators[tag]["X"], \
                y=emulators[tag]["y"], \
                hyperparams = emulators[tag]["hyperparams"],
                basis_functions = emulators[tag]["basis_functions"] )
        else:
            gp = GaussianProcess ( \
                emulators[tag]["inputs"], \
                emulators[tag]["targets"] )
            gp._set_params(emulators[tag]['theta'])
        emulators.close()
        return gp
Exemplo n.º 8
0
def test_r2_distance_tests(x, y):
    d_xy = GaussianProcess.r2_distance(x, y)
    d_yx = GaussianProcess.r2_distance(y, x)

    #Check symmetry
    assert np.array_equiv(d_xy, d_yx.T) or np.array_equiv(d_xy, d_yx)

    #check d(x,0) = 0 iff x = 0
    assert (GaussianProcess.r2_distance(x, np.zeros(
        x.shape)) == 0).all() == (np.sum(np.abs(x)) == 0)

    #d(x,x) == 0
    d_xx = GaussianProcess.r2_distance(x, x)
    if len(d_xx.shape) <= 1:
        assert (d_xx == 0).all()
    else:
        assert (d_xx.diagonal() == 0).all()
def predict(sa, theta_min):
    idx = kdt.query(sa.T, k=K, return_distance=False)
    X_nn = Xtrain[idx, :].reshape(K, state_action_dim)
    Y_nn = Ytrain[idx, :].reshape(K, state_dim)

    if useDiffusionMaps:
        X_nn, Y_nn = reduction(sa, X_nn, Y_nn)

    m = np.zeros(state_dim)
    s = np.zeros(state_dim)
    for i in range(0, state_dim):
        gp_est = GaussianProcess(X_nn[:, :4],
                                 Y_nn[:, i],
                                 GaussianCovariance(),
                                 theta_min=None)
        m[i], s[i] = gp_est.estimate(sa[0][:4].reshape(1, -1))
    return m, s
Exemplo n.º 10
0
def test_GP_GP_bridge_1d():
    design_pts = GaussianProcess.create_uniform_grid(-2, 2, 5)
    obs = np.random.normal(size=5)
    GP = GaussianProcess(design_pts, obs)

    plt.figure()
    for _ in range(1):
        grid = np.random.uniform(low=-2, high=2, size=(3000, 1))
        #        OR uniform grid in random order:
        #        grid = GaussianProcess.create_uniform_grid(-2,2,3000,1)
        #        np.random.shuffle(grid)

        for x in grid:
            GP.GP_eval(x)

        #Sort to plot nicer
        X, Y = GP.get_data()
        ind = np.argsort(X.flatten())
        X = X.flatten()[ind]
        Y = Y[ind]
        #Plot results:
        plt.plot(X, Y)
        GP.reset()
    plt.plot(design_pts, obs, 'ro')
    plt.title('test_GP_GP_bridge_1d')
    plt.show()
Exemplo n.º 11
0
def test_find_and_add_1d():
    """Green are closen points, red is x, blue dotted are X (not chosen)"""
    X = np.arange(10) / 10
    GP = GaussianProcess(X, np.ones(X.shape))

    #add some 'bad' data
    GP.GP_eval(np.array([-0.1]))
    GP.GP_eval(np.array([1]))
    #add some random data
    for _ in range(15):
        GP.GP_eval(np.random.uniform(size=(1)))

    x = 1.2 * np.random.uniform(size=(1))
    ind, points = GP.find_closest(x)

    points = np.squeeze(points, axis=1)
    X, _ = GP.get_data()

    #make sure data is sorted
    assert np.all(X[:-1] <= X[1:])

    plt.figure()
    plt.hlines(1, 0, 1)
    plt.eventplot(X, orientation='horizontal', colors='b', linestyles='dotted')
    plt.eventplot(x, orientation='horizontal', colors='r')
    plt.eventplot(points,
                  orientation='horizontal',
                  colors='g',
                  linestyles='dashed')
    plt.axis('off')

    plt.title('test_find_closest_1d')
    plt.show()
Exemplo n.º 12
0
def test_find_and_add_2d():
    """Green are closen points, red is x, blue dotted are X (not chosen)"""
    X = np.random.uniform(size=(20, 2))
    GP = GaussianProcess(X, np.ones(X.shape[0]))

    #add some random data
    for _ in range(10):
        GP.GP_eval(np.random.uniform(size=(1, 2)))

    x = np.random.uniform(size=(1, 2))
    ind, points = GP.find_closest(x)

    plt.figure()
    plt.plot(X[:, 0], X[:, 1], 'bo')
    plt.plot(x[:, 0], x[:, 1], 'ro')

    plt.plot(points[:, 0], points[:, 1], 'go')
    plt.title('test_find_closest_2d')
    plt.show()
Exemplo n.º 13
0
def test_GP_samples_2d(plotFlag=True):
    design_pts = GaussianProcess.create_uniform_grid(-2, 2, 5, 2)
    obs = np.random.randn(5**2)
    GP = GaussianProcess(design_pts, obs)
    grid = GaussianProcess.create_uniform_grid(-2, 2, 50, 2)
    Z = GP.GP_at_points(grid, num_evals=1).T

    if plotFlag:
        fig = plt.figure()
        ax = fig.gca(projection='3d')

        X = grid[:, 0]
        Y = grid[:, 1]
        Z = Z.flatten()
        #Plot the surface
        ax.plot_trisurf(X, Y, Z)
        #Plot the design points
        ax.scatter(design_pts[:, 0], design_pts[:, 1], obs, color='green')
        plt.title('test_GP_samples_2d')
        plt.show()
def get_global_theta():
    Theta_min = []
    for i in range(0, state_dim):
        gp_est = GaussianProcess(Xtrain[:, :4],
                                 Ytrain[:, i],
                                 GaussianCovariance(),
                                 globalTheta=True)
        Theta_min.append(gp_est.theta_min)
        del gp_est

    return Theta_min
Exemplo n.º 15
0
def test_GP_interp_2d(plotFlag=True):
    design_pts = GaussianProcess.create_uniform_grid(-2, 2, 5, 2)
    obs = np.random.randn(5**2)
    GP1 = GaussianProcess(design_pts, obs)
    GP_interp_method = GP1.GP_interp(25)
    vGP_interp_method = np.vectorize(GP_interp_method, signature='(i)->()')

    #Plot results:
    if plotFlag:
        fig = plt.figure()
        ax = fig.gca(projection='3d')
        Z = GaussianProcess.create_uniform_grid(-2, 2, 50, 2)
        X = Z[:, 0]
        Y = Z[:, 1]
        #Plot the surface
        ax.plot_trisurf(X, Y, vGP_interp_method(Z))  #, antialiased=True
        #Plot the design points
        ax.scatter(design_pts[:, 0], design_pts[:, 1], obs, color='green')
        plt.title('test_GP_interp_2d')
        plt.show()
def UP(sa_mean, sa_Sigma):
    idx = kdt.query(sa.T, k=K_up, return_distance=False)
    X_nn = Xtrain[idx, :].reshape(K_up, state_action_dim)
    Y_nn = Ytrain[idx, :].reshape(K_up, state_dim)

    m = np.empty(state_dim)
    s = np.empty(state_dim)
    for i in range(state_dim):
        gp_est = GaussianProcess(X_nn,
                                 Y_nn[:, i],
                                 GaussianCovariance(),
                                 theta_min=theta_min)
        up = UncertaintyPropagationExact(gp_est)
        m[i], s[i] = up.propagate_GA(sa_mean.reshape((-1, )), sa_Sigma)
    return m, s
def test_BO(dim, obj_fun, ftarget, max_FEs, lb, ub, logfile):
    sys.path.insert(0, "../")
    sys.path.insert(0, "../../GaussianProcess")
    from BayesOpt import BO, DiscreteSpace, IntegerSpace, RandomForest, RealSpace
    from GaussianProcess import GaussianProcess
    from GaussianProcess.trend import constant_trend

    space = RealSpace([lb, ub]) * dim

    # kernel = 1.0 * Matern(length_scale=(1, 1), length_scale_bounds=(1e-10, 1e2))
    # model = _GaussianProcessRegressor(kernel=kernel, alpha=0, n_restarts_optimizer=30, normalize_y=False)

    mean = constant_trend(dim, beta=0)  # equivalent to Simple Kriging
    thetaL = 1e-5 * (ub - lb) * np.ones(dim)
    thetaU = 10 * (ub - lb) * np.ones(dim)
    theta0 = np.random.rand(dim) * (thetaU - thetaL) + thetaL

    model = GaussianProcess(
        mean=mean,
        corr="matern",
        theta0=theta0,
        thetaL=thetaL,
        thetaU=thetaU,
        noise_estim=False,
        nugget=0,
        optimizer="BFGS",
        wait_iter=5,
        random_start=10 * dim,
        eval_budget=200 * dim,
    )

    return BO(
        search_space=space,
        obj_fun=obj_fun,
        model=model,
        DoE_size=dim * 10,
        max_FEs=max_FEs,
        verbose=True,
        n_point=1,
        minimize=True,
        acquisition_fun="EI",
        ftarget=ftarget,
        logger=None,
    )
Exemplo n.º 18
0
def create_optimizer(dim, fitness, n_step, n_init_sample, model_type):
    x1 = {'name' : "x1",
          'type' : 'R',
          'bounds': [-6, 6]}
    x2 = {'name' : "x2",
          'type' : 'R',
          'bounds': [-6, 6]}
    search_space = [x1, x2]

    if model_type == 'GP':
        thetaL = 1e-3 * (ub - lb) * np.ones(dim)
        thetaU = 10 * (ub - lb) * np.ones(dim)
        theta0 = np.random.rand(dim) * (thetaU - thetaL) + thetaL
    
        mean = constant_trend(dim, beta=None)
        model = GaussianProcess(mean=mean,
                                corr='matern',
                                theta0=theta0,
                                thetaL=thetaL,
                                thetaU=thetaU,
                                nugget=1e-5,
                                noise_estim=False,
                                random_start=15 * dim,
                                likelihood='concentrated',
                                random_state=None,
                                eval_budget=100 * dim)
                                
    elif model_type == 'sklearn-RF':
        min_samples_leaf = max(1, int(n_init_sample / 20.))
        max_features = int(np.ceil(dim * 5 / 6.))
        model = RandomForest(n_estimators=100,
                            max_features=max_features,
                            min_samples_leaf=min_samples_leaf)

    elif model_type == 'R-RF':
        model = RrandomForest()

    opt = mipego(search_space, fitness, model, max_iter=n_step, random_seed=None,
                   n_init_sample=n_init_sample, minimize=True, optimizer='BFGS')
    
    return opt
Exemplo n.º 19
0
def test_1d():
    # 1 dimension
    f = genRandomFunction()
    X, y = genDataFromFunction(f, N=100)

    params = {'sigma_n': .01, 'sigma_s': 1.0, 'width': 10.0}
    kernel = GaussianKernel([params['sigma_s'], params['width']])
    mean = lambda x: 0
    GP = GaussianProcess(mean, kernel, sigma_n=params['sigma_n'])
    GP.train(X=X, y=y)
    GP.optimize_hyperparameters_grid_search()

    x = np.linspace(np.min(X), np.max(X))
    x = np.reshape(x, (1, -1))

    y_expect = np.array([GP.eval_mean(x[:, i]) for i in range(x.shape[1])])
    y_var = np.array([GP.eval_var(x[:, i]) for i in range(x.shape[1])])
    y_std = np.sqrt(y_var)

    plt.figure()
    true, = plt.plot(x[0, :].flatten(),
                     f(x),
                     color='black',
                     label='True Function')  # true function
    data, = plt.plot(X[0, :], y, '.', color='orange',
                     label='Data')  # noisy data
    mean, = plt.plot(x[0, :],
                     y_expect,
                     '--',
                     color='blue',
                     label='Estimated Mean')  # estimated from the GP
    plt.fill_between(x[0, :],
                     y_expect + 2 * y_std,
                     y_expect - 2 * y_std,
                     color='gray',
                     linewidth=0.0,
                     alpha=0.5)
    plt.legend(handles=[true, data, mean])
Exemplo n.º 20
0
def test_2d():
    # two dimensions
    dim = 2
    f = genRandomFunction(dim)
    X, y = genDataFromFunction(f, dim=dim, N=1000)

    x_coord, y_coord = np.meshgrid(np.linspace(0, 1), np.linspace(0, 1))
    x_eval = np.vstack(
        [np.reshape(x_coord, (1, -1)),
         np.reshape(y_coord, (1, -1))])
    f_eval = np.reshape(f(x_eval), x_coord.shape)

    mean = lambda x: 0
    params = {'sigma_n': .01, 'sigma_s': 1.0, 'width': 10.0}
    kernel = GaussianKernel([params['sigma_s'], params['width']])
    GP = GaussianProcess(mean, kernel)
    GP.train(X, y)
    GP.optimize_hyperparameters_random_search()

    # evaluate
    y_expect = [GP.eval_mean(x_eval[:, i]) for i in range(x_eval.shape[1])]
    y_expect = np.reshape(y_expect, x_coord.shape)
    v_max = max(np.max(y), -np.min(y))

    # plot
    color_options = {'cmap': 'RdBu', 'vmin': -v_max, 'vmax': v_max}

    plt.figure()
    plt.subplot(4, 1, 1)
    plt.pcolor(x_coord, y_coord, f_eval, **color_options)
    plt.title('Original Function')

    plt.subplot(4, 1, 2)
    plt.scatter(X[0, :], X[1, :], c=y, **color_options)
    plt.title('Data')

    plt.subplot(4, 1, 3)
    plt.title('GP Estimation')
    plt.pcolor(x_coord, y_coord, y_expect, **color_options)

    plt.subplot(4, 1, 4)
    plt.pcolor(x_coord, y_coord, -f_eval + y_expect, **color_options)
    plt.title('Residual')
Exemplo n.º 21
0
def test_GP_GP_bridge_2d():
    design_pts = GaussianProcess.create_uniform_grid(-2, 2, 5, 2)
    obs = np.random.randn(5**2)
    GP = GaussianProcess(design_pts, obs)

    #Plot results:
    fig = plt.figure()
    ax = fig.gca(projection='3d')

    grid = GaussianProcess.create_uniform_grid(-2, 2, 50, 2)
    np.random.shuffle(grid)

    for x in grid:
        GP.GP_eval(x[np.newaxis, :])

    points, Z = GP.get_data()
    X = points[:, 0]
    Y = points[:, 1]
    #Plot the surface
    ax.plot_trisurf(X, Y, Z)
    #Plot the design points
    ax.scatter(design_pts[:, 0], design_pts[:, 1], obs, color='green')
    plt.title('test_GP_GP_bridge_2d')
    plt.show()
Exemplo n.º 22
0
obj_func = lambda x: benchmarks.himmelblau(x)[0]
lb = np.array([-6] * dim)
ub = np.array([6] * dim)

search_space = ContinuousSpace(['x1', 'x2'], zip(lb, ub))

thetaL = 1e-3 * (ub - lb) * np.ones(dim)
thetaU = 10 * (ub - lb) * np.ones(dim)
theta0 = np.random.rand(dim) * (thetaU - thetaL) + thetaL

mean = constant_trend(dim, beta=None)
model = GaussianProcess(mean=mean,
                        corr='matern',
                        theta0=theta0,
                        thetaL=thetaL,
                        thetaU=thetaU,
                        nugget=None,
                        noise_estim=False,
                        optimizer='BFGS',
                        wait_iter=5,
                        random_start=15 * dim,
                        likelihood='concentrated',
                        eval_budget=100 * dim)

# search_space = [x1, x2]
opt = BayesOpt(search_space, obj_func, model, max_iter=n_step, random_seed=None,
               n_init_sample=n_init_sample, minimize=True, verbose=False, debug=True,
               optimizer='BFGS')
               
opt.run()
Exemplo n.º 23
0
    X = np.random.rand(n_init_sample, dim) * (x_ub - x_lb) + x_lb
    y = fitness(X)

    thetaL = 1e-5 * (x_ub - x_lb) * np.ones(dim)
    thetaU = 10 * (x_ub - x_lb) * np.ones(dim)
    theta0 = np.random.rand(dim) * (thetaU - thetaL) + thetaL

    mean = linear_trend(dim, beta=None)
    model = GaussianProcess(mean=mean,
                            corr='matern',
                            theta0=theta0,
                            thetaL=thetaL,
                            thetaU=thetaU,
                            nugget=None,
                            noise_estim=True,
                            optimizer='BFGS',
                            verbose=True,
                            wait_iter=3,
                            random_start=30,
                            likelihood='concentrated',
                            eval_budget=1e3)

    model.fit(X, y)

    def grad(model):
        f = EI(model)

        def __(x):
            _, dx = f(x, dx=True)
            return dx
Exemplo n.º 24
0
 def get_loglikelihood(self):
     y, X = self.subsample_data()
     K = self.kernel.eval_batch_symm(X)
     return GaussianProcess.loglikelihood(y, K, self.sigma_n)
Exemplo n.º 25
0
    def init_with_rh(self, data, **kwargs):
        X = np.atleast_2d([
            Configuration(values=_[0], configuration_space=self.cs).get_array()\
                 for _ in data
        ])
        y = np.array([_[1] for _ in data])
        dim = X.shape[1]
        fopt = np.min(y)
        xopt = X[np.where(y == fopt)[0][0]]

        mean = constant_trend(dim, beta=None)  # Simple Kriging
        thetaL = 1e-10 * np.ones(dim)
        thetaU = 10 * np.ones(dim)
        theta0 = np.random.rand(dim) * (thetaU - thetaL) + thetaL

        model = GaussianProcess(
            mean=mean, corr='squared_exponential',
            theta0=theta0, thetaL=thetaL, thetaU=thetaU,
            nugget=1e-6, noise_estim=False,
            optimizer='BFGS', wait_iter=5, random_start=5 * dim,
            eval_budget=100 * dim
        )
        model.fit(X, y)

        # obtain the Hessian and gradient from the GP mean surface
        H = model.Hessian(xopt)
        g = model.gradient(xopt)[0]

        w, B = np.linalg.eigh(H)
        w[w <= 0] = 1e-6     # replace the negative eigenvalues by a very small value
        w_min, w_max = np.min(w), np.max(w)

        # to avoid the conditional number gets too high
        cond_upper = 1e3
        delta = (cond_upper * w_min - w_max) / (1 - cond_upper)
        w += delta

        # compute the upper bound for step-size
        M = np.diag(1 / np.sqrt(w)).dot(B.T)
        H_inv = B.dot(np.diag(1 / w)).dot(B.T)
        p = -1 * H_inv.dot(g).ravel()
        alpha = np.linalg.norm(p)

        if np.isnan(alpha):
            alpha = 1
            H_inv = np.eye(dim)

        # use a backtracking line search to determine the initial step-size
        tau, c = 0.9, 1e-4
        slope = np.inner(g.ravel(), p.ravel())

        if slope > 0:  # this should not happen..
            p *= -1
            slope *= -1

        f = lambda x: model.predict(x)
        while True:
            _x = (xopt + alpha * p).reshape(1, -1)
            if f(_x) <= f(xopt.reshape(1, -1)) + c * alpha * slope:
                break
            alpha *= tau

        sigma0 = np.linalg.norm(M.dot(alpha * p)) / np.sqrt(dim - 0.5)
        self.Cov = H_inv
        self.sigma = self.sigma0 = sigma0
        self._set_x0(xopt)
        self.mean = self.gp.geno(
            np.array(self.x0, copy=True),
            from_bounds=self.boundary_handler.inverse,
            copy=False
        )
        self.mean0 = np.array(self.mean, copy=True)
        self.best = BestSolution(x=self.mean, f=fopt)
Exemplo n.º 26
0
def main(model, dataset, optimise, split, k, alpha, epochs, trees, max_depth,
         length, sigma_f, sigma_n, features, debug):
    if model not in Models:
        print('Please specify one of the following models:')
        print(' - NN for Nearest Neighbour')
        print(' - LR for Linear Regression')
        print(' - RF for Regression Forest')
        print(' - GP for Gaussian Process')

    if dataset == 'toy_dataset':
        dataset = get_toy_dataset(0 if model in ['NN', 'GP'] else 1)
    else:
        dataset = read_dataset(dataset)

    if model == 'NN':
        nn = NearestNeighbour(k=k, debug=debug)
        dataset = normalise_data(dataset)
        X_train, y_train, X_test, y_test = split_dataset(dataset,
                                                         percentage=split)
        if optimise:
            nn.optimise(X_train, y_train, X_test, y_test)
        else:
            nn.fit(X_train, y_train)
            nn.test(X_test, y_test)

    if model == 'LR':
        lr = LinearRegression(alpha=alpha, epochs=epochs, debug=debug)
        dataset = normalise_data(dataset)
        X_train, y_train, X_test, y_test = split_dataset(dataset,
                                                         percentage=split)
        if optimise:
            lr.optimise(X_train, y_train, X_test, y_test)
        else:
            lr.train(X_train, y_train)
            lr.test(X_test, y_test)

    if model == 'RF':
        rf = RegressionForest(n_features=features,
                              n_estimators=trees,
                              max_depth=max_depth,
                              split=split,
                              debug=debug)
        dataset = normalise_data(dataset)
        X_train, y_train, X_test, y_test = split_dataset(dataset,
                                                         percentage=split)
        if optimise:
            rf.optimise(X_train, y_train, X_test, y_test)
        else:
            rf.train(X_train, y_train)
            rf.test(X_test, y_test)

    if model == 'GP':
        gp = GaussianProcess(l=length,
                             sigma_f=sigma_f,
                             sigma_n=sigma_n,
                             debug=debug)
        X_train, y_train, _, _ = split_dataset(dataset,
                                               random=False,
                                               percentage=0.8)
        gp.fit(X_train, y_train)
        gp.test()
Exemplo n.º 27
0
def create_emulator_validation ( f_simulator, parameters, minvals, maxvals, 
                                n_train, n_validate, do_gradient=True, 
                                fix_params=None, thresh=0.98, n_tries=5, 
                                args=(), n_procs=None ):


    """A method to create an emulator, given the simulator function, the
    parameters names and boundaries, the number of training input/output pairs. 
    The function will also provide an independent validation dataset, both for 
    the valuation of the function and its gradient. The gradient is calculated
    using finite differences, so it is a bit ropey.
    
    In order to better sample some regions of parameter space easily (you can 
    change the underlying pdf of the parameters for LHS, but that's overkill)
    you can also add additional samples where one parameter is set to a fixed
    value, and an LHS design for all the other parameters is returned. This 
    can be done usign the `fix_params` keyword.
    
    Parameters
    ------------
    f_simulator: function
        A function that evaluates the simulator. It should take a single 
        parameter which will be made out of the input vector, plus whatever
        other extra arguments one needs (stored in ``args``).
    parameters: list
        The parameter names
    minvals: list
        The minimum value of the parameters
    maxvals: list
        The maximum value of the parameters
    n_train: int
        The number of training samples
    n_validate: int
        The number of validation samples
    thresh: float
        For a multivariate output GP, the threshold at which to cut the 
        PCA expansion.
    n_tries: int
        The number of tries in the GP hyperparameter stage. The more the better,
        but also the longer it will take.
    args: tuple
        A list of extra arguments to the model
    do_gradient: Boolean
        Whether to do a gradient validation too.
    fix_params: dictionary
        A dictionary that allows the training set to be extended by fixing one
        or more parameters to one value, while still doing an LHS on the 
        remaining parameters. Each parameter has a 2-element tuple, indicating
        the value and the number of extra samples.
        
    Returns
        The GP object, the validation input set, the validation output set, the
        emulated validation set, the emulated gradient set. If the gradient
        validation is also done, it will also return the gradient validation 
        using finite differences.
        
    """
    
    # First, create the training set, using the appropriate function from
    # above...
    samples, distributions = create_training_set ( parameters, minvals, maxvals, 
                                    n_train=n_train, fix_params=fix_params )
    # Now, create the validation set, using the distributions object we got
    # from creating the training set
    validate  = []
    for d in distributions:
        validate.append ( d.rvs( n_validate ))
    validate = np.array ( validate ).T
    
    # We have the input pairs for the training and validation. We will now run
    # the simulator function
    
    if n_procs is None:
        training_set = map  ( f_simulator, [( (x,)+args) for x in samples] )
        validation_set = map  ( f_simulator, [( (x,)+args) for x in validate] )
        
    else:
        pool = multiprocessing.Pool ( processes = n_procs)
        
        
        training_set = pool.map  ( f_simulator, [( (x,)+args) for x in samples] )
        validation_set = pool.map  ( f_simulator, [( (x,)+args) for x in validate] )
    training_set = np.array ( training_set ).squeeze()
    validation_set = np.array ( validation_set )

    if training_set.ndim == 1:
        gp = GaussianProcess( samples, training_set )
        gp.learn_hyperparameters( n_tries = n_tries )
    else:
        gp = MultivariateEmulator(X=training_set , \
                        y=samples, thresh=thresh, n_tries=n_tries )
    
    X = [ gp.predict ( np.atleast_2d(x) ) 
                        for x in validate ] 
    if len ( X[0] ) == 2:
        emulated_validation = np.array ( [ x[0] for x in X] )
        emulated_gradient = np.array ( [ x[1] for x in X] )
    elif len ( X[0] ) == 3:
        emulated_validation = np.array ( [ x[0] for x in X] )
        emulated_gradient = np.array ( [ x[2] for x in X] )
    # Now with gradient... Approximate with finite differences...
    
    

    if do_gradient:
        val_set = [( (x,)+args) for x in validate]
        validation_gradient = []
        delta = [(maxvals[j] - minvals[j])/10000. 
                    for j in xrange(len(parameters)) ]
        delta = np.array ( delta )
        for i, pp in enumerate( val_set ):
            xx0 = pp[0]*1.
            grad_val_set = []
            f0 = validation_set[i]
            df = []
            for j in xrange ( len ( parameters ) ):
                xx = xx0*1
                xx[j] = xx0[j] + delta[j]
                grad_val_set.append ( xx  )
                df.append ( f_simulator ( ( (xx,) + args ) ) )
            df = np.array ( df )
            try:
                validation_gradient.append (  (df-f0)/delta )
            except ValueError:
                validation_gradient.append (  (df-f0)/delta[:, None] )
                
        return gp, validate, validation_set, np.array(validation_gradient), \
            emulated_validation, emulated_gradient.squeeze()
    else:
        return gp, validate, validation_set,  emulated_validation, \
            emulated_gradient
    dim = 2
    n_init_sample = 10

    x_lb = np.array([-5] * dim)
    x_ub = np.array([5] * dim)

    X = np.random.rand(n_init_sample, dim) * (x_ub - x_lb) + x_lb
    y = fitness(X)

    thetaL = 1e-5 * (x_ub - x_lb) * np.ones(dim)
    thetaU = 10 * (x_ub - x_lb) * np.ones(dim)
    theta0 = np.random.rand(dim) * (thetaU - thetaL) + thetaL

    mean = linear_trend(dim, beta=None)
    model = GaussianProcess(mean=mean, corr='matern', theta0=theta0, thetaL=thetaL, thetaU=thetaU,
                            nugget=None, noise_estim=True, optimizer='BFGS', verbose=True,
                            wait_iter=3, random_start=10, eval_budget=50)
    
    model.fit(X, y)
    
    def grad(model):
        f = MGFI(model, t=10)
        def __(x):
            _, dx = f(x, dx=True)
            return dx
        return __
    
    t = 1
    infill = MGFI(model, t=t)
    infill_dx = grad(model)
    
Exemplo n.º 29
0
    # now project the training data onto these axes
    train_SB = np.dot(train_brf, SB.T).T

    # unpack params
    inputs_t, keys = unpack(train_params)

    # set some arrays to collect information
    gps = [''] * train_SB.shape[0]
    theta_min0 = [''] * train_SB.shape[0]
    theta_min1 = [''] * train_SB.shape[0]

    # loop over each dimenstion and train a gp
    for i in xrange(train_SB.shape[0]):
        yields_t = train_SB[i]
        gps[i] = GaussianProcess(inputs_t, yields_t)
        theta_min0[i], theta_min1[i] = gps[i].learn_hyperparameters(n_tries=2)

    pickle.dump(gps, open(pickleFile, 'wb'))
    np.savez(lutfile,angles=angles,train_params=train_params,\
                    train_brf=train_brf,\
                    theta_min0=theta_min0,theta_min1=theta_min1,\
                    s=s,SB=SB,train_SB=train_SB)

wl = np.arange(400, 2501).astype(float)

plt.clf()
for i in xrange(train_SB.shape[0]):
    plt.plot(wl, SB[i], label='PC %d (%.2f)' % (i, (s / s.sum())[i]))

plt.legend()
Exemplo n.º 30
0
def create_emulator_validation(f_simulator,
                               parameters,
                               minvals,
                               maxvals,
                               n_train,
                               n_validate,
                               do_gradient=True,
                               thresh=0.98,
                               n_tries=5,
                               args=(),
                               n_procs=None):
    """A method to create an emulator, given the simulator function, the
    parameters names and boundaries, the number of training input/output pairs. 
    The function will also provide an independent validation dataset, both for 
    the valuation of the function and its gradient. The gradient is calculated
    using finite differences, so it is a bit ropey.
    
    Parameters
    ------------
    f_simulator: function
        A function that evaluates the simulator. It should take a single 
        parameter which will be made out of the input vector, plus whatever
        other extra arguments one needs (stored in ``args``).
    parameters: list
        The parameter names
    minvals: list
        The minimum value of the parameters
    maxvals: list
        The maximum value of the parameters
    n_train: int
        The number of training samples
    n_validate: int
        The number of validation samples
    thresh: float
        For a multivariate output GP, the threshold at which to cut the 
        PCA expansion.
    n_tries: int
        The number of tries in the GP hyperparameter stage. The more the better,
        but also the longer it will take.
    args: tuple
        A list of extra arguments to the model
    do_gradient: Boolean
        Whether to do a gradient validation too.
        
        
    Returns
        The GP object, the validation input set, the validation output set, the
        emulated validation set, the emulated gradient set. If the gradient
        validation is also done, it will also return the gradient validation 
        using finite differences.
        
    """

    # First, create the training set, using the appropriate function from
    # above...
    samples, distributions = create_training_set(parameters,
                                                 minvals,
                                                 maxvals,
                                                 n_train=n_train)
    # Now, create the validation set, using the distributions object we got
    # from creating the training set
    validate = []
    for d in distributions:
        validate.append(d.rvs(n_validate))
    validate = np.array(validate).T

    # We have the input pairs for the training and validation. We will now run
    # the simulator function

    if n_procs is None:
        training_set = map(f_simulator, [((x, ) + args) for x in samples])
        validation_set = map(f_simulator, [((x, ) + args) for x in validate])

    else:
        pool = multiprocessing.Pool(processes=n_procs)

        training_set = pool.map(f_simulator, [((x, ) + args) for x in samples])
        validation_set = pool.map(f_simulator,
                                  [((x, ) + args) for x in validate])
    training_set = np.array(training_set).squeeze()
    validation_set = np.array(validation_set)

    if training_set.ndim == 1:
        gp = GaussianProcess(samples, training_set)
        gp.learn_hyperparameters(n_tries=n_tries)
    else:
        gp = MultivariateEmulator(X=training_set , \
                        y=samples, thresh=thresh, n_tries=n_tries )

    X = [gp.predict(np.atleast_2d(x)) for x in validate]
    if len(X[0]) == 2:
        emulated_validation = np.array([x[0] for x in X])
        emulated_gradient = np.array([x[1] for x in X])
    elif len(X[0]) == 3:
        emulated_validation = np.array([x[0] for x in X])
        emulated_gradient = np.array([x[2] for x in X])
    # Now with gradient... Approximate with finite differences...

    if do_gradient:
        val_set = [((x, ) + args) for x in validate]
        validation_gradient = []
        delta = [(maxvals[j] - minvals[j]) / 10000.
                 for j in xrange(len(parameters))]
        delta = np.array(delta)
        for i, pp in enumerate(val_set):
            xx0 = pp[0] * 1.
            grad_val_set = []
            f0 = validation_set[i]
            df = []
            for j in xrange(len(parameters)):
                xx = xx0 * 1
                xx[j] = xx0[j] + delta[j]
                grad_val_set.append(xx)
                df.append(f_simulator(((xx, ) + args)))
            df = np.array(df)
            try:
                validation_gradient.append((df - f0) / delta)
            except ValueError:
                validation_gradient.append((df - f0) / delta[:, None])

        return gp, validate, validation_set, np.array(validation_gradient), \
            emulated_validation, emulated_gradient.squeeze()
    else:
        return gp, validate, validation_set,  emulated_validation, \
            emulated_gradient
    def __call__(self,
                 fid,
                 dim,
                 rep1,
                 rep2,
                 split_idx,
                 iids=[1, 2, 3, 4, 5],
                 num_reps=5,
                 budget=None,
                 target_idx=None,
                 sol_points=None,
                 seed=0,
                 verbose=False,
                 log_file=None,
                 data_file=None,
                 opt_split=False):
        np.random.seed(seed)
        params = self.params
        if self.part_to_optimize == 1 or self.part_to_optimize == -1:
            initial_point = get_default_hyperparameter_values(
                params, dim, rep1, budget)
        else:
            initial_point = get_default_hyperparameter_values(
                params, dim, rep2, budget)
        if sol_points is None:
            initial_points = [initial_point]
        else:
            if isinstance(sol_points[0], list):
                initial_points = sol_points.append(initial_point)
            else:
                if initial_point != sol_points:
                    initial_points = [sol_points, initial_point]
                else:
                    initial_points = [initial_point]
        if self.param_vals is not None and self.param_vals not in initial_points:
            initial_points = initial_points.append(self.param_vals)

        def obj_func(x):
            if self.contains_discrete:
                lambda1_ = x[-1]
                lambda2_ = x[-1]
                x = x[:-1]
                params_i = [x for x in self.params if x != "lambda_"]
            else:
                lambda1_ = None
                lambda2_ = None
                params_i = self.params
            if self.part_to_optimize == 1 or self.part_to_optimize == -1:
                c1 = (params_i, x)
            elif self.param_vals is not None:
                if self.contains_discrete:
                    lambda1_ = self.param_vals[-1]
                    c1 = (params_i, self.param_vals[:-1])
                else:
                    c1 = (params_i, self.param_vals)
            else:
                c1 = None
            if self.part_to_optimize == 2 or self.part_to_optimize == -1:
                c2 = (params_i, x)
            elif self.param_vals is not None:
                if self.contains_discrete:
                    lambda2_ = self.param_vals[-1]
                    c2 = (params_i, self.param_vals[:-1])
                else:
                    c2 = (params_i, self.param_vals)
            else:
                c2 = None
            print(c1, c2, lambda1_, lambda2_)
            return single_split_with_hyperparams_parallel(
                fid,
                dim,
                rep1,
                rep2,
                split_idx,
                iids=iids,
                num_reps=num_reps,
                hyperparams=c1,
                hyperparams2=c2,
                budget=budget,
                target_idx=target_idx,
                lambda_=lambda1_,
                lambda2_=lambda2_,
                opt_split=opt_split)

        if self.contains_discrete:
            model = RandomForest()

            opt = BO(self.search_space,
                     obj_func,
                     model,
                     max_iter=self.max_iter,
                     n_init_sample=self.n_init_sample,
                     minimize=True,
                     verbose=verbose,
                     wait_iter=10,
                     init_sol_points=initial_points,
                     random_seed=seed,
                     n_point=self.n_point,
                     optimizer='MIES',
                     log_file=log_file,
                     data_file=data_file)
        else:
            model = GaussianProcess(mean=self.mean,
                                    corr='matern',
                                    theta0=self.theta0,
                                    thetaL=self.thetaL,
                                    thetaU=self.thetaU,
                                    nugget=1e-10,
                                    noise_estim=False,
                                    optimizer='BFGS',
                                    wait_iter=5,
                                    random_start=10 * self.dim_hyperparams,
                                    likelihood='concentrated',
                                    eval_budget=self.eval_budget,
                                    random_state=seed)

            opt = BO(
                self.search_space,
                obj_func,
                model,
                max_iter=self.max_iter,
                n_init_sample=self.n_init_sample,
                minimize=True,
                verbose=verbose,
                wait_iter=10,
                init_sol_points=initial_points,
                random_seed=seed,
                n_point=self.n_point,
                optimizer='BFGS',
                log_file=log_file,
                data_file=
                data_file  # when using GPR model, 'BFGS' is faster than 'MIES'
            )
        return opt.run()