def __init__(self, lb, ub): """ Construct bounded volumes. :param lb: the lowerbounds of the volumes :param ub: the upperbounds of the volumes """ super(BoundedVolumes, self).__init__() assert np.all(lb.shape == lb.shape) self.lb = DataHolder(np.atleast_2d(lb), 'pass') self.ub = DataHolder(np.atleast_2d(ub), 'pass')
def __init__(self, model, constraint): """ Args: model: GPflow model (single output) representing our belief of the objective constraint: A function g, which describes the known constraint on the domain. Effectively allows us to explore a domain with a different shape to a hyper-rectangle. Note, we wish to find the value of x that maximises the unknown function f, subject to the known constraint g(x) > 0. """ super(ConstrainedExpectedImprovement, self).__init__(model) self.fmin = DataHolder(np.zeros(1)) self.constraint = constraint self._setup()
def Y(self): """ Returns the output data of the wrapped model, unscaled. :return: :class:`.DataHolder`: unscaled output data """ return DataHolder(self.output_transform.backward(self.wrapped.Y.value))
def X(self): """ Returns the input data of the model, unscaled. :return: :class:`.DataHolder`: unscaled input data """ return DataHolder(self.input_transform.backward(self.wrapped.X.value))
def __init__(self, model, sigma=2.0): """ :param model: GPflow model (single output) representing our belief of the objective :param sigma: See formula, the higher the more exploration """ super(LowerConfidenceBound, self).__init__(model) self.sigma = DataHolder(np.array(sigma))
def __init__(self, X, Y, kern, Z, alpha, mean_function=Zero()): """ X is a data matrix, size N x D Y is a data matrix, size N x R Z is a matrix of pseudo inputs, size M x D kern, mean_function are appropriate gpflow objects This method only works with a Gaussian likelihood. """ X = DataHolder(X, on_shape_change='pass') Y = DataHolder(Y, on_shape_change='pass') likelihood = likelihoods.Gaussian() GPModel.__init__(self, X, Y, kern, likelihood, mean_function) self.Z = Param(Z) self.num_data = X.shape[0] self.num_latent = Y.shape[1] self.alpha = alpha
class ProbabilityOfImprovement(Acquisition): """ Probability of Improvement acquisition function for single-objective global optimization. Key reference: :: @article{Kushner:1964, author = "Kushner, Harold J", journal = "Journal of Basic Engineering", number = "1", pages = "97--106", publisher = "American Society of Mechanical Engineers", title = "{A new method of locating the maximum point of an arbitrary multipeak curve in the presence of noise}", volume = "86", year = "1964" } .. math:: \\alpha(\\mathbf x_{\\star}) = \\int_{-\\infty}^{f_{\\min}} \\, p( f_{\\star}\\,|\\, \\mathbf x, \\mathbf y, \\mathbf x_{\\star} ) \\, d f_{\\star} """ def __init__(self, model): """ :param model: GPflow model (single output) representing our belief of the objective """ super(ProbabilityOfImprovement, self).__init__(model) self.fmin = DataHolder(np.zeros(1)) self._setup() def _setup(self): super(ProbabilityOfImprovement, self)._setup() feasible_samples = self.data[0][ self.highest_parent.feasible_data_index(), :] samples_mean, _ = self.models[0].predict_f(feasible_samples) self.fmin.set_data(np.min(samples_mean, axis=0)) def build_acquisition(self, Xcand, **kwargs): candidate_mean, candidate_var = self._build_acquisition( Xcand, **kwargs) candidate_var = tf.maximum(candidate_var, stability) normal = tf.contrib.distributions.Normal(candidate_mean, tf.sqrt(candidate_var)) return normal.cdf(self.fmin, name=self.__class__.__name__)
def __init__(self, X, Y, Z, kernels, likelihood, num_latent_Y=None, minibatch_size=None, num_samples=1, mean_function=Zero()): Model.__init__(self) assert X.shape[0] == Y.shape[0] assert Z.shape[1] == X.shape[1] assert kernels[0].input_dim == X.shape[1] self.num_data, D_X = X.shape self.num_samples = num_samples self.D_Y = num_latent_Y or Y.shape[1] self.dims = [k.input_dim for k in kernels] + [ self.D_Y, ] q_mus, q_sqrts, Zs, mean_functions = init_layers( X, Z, self.dims, mean_function) layers = [] for q_mu, q_sqrt, Z, mean_function, kernel in zip( q_mus, q_sqrts, Zs, mean_functions, kernels): layers.append(Layer(kernel, q_mu, q_sqrt, Z, mean_function)) self.layers = ParamList(layers) for layer in self.layers[:-1]: # fix the inner layer mean functions layer.mean_function.fixed = True self.likelihood = likelihood if minibatch_size is not None: self.X = MinibatchData(X, minibatch_size) self.Y = MinibatchData(Y, minibatch_size) else: self.X = DataHolder(X) self.Y = DataHolder(Y)
def __init__(self, models): """ :param models: A list of (possibly multioutput) GPflow representing our belief of the objectives. """ super(HVProbabilityOfImprovement, self).__init__(models) num_objectives = self.data[1].shape[1] assert num_objectives > 1 # Keep empty for now - it is updated in _setup() self.pareto = Pareto(np.empty((0, num_objectives))) self.reference = DataHolder(np.ones((1, num_objectives)))
def __init__(self, base_kern, branchPtTensor, b, fDebug=False): ''' branchPtTensor is tensor of branch points of size F X F X B where F the number of functions and B the number of branching points ''' gpflow.kernels.Kern.__init__(self, input_dim=base_kern.input_dim + 1) self.kern = base_kern self.fm = branchPtTensor self.fDebug = fDebug assert isinstance(b, np.ndarray) assert self.fm.shape[0] == self.fm.shape[1] assert self.fm.shape[2] > 0 self.Bv = DataHolder(b)
def __init__(self, models): ''' :param models: a list of (possibly multioutput) GPflow models representing our belief about the objectives. ''' super(HVExpectedImprovement, self).__init__(models) num_objectives = self.data[1].shape[1] assert num_objectives > 1 # Keep empty for now, will be updated in _setup() self.pareto = Pareto(np.empty((0, num_objectives))) self.reference = DataHolder(np.ones((1, num_objectives)))
def __init__(self, X, Y, kern, mu_old, Su_old, Kaa_old, Z_old, Z, mean_function=Zero()): """ X is a data matrix, size N x D Y is a data matrix, size N x R Z is a matrix of pseudo inputs, size M x D kern, mean_function are appropriate gpflow objects mu_old, Su_old are mean and covariance of old q(u) Z_old is the old inducing inputs This method only works with a Gaussian likelihood. """ X = DataHolder(X, on_shape_change='pass') Y = DataHolder(Y, on_shape_change='pass') likelihood = likelihoods.Gaussian() GPModel.__init__(self, X, Y, kern, likelihood, mean_function) self.Z = Param(Z) self.num_data = X.shape[0] self.num_latent = Y.shape[1] self.mu_old = DataHolder(mu_old, on_shape_change='pass') self.M_old = Z_old.shape[0] self.Su_old = DataHolder(Su_old, on_shape_change='pass') self.Kaa_old = DataHolder(Kaa_old, on_shape_change='pass') self.Z_old = DataHolder(Z_old, on_shape_change='pass')
def __init__(self, A, b): """ :param A: scaling matrix. Either a P-dimensional vector, or a P x P transformation matrix. For the latter, the inverse and backward methods are not guaranteed to work as A must be invertible. It is also possible to specify a matrix with size P x Q with Q != P to achieve a lower dimensional representation of X. In this case, A is not invertible, hence inverse and backward transforms are not supported. :param b: A P-dimensional offset vector. """ super(LinearTransform, self).__init__() assert A is not None assert b is not None b = np.atleast_1d(b) A = np.atleast_1d(A) if len(A.shape) == 1: A = np.diag(A) assert (len(b.shape) == 1) assert (len(A.shape) == 2) self.A = DataHolder(A) self.b = DataHolder(b)
def __init__(self, prev_ind_list, cur_ind_list, X_grid, kerns_list, name='collaborative_pref_gps'): Model.__init__(self, name) total_shape = total_all_actions(prev_ind_list) Y = np.ones(total_shape)[:, None] self.Y = DataHolder(Y) # Introducing Paramlist to define kernels for latent GPs H self.kerns_list = ParamList(kerns_list) self.X_grid = DataHolder(X_grid[:, None]) self.prev_ind_list = prev_ind_list self.cur_ind_list = cur_ind_list # define likelihood self.likelihood = gpflow.likelihoods.Bernoulli()
def __init__(self, X_variational_mean, X_variational_var, t): """ :param X_variational_mean: initial latent variational distribution mean, size N (number of points) x Q (latent dimensions) :param X_variational_var: initial latent variational distribution std (N x Q) :param t: time stamps for the variational prior kernel, need to ba an np.narray. """ super(GPTimeSeries, self).__init__(name='GPTimeSeries') self.X_variational_mean = Param(X_variational_mean) self.X_variational_var = Param(X_variational_var, transforms.positive) assert X_variational_var.ndim == 2, "the dimensionality of variational prior covariance needs to be 2." assert np.all(X_variational_mean.shape == X_variational_var.shape), "the shape of variational prior mean and variational prior covariance needs to be equal." self.num_latent = X_variational_mean.shape[1] self.num_data = X_variational_mean.shape[0] assert (isinstance(t, np.ndarray)), "time stamps need to be a numpy array." t = DataHolder(t) self.t = t
def __init__(self, X, Y, kern, Z, mean_function=None, reg=False): # if regularization is true if reg: # introduce vector (ParamList) with the variances of every pitch kernel D = len(kern.kern_list) var_list = [] for i in range(D): var_list.append(kern.kern_list[i].variance) kern.var_vector = gpflow.param.ParamList(var_list) gpflow.sgpr.SGPR.__init__(self, X=X, Y=Y, kern=kern, Z=Z, mean_function=mean_function) self.Z = DataHolder(Z, on_shape_change='pass') self.reg = reg
def __init__(self, Y, threshold=0): """ Construct a Pareto set. Stores a Pareto set and calculates the cell bounds covering the non-dominated region. The latter is needed for certain multiobjective acquisition functions. E.g., the :class:`~.acquisition.HVProbabilityOfImprovement`. :param Y: output data points, size N x R :param threshold: approximation threshold for the generic divide and conquer strategy (default 0: exact calculation) """ super(Pareto, self).__init__() self.threshold = threshold self.Y = Y # Setup data structures self.bounds = BoundedVolumes.empty(Y.shape[1], np_int_type) self.front = DataHolder(np.zeros((0, Y.shape[1])), 'pass') # Initialize self.update()
def __init__(self, X_variational_mean, X_variational_var, Y, kern, t, kern_t, M , Z=None): """ Initialization of Bayesian Gaussian Process Dynamics Model. This method only works with Gaussian likelihood. :param X_variational_mean: initial latent positions, size N (number of points) x Q (latent dimensions). :param X_variational_var: variance of latent positions (N x Q), for the initialisation of the latent space. :param Y: data matrix, size N (number of points) x D (dimensions). :param kern: kernel specification, by default RBF. :param t: time stamps. :param kern_t: dynamics kernel specification, by default RBF. :param M: number of inducing points. :param Z: matrix of inducing points, size M (inducing points) x Q (latent dimensions), By default random permutation of X_mean. """ super(BayesianDGPLVM, self).__init__(name='BayesianDGPLVM') self.kern = kern assert len(X_variational_mean) == len(X_variational_var), 'must be same amount of time series' self.likelihood = likelihoods.Gaussian() # multiple sequences series = [] for i in range(len(X_variational_mean)): series.append(GPTimeSeries(X_variational_mean[i], X_variational_var[i], t[i])) self.series = ParamList(series) # inducing points if Z is None: # By default we initialize by permutation of initial Z = np.random.permutation(np.concatenate(X_variational_mean, axis=0).copy())[:M] else: assert Z.shape[0] == M self.Z = Param(Z) self.kern_t = kern_t self.Y = DataHolder(Y) self.M = M self.n_s = 0
def __init__(self, t, XExpanded, Y, kern, indices, b, phiPrior=None, phiInitial=None, fDebug=False, KConst=None): gpflow.model.GPModel.__init__(self, XExpanded, Y, kern, likelihood=gpflow.likelihoods.Gaussian(), mean_function=gpflow.mean_functions.Zero()) assert len(indices) == t.size, 'indices must be size N' assert len(t.shape) == 1, 'pseudotime should be 1D' self.N = t.shape[0] self.t = t.astype(np_float_type) # could be DataHolder? advantages self.indices = indices self.logPhi = gpflow.param.Param(np.random.randn(t.shape[0], t.shape[0] * 3)) # 1 branch point => 3 functions if(phiInitial is None): phiInitial = np.ones((self.N, 2))*0.5 # dont know anything phiInitial[:, 0] = np.random.rand(self.N) phiInitial[:, 1] = 1-phiInitial[:, 0] self.fDebug = fDebug # Used as p(Z) prior in KL term. This should add to 1 but will do so after UpdatePhPrior if(phiPrior is None): phiPrior = np.ones((self.N, 2)) * 0.5 # Fix prior term - this is without trunk self.pZ = DataHolder(np.ones((t.shape[0], t.shape[0] * 3))) self.UpdateBranchingPoint(b, phiInitial, prior=phiPrior) self.KConst = KConst if(not fDebug): assert KConst is None, 'KConst only for debugging'
class MinValueEntropySearch(Acquisition): """ Max-value entropy search acquisition function for single-objective global optimization. Introduced by (Wang et al., 2017). Key reference: :: @InProceedings{Wang:2017, title = {Max-value Entropy Search for Efficient {B}ayesian Optimization}, author = {Zi Wang and Stefanie Jegelka}, booktitle = {Proceedings of the 34th International Conference on Machine Learning}, pages = {3627--3635}, year = {2017}, editor = {Doina Precup and Yee Whye Teh}, volume = {70}, series = {Proceedings of Machine Learning Research}, address = {International Convention Centre, Sydney, Australia}, month = {06--11 Aug}, publisher = {PMLR}, } """ def __init__(self, model, domain, gridsize=10000, num_samples=10): assert isinstance(model, Model) super(MinValueEntropySearch, self).__init__(model) assert self.data[1].shape[1] == 1 self.gridsize = gridsize self.num_samples = num_samples self.samples = DataHolder(np.zeros(num_samples, dtype=np_float_type)) self._domain = domain def _setup(self): super(MinValueEntropySearch, self)._setup() # Apply Gumbel sampling m = self.models[0] valid = self.feasible_data_index() # Work with feasible data X = self.data[0][valid, :] N = np.shape(X)[0] Xrand = RandomDesign(self.gridsize, self._domain).generate() fmean, fvar = m.predict_f(np.vstack((X, Xrand))) idx = np.argmin(fmean[:N]) right = fmean[idx].flatten() # + 2*np.sqrt(fvar[idx]).flatten() left = right probf = lambda x: np.exp( np.sum(norm.logcdf(-(x - fmean) / np.sqrt(fvar)), axis=0)) i = 0 while probf(left) < 0.75: left = 2.**i * np.min(fmean - 5. * np.sqrt(fvar)) + (1. - 2.**i) * right i += 1 # Binary search for 3 percentiles q1, med, q2 = map( lambda val: bisect(lambda x: probf(x) - val, left, right, maxiter=10000, xtol=0.01), [0.25, 0.5, 0.75]) beta = (q1 - q2) / (np.log(np.log(4. / 3.)) - np.log(np.log(4.))) alpha = med + beta * np.log(np.log(2.)) # obtain samples from y* mins = -np.log( -np.log(np.random.rand(self.num_samples).astype(np_float_type)) ) * beta + alpha self.samples.set_data(mins) def build_acquisition(self, Xcand): fmean, fvar = self.models[0].build_predict(Xcand) norm = tf.contrib.distributions.Normal( tf.constant(0.0, dtype=float_type), tf.constant(1.0, dtype=float_type)) gamma = (fmean - tf.expand_dims(self.samples, axis=0)) / tf.sqrt(fvar) return tf.reduce_sum(gamma * norm.prob(gamma) / (2. * norm.cdf(gamma)) - norm.log_cdf(gamma), axis=1, keep_dims=True) / self.num_samples
def __init__(self, X, Y, kernf, kerng, likelihood, Zf, Zg, mean_function=None, minibatch_size=None, name='model'): Model.__init__(self, name) self.mean_function = mean_function or Zero() self.kernf = kernf self.kerng = kerng self.likelihood = likelihood self.whiten = False self.q_diag = True # save initial attributes for future plotting purpose Xtrain = DataHolder(X) Ytrain = DataHolder(Y) self.Xtrain, self.Ytrain = Xtrain, Ytrain # sort out the X, Y into MiniBatch objects. if minibatch_size is None: minibatch_size = X.shape[0] self.num_data = X.shape[0] self.num_latent = Y.shape[1] # num_latent will be 1 self.X = MinibatchData(X, minibatch_size, np.random.RandomState(0)) self.Y = MinibatchData(Y, minibatch_size, np.random.RandomState(0)) # Add variational paramters self.Zf = Param(Zf) self.Zg = Param(Zg) self.num_inducing_f = Zf.shape[0] self.num_inducing_g = Zg.shape[0] # init variational parameters self.u_fm = Param( np.random.randn(self.num_inducing_f, self.num_latent) * 0.01) self.u_gm = Param( np.random.randn(self.num_inducing_g, self.num_latent) * 0.01) if self.q_diag: self.u_fs_sqrt = Param( np.ones((self.num_inducing_f, self.num_latent)), transforms.positive) self.u_gs_sqrt = Param( np.ones((self.num_inducing_g, self.num_latent)), transforms.positive) else: u_fs_sqrt = np.array([ np.eye(self.num_inducing_f) for _ in range(self.num_latent) ]).swapaxes(0, 2) self.u_fs_sqrt = Param( u_fs_sqrt, transforms.LowerTriangular(u_fs_sqrt.shape[2])) u_gs_sqrt = np.array([ np.eye(self.num_inducing_g) for _ in range(self.num_latent) ]).swapaxes(0, 2) self.u_gs_sqrt = Param( u_gs_sqrt, transforms.LowerTriangular(u_gs_sqrt.shape[2]))
def __init__(self, X, Y, kern, Z, mean_function=None): gpflow.sgpr.SGPR.__init__(self, X=X, Y=Y, kern=kern, Z=Z, mean_function=mean_function) self.Z = DataHolder(Z, on_shape_change='pass')
class LinearTransform(DataTransform): """ A simple linear transform of the form .. math:: \\mathbf Y = (\\mathbf A \\mathbf X^{T})^{T} + \\mathbf b \\otimes \\mathbf 1_{N}^{T} """ def __init__(self, A, b): """ :param A: scaling matrix. Either a P-dimensional vector, or a P x P transformation matrix. For the latter, the inverse and backward methods are not guaranteed to work as A must be invertible. It is also possible to specify a matrix with size P x Q with Q != P to achieve a lower dimensional representation of X. In this case, A is not invertible, hence inverse and backward transforms are not supported. :param b: A P-dimensional offset vector. """ super(LinearTransform, self).__init__() assert A is not None assert b is not None b = np.atleast_1d(b) A = np.atleast_1d(A) if len(A.shape) == 1: A = np.diag(A) assert (len(b.shape) == 1) assert (len(A.shape) == 2) self.A = DataHolder(A) self.b = DataHolder(b) def build_forward(self, X): return tf.matmul(X, tf.transpose(self.A)) + self.b @AutoFlow((float_type, [None, None])) def backward(self, Y): """ Overwrites the default backward approach, to avoid an explicit matrix inversion. """ return self.build_backward(Y) def build_backward(self, Y): """ TensorFlow implementation of the inverse mapping """ L = tf.cholesky(tf.transpose(self.A)) XT = tf.cholesky_solve(L, tf.transpose(Y - self.b)) return tf.transpose(XT) def build_backward_variance(self, Yvar): """ Additional method for scaling variance backward (used in :class:`.Normalizer`). Can process both the diagonal variances returned by predict_f, as well as full covariance matrices. :param Yvar: size N x N x P or size N x P :return: Yvar scaled, same rank and size as input """ rank = tf.rank(Yvar) # Because TensorFlow evaluates both fn1 and fn2, the transpose can't be in the same line. If a full cov # matrix is provided fn1 turns it into a rank 4, then tries to transpose it as a rank 3. # Splitting it in two steps however works fine. Yvar = tf.cond(tf.equal(rank, 2), lambda: tf.matrix_diag(tf.transpose(Yvar)), lambda: Yvar) Yvar = tf.cond(tf.equal(rank, 2), lambda: tf.transpose(Yvar, perm=[1, 2, 0]), lambda: Yvar) N = tf.shape(Yvar)[0] D = tf.shape(Yvar)[2] L = tf.cholesky(tf.square(tf.transpose(self.A))) Yvar = tf.reshape(Yvar, [N * N, D]) scaled_var = tf.reshape( tf.transpose(tf.cholesky_solve(L, tf.transpose(Yvar))), [N, N, D]) return tf.cond(tf.equal(rank, 2), lambda: tf.reduce_sum(scaled_var, axis=1), lambda: scaled_var) def assign(self, other): """ Assign the parameters of another :class:`LinearTransform`. Useful to avoid graph re-compilation. :param other: :class:`.LinearTransform` object """ assert other is not None assert isinstance(other, LinearTransform) self.A.set_data(other.A.value) self.b.set_data(other.b.value) def __invert__(self): A_inv = np.linalg.inv(self.A.value.T) return LinearTransform(A_inv, -np.dot(self.b.value, A_inv))
class ConstrainedExpectedImprovement(Acquisition): """ Constrained Expected Improvement acquisition function for single-objective global optimization. Introduced by (Mockus et al, 1975). Key reference: :: @article{Jones:1998, title={Efficient global optimization of expensive black-box functions}, author={Jones, Donald R and Schonlau, Matthias and Welch, William J}, journal={Journal of Global optimization}, volume={13}, number={4}, pages={455--492}, year={1998}, publisher={Springer} } This acquisition function is the expectation of the improvement over the current best observation w.r.t. the predictive distribution. The definition is closely related to the :class:`.ProbabilityOfImprovement`, but adds a multiplication with the improvement w.r.t the current best observation to the integral. .. math:: \\alpha(\\mathbf x_{\\star}) = \\int \\max(f_{\\min} - f_{\\star}, 0) \\, p( f_{\\star}\\,|\\, \\mathbf x, \\mathbf y, \\mathbf x_{\\star} ) \\, d f_{\\star} """ def __init__(self, model, constraint): """ Args: model: GPflow model (single output) representing our belief of the objective constraint: A function g, which describes the known constraint on the domain. Effectively allows us to explore a domain with a different shape to a hyper-rectangle. Note, we wish to find the value of x that maximises the unknown function f, subject to the known constraint g(x) > 0. """ super(ConstrainedExpectedImprovement, self).__init__(model) self.fmin = DataHolder(np.zeros(1)) self.constraint = constraint self._setup() def _setup(self): super(ConstrainedExpectedImprovement, self)._setup() # Obtain the lowest posterior mean for the previous - feasible - evaluations feasible_samples = self.data[0][ self.highest_parent.feasible_data_index(), :] samples_mean, _ = self.models[0].predict_f(feasible_samples) self.fmin.set_data(np.min(samples_mean, axis=0)) def build_acquisition(self, Xcand): # Obtain predictive distributions for candidates candidate_mean, candidate_var = self.models[0].build_predict(Xcand) candidate_var = tf.maximum(candidate_var, stability) delta = self.constraint(Xcand) pof = heaviside(delta) # Compute EI normal = tf.contrib.distributions.Normal(candidate_mean, tf.sqrt(candidate_var)) t1 = (self.fmin - candidate_mean) * normal.cdf(self.fmin) t2 = candidate_var * normal.prob(self.fmin) return pof * tf.add(t1, t2, name=self.__class__.__name__)