def init_layers_linear(X, Y, Z, kernels, layer_sizes, mean_function=Zero(), num_outputs=None, Layer=SVGPLayer, whiten=False): num_outputs = num_outputs or Y.shape[1] layers = [] X_running, Z_running = X.copy(), Z.copy() for in_idx, kern_in in enumerate(kernels[:-1]): dim_in = layer_sizes[in_idx] dim_out = layer_sizes[in_idx + 1] # Initialize mean function to be either Identity or PCA projection if dim_in == dim_out: mf = Identity() else: if dim_in > dim_out: # stepping down, use the pca projection # use eigenvectors corresponding to dim_out largest eigenvalues _, _, V = np.linalg.svd(X_running, full_matrices=False) W = V[:dim_out, :].T else: # stepping up, use identity + padding W = np.concatenate( [np.eye(dim_in), np.zeros((dim_in, dim_out - dim_in))], 1) mf = Linear(W) mf.A.trainable = False mf.b.trainable = False layers.append(Layer(kern_in, Z_running, dim_out, mf, white=whiten)) if dim_in != dim_out: Z_running = Z_running.dot(W) X_running = X_running.dot(W) # final layer layers.append( Layer(kernels[-1], Z_running, num_outputs, mean_function, white=whiten)) return layers
def test_verify_compatibility_type_errors(): valid_inducing_variable = construct_basic_inducing_variables([35], input_dim=40) valid_kernel = construct_basic_kernel([Matern52()]) valid_mean_function = Zero( ) # all gpflow mean functions are currently valid with pytest.raises(GPLayerIncompatibilityException ): # gpflow kernels must be MultioutputKernels verify_compatibility(Matern52(), valid_mean_function, valid_inducing_variable) Z = valid_inducing_variable.inducing_variable_list[0].Z inducing_variable = InducingPoints(Z) with pytest.raises( GPLayerIncompatibilityException ): # gpflow inducing_variables must be MultioutputInducingVariables verify_compatibility(valid_kernel, valid_mean_function, inducing_variable)
def __init__(self, X, Y, Z, kernels, likelihood, num_latent_Y=None, minibatch_size=None, num_samples=1, mean_function=Zero()): Model.__init__(self) assert X.shape[0] == Y.shape[0] assert Z.shape[1] == X.shape[1] assert kernels[0].input_dim == X.shape[1] self.num_data, D_X = X.shape self.num_samples = num_samples self.D_Y = num_latent_Y or Y.shape[1] self.dims = [k.input_dim for k in kernels] + [ self.D_Y, ] q_mus, q_sqrts, Zs, mean_functions = init_layers( X, Z, self.dims, mean_function) layers = [] for q_mu, q_sqrt, Z, mean_function, kernel in zip( q_mus, q_sqrts, Zs, mean_functions, kernels): layers.append(Layer(kernel, q_mu, q_sqrt, Z, mean_function)) self.layers = ParamList(layers) for layer in self.layers[:-1]: # fix the inner layer mean functions layer.mean_function.fixed = True self.likelihood = likelihood if minibatch_size is not None: self.X = MinibatchData(X, minibatch_size) self.Y = MinibatchData(Y, minibatch_size) else: self.X = DataHolder(X) self.Y = DataHolder(Y)
def init_layers_mf(Y, Z, kernels, num_outputs=None, Layer=SVGP_Layer): """ Creates layer objects from initial data :param Y: Numpy array of training targets :param Z: List of numpy arrays of inducing point locations for each layer :param kernels: List of kernels for each layer :param num_outputs: Number of outputs (same for each layer) :param Layer: The layer object to use :return: List of layer objects with which to build a multi-fidelity deep Gaussian process model """ num_outputs = num_outputs or Y[-1].shape[1] layers = [] num_layers = len(Z) for i in range(0, num_layers): layers.append(Layer(kernels[i], Z[i], num_outputs, Zero())) return layers
def test_mean_functions_A_minus_A_equals_zero(mean_functions): """ Tests that the addition the inverse of a mean function to itself is equivalent to having a Zero mean function: A + (-A) = 0 """ X, Y = rng.randn(Datum.N, Datum.input_dim), rng.randn(Datum.N, Datum.output_dim) Xtest = rng.randn(30, Datum.input_dim) A, A_inverse = mean_functions[0], mean_functions[-1] lhs = Additive(A, A_inverse) # A + (-A) rhs = Zero() # 0 model_lhs = _create_GPR_model_with_bias(X, Y, mean_function=lhs) model_rhs = _create_GPR_model_with_bias(X, Y, mean_function=rhs) mu_lhs, var_lhs = model_lhs.predict_f(Xtest) mu_rhs, var_rhs = model_rhs.predict_f(Xtest) assert_allclose(mu_lhs, mu_rhs) assert_allclose(var_lhs, var_rhs)
def test_gpr_posterior_update_cache_with_variables_no_precompute( register_posterior_test, q_sqrt_factory, whiten, precompute_cache_type): kernel = gpflow.kernels.SquaredExponential() X = np.random.randn(NUM_INDUCING_POINTS, INPUT_DIMS) Y = np.random.randn(NUM_INDUCING_POINTS, 1) posterior = GPRPosterior( kernel=kernel, data=(X, Y), likelihood_variance=gpflow.Parameter(0.1), precompute_cache=precompute_cache_type, mean_function=Zero(), ) posterior.update_cache(PrecomputeCacheType.VARIABLE) register_posterior_test(posterior, GPRPosterior) assert isinstance(posterior.alpha, tf.Variable) assert isinstance(posterior.Qinv, tf.Variable)
def __init__( self, X, Y, Z, kernels, likelihood, num_outputs=None, mean_function=Zero(), # the final layer mean function, white=False, **kwargs): layers = init_layers_linear(X, Y, Z, kernels, num_outputs=num_outputs, mean_function=mean_function, white=white) DGP_Base.__init__(self, X, Y, likelihood, layers, **kwargs)
def init_layers_linear(X, Y, Z, kernels, num_outputs=None, mean_function=Zero(), Layer=SVGP_Layer, white=False): num_outputs = num_outputs or Y.shape[1] layers = [] X_running, Z_running = X.copy(), Z.copy() for kern_in, kern_out in zip(kernels[:-1], kernels[1:]): dim_in = kern_in.input_dim dim_out = kern_out.input_dim print(dim_in, dim_out) if dim_in == dim_out: mf = Identity() else: if dim_in > dim_out: # stepping down, use the pca projection _, _, V = np.linalg.svd(X_running, full_matrices=False) W = V[:dim_out, :].T else: # stepping up, use identity + padding W = np.concatenate( [np.eye(dim_in), np.zeros((dim_in, dim_out - dim_in))], 1) mf = Linear(W) mf.set_trainable(False) layers.append(Layer(kern_in, Z_running, dim_out, mf, white=white)) if dim_in != dim_out: Z_running = Z_running.dot(W) X_running = X_running.dot(W) # final layer layers.append( Layer(kernels[-1], Z_running, num_outputs, mean_function, white=white)) return layers
def _init_layers(self, X, Y, Z, dims, kernels, mean_function=Zero(), Layer=SVGPIndependentLayer, white=False): """Initialise DGP layers to have the same number of outputs as inputs, apart from the final layer.""" layers = [] X_running, Z_running = X.copy(), Z.copy() for i in range(len(kernels) - 1): dim_in, dim_out, kern = dims[i], dims[i + 1], kernels[i] if dim_in == dim_out: mf = Identity() else: if dim_in > dim_out: _, _, V = np.linalg.svd(X_running, full_matrices=False) W = V[:dim_out, :].T else: W = np.concatenate( [np.eye(dim_in), np.zeros((dim_in, dim_out - dim_in))], 1) mf = Linear(W) set_trainable(mf.A, False) set_trainable(mf.b, False) layers.append(Layer(kern, Z_running, dim_out, mf, white=white)) if dim_in != dim_out: Z_running = Z_running.dot(W) X_running = X_running.dot(W) layers.append( Layer(kernels[-1], Z_running, dims[-1], mean_function, white=white)) return layers
def __init__(self, X, Y, Z, kernels, likelihood, num_outputs=None, mean_function=Zero(), # the final layer mean function **kwargs): Model.__init__(self) num_outputs = num_outputs or Y.shape[1] # init the layers layers = [] # inner layers X_running, Z_running = X.copy(), Z.copy() for kern_in, kern_out in zip(kernels[:-1], kernels[1:]): dim_in = kern_in.input_dim dim_out = kern_out.input_dim if dim_in == dim_out: mf = Identity() else: if dim_in > dim_out: # stepping down, use the pca projection _, _, V = np.linalg.svd(X_running, full_matrices=False) W = V[:dim_out, :].T else: # pad with zeros zeros = np.zeros((dim_in, dim_out - dim_in)) W = np.concatenate([np.eye(dim_in), zeros], 1) mf = Linear(W) mf.set_trainable(False) layers.append(SVGP_Layer(kern_in, Z_running, dim_out, mf)) if dim_in != dim_out: Z_running = Z_running.dot(W) X_running = X_running.dot(W) # final layer layers.append(SVGP_Layer(kernels[-1], Z_running, num_outputs, mean_function)) DGP_Base.__init__(self, X, Y, likelihood, layers, **kwargs)
def __init__(self, X, Y, Z, kernels, layer_sizes, likelihood, num_outputs=None, mean_function=Zero(), whiten=False, num_samples=1): layers = init_layers_linear(X, Y, Z, kernels, layer_sizes, mean_function=mean_function, num_outputs=num_outputs, whiten=whiten) super().__init__(likelihood, layers, num_samples)
def __init__(self, X, Y, Z, q_sqrt_initial, kernels, likelihoods, mean_function=Zero(), white=False, **kwargs): layers = self._init_layers(X, Y, Z, q_sqrt_initial, kernels, mean_function=mean_function, white=white) super().__init__(likelihoods, layers, **kwargs)
def __init__(self, X, Y, Z, kernels, likelihood, gmat, num_layers=2, num_nodes=None, dim_per_node=5, dim_per_X=5, dim_per_Y=5, mean_function=Zero(), # the final layer mean function, num_samples=1, num_data=None, minibatch_size=None, full_cov=False, share_Z=False, nb_init=True, **kwargs): layers = init_layers_graph(X, Y, Z, kernels, gmat, num_layers, num_nodes, dim_per_node, dim_per_X, dim_per_Y, share_Z=share_Z, nb_init=nb_init) DGP_Base.__init__(self, X, Y, likelihood, layers, **kwargs)
def __init__(self, X, Y, Z, dims, kernels, likelihoods, input_prop_dim=None, mean_function=Zero(), white=False, **kwargs): layers = self._init_layers(X, Y, Z, dims, kernels, mean_function=mean_function, white=white) super().__init__(likelihoods, layers, **kwargs)
def __init__(self, X, Y, M, mean_function=Zero(), white=False, Layer=SVGPLayer, **kwargs): self.temporal_layers = [] for i in range(self.num_outputs): kerneli = self.temporal_kernel() inducing_inputs = inducingpoint_wrapper( kmeans2(X, M, minit='points')[0]) layer = Layer(kerneli, inducing_inputs.Z, mean_function, white=white) self.temporal_layers.append(layer) super().__init__(**kwargs)
def __init__(self, X, Y, kern, likelihood, mean_function, num_latent=None, name=None): super(GPModel, self).__init__(name=name) self.num_latent = num_latent or Y.shape[1] self.mean_function = mean_function or Zero(output_dim=self.num_latent) self.kern = kern self.likelihood = likelihood if isinstance(X, np.ndarray): # X is a data matrix; each row represents one instance X = DataHolder(X) if isinstance(Y, np.ndarray): # Y is a data matrix, rows correspond to the rows in X, # columns are treated independently Y = DataHolder(Y) self.X, self.Y = X, Y
def __init__(self, t, XExpanded, Y, kern, indices, b, phiPrior=None, phiInitial=None, fDebug=False, KConst=None): GPModel.__init__(self, XExpanded, Y, kern, likelihood=gpflow.likelihoods.Gaussian(), mean_function=Zero()) assert len(indices) == t.size, 'indices must be size N' assert len(t.shape) == 1, 'pseudotime should be 1D' self.N = t.shape[0] self.t = t.astype( settings.float_type) # could be DataHolder? advantages self.indices = indices self.logPhi = Parameter(np.random.randn( t.shape[0], t.shape[0] * 3)) # 1 branch point => 3 functions if (phiInitial is None): phiInitial = np.ones((self.N, 2)) * 0.5 # dont know anything phiInitial[:, 0] = np.random.rand(self.N) phiInitial[:, 1] = 1 - phiInitial[:, 0] self.fDebug = fDebug # Used as p(Z) prior in KL term. This should add to 1 but will do so after UpdatePhPrior if (phiPrior is None): phiPrior = np.ones((self.N, 2)) * 0.5 # Fix prior term - this is without trunk self.pZ = DataHolder(np.ones((t.shape[0], t.shape[0] * 3))) self.UpdateBranchingPoint(b, phiInitial, prior=phiPrior) self.KConst = KConst if (not fDebug): assert KConst is None, 'KConst only for debugging'
def __init__(self, embeds, m, name='Model'): Model.__init__(self, name) self.nkpts = len(m.Y.value[0]) self.npts = len(embeds) embeds = np.array(m.X_mean.value) self.X_mean = Param(embeds) self.Z = Param(np.array(m.Z.value)) self.kern = deepcopy(m.kern) self.X_var = Param(np.array(m.X_var.value)) self.Y = m.Y self.likelihood = likelihoods.Gaussian() self.mean_function = Zero() self.likelihood._check_targets(self.Y.value) self._session = None self.X_mean.fixed = True self.Z.fixed = True self.kern.fixed = True self.X_var.fixed = True self.likelihood.fixed = True
def init_layers(X, dims_in, dims_out, M, final_inducing_points, share_inducing_inputs): q_mus, q_sqrts, mean_functions, Zs = [], [], [], [] X_running = X.copy() for dim_in, dim_out in zip(dims_in[:-1], dims_out[:-1]): if dim_in == dim_out: # identity for same dims W = np.eye(dim_in) elif dim_in > dim_out: # use PCA mf for stepping down _, _, V = np.linalg.svd(X_running, full_matrices=False) W = V[:dim_out, :].T elif dim_in < dim_out: # identity + pad with zeros for stepping up I = np.eye(dim_in) zeros = np.zeros((dim_out - dim_in, dim_in)) W = np.concatenate([I, zeros], 0).T mean_functions.append(Linear(A=W)) Zs.append(kmeans2(X_running, M, minit='points')[0]) if share_inducing_inputs: q_mus.append([np.zeros((M, dim_out))]) q_sqrts.append([np.eye(M)[:, :, None] * np.ones((1, 1, dim_out))]) else: q_mus.append([np.zeros((M, 1))] * dim_out) q_sqrts.append([np.eye(M)[:, :, None] * np.ones( (1, 1, 1))] * dim_out) X_running = X_running.dot(W) # final layer (as before but no mean function) mean_functions.append(Zero()) Zs.append(kmeans2(X_running, final_inducing_points, minit='points')[0]) q_mus.append([np.zeros((final_inducing_points, 1))]) q_sqrts.append( [np.eye(final_inducing_points)[:, :, None] * np.ones((1, 1, 1))]) return q_mus, q_sqrts, Zs, mean_functions
def boosting_initialization(self, mean_function=Zero(), Layer=SVGPLayer, white=False): previous_layers = [ self.temporal_layers[0], ] self.boosting_layers = [ None, ] for i in range(1, self.num_outputs): current_layers = [] for previous in previous_layers: kerneli = self.boosting_kernel() inducing_inputs = previous.q_mu layer = Layer(kerneli, inducing_inputs, mean_function, white=white) current_layers.append(layer) self.boosting_layers.append(current_layers) previous_layers.append(self.temporal_layers[i]) previous_layers.extend(current_layers)
def __init__(self, embeds, skeletons, dist, m, name='Model'): Model.__init__(self, name) self.skeletons = skeletons self.dist_embeds = dist self.nkpts = len(skeletons[0, :]) self.npts = len(embeds) self.dist_skeletons = np.ones([self.npts, self.npts]) * -1 embeds = np.array(m.X_mean.value) self.X_mean = Param(embeds) self.Z = Param(np.array(m.Z.value)) self.kern = deepcopy(m.kern) self.X_var = Param(np.array(m.X_var.value)) self.Y = m.Y self.likelihood = likelihoods.Gaussian() self.mean_function = Zero() self.likelihood._check_targets(self.Y.value) self._session = None self.X_mean.fixed = True self.Z.fixed = True self.kern.fixed = True self.X_var.fixed = True self.likelihood.fixed = True
def init_layers(graph_adj, node_feature, kernels, n_layers, all_layers_dim, num_inducing, gc_kernel=True, mean_function="linear", white=False, q_diag=False): assert mean_function in ["linear", "zero"] # mean function must be linear or zero layers = [] # get initial Z sparse_adj = tuple_to_sparse_matrix(graph_adj[0], graph_adj[1], graph_adj[2]) X_running = node_feature.copy() for i in range(n_layers): tf.logging.info("initialize {}th layer".format(i + 1)) dim_in = all_layers_dim[i] dim_out = all_layers_dim[i + 1] conv_X = sparse_adj.dot(X_running) Z_running = kmeans2(conv_X, num_inducing[i], minit="points")[0] kernel = kernels[i] if gc_kernel and kernel.gc_weight: # Z_running = pca(Z_running, kernel.base_kernel.input_dim) # 将维度降到和输出维度一致 X_dim = X_running.shape[1] kernel_input_dim = kernel.base_kernel.input_dim if X_dim > kernel_input_dim: Z_running = pca(Z_running, kernel.base_kernel.input_dim) # 将维度降到和输出维度一致 elif X_dim < kernel_input_dim: Z_running = np.concatenate([Z_running, np.zeros((Z_running.shape[0], kernel_input_dim - X_dim))], axis=1) # print(type(Z_running)) # print(Z_running) if dim_in > dim_out: _, _, V = np.linalg.svd(X_running, full_matrices=False) W = V[:dim_out, :].T elif dim_in < dim_out: W = np.concatenate([np.eye(dim_in), np.zeros((dim_in, dim_out - dim_in))], 1) if mean_function == "zero": mf = Zero() else: if dim_in == dim_out: mf = Identity() else: mf = Linear(W) mf.set_trainable(False) # self.Ku = Kuu(GraphConvolutionInducingpoints(Z_running), kernel, jitter=settings.jitter) # print("successfully calculate Ku") if gc_kernel: feature = GraphConvolutionInducingpoints(Z_running) else: feature = InducingPoints(Z_running) layers.append(svgp_layer(kernel, Z_running, feature, dim_out, mf, gc_kernel, white=white, q_diag=q_diag)) if dim_in != dim_out: # Z_running = Z_running.dot(W) X_running = X_running.dot(W) return layers
def __init__(self, X, Y, Z, kernels, likelihood, num_outputs=None, mean_function=Zero(), # the final layer mean function **kwargs): Model.__init__(self) num_outputs = num_outputs or Y.shape[1] # init the layers layers = [] # inner layers X_running, Z_running = X.copy(), Z.copy() for kern_in, kern_out in zip(kernels[:-1], kernels[1:]): dim_in = kern_in.input_dim dim_out = kern_out.input_dim mf = Zero() # Added to compare with DGP EP MCM # Inducing points for layer if Z.shape[1] > dim_in: # Reduce Z by doing PCA _, _, V = np.linalg.svd(X, full_matrices=False) # V -> (D,D) Matrix Z_kern = Z.dot(V[:dim_in, :].T) elif Z.shape[1] < dim_in: # Increase Z by doing tile _, _, V = np.linalg.svd(X, full_matrices=False) # V -> (D,D) Matrix first_pca = Z.dot(V[0, :].T) # First Principal component Z_kern = np.tile(first_pca[:, None], (1, dim_in)) else: # same dimension Z_kern = Z.copy() layers.append(SVGP_Layer(kern_in, Z_kern, dim_out, mf)) # print("{} \n{}\n".format(Z_kern.shape, Z_kern[0:3, 0:3])) # Final Layer mf = Zero() # Added to compare with DGP EP MCM dim_in = kernels[-1].input_dim # Inducing points for layer if Z.shape[1] > dim_in: # Reduce Z by doing PCA _, _, V = np.linalg.svd(X, full_matrices=False) # V -> (D,D) Matrix Z_kern = Z.dot(V[:dim_in, :].T) elif Z.shape[1] < dim_in: # Increase Z by doing tile _, _, V = np.linalg.svd(X, full_matrices=False) # V -> (D,D) Matrix first_pca = Z.dot(V[0, :].T) # First Principal component Z_kern = np.tile(first_pca[:, None], (1, dim_in)) else: # same dimension Z_kern = Z.copy() # print("{} \n{}\n".format(Z_kern.shape, Z_kern[0:3, 0:3])) layers.append(SVGP_Layer(kernels[-1], Z_kern, num_outputs, mean_function)) """ for kern_in, kern_out in zip(kernels[:-1], kernels[1:]): dim_in = kern_in.input_dim dim_out = kern_out.input_dim if dim_in == dim_out: mf = Identity() else: # stepping down, use the pca projection _, _, V = np.linalg.svd(X_running, full_matrices=False) # V -> (D,D) Matrix W = V[:dim_out, :].T mf = Linear(W) mf.set_trainable(False) # Z_kern = Z_running[:, 0:dim_in] # print("{} \n{}\n".format(Z_kern.shape, Z_kern[0:3, 0:3])) layers.append(SVGP_Layer(kern_in, Z_running, dim_out, mf)) if dim_in != dim_out: Z_running = Z_running.dot(W) X_running = X_running.dot(W) """ # final layer # Z_kern = Z_running[:, 0:kernels[-1].input_dim] # print("{} \n{}\n".format(Z_kern.shape, Z_kern[0:3, 0:3])) # layers.append(SVGP_Layer(kernels[-1], Z_running, num_outputs, mean_function)) DGP_Base.__init__(self, X, Y, likelihood, layers, **kwargs)
def __init__(self, X, Y, M, gpar, likelihoods=None, reorder=None, minibatch_size=None, missing=False, mean_function=Zero(), white=False, impute=True, scale=1.0, scale_tie=False, per=False, per_period=1.0, per_scale=1.0, per_decay=10.0, input_linear=False, input_linear_scale=100.0, linear=True, linear_scale=100.0, nonlinear=True, nonlinear_scale=0.1, nonlinear_dependent=False, rq=False, markov=None, noise_inner=1e-05, noise_obs=0.01, normalise_y=True, transform_y=(lambda x: x, lambda x: x), **kwargs): self.impute = impute self.model_config = { 'scale': scale, 'scale_tie': scale_tie, 'per': per, 'per_period': per_period, 'per_scale': per_scale, 'per_decay': per_decay, 'input_linear': input_linear, 'input_linear_scale': input_linear_scale, 'linear': linear, 'linear_scale': linear_scale, 'nonlinear': nonlinear, 'nonlinear_scale': nonlinear_scale, 'nonlinear_dependent': nonlinear_dependent, 'rq': rq, 'markov': markov, 'noise_inner': noise_inner } self.m = X.shape[1] self.num_outputs = Y.shape[1] self.reorder = reorder kernels = self._kernels_generator() if not likelihoods: likelihoods = [] for i in range(self.num_outputs): likelihoods.append(Gaussian(variance=noise_obs)) # Todo: normalize y # Todo: impute, handle missing data, make closed down # Todo: initialize inducing locations Z Z, q_sqrt_initial = self._initialize_inducing_locations_from_post_GPAR( gpar, X, Y, M) self.initial_inducing_points = Z if np.any(np.isnan(Y)): missing = True super().__init__(X, Y, Z, q_sqrt_initial, kernels, likelihoods, mean_function=mean_function, white=white, num_data=X.shape[0], minibatch_size=minibatch_size, missing=missing, **kwargs)
import gpflow from gpflow.config import default_int from gpflow.inducing_variables import InducingPoints from gpflow.mean_functions import Additive, Constant, Linear, Product, SwitchedMeanFunction, Zero rng = np.random.RandomState(99021) class Datum: input_dim, output_dim = 3, 2 N, Ntest, M = 20, 30, 10 _mean_functions = [ Zero(), Linear(A=rng.randn(Datum.input_dim, Datum.output_dim), b=rng.randn(Datum.output_dim, 1).reshape(-1)), Constant(c=rng.randn(Datum.output_dim, 1).reshape(-1)) ] @pytest.mark.parametrize('mean_function_1', _mean_functions) @pytest.mark.parametrize('mean_function_2', _mean_functions) @pytest.mark.parametrize('operation', ['+', 'x']) def test_mean_functions_output_shape(mean_function_1, mean_function_2, operation): """ Test the output shape for basic and compositional mean functions, also check that the combination of mean functions returns the correct class """ X = np.random.randn(Datum.N, Datum.input_dim) Y = mean_function_1(X)
def test_models_with_mean_functions_changes(model_class): """ Simply check that all models have a higher prediction with a constant mean function than with a zero mean function. For compositions of mean functions check that multiplication/ addition of a constant results in a higher prediction, whereas addition of zero/ mutliplication with one does not. """ data = rng.randn(Datum.N, Datum.input_dim), rng.randn(Datum.N, 1) predict_at = rng.randn(Datum.Ntest, Datum.input_dim) inducing_variable = InducingPoints(Z=rng.randn(Datum.M, Datum.input_dim)) kernel = gpflow.kernels.Matern32() likelihood = gpflow.likelihoods.Gaussian() zero_mean = Zero() non_zero_mean = Constant(c=np.ones(1) * 10) if model_class in [gpflow.models.GPR]: model_zero_mean = model_class(data, kernel=kernel, mean_function=zero_mean) model_non_zero_mean = model_class(data, kernel=kernel, mean_function=non_zero_mean) elif model_class in [gpflow.models.VGP]: model_zero_mean = model_class(data, likelihood=likelihood, kernel=kernel, mean_function=zero_mean) model_non_zero_mean = model_class(data, likelihood=likelihood, kernel=kernel, mean_function=non_zero_mean) elif model_class in [gpflow.models.SVGP]: model_zero_mean = model_class(kernel=kernel, likelihood=likelihood, inducing_variable=inducing_variable, mean_function=zero_mean) model_non_zero_mean = model_class(kernel=kernel, likelihood=likelihood, inducing_variable=inducing_variable, mean_function=non_zero_mean) elif model_class in [gpflow.models.SGPR, gpflow.models.GPRFITC]: model_zero_mean = model_class(data, kernel=kernel, inducing_variable=inducing_variable, mean_function=zero_mean) model_non_zero_mean = model_class(data, kernel=kernel, inducing_variable=inducing_variable, mean_function=non_zero_mean) elif model_class in [gpflow.models.SGPMC]: model_zero_mean = model_class(data, kernel=kernel, likelihood=likelihood, inducing_variable=inducing_variable, mean_function=zero_mean) model_non_zero_mean = model_class(data, kernel=kernel, likelihood=likelihood, inducing_variable=inducing_variable, mean_function=non_zero_mean) elif model_class in [gpflow.models.GPMC]: model_zero_mean = model_class(data, kernel=kernel, likelihood=likelihood, mean_function=zero_mean) model_non_zero_mean = model_class(data, kernel=kernel, likelihood=likelihood, mean_function=non_zero_mean) else: raise (NotImplementedError) mu_zero, var_zero = model_zero_mean.predict_f(predict_at) mu_non_zero, var_non_zero = model_non_zero_mean.predict_f(predict_at) # predictive variance remains unchanged after modifying mean function assert np.all(var_zero.numpy() == var_non_zero.numpy()) # predictive mean changes after modifying mean function assert not np.all(mu_zero.numpy() == mu_non_zero.numpy())
def __init__( self, X, Y, Z, kernels, likelihood, num_outputs=None, mean_function=Zero(), # the final layer mean function **kwargs): Model.__init__(self) num_outputs = num_outputs or Y.shape[1] # init the layers layers = [] # inner layers X_running, Z_running = X.copy(), Z.copy() for kern_in, kern_out in zip(kernels[:-1], kernels[1:]): if isinstance(kern_in, Conv): dim_in = kern_in.basekern.input_dim else: dim_in = kern_in.input_dim ''' if isinstance(kern_out,Conv): dim_out = kern_out.basekern.input_dim else: dim_out = kern_out.input_dim ''' dim_out = kern_out.input_dim if dim_in == dim_out: mf = Identity() else: # stepping down, use the pca projection _, _, V = np.linalg.svd(X_running, full_matrices=False) W = V[:dim_out, :].T b = np.zeros(1, dtype=np.float32) mf = Linear(W, b) mf.set_trainable(False) if isinstance(kern_in, Conv): Z_patch = np.unique(kern_in.compute_patches(Z_running).reshape( -1, kern_in.patch_len), axis=0) Z_patch = Z_patch[np.random.permutation( (len(Z_patch)))[:Z_running.shape[0]], :] layers.append(svconvgp(kern_in, Z_patch, dim_out, mf)) else: layers.append(SVGP_Layer(kern_in, Z_running, dim_out, mf)) if dim_in != dim_out: Z_running = Z_running.dot(W) X_running = X_running.dot(W) # final layer if isinstance(kernels[-1], Conv): Z_patch = np.unique(kernels[-1].compute_patches(Z_running).reshape( -1, kernels[-1].patch_len), axis=0) Z_patch = Z_patch[np.random.permutation( (len(Z_patch)))[:Z_running.shape[0]], :] layers.append( svconvgp(kernels[-1], Z_patch, num_outputs, mean_function)) else: layers.append( SVGP_Layer(kernels[-1], Z_running, num_outputs, mean_function)) DGP_Base.__init__(self, X, Y, likelihood, layers, **kwargs)
def __init__(self, X, Y, kernf, kerng, likelihood, Zf, Zg, mean_function=None, minibatch_size=None, name='model'): Model.__init__(self, name) self.mean_function = mean_function or Zero() self.kernf = kernf self.kerng = kerng self.likelihood = likelihood self.whiten = False self.q_diag = True # save initial attributes for future plotting purpose Xtrain = DataHolder(X) Ytrain = DataHolder(Y) self.Xtrain, self.Ytrain = Xtrain, Ytrain # sort out the X, Y into MiniBatch objects. if minibatch_size is None: minibatch_size = X.shape[0] self.num_data = X.shape[0] self.num_latent = Y.shape[1] # num_latent will be 1 self.X = MinibatchData(X, minibatch_size, np.random.RandomState(0)) self.Y = MinibatchData(Y, minibatch_size, np.random.RandomState(0)) # Add variational paramters self.Zf = Param(Zf) self.Zg = Param(Zg) self.num_inducing_f = Zf.shape[0] self.num_inducing_g = Zg.shape[0] # init variational parameters self.u_fm = Param( np.random.randn(self.num_inducing_f, self.num_latent) * 0.01) self.u_gm = Param( np.random.randn(self.num_inducing_g, self.num_latent) * 0.01) if self.q_diag: self.u_fs_sqrt = Param( np.ones((self.num_inducing_f, self.num_latent)), transforms.positive) self.u_gs_sqrt = Param( np.ones((self.num_inducing_g, self.num_latent)), transforms.positive) else: u_fs_sqrt = np.array([ np.eye(self.num_inducing_f) for _ in range(self.num_latent) ]).swapaxes(0, 2) self.u_fs_sqrt = Param( u_fs_sqrt, transforms.LowerTriangular(u_fs_sqrt.shape[2])) u_gs_sqrt = np.array([ np.eye(self.num_inducing_g) for _ in range(self.num_latent) ]).swapaxes(0, 2) self.u_gs_sqrt = Param( u_gs_sqrt, transforms.LowerTriangular(u_gs_sqrt.shape[2]))
def __init__(self, X, Y, Z, input_dims, likelihood, adj, agg_op_name='concat3d', ARD=False, is_Z_forward=True, mean_trainable=False, out_mf0=True, kern_type='RBF', **kwargs): """ init layers for graph dgp model. :param X: (s1, n, d_in) :param Y: (s1, n, d_out) :param Z: (s2, n, d_in) :param kernels: [(n, d_in)...] length=L :param likelihood: todo :param adj: (n, n) :param is_Z_forward: whether Z should be aggregated and propagated among layers """ assert np.ndim(X) == 3 and np.ndim(Z) == 3 and np.ndim(Y) == 3 nb_agg = get_nbf_op(agg_op_name) num_nodes = adj.shape[0] raw_mask = adj.copy() # 1. constructing layers layers, X_running, Z_running = [], X.copy(), Z.copy() layer_n = 0 for dim_in, dim_out in zip(input_dims[:-1], input_dims[1:]): # get in->out dimension for current layer # constructing mean function W, fixed_nmf = FixedNMF.init(X_running, adj, dim_out, agg_op_name, mean_trainable) # constructing kernel if 'concat' in agg_op_name: mask_concat = neighbour_feats(raw_mask, np.ones((num_nodes, dim_in))) # (n, n*feat) kern = RBFNodes(num_nodes, num_nodes * dim_in, mask=mask_concat, ARD=ARD, layer_n=layer_n, kern_type=kern_type) else: kern = RBFNodes(num_nodes, dim_in, ARD=ARD, layer_n=layer_n, kern_type=kern_type) # init layer layers.append( SVGPG_Layer(fixed_nmf, kern, Z_running, adj, dim_out, agg_op_name, is_Z_forward)) print('input-output dim ({}(agg:{})->{})'.format( dim_in, kern.input_dim, dim_out)) # propagating X & Z if is_Z_forward: Z_running = nb_agg(adj, Z_running) # warn: if aggregation mode of X is 'concat' and Z is not aggregated,the mean_function of X and Z should be different. Z_running = FixedNMF.np_call(Z_running, W) # (s2, n, d_in) -> (s2, n, d_out) X_running = FixedNMF.np_call(nb_agg(adj, X_running), W) # (s1, n, d_in) -> (s1, n, d_out) layer_n += 1 # 2. constructing the last/output layer recording to the shape of Y # constructing mean function dim_in = input_dims[-1] if 'concat' in agg_op_name: mask_concat = neighbour_feats(raw_mask, np.ones( (num_nodes, dim_in))) kern = RBFNodes(num_nodes, num_nodes * dim_in, mask=mask_concat, ARD=ARD, layer_n=layer_n, kern_type=kern_type) else: kern = RBFNodes(num_nodes, dim_in, ARD=ARD, layer_n=layer_n, kern_type=kern_type) mf = Zero() if out_mf0 else FixedNMF.init( X_running, adj, Y.shape[-1], agg_op_name, mean_trainable)[1] # init layer layers.append( SVGPG_Layer(mf, kern, Z_running, adj, Y.shape[-1], agg_op_name, is_Z_forward)) print('input-output dim ({}(agg:{})->{})'.format( dim_in, kern.input_dim, Y.shape[-1])) # 3. using layers to init the Base model with 2d inputs DGP_Base.__init__(self, X.reshape(X.shape[0], -1), Y.reshape(Y.shape[0], -1), likelihood, layers, name='DGPG', **kwargs)
def _make_part_model(self, X, Y, weights, Z, q_mu, q_sqrt, W, freqs, minibatch_size=None, priors=None): """ Create a gpflow model for a selection of data X: array (N, Din) Y: array (N, P, Nf) weights: array like Y the statistical weights of each datapoint minibatch_size : int Z: list of array (M, Din) The inducing points mean locations. q_mu: list of array (M, L) q_sqrt: list of array (L, M, M) W: array [P,L] freqs: array [Nf,] the freqs priors : dict of priors for the global model Returns: model : gpflow.models.Model """ N, P, Nf = Y.shape _, Din = X.shape assert priors is not None likelihood_var = priors['likelihood_var'] tec_kern_time_ls = priors['tec_kern_time_ls'] tec_kern_dir_ls = priors['tec_kern_dir_ls'] tec_kern_var = priors['tec_kern_var'] tec_mean = priors['tec_mean'] Z_var = priors['Z_var'] P, L = W.shape with defer_build(): # Define the likelihood likelihood = WrappedPhaseGaussianMulti( tec_scale=priors['tec_scale'], freqs=freqs) likelihood.variance = np.exp(likelihood_var[0]) #median as initial likelihood.variance.prior = LogNormal(likelihood_var[0], likelihood_var[1]**2) likelihood.variance.set_trainable(True) def _kern(): kern_thin_layer = ThinLayer(np.array([0., 0., 0.]), priors['tec_scale'], active_dims=slice(2, 6, 1)) kern_time = Matern32(1, active_dims=slice(6, 7, 1)) kern_dir = Matern32(2, active_dims=slice(0, 2, 1)) ### # time kern kern_time.lengthscales = np.exp(tec_kern_time_ls[0]) kern_time.lengthscales.prior = LogNormal( tec_kern_time_ls[0], tec_kern_time_ls[1]**2) kern_time.lengthscales.set_trainable(True) kern_time.variance = 1. #np.exp(tec_kern_var[0]) #kern_time.variance.prior = LogNormal(tec_kern_var[0],tec_kern_var[1]**2) kern_time.variance.set_trainable(False) # ### # directional kern kern_dir.variance = np.exp(tec_kern_var[0]) kern_dir.variance.prior = LogNormal(tec_kern_var[0], tec_kern_var[1]**2) kern_dir.variance.set_trainable(True) kern_dir.lengthscales = np.exp(tec_kern_dir_ls[0]) kern_dir.lengthscales.prior = LogNormal( tec_kern_dir_ls[0], tec_kern_dir_ls[1]**2) kern_dir.lengthscales.set_trainable(True) kern = kern_dir * kern_time #(kern_thin_layer + kern_dir)*kern_time return kern kern = mk.SeparateMixedMok([_kern() for _ in range(L)], W) feature_list = [] for _ in range(L): feat = InducingPoints(Z) #feat.Z.prior = Gaussian(Z,Z_var) feature_list.append(feat) feature = mf.MixedKernelSeparateMof(feature_list) mean = Zero() model = HomoscedasticPhaseOnlySVGP(weights, X, Y, kern, likelihood, feat=feature, mean_function=mean, minibatch_size=minibatch_size, num_latent=P, num_data=N, whiten=False, q_mu=q_mu, q_sqrt=q_sqrt) model.compile() return model