def test_bug_277_regression(): """ See github issue #277. This is a regression test. """ model1, model2 = Linear(), Linear() assert model1.b.numpy() == model2.b.numpy() model2.b.assign([1.]) assert not model1.b.numpy() == model2.b.numpy()
def init_layers(X, Z, dims, final_mean_function): M = Z.shape[0] q_mus, q_sqrts, mean_functions, Zs = [], [], [], [] X_running, Z_running = X.copy(), Z.copy() for dim_in, dim_out in zip(dims[:-2], dims[1:-1]): if dim_in == dim_out: # identity for same dims W = np.eye(dim_in) elif dim_in > dim_out: # use PCA mf for stepping down _, _, V = np.linalg.svd(X_running, full_matrices=False) W = V[:dim_out, :].T elif dim_in < dim_out: # identity + pad with zeros for stepping up I = np.eye(dim_in) zeros = np.zeros((dim_in, dim_out - dim_in)) W = np.concatenate([I, zeros], 1) mean_functions.append(Linear(A=W)) Zs.append(Z_running.copy()) q_mus.append(np.zeros((M, dim_out))) q_sqrts.append(np.eye(M)[:, :, None] * np.ones((1, 1, dim_out))) Z_running = Z_running.dot(W) X_running = X_running.dot(W) # final layer (as before but no mean function) mean_functions.append(final_mean_function) Zs.append(Z_running.copy()) q_mus.append(np.zeros((M, dims[-1]))) q_sqrts.append(np.eye(M)[:, :, None] * np.ones((1, 1, dims[-1]))) return q_mus, q_sqrts, Zs, mean_functions
def setUp(self): self.rng = np.random.RandomState(42) input_dim = 2 output_dim = 2 kern_list = [RBF(2) for _ in range(output_dim)] self.W0 = np.zeros((input_dim, output_dim)) mean_function = Linear(A=self.W0) self.Z = self.rng.randn(5, 2) num_inducing = 5 self.layer = MultikernelHiddenLayer(input_dim=input_dim, output_dim=output_dim, num_inducing=num_inducing, kernel_list=kern_list, share_Z=False, mean_function=mean_function) self.layer_shared_Z = MultikernelHiddenLayer( input_dim=input_dim, output_dim=output_dim, num_inducing=num_inducing, kernel_list=kern_list, share_Z=True, mean_function=mean_function) self.X = self.rng.randn(10, 2)
def init_layers_linear(X, Y, Z, kernels, layer_sizes, mean_function=Zero(), num_outputs=None, Layer=SVGPLayer, whiten=False): num_outputs = num_outputs or Y.shape[1] layers = [] X_running, Z_running = X.copy(), Z.copy() for in_idx, kern_in in enumerate(kernels[:-1]): dim_in = layer_sizes[in_idx] dim_out = layer_sizes[in_idx+1] # Initialize mean function to be either Identity or PCA projection if dim_in == dim_out: mf = Identity() else: if dim_in > dim_out: # stepping down, use the pca projection # use eigenvectors corresponding to dim_out largest eigenvalues _, _, V = np.linalg.svd(X_running, full_matrices=False) W = V[:dim_out, :].T else: # stepping up, use identity + padding W = np.concatenate([np.eye(dim_in), np.zeros((dim_in, dim_out - dim_in))], 1) mf = Linear(W) gpflow.set_trainable(mf.A, False) gpflow.set_trainable(mf.b, False) layers.append(Layer(kern_in, Z_running, dim_out, mf, white=whiten)) if dim_in != dim_out: Z_running = Z_running.dot(W) X_running = X_running.dot(W) # final layer layers.append(Layer(kernels[-1], Z_running, num_outputs, mean_function, white=whiten)) return layers
def mean_function_factory(mean_function_name, D_in, D_out): if mean_function_name == "Zero": return Zero(output_dim=D_out) elif mean_function_name == "Constant": return Constant(c=rng.rand(D_out)) elif mean_function_name == "Linear": return Linear(A=rng.rand(D_in, D_out), b=rng.rand(D_out)) else: return None
def init_linear(X, Z, all_kernels, initialized_Zs=False): """ if there are no Zs from an initialization (e.g. for warm-starting), all_Zs is initialized according to the Salimbeni scheme (Z should be MxD). otherwise the Zs obtained from the initialization are simply taken and put into the all_Zs array (Z should be a list of L arrays) """ if initialized_Zs: all_Zs = Z else: all_Zs = [] all_mean_funcs = [] X_running = X.copy() if not initialized_Zs: Z_running = Z.copy() for kern_in, kern_out in zip(all_kernels[:-1], all_kernels[1:]): dim_in = kern_in.input_dim dim_out = kern_out.input_dim if dim_in == dim_out: mf = Identity() else: if dim_in > dim_out: # stepping down, use the pca projection _, _, V = np.linalg.svd(X_running, full_matrices=False) W = V[:dim_out, :].T else: # stepping up, use identity + padding W = np.concatenate( [np.eye(dim_in), np.zeros((dim_in, dim_out - dim_in))], 1) mf = Linear(W) mf.set_trainable(False) all_mean_funcs.append(mf) if not initialized_Zs: all_Zs.append(Z_running) if dim_in != dim_out: X_running = X_running.dot(W) if not initialized_Zs: Z_running = Z_running.dot(W) # final layer all_mean_funcs.append(Zero()) if not initialized_Zs: all_Zs.append(Z_running) return all_Zs, all_mean_funcs
def setUp(self): self.rng = np.random.RandomState(42) kernel = RBF(2) input_dim = 2 output_dim = 2 self.W0 = np.zeros((input_dim, output_dim)) mean_function = Linear(A=self.W0) self.Z = self.rng.randn(5, 2) num_inducing = 5 self.layer = HiddenLayer(input_dim=input_dim, output_dim=output_dim, num_inducing=num_inducing, kernel=kernel, mean_function=mean_function) self.X = self.rng.randn(10, 2)
def prepare(self): N = 100 M = 10 rng = np.random.RandomState(42) X = rng.randn(N, 2) Y = rng.randn(N, 1) Z = rng.randn(M, 2) X_ind = rng.randint(0, 2, (N, 1)) Z_ind = rng.randint(0, 2, (M, 1)) X = np.hstack([X, X_ind]) Y = np.hstack([Y, X_ind]) Z = np.hstack([Z, Z_ind]) Xs = rng.randn(M, 2) Xs_ind = rng.randint(0, 2, (M, 1)) Xs = np.hstack([Xs, Xs_ind]) with defer_build(): lik = SwitchedLikelihood([Gaussian(), Gaussian()]) input_layer = InputLayer(input_dim=2, output_dim=1, num_inducing=M, kernel=RBF(2) + White(2), mean_function=Linear(A=np.ones((3, 1))), multitask=True) output_layer = OutputLayer(input_dim=1, output_dim=1, num_inducing=M, kernel=RBF(1) + White(1), multitask=True) seq = MultitaskSequential([input_layer, output_layer]) model = MultitaskDSDGP(X=X, Y=Y, Z=Z, layers=seq, likelihood=lik, num_latent=1) model.compile() return model, Xs
def init_layers_linear(X, Y, Z, kernels, num_outputs=None, mean_function=Zero(), Layer=SVGP_Layer, white=False): num_outputs = num_outputs or Y.shape[1] layers = [] X_running, Z_running = X.copy(), Z.copy() for kern_in, kern_out in zip(kernels[:-1], kernels[1:]): dim_in = kern_in.input_dim dim_out = kern_out.input_dim print(dim_in, dim_out) if dim_in == dim_out: mf = Identity() else: if dim_in > dim_out: # stepping down, use the pca projection _, _, V = np.linalg.svd(X_running, full_matrices=False) W = V[:dim_out, :].T else: # stepping up, use identity + padding W = np.concatenate( [np.eye(dim_in), np.zeros((dim_in, dim_out - dim_in))], 1) mf = Linear(W) mf.set_trainable(False) layers.append(Layer(kern_in, Z_running, dim_out, mf, white=white)) if dim_in != dim_out: Z_running = Z_running.dot(W) X_running = X_running.dot(W) # final layer layers.append( Layer(kernels[-1], Z_running, num_outputs, mean_function, white=white)) return layers
def _init_layers(self, X, Y, Z, dims, kernels, mean_function=Zero(), Layer=SVGPIndependentLayer, white=False): """Initialise DGP layers to have the same number of outputs as inputs, apart from the final layer.""" layers = [] X_running, Z_running = X.copy(), Z.copy() for i in range(len(kernels) - 1): dim_in, dim_out, kern = dims[i], dims[i + 1], kernels[i] if dim_in == dim_out: mf = Identity() else: if dim_in > dim_out: _, _, V = np.linalg.svd(X_running, full_matrices=False) W = V[:dim_out, :].T else: W = np.concatenate( [np.eye(dim_in), np.zeros((dim_in, dim_out - dim_in))], 1) mf = Linear(W) set_trainable(mf.A, False) set_trainable(mf.b, False) layers.append(Layer(kern, Z_running, dim_out, mf, white=white)) if dim_in != dim_out: Z_running = Z_running.dot(W) X_running = X_running.dot(W) layers.append( Layer(kernels[-1], Z_running, dims[-1], mean_function, white=white)) return layers
def __init__(self, X, Y, Z, kernels, likelihood, num_outputs=None, mean_function=Zero(), # the final layer mean function **kwargs): Model.__init__(self) num_outputs = num_outputs or Y.shape[1] # init the layers layers = [] # inner layers X_running, Z_running = X.copy(), Z.copy() for kern_in, kern_out in zip(kernels[:-1], kernels[1:]): dim_in = kern_in.input_dim dim_out = kern_out.input_dim if dim_in == dim_out: mf = Identity() else: if dim_in > dim_out: # stepping down, use the pca projection _, _, V = np.linalg.svd(X_running, full_matrices=False) W = V[:dim_out, :].T else: # pad with zeros zeros = np.zeros((dim_in, dim_out - dim_in)) W = np.concatenate([np.eye(dim_in), zeros], 1) mf = Linear(W) mf.set_trainable(False) layers.append(SVGP_Layer(kern_in, Z_running, dim_out, mf)) if dim_in != dim_out: Z_running = Z_running.dot(W) X_running = X_running.dot(W) # final layer layers.append(SVGP_Layer(kernels[-1], Z_running, num_outputs, mean_function)) DGP_Base.__init__(self, X, Y, likelihood, layers, **kwargs)
def _fit(self, X, F, data): if self.regr == 'constant': mf = Constant() elif self.regr == 'linear': mf = Linear(numpy.ones((X.shape[1], 1)), numpy.ones((1, 1))) if self.kernel == 'linear': kernel = gpflow.kernels.Linear(X.shape[1], ARD=self.ARD) if self.kernel == 'rbf': kernel = gpflow.kernels.RBF(X.shape[1], ARD=self.ARD) elif self.kernel == 'polynomial': kernel = gpflow.kernels.Polynomial(X.shape[1], ARD=self.ARD) m = gpflow.gpr.GPR(X, numpy.array([F]).T, kern=kernel, mean_function=mf) m.optimize() self.model = m
def prepare(self): N = 100 M = 10 rng = np.random.RandomState(42) X = rng.randn(N, 2) Y = rng.randn(N, 1) Z = rng.randn(M, 2) Xs = rng.randn(M, 2) lik = Gaussian() input_layer = InputLayer(input_dim=2, output_dim=1, num_inducing=M, kernel=RBF(2) + White(2), mean_function=Linear(A=np.ones((2, 1)))) output_layer = OutputLayer(input_dim=1, output_dim=1, num_inducing=M, kernel=RBF(1) + White(1)) seq = Sequential([input_layer, output_layer]) model = DSDGP(X=X, Y=Y, Z=Z, layers=seq, likelihood=lik) model.compile() return model, Xs
def init_layers(X, dims_in, dims_out, M, final_inducing_points, share_inducing_inputs): q_mus, q_sqrts, mean_functions, Zs = [], [], [], [] X_running = X.copy() for dim_in, dim_out in zip(dims_in[:-1], dims_out[:-1]): if dim_in == dim_out: # identity for same dims W = np.eye(dim_in) elif dim_in > dim_out: # use PCA mf for stepping down _, _, V = np.linalg.svd(X_running, full_matrices=False) W = V[:dim_out, :].T elif dim_in < dim_out: # identity + pad with zeros for stepping up I = np.eye(dim_in) zeros = np.zeros((dim_out - dim_in, dim_in)) W = np.concatenate([I, zeros], 0).T mean_functions.append(Linear(A=W)) Zs.append(kmeans2(X_running, M, minit='points')[0]) if share_inducing_inputs: q_mus.append([np.zeros((M, dim_out))]) q_sqrts.append([np.eye(M)[:, :, None] * np.ones((1, 1, dim_out))]) else: q_mus.append([np.zeros((M, 1))] * dim_out) q_sqrts.append([np.eye(M)[:, :, None] * np.ones( (1, 1, 1))] * dim_out) X_running = X_running.dot(W) # final layer (as before but no mean function) mean_functions.append(Zero()) Zs.append(kmeans2(X_running, final_inducing_points, minit='points')[0]) q_mus.append([np.zeros((final_inducing_points, 1))]) q_sqrts.append( [np.eye(final_inducing_points)[:, :, None] * np.ones((1, 1, 1))]) return q_mus, q_sqrts, Zs, mean_functions
def build_model(self, ARGS, X, Y, conditioning=False, apply_name=True, noise_var=None, mean_function=None): if conditioning == False: N, D = X.shape # first layer inducing points if N > ARGS.M: Z = kmeans2(X, ARGS.M, minit='points')[0] else: # This is the old way of initializing Zs # M_pad = ARGS.M - N # Z = np.concatenate([X.copy(), np.random.randn(M_pad, D)], 0) # This is the new way of initializing Zs min_x, max_x = self.bounds[0] min_x = (min_x - self.x_mean) / self.x_std max_x = (max_x - self.x_mean) / self.x_std Z = np.linspace(min_x, max_x, num=ARGS.M) # * X.shape[1]) Z = Z.reshape((-1, X.shape[1])) #print(min_x) #print(max_x) #print(Z) #################################### layers P = np.linalg.svd(X, full_matrices=False)[2] # PX = P.copy() layers = [] # quad_layers = [] DX = D DY = 1 D_in = D D_out = D with defer_build(): # variance initialiaztion lik = Gaussian() lik.variance = ARGS.likelihood_variance if len(ARGS.configuration) > 0: for c, d in ARGS.configuration.split('_'): if c == 'G': num_gps = int(d) A = np.zeros((D_in, D_out)) D_min = min(D_in, D_out) A[:D_min, :D_min] = np.eye(D_min) mf = Linear(A=A) mf.b.set_trainable(False) def make_kern(): k = RBF(D_in, lengthscales=float(D_in)**0.5, variance=1., ARD=True) k.variance.set_trainable(False) return k PP = np.zeros((D_out, num_gps)) PP[:, :min(num_gps, DX)] = P[:, :min(num_gps, DX)] ZZ = np.random.randn(ARGS.M, D_in) # print(Z.shape) # print(ZZ.shape) ZZ[:, :min(D_in, DX)] = Z[:, :min(D_in, DX)] kern = SharedMixedMok(make_kern(), W=PP) inducing = MixedKernelSharedMof(InducingPoints(ZZ)) l = GPLayer(kern, inducing, num_gps, mean_function=mf) if ARGS.fix_linear is True: kern.W.set_trainable(False) mf.set_trainable(False) layers.append(l) D_in = D_out elif c == 'L': d = int(d) D_in += d layers.append(LatentVariableLayer(d, XY_dim=DX + 1)) # kernel initialization kern = RBF(D_in, lengthscales=float(D_in)**0.5, variance=1., ARD=True) ZZ = np.random.randn(ARGS.M, D_in) ZZ[:, :min(D_in, DX)] = Z[:, :min(D_in, DX)] layers.append(GPLayer(kern, InducingPoints(ZZ), DY)) self.layers = layers self.lik = lik # global_step = tf.Variable(0, dtype=tf.int32) # self.global_step = global_step else: lik = self._gp.likelihood layers = self._gp.layers._list # val = self.session.run(self.global_step) # global_step = tf.Variable(val, dtype=tf.int32) # self.global_step = global_step self._gp.clear() with defer_build(): #################################### model name = 'Model' if apply_name else None if ARGS.mode == 'VI': model = DGP_VI(X, Y, layers, lik, minibatch_size=ARGS.minibatch_size, name=name) elif ARGS.mode == 'SGHMC': for layer in layers: if hasattr(layer, 'q_sqrt'): del layer.q_sqrt layer.q_sqrt = None layer.q_mu.set_trainable(False) model = DGP_VI(X, Y, layers, lik, minibatch_size=ARGS.minibatch_size, name=name) elif ARGS.mode == 'IWAE': model = DGP_IWVI(X, Y, layers, lik, minibatch_size=ARGS.minibatch_size, num_samples=ARGS.num_IW_samples, name=name) global_step = tf.Variable(0, dtype=tf.int32) op_increment = tf.assign_add(global_step, 1) if not ('SGHMC' == ARGS.mode): for layer in model.layers[:-1]: if isinstance(layer, GPLayer): layer.q_sqrt = layer.q_sqrt.read_value() * 1e-5 model.compile() #################################### optimization var_list = [[model.layers[-1].q_mu, model.layers[-1].q_sqrt]] model.layers[-1].q_mu.set_trainable(False) model.layers[-1].q_sqrt.set_trainable(False) gamma = tf.cast(tf.train.exponential_decay(ARGS.gamma, global_step, 1000, ARGS.gamma_decay, staircase=True), dtype=tf.float64) lr = tf.cast(tf.train.exponential_decay(ARGS.lr, global_step, 1000, ARGS.lr_decay, staircase=True), dtype=tf.float64) op_ng = NatGradOptimizer(gamma=gamma).make_optimize_tensor( model, var_list=var_list) op_adam = AdamOptimizer(lr).make_optimize_tensor(model) def train(s): s.run(op_increment) s.run(op_ng) s.run(op_adam) model.train_op = train model.init_op = lambda s: s.run( tf.variables_initializer([global_step])) model.global_step = global_step else: model.compile() sghmc_vars = [] for layer in layers: if hasattr(layer, 'q_mu'): sghmc_vars.append(layer.q_mu.unconstrained_tensor) hyper_train_op = AdamOptimizer(ARGS.lr).make_optimize_tensor(model) self.sghmc_optimizer = SGHMC(model, sghmc_vars, hyper_train_op, 100) def train_op(s): s.run(op_increment), self.sghmc_optimizer.sghmc_step(s), self.sghmc_optimizer.train_hypers(s) model.train_op = train_op model.sghmc_optimizer = self.sghmc_optimizer def init_op(s): epsilon = 0.01 mdecay = 0.05 with tf.variable_scope('sghmc'): self.sghmc_optimizer.generate_update_step(epsilon, mdecay) v = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='sghmc') s.run(tf.variables_initializer(v)) s.run(tf.variables_initializer([global_step])) # Added jitter due to input matrix invertability problems custom_config = gpflow.settings.get_settings() custom_config.numerics.jitter_level = 1e-8 model.init_op = init_op model.global_step = global_step # build the computation graph for the gradient self.X_placeholder = tf.placeholder(tf.float64, shape=[None, X.shape[1]]) self.Fs, Fmu, Fvar = model._build_predict(self.X_placeholder) self.mean_grad = tf.gradients(Fmu, self.X_placeholder) self.var_grad = tf.gradients(Fvar, self.X_placeholder) # calculated the gradient of the mean for the quantile-filtered distribution # print(Fs) # q = np.quantile(Fs, self.quantile, axis=0) # qFs = [f for f in Fs if f < q] # q_mean = np.mean(qFs, axis=0) # q_var = np.var(qFs, axis=0) # self.qmean_grad = tf.gradients(q_mean, self.X_placeholder) # self.qvar_grad = tf.gradients(q_var, self.X_placeholder) return model
def __init__( self, X, Y, Z, kernels, likelihood, num_outputs=None, mean_function=Zero(), # the final layer mean function **kwargs): Model.__init__(self) num_outputs = num_outputs or Y.shape[1] # init the layers layers = [] # inner layers X_running, Z_running = X.copy(), Z.copy() for kern_in, kern_out in zip(kernels[:-1], kernels[1:]): if isinstance(kern_in, Conv): dim_in = kern_in.basekern.input_dim else: dim_in = kern_in.input_dim ''' if isinstance(kern_out,Conv): dim_out = kern_out.basekern.input_dim else: dim_out = kern_out.input_dim ''' dim_out = kern_out.input_dim if dim_in == dim_out: mf = Identity() else: # stepping down, use the pca projection _, _, V = np.linalg.svd(X_running, full_matrices=False) W = V[:dim_out, :].T b = np.zeros(1, dtype=np.float32) mf = Linear(W, b) mf.set_trainable(False) if isinstance(kern_in, Conv): Z_patch = np.unique(kern_in.compute_patches(Z_running).reshape( -1, kern_in.patch_len), axis=0) Z_patch = Z_patch[np.random.permutation( (len(Z_patch)))[:Z_running.shape[0]], :] layers.append(svconvgp(kern_in, Z_patch, dim_out, mf)) else: layers.append(SVGP_Layer(kern_in, Z_running, dim_out, mf)) if dim_in != dim_out: Z_running = Z_running.dot(W) X_running = X_running.dot(W) # final layer if isinstance(kernels[-1], Conv): Z_patch = np.unique(kernels[-1].compute_patches(Z_running).reshape( -1, kernels[-1].patch_len), axis=0) Z_patch = Z_patch[np.random.permutation( (len(Z_patch)))[:Z_running.shape[0]], :] layers.append( svconvgp(kernels[-1], Z_patch, num_outputs, mean_function)) else: layers.append( SVGP_Layer(kernels[-1], Z_running, num_outputs, mean_function)) DGP_Base.__init__(self, X, Y, likelihood, layers, **kwargs)
def init_layers_graph(X, Y, Z, kernels, gmat, num_layers=2, num_nodes=None, dim_per_node=5, dim_per_X=5, dim_per_Y=5, share_Z=False, nb_init=True): layers = [] def pa_idx(nd, dim_per_in): res = [] for n in range(num_nodes): w = gmat[nd, n] if w > 0: # print(res, range(n*self.dim_per_in, (n+1)*self.dim_per_in)) res = res + list(range(n * dim_per_in, (n + 1) * dim_per_in)) res = np.asarray(res) return res X_running, Z_running = X.copy(), Z.copy() for l in range(num_layers - 1): if l == 0: dim_in = dim_per_X dim_out = dim_per_node else: dim_in = dim_per_node dim_out = dim_per_node # print(dim_in, dim_out) X_running_tmp = np.zeros((X.shape[0], dim_out * num_nodes)) Z_running_tmp = np.zeros((Z.shape[0], dim_out * num_nodes)) mf_lst = ParamList([], trainable=False) for nd in range(num_nodes): if nb_init: pa = pa_idx(nd, dim_in) else: pa = np.asarray(range(nd * dim_in, (nd + 1) * dim_in)) agg_dim_in = len(pa) if agg_dim_in == dim_out: mf = Identity() else: if agg_dim_in > dim_out: # stepping down, use the pca projection # _, _, V = np.linalg.svd(X_running[:, nd*dim_in : (nd+1)*dim_in], full_matrices=False) _, _, V = np.linalg.svd(X_running[:, pa], full_matrices=False) W = V[:dim_out, :].T else: # stepping up, use identity + padding W = np.concatenate([np.eye(agg_dim_in), np.zeros((agg_dim_in, dim_out - agg_dim_in))], 1) mf = Linear(W) mf.set_trainable(False) mf_lst.append(mf) if agg_dim_in != dim_out: # print(Z_running_tmp[:, nd*dim_out:(nd+1)*dim_out].shape, Z_running[:, nd*dim_in:(nd+1)*dim_in].shape, # W.shape, Z_running[:, nd*dim_in:(nd+1)*dim_in].dot(W).shape) Z_running_tmp[:, nd * dim_out:(nd + 1) * dim_out] = Z_running[:, pa].dot(W) X_running_tmp[:, nd * dim_out:(nd + 1) * dim_out] = X_running[:, pa].dot(W) else: Z_running_tmp[:, nd * dim_out:(nd + 1) * dim_out] = Z_running[:, pa] X_running_tmp[:, nd * dim_out:(nd + 1) * dim_out] = X_running[:, pa] layers.append( SVGPG_Layer(kernels[l], Z_running, mf_lst, num_nodes, dim_in, dim_out, gmat, share_Z=share_Z, nb_init=nb_init)) Z_running = Z_running_tmp X_running = X_running_tmp # final layer if num_layers == 1: fin_dim_in = dim_per_X else: fin_dim_in = dim_per_node layers.append( SVGPG_Layer(kernels[-1], Z_running, None, num_nodes, fin_dim_in, dim_per_Y, gmat, share_Z=share_Z, nb_init=nb_init)) return layers
def init_layers(graph_adj, node_feature, kernels, n_layers, all_layers_dim, num_inducing, gc_kernel=True, mean_function="linear", white=False, q_diag=False): assert mean_function in ["linear", "zero"] # mean function must be linear or zero layers = [] # get initial Z sparse_adj = tuple_to_sparse_matrix(graph_adj[0], graph_adj[1], graph_adj[2]) X_running = node_feature.copy() for i in range(n_layers): tf.logging.info("initialize {}th layer".format(i + 1)) dim_in = all_layers_dim[i] dim_out = all_layers_dim[i + 1] conv_X = sparse_adj.dot(X_running) Z_running = kmeans2(conv_X, num_inducing[i], minit="points")[0] kernel = kernels[i] if gc_kernel and kernel.gc_weight: # Z_running = pca(Z_running, kernel.base_kernel.input_dim) # 将维度降到和输出维度一致 X_dim = X_running.shape[1] kernel_input_dim = kernel.base_kernel.input_dim if X_dim > kernel_input_dim: Z_running = pca(Z_running, kernel.base_kernel.input_dim) # 将维度降到和输出维度一致 elif X_dim < kernel_input_dim: Z_running = np.concatenate([Z_running, np.zeros((Z_running.shape[0], kernel_input_dim - X_dim))], axis=1) # print(type(Z_running)) # print(Z_running) if dim_in > dim_out: _, _, V = np.linalg.svd(X_running, full_matrices=False) W = V[:dim_out, :].T elif dim_in < dim_out: W = np.concatenate([np.eye(dim_in), np.zeros((dim_in, dim_out - dim_in))], 1) if mean_function == "zero": mf = Zero() else: if dim_in == dim_out: mf = Identity() else: mf = Linear(W) mf.set_trainable(False) # self.Ku = Kuu(GraphConvolutionInducingpoints(Z_running), kernel, jitter=settings.jitter) # print("successfully calculate Ku") if gc_kernel: feature = GraphConvolutionInducingpoints(Z_running) else: feature = InducingPoints(Z_running) layers.append(svgp_layer(kernel, Z_running, feature, dim_out, mf, gc_kernel, white=white, q_diag=q_diag)) if dim_in != dim_out: # Z_running = Z_running.dot(W) X_running = X_running.dot(W) return layers
def build_model(ARGS, X, Y, apply_name=True): if ARGS.mode == 'CVAE': layers = [] for l in ARGS.configuration.split('_'): try: layers.append(int(l)) except: pass with defer_build(): name = 'CVAE' if apply_name else None model = CVAE(X, Y, 1, layers, batch_size=ARGS.minibatch_size, name=name) model.compile() global_step = tf.Variable(0, dtype=tf.int32) op_increment = tf.assign_add(global_step, 1) lr = tf.cast(tf.train.exponential_decay(ARGS.lr, global_step, 1000, 0.98, staircase=True), dtype=tf.float64) op_adam = AdamOptimizer(lr).make_optimize_tensor(model) model.train_op = lambda s: s.run([op_adam, op_increment]) model.init_op = lambda s: s.run(tf.variables_initializer([global_step])) model.global_step = global_step model.compile() else: N, D = X.shape # first layer inducing points if N > ARGS.M: Z = kmeans2(X, ARGS.M, minit='points')[0] else: M_pad = ARGS.M - N Z = np.concatenate([X.copy(), np.random.randn(M_pad, D)], 0) #################################### layers P = np.linalg.svd(X, full_matrices=False)[2] # PX = P.copy() layers = [] # quad_layers = [] DX = D DY = 1 D_in = D D_out = D with defer_build(): lik = Gaussian() lik.variance = ARGS.likelihood_variance if len(ARGS.configuration) > 0: for c, d in ARGS.configuration.split('_'): if c == 'G': num_gps = int(d) A = np.zeros((D_in, D_out)) D_min = min(D_in, D_out) A[:D_min, :D_min] = np.eye(D_min) mf = Linear(A=A) mf.b.set_trainable(False) def make_kern(): k = RBF(D_in, lengthscales=float(D_in) ** 0.5, variance=1., ARD=True) k.variance.set_trainable(False) return k PP = np.zeros((D_out, num_gps)) PP[:, :min(num_gps, DX)] = P[:, :min(num_gps, DX)] ZZ = np.random.randn(ARGS.M, D_in) ZZ[:, :min(D_in, DX)] = Z[:, :min(D_in, DX)] kern = SharedMixedMok(make_kern(), W=PP) inducing = MixedKernelSharedMof(InducingPoints(ZZ)) l = GPLayer(kern, inducing, num_gps, mean_function=mf) if ARGS.fix_linear is True: kern.W.set_trainable(False) mf.set_trainable(False) layers.append(l) D_in = D_out elif c == 'L': d = int(d) D_in += d layers.append(LatentVariableLayer(d, XY_dim=DX+1)) kern = RBF(D_in, lengthscales=float(D_in)**0.5, variance=1., ARD=True) ZZ = np.random.randn(ARGS.M, D_in) ZZ[:, :min(D_in, DX)] = Z[:, :min(D_in, DX)] layers.append(GPLayer(kern, InducingPoints(ZZ), DY)) #################################### model name = 'Model' if apply_name else None if ARGS.mode == 'VI': model = DGP_VI(X, Y, layers, lik, minibatch_size=ARGS.minibatch_size, name=name) elif ARGS.mode == 'SGHMC': for layer in layers: if hasattr(layer, 'q_sqrt'): del layer.q_sqrt layer.q_sqrt = None layer.q_mu.set_trainable(False) model = DGP_VI(X, Y, layers, lik, minibatch_size=ARGS.minibatch_size, name=name) elif ARGS.mode == 'IWAE': model = DGP_IWVI(X, Y, layers, lik, minibatch_size=ARGS.minibatch_size, num_samples=ARGS.num_IW_samples, name=name) global_step = tf.Variable(0, dtype=tf.int32) op_increment = tf.assign_add(global_step, 1) if not ('SGHMC' == ARGS.mode): for layer in model.layers[:-1]: if isinstance(layer, GPLayer): layer.q_sqrt = layer.q_sqrt.read_value() * 1e-5 model.compile() #################################### optimization var_list = [[model.layers[-1].q_mu, model.layers[-1].q_sqrt]] model.layers[-1].q_mu.set_trainable(False) model.layers[-1].q_sqrt.set_trainable(False) gamma = tf.cast(tf.train.exponential_decay(ARGS.gamma, global_step, 1000, ARGS.gamma_decay, staircase=True), dtype=tf.float64) lr = tf.cast(tf.train.exponential_decay(ARGS.lr, global_step, 1000, ARGS.lr_decay, staircase=True), dtype=tf.float64) op_ng = NatGradOptimizer(gamma=gamma).make_optimize_tensor(model, var_list=var_list) op_adam = AdamOptimizer(lr).make_optimize_tensor(model) def train(s): s.run(op_increment) s.run(op_ng) s.run(op_adam) model.train_op = train model.init_op = lambda s: s.run(tf.variables_initializer([global_step])) model.global_step = global_step else: model.compile() hmc_vars = [] for layer in layers: if hasattr(layer, 'q_mu'): hmc_vars.append(layer.q_mu.unconstrained_tensor) hyper_train_op = AdamOptimizer(ARGS.lr).make_optimize_tensor(model) sghmc_optimizer = SGHMC(model, hmc_vars, hyper_train_op, 100) def train_op(s): s.run(op_increment), sghmc_optimizer.sghmc_step(s), sghmc_optimizer.train_hypers(s) model.train_op = train_op model.sghmc_optimizer = sghmc_optimizer def init_op(s): epsilon = 0.01 mdecay = 0.05 with tf.variable_scope('hmc'): sghmc_optimizer.generate_update_step(epsilon, mdecay) v = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='hmc') s.run(tf.variables_initializer(v)) s.run(tf.variables_initializer([global_step])) model.init_op = init_op model.global_step = global_step return model
def build_model(ARGS, X, Y, apply_name=True): N, D = X.shape # first layer inducing points if N > ARGS.M: Z = kmeans2(X, ARGS.M, minit="points")[0] else: M_pad = ARGS.M - N Z = np.concatenate([X.copy(), np.random.randn(M_pad, D)], 0) #################################### layers P = np.linalg.svd(X, full_matrices=False)[2] layers = [] DX = D DY = 1 D_in = D D_out = D with defer_build(): lik = Gaussian() lik.variance = ARGS.likelihood_variance if len(ARGS.configuration) > 0: for c, d in ARGS.configuration.split("_"): if c == "G": num_gps = int(d) A = np.zeros((D_in, D_out)) D_min = min(D_in, D_out) A[:D_min, :D_min] = np.eye(D_min) mf = Linear(A=A) mf.b.set_trainable(False) def make_kern(): k = RBF(D_in, lengthscales=float(D_in)**0.5, variance=1.0, ARD=True) k.variance.set_trainable(False) return k PP = np.zeros((D_out, num_gps)) PP[:, :min(num_gps, DX)] = P[:, :min(num_gps, DX)] ZZ = np.random.randn(ARGS.M, D_in) ZZ[:, :min(D_in, DX)] = Z[:, :min(D_in, DX)] kern = SharedMixedMok(make_kern(), W=PP) inducing = MixedKernelSharedMof(InducingPoints(ZZ)) l = GPLayer(kern, inducing, num_gps, layer_num=len(layers), mean_function=mf) if ARGS.fix_linear is True: kern.W.set_trainable(False) mf.set_trainable(False) layers.append(l) D_in = D_out elif c == "L": d = int(d) D_in += d encoder_dims = [ int(dim.strip()) for dim in ARGS.encoder_dims.split(",") ] layers.append( LatentVariableLayer(d, XY_dim=DX + 1, encoder_dims=encoder_dims, qz_mode=ARGS.qz_mode)) kern = RBF(D_in, lengthscales=float(D_in)**0.5, variance=1.0, ARD=True) ZZ = np.random.randn(ARGS.M, D_in) ZZ[:, :min(D_in, DX)] = Z[:, :min(D_in, DX)] layers.append(GPLayer(kern, InducingPoints(ZZ), DY)) #################################### model name = "Model" if apply_name else None if ARGS.mode == "VI": model = DGP_VI(X, Y, layers, lik, minibatch_size=ARGS.minibatch_size, name=name) elif ARGS.mode == "IWAE": model = DGP_IWVI( X=X, Y=Y, layers=layers, likelihood=lik, minibatch_size=ARGS.minibatch_size, num_samples=ARGS.num_IW_samples, name=name, encoder_minibatch_size=ARGS.encoder_minibatch_size, ) elif ARGS.mode == "CIWAE": model = DGP_CIWAE( X, Y, layers, lik, minibatch_size=ARGS.minibatch_size, num_samples=ARGS.num_IW_samples, name=name, beta=ARGS.beta, ) else: raise ValueError(f"Unknown mode {ARGS.mode}.") global_step = tf.Variable(0, dtype=tf.int32) op_increment = tf.assign_add(global_step, 1) for layer in model.layers[:-1]: if isinstance(layer, GPLayer): layer.q_sqrt = layer.q_sqrt.read_value() * 1e-5 model.compile() #################################### optimization # Whether to train the final layer with the other parameters, using Adam, or by itself, using natural # gradients. if ARGS.use_nat_grad_for_final_layer: # Turn off training so the parameters are not optimised by Adam. We pass them directly to the natgrad # optimiser, which bypasses this flag. model.layers[-1].q_mu.set_trainable(False) model.layers[-1].q_sqrt.set_trainable(False) gamma = tf.cast( tf.train.exponential_decay(ARGS.gamma, global_step, 1000, ARGS.gamma_decay, staircase=True), dtype=tf.float64, ) final_layer_vars = [[model.layers[-1].q_mu, model.layers[-1].q_sqrt]] final_layer_opt_op = NatGradOptimizer( gamma=gamma).make_optimize_tensor(model, var_list=final_layer_vars) else: final_layer_opt_op = NoOp() lr = tf.cast( tf.train.exponential_decay(ARGS.lr, global_step, decay_steps=1000, decay_rate=ARGS.lr_decay, staircase=True), dtype=tf.float64, ) encoder_lr = tf.cast( tf.train.exponential_decay( ARGS.encoder_lr, global_step, decay_steps=1000, decay_rate=ARGS.encoder_lr_decay, staircase=True, ), dtype=tf.float64, ) dreg_optimizer = DregOptimizer( enable_dreg=ARGS.use_dreg, optimizer=ARGS.optimizer, encoder_optimizer=ARGS.encoder_optimizer, learning_rate=lr, encoder_learning_rate=encoder_lr, assert_no_nans=ARGS.assert_no_nans, encoder_grad_clip_value=ARGS.clip_encoder_grads, ) other_layers_opt_op = dreg_optimizer.make_optimize_tensor(model) model.lr = lr model.train_op = tf.group(op_increment, final_layer_opt_op, other_layers_opt_op) model.init_op = lambda s: s.run(tf.variables_initializer([global_step])) model.global_step = global_step return model
SwitchedMeanFunction, Zero, ) rng = np.random.RandomState(99021) class Datum: input_dim, output_dim = 3, 2 N, Ntest, M = 20, 30, 10 _mean_functions = [ Zero(), Linear( A=rng.randn(Datum.input_dim, Datum.output_dim), b=rng.randn(Datum.output_dim, 1).reshape(-1), ), Constant(c=rng.randn(Datum.output_dim, 1).reshape(-1)), ] @pytest.mark.parametrize("mean_function_1", _mean_functions) @pytest.mark.parametrize("mean_function_2", _mean_functions) @pytest.mark.parametrize("operation", ["+", "*"]) def test_mean_functions_output_shape(mean_function_1, mean_function_2, operation): """ Test the output shape for basic and compositional mean functions, also check that the combination of mean functions returns the correct class """ X = np.random.randn(Datum.N, Datum.input_dim)
import gpflow from gpflow.config import default_int from gpflow.inducing_variables import InducingPoints from gpflow.mean_functions import Additive, Constant, Linear, Product, SwitchedMeanFunction, Zero rng = np.random.RandomState(99021) class Datum: input_dim, output_dim = 3, 2 N, Ntest, M = 20, 30, 10 _mean_functions = [ Zero(), Linear(A=rng.randn(Datum.input_dim, Datum.output_dim), b=rng.randn(Datum.output_dim, 1).reshape(-1)), Constant(c=rng.randn(Datum.output_dim, 1).reshape(-1)) ] @pytest.mark.parametrize('mean_function_1', _mean_functions) @pytest.mark.parametrize('mean_function_2', _mean_functions) @pytest.mark.parametrize('operation', ['+', 'x']) def test_mean_functions_output_shape(mean_function_1, mean_function_2, operation): """ Test the output shape for basic and compositional mean functions, also check that the combination of mean functions returns the correct class """ X = np.random.randn(Datum.N, Datum.input_dim) Y = mean_function_1(X) # basic output shape check