def _last_layer(self, H_X, M, filter_size, stride, layer_params=None, pad = 0): if layer_params is None: layer_params = {} NHWC = H_X.shape conv_output_count = np.prod(NHWC[1:]) Z = layer_params.get('Z') q_mu = layer_params.get('q_mu') q_sqrt = layer_params.get('q_sqrt') if Z is not None: saved_filter_size = int(np.sqrt(Z.shape[1] / NHWC[3])) if filter_size != saved_filter_size: print("filter_size {} != {} for last layer. Resetting parameters.".format(filter_size, saved_filter_size)) Z = None q_mu = None q_sqrt = None if self.flags.last_kernel == 'rbf': H_X = H_X.reshape(H_X.shape[0], -1) lengthscales = layer_params.get('lengthscales', 5.0) variance = layer_params.get('variance', 5.0) kernel = gpflow.kernels.RBF(conv_output_count, lengthscales=lengthscales, variance=variance, ARD=True) if Z is None: Z = select_initial_inducing_points(H_X, M) inducing = features.InducingPoints(Z) else: lengthscales = layer_params.get('base_kernel/lengthscales', 5.0) variance = layer_params.get('base_kernel/variance', 5.0) input_dim = filter_size**2 * NHWC[3] view = FullView(input_size=NHWC[1:], filter_size=filter_size, feature_maps=NHWC[3], stride=stride, pad = pad) if Z is None: inducing = PatchInducingFeatures.from_images(H_X, M, filter_size) else: inducing = PatchInducingFeatures(Z) patch_weights = layer_params.get('patch_weights') if self.flags.last_kernel == 'conv': kernel = ConvKernel( base_kernel=gpflow.kernels.RBF(input_dim, variance=variance, lengthscales=lengthscales), view=view, patch_weights=patch_weights) elif self.flags.last_kernel == 'add': kernel = AdditivePatchKernel( base_kernel=gpflow.kernels.RBF(input_dim, variance=variance, lengthscales=lengthscales), view=view, patch_weights=patch_weights) else: raise ValueError("Invalid last layer kernel") return SVGP_Layer(kern=kernel, num_outputs=10, feature=inducing, mean_function=gpflow.mean_functions.Zero(output_dim=10), white=self.flags.white, q_mu=q_mu, q_sqrt=q_sqrt)
def __init__(self, X, Y, Z, kernels, likelihood, num_outputs=None, mean_function=Zero(), # the final layer mean function **kwargs): Model.__init__(self) num_outputs = num_outputs or Y.shape[1] # init the layers layers = [] # inner layers X_running, Z_running = X.copy(), Z.copy() for kern_in, kern_out in zip(kernels[:-1], kernels[1:]): dim_in = kern_in.input_dim dim_out = kern_out.input_dim if dim_in == dim_out: mf = Identity() else: if dim_in > dim_out: # stepping down, use the pca projection _, _, V = np.linalg.svd(X_running, full_matrices=False) W = V[:dim_out, :].T else: # pad with zeros zeros = np.zeros((dim_in, dim_out - dim_in)) W = np.concatenate([np.eye(dim_in), zeros], 1) mf = Linear(W) mf.set_trainable(False) layers.append(SVGP_Layer(kern_in, Z_running, dim_out, mf)) if dim_in != dim_out: Z_running = Z_running.dot(W) X_running = X_running.dot(W) # final layer layers.append(SVGP_Layer(kernels[-1], Z_running, num_outputs, mean_function)) DGP_Base.__init__(self, X, Y, likelihood, layers, **kwargs)
def __init__(self, X, Y, Z, kernels, likelihood, num_outputs=None, mean_function=Zero(), # the final layer mean function **kwargs): Model.__init__(self) num_outputs = num_outputs or Y.shape[1] # init the layers layers = [] # inner layers X_running, Z_running = X.copy(), Z.copy() for kern_in, kern_out in zip(kernels[:-1], kernels[1:]): dim_in = kern_in.input_dim dim_out = kern_out.input_dim mf = Zero() # Added to compare with DGP EP MCM # Inducing points for layer if Z.shape[1] > dim_in: # Reduce Z by doing PCA _, _, V = np.linalg.svd(X, full_matrices=False) # V -> (D,D) Matrix Z_kern = Z.dot(V[:dim_in, :].T) elif Z.shape[1] < dim_in: # Increase Z by doing tile _, _, V = np.linalg.svd(X, full_matrices=False) # V -> (D,D) Matrix first_pca = Z.dot(V[0, :].T) # First Principal component Z_kern = np.tile(first_pca[:, None], (1, dim_in)) else: # same dimension Z_kern = Z.copy() layers.append(SVGP_Layer(kern_in, Z_kern, dim_out, mf)) # print("{} \n{}\n".format(Z_kern.shape, Z_kern[0:3, 0:3])) # Final Layer mf = Zero() # Added to compare with DGP EP MCM dim_in = kernels[-1].input_dim # Inducing points for layer if Z.shape[1] > dim_in: # Reduce Z by doing PCA _, _, V = np.linalg.svd(X, full_matrices=False) # V -> (D,D) Matrix Z_kern = Z.dot(V[:dim_in, :].T) elif Z.shape[1] < dim_in: # Increase Z by doing tile _, _, V = np.linalg.svd(X, full_matrices=False) # V -> (D,D) Matrix first_pca = Z.dot(V[0, :].T) # First Principal component Z_kern = np.tile(first_pca[:, None], (1, dim_in)) else: # same dimension Z_kern = Z.copy() # print("{} \n{}\n".format(Z_kern.shape, Z_kern[0:3, 0:3])) layers.append(SVGP_Layer(kernels[-1], Z_kern, num_outputs, mean_function)) """ for kern_in, kern_out in zip(kernels[:-1], kernels[1:]): dim_in = kern_in.input_dim dim_out = kern_out.input_dim if dim_in == dim_out: mf = Identity() else: # stepping down, use the pca projection _, _, V = np.linalg.svd(X_running, full_matrices=False) # V -> (D,D) Matrix W = V[:dim_out, :].T mf = Linear(W) mf.set_trainable(False) # Z_kern = Z_running[:, 0:dim_in] # print("{} \n{}\n".format(Z_kern.shape, Z_kern[0:3, 0:3])) layers.append(SVGP_Layer(kern_in, Z_running, dim_out, mf)) if dim_in != dim_out: Z_running = Z_running.dot(W) X_running = X_running.dot(W) """ # final layer # Z_kern = Z_running[:, 0:kernels[-1].input_dim] # print("{} \n{}\n".format(Z_kern.shape, Z_kern[0:3, 0:3])) # layers.append(SVGP_Layer(kernels[-1], Z_running, num_outputs, mean_function)) DGP_Base.__init__(self, X, Y, likelihood, layers, **kwargs)