def init_linear(X, Z, all_kernels, initialized_Zs=False): """ if there are no Zs from an initialization (e.g. for warm-starting), all_Zs is initialized according to the Salimbeni scheme (Z should be MxD). otherwise the Zs obtained from the initialization are simply taken and put into the all_Zs array (Z should be a list of L arrays) """ if initialized_Zs: all_Zs = Z else: all_Zs = [] all_mean_funcs = [] X_running = X.copy() if not initialized_Zs: Z_running = Z.copy() for kern_in, kern_out in zip(all_kernels[:-1], all_kernels[1:]): dim_in = kern_in.input_dim dim_out = kern_out.input_dim if dim_in == dim_out: mf = Identity() else: if dim_in > dim_out: # stepping down, use the pca projection _, _, V = np.linalg.svd(X_running, full_matrices=False) W = V[:dim_out, :].T else: # stepping up, use identity + padding W = np.concatenate( [np.eye(dim_in), np.zeros((dim_in, dim_out - dim_in))], 1) mf = Linear(W) mf.set_trainable(False) all_mean_funcs.append(mf) if not initialized_Zs: all_Zs.append(Z_running) if dim_in != dim_out: X_running = X_running.dot(W) if not initialized_Zs: Z_running = Z_running.dot(W) # final layer all_mean_funcs.append(Zero()) if not initialized_Zs: all_Zs.append(Z_running) return all_Zs, all_mean_funcs
def init_layers_linear(X, Y, Z, kernels, num_outputs=None, mean_function=Zero(), Layer=SVGP_Layer, white=False): num_outputs = num_outputs or Y.shape[1] layers = [] X_running, Z_running = X.copy(), Z.copy() for kern_in, kern_out in zip(kernels[:-1], kernels[1:]): dim_in = kern_in.input_dim dim_out = kern_out.input_dim print(dim_in, dim_out) if dim_in == dim_out: mf = Identity() else: if dim_in > dim_out: # stepping down, use the pca projection _, _, V = np.linalg.svd(X_running, full_matrices=False) W = V[:dim_out, :].T else: # stepping up, use identity + padding W = np.concatenate( [np.eye(dim_in), np.zeros((dim_in, dim_out - dim_in))], 1) mf = Linear(W) mf.set_trainable(False) layers.append(Layer(kern_in, Z_running, dim_out, mf, white=white)) if dim_in != dim_out: Z_running = Z_running.dot(W) X_running = X_running.dot(W) # final layer layers.append( Layer(kernels[-1], Z_running, num_outputs, mean_function, white=white)) return layers
def __init__(self, X, Y, Z, kernels, likelihood, num_outputs=None, mean_function=Zero(), # the final layer mean function **kwargs): Model.__init__(self) num_outputs = num_outputs or Y.shape[1] # init the layers layers = [] # inner layers X_running, Z_running = X.copy(), Z.copy() for kern_in, kern_out in zip(kernels[:-1], kernels[1:]): dim_in = kern_in.input_dim dim_out = kern_out.input_dim if dim_in == dim_out: mf = Identity() else: if dim_in > dim_out: # stepping down, use the pca projection _, _, V = np.linalg.svd(X_running, full_matrices=False) W = V[:dim_out, :].T else: # pad with zeros zeros = np.zeros((dim_in, dim_out - dim_in)) W = np.concatenate([np.eye(dim_in), zeros], 1) mf = Linear(W) mf.set_trainable(False) layers.append(SVGP_Layer(kern_in, Z_running, dim_out, mf)) if dim_in != dim_out: Z_running = Z_running.dot(W) X_running = X_running.dot(W) # final layer layers.append(SVGP_Layer(kernels[-1], Z_running, num_outputs, mean_function)) DGP_Base.__init__(self, X, Y, likelihood, layers, **kwargs)
def __init__( self, X, Y, Z, kernels, likelihood, num_outputs=None, mean_function=Zero(), # the final layer mean function **kwargs): Model.__init__(self) num_outputs = num_outputs or Y.shape[1] # init the layers layers = [] # inner layers X_running, Z_running = X.copy(), Z.copy() for kern_in, kern_out in zip(kernels[:-1], kernels[1:]): if isinstance(kern_in, Conv): dim_in = kern_in.basekern.input_dim else: dim_in = kern_in.input_dim ''' if isinstance(kern_out,Conv): dim_out = kern_out.basekern.input_dim else: dim_out = kern_out.input_dim ''' dim_out = kern_out.input_dim if dim_in == dim_out: mf = Identity() else: # stepping down, use the pca projection _, _, V = np.linalg.svd(X_running, full_matrices=False) W = V[:dim_out, :].T b = np.zeros(1, dtype=np.float32) mf = Linear(W, b) mf.set_trainable(False) if isinstance(kern_in, Conv): Z_patch = np.unique(kern_in.compute_patches(Z_running).reshape( -1, kern_in.patch_len), axis=0) Z_patch = Z_patch[np.random.permutation( (len(Z_patch)))[:Z_running.shape[0]], :] layers.append(svconvgp(kern_in, Z_patch, dim_out, mf)) else: layers.append(SVGP_Layer(kern_in, Z_running, dim_out, mf)) if dim_in != dim_out: Z_running = Z_running.dot(W) X_running = X_running.dot(W) # final layer if isinstance(kernels[-1], Conv): Z_patch = np.unique(kernels[-1].compute_patches(Z_running).reshape( -1, kernels[-1].patch_len), axis=0) Z_patch = Z_patch[np.random.permutation( (len(Z_patch)))[:Z_running.shape[0]], :] layers.append( svconvgp(kernels[-1], Z_patch, num_outputs, mean_function)) else: layers.append( SVGP_Layer(kernels[-1], Z_running, num_outputs, mean_function)) DGP_Base.__init__(self, X, Y, likelihood, layers, **kwargs)
def init_layers_graph(X, Y, Z, kernels, gmat, num_layers=2, num_nodes=None, dim_per_node=5, dim_per_X=5, dim_per_Y=5, share_Z=False, nb_init=True): layers = [] def pa_idx(nd, dim_per_in): res = [] for n in range(num_nodes): w = gmat[nd, n] if w > 0: # print(res, range(n*self.dim_per_in, (n+1)*self.dim_per_in)) res = res + list(range(n * dim_per_in, (n + 1) * dim_per_in)) res = np.asarray(res) return res X_running, Z_running = X.copy(), Z.copy() for l in range(num_layers - 1): if l == 0: dim_in = dim_per_X dim_out = dim_per_node else: dim_in = dim_per_node dim_out = dim_per_node # print(dim_in, dim_out) X_running_tmp = np.zeros((X.shape[0], dim_out * num_nodes)) Z_running_tmp = np.zeros((Z.shape[0], dim_out * num_nodes)) mf_lst = ParamList([], trainable=False) for nd in range(num_nodes): if nb_init: pa = pa_idx(nd, dim_in) else: pa = np.asarray(range(nd * dim_in, (nd + 1) * dim_in)) agg_dim_in = len(pa) if agg_dim_in == dim_out: mf = Identity() else: if agg_dim_in > dim_out: # stepping down, use the pca projection # _, _, V = np.linalg.svd(X_running[:, nd*dim_in : (nd+1)*dim_in], full_matrices=False) _, _, V = np.linalg.svd(X_running[:, pa], full_matrices=False) W = V[:dim_out, :].T else: # stepping up, use identity + padding W = np.concatenate([np.eye(agg_dim_in), np.zeros((agg_dim_in, dim_out - agg_dim_in))], 1) mf = Linear(W) mf.set_trainable(False) mf_lst.append(mf) if agg_dim_in != dim_out: # print(Z_running_tmp[:, nd*dim_out:(nd+1)*dim_out].shape, Z_running[:, nd*dim_in:(nd+1)*dim_in].shape, # W.shape, Z_running[:, nd*dim_in:(nd+1)*dim_in].dot(W).shape) Z_running_tmp[:, nd * dim_out:(nd + 1) * dim_out] = Z_running[:, pa].dot(W) X_running_tmp[:, nd * dim_out:(nd + 1) * dim_out] = X_running[:, pa].dot(W) else: Z_running_tmp[:, nd * dim_out:(nd + 1) * dim_out] = Z_running[:, pa] X_running_tmp[:, nd * dim_out:(nd + 1) * dim_out] = X_running[:, pa] layers.append( SVGPG_Layer(kernels[l], Z_running, mf_lst, num_nodes, dim_in, dim_out, gmat, share_Z=share_Z, nb_init=nb_init)) Z_running = Z_running_tmp X_running = X_running_tmp # final layer if num_layers == 1: fin_dim_in = dim_per_X else: fin_dim_in = dim_per_node layers.append( SVGPG_Layer(kernels[-1], Z_running, None, num_nodes, fin_dim_in, dim_per_Y, gmat, share_Z=share_Z, nb_init=nb_init)) return layers