def setup_base_gps(self): self.base_gps = [] # one per dataset self.deep_gps = [] # all but the first dataset self.parent_gps = [] for i in range(self.num_datasets): z = self.Z[0][i] k = self.kernels[0][i] sig = self.noise_sigmas[0][i] gp = MR_SVGP(z, k, sig) self.base_gps.append(gp) if i > 0: z = self.Z[1][i - 1] k = self.kernels[1][i - 1] sig = self.noise_sigmas[1][i - 1] dgp = MR_SVGP(z, k, sig) self.deep_gps.append(dgp) if self.parent_mixtures: for i in range(len(self.parent_mixtures)): z = self.Z[2][i] k = self.kernels[2][i] sig = self.noise_sigmas[2][i] gp = MR_SVGP(z, k, sig) self.parent_gps.append(gp) self.base_gps = ParamList(self.base_gps) self.deep_gps = ParamList(self.deep_gps) self.parent_gps = ParamList(self.parent_gps)
def __init__(self, kern, Z, mean_function, num_nodes, dim_per_in, dim_per_out, gmat, share_Z=False, nb_init=True, **kwargs): Layer.__init__(self, input_prop_dim=False, **kwargs) self.kern = kern self.num_nodes = num_nodes self.dim_per_in, self.dim_per_out = dim_per_in, dim_per_out self.gmat = gmat self.share_Z = share_Z self.nb_init = nb_init self.num_outputs = num_nodes * dim_per_out self.num_inducing = Z.shape[0] self.q_mu = Parameter( np.zeros((self.num_inducing, num_nodes * dim_per_out))) self.mean_function = ParamList([], trainable=False) self.q_sqrt_lst = ParamList([]) transform = transforms.LowerTriangular(self.num_inducing, num_matrices=self.dim_per_out) if share_Z: self.feature = InducingPoints(Z) else: self.feature = ParamList([]) # InducingPoints(Z) for nd in range(num_nodes): if mean_function: self.mean_function.append(mean_function[nd]) else: self.mean_function.append(Zero()) if share_Z: pa_nd = self.pa_idx(nd) Ku_nd = self.kern[nd].compute_K_symm(Z) Lu_nd = np.linalg.cholesky(Ku_nd + np.eye(Z.shape[0]) * settings.jitter) q_sqrt = np.tile(Lu_nd[None, :, :], [dim_per_out, 1, 1]) self.q_sqrt_lst.append(Parameter(q_sqrt, transform=transform)) else: pa_nd = self.pa_idx(nd) Z_tmp = Z[:, pa_nd].copy() self.feature.append(InducingPoints(Z_tmp)) Ku_nd = self.kern[nd].compute_K_symm(Z_tmp) Lu_nd = np.linalg.cholesky(Ku_nd + np.eye(Z_tmp.shape[0]) * settings.jitter) q_sqrt = np.tile(Lu_nd[None, :, :], [dim_per_out, 1, 1]) self.q_sqrt_lst.append(Parameter(q_sqrt, transform=transform)) self.needs_build_cholesky = True
def _init_layers(self): self.layers = [] for i in range(self.nLayers): self.layers.append( SVGP_Layer(layer_id=i, Z=None, U=self.U, kern=self.kernels[i], num_outputs=self.num_classes, mean_function=Zero())) self.layers = ParamList(self.layers)
def __init__(self, X, Y, likelihood, layers, minibatch_size=None, num_samples=1, **kwargs): """ :param X: List of training inputs where each element of the list is a numpy array corresponding to the inputs of one fidelity. :param Y: List of training targets where each element of the list is a numpy array corresponding to the inputs of one fidelity. :param likelihood: gpflow likelihood object for use at the final layer :param layers: List of doubly_stochastic_dgp.layers.Layer objects :param minibatch_size: Minibatch size if using minibatch trainingz :param num_samples: Number of samples when propagating predictions through layers :param kwargs: kwarg inputs to gpflow.models.Model """ Model.__init__(self, **kwargs) self.Y_list = Y self.X_list = X self.minibatch_size = minibatch_size self.num_samples = num_samples # This allows a training regime where the first layer is trained first by itself, then the subsequent layer # and so on. self._train_upto_fidelity = -1 if minibatch_size: for i, (x, y) in enumerate(zip(X, Y)): setattr(self, "num_data" + str(i), x.shape[0]) setattr(self, "X" + str(i), Minibatch(x, minibatch_size, seed=0)) setattr(self, "Y" + str(i), Minibatch(y, minibatch_size, seed=0)) else: for i, (x, y) in enumerate(zip(X, Y)): setattr(self, "num_data" + str(i), x.shape[0]) setattr(self, "X" + str(i), DataHolder(x)) setattr(self, "Y" + str(i), DataHolder(y)) self.num_layers = len(layers) self.layers = ParamList(layers) self.likelihood = BroadcastingLikelihood(likelihood)
def __init__(self, X, adj, layers, sample, n_samples, K, neighbors=None, loss_type="link_full", label=None, pos_edges=None, neg_edges=None, idx_train=None, linear_layer=False, n_split=None, name="GCGP_base", **kwargs): """ :param X: tensor placeholder :param adj: sparse tensor placeholder :param label: :param layers: :param sample: :param n_samples: :param K: :param neighbors: n-array, [n_nodes, K] """ Model.__init__(self, name=name, **kwargs) self.X = X self.adj = adj self.loss_type = loss_type self.label = label self.pos_edges = pos_edges self.neg_edges = neg_edges self.idx_train = idx_train self.n_split = n_split self.linear_layer = linear_layer self.layers = ParamList(layers) self.sample = sample self.n_samples = n_samples self.K = K # indices for neighbor sampling self.neighbor_indices = [None] * 4 if self.sample == "neighbor": assert neighbors is not None # neighbors should not be None when sample="neighbor" self.update_neighbor_indices(neighbors.shape[0], neighbors) if self.loss_type == "classification": self.likelihood = gpflow.likelihoods.MultiClass(len(np.unique(self.label))) if self.loss_type == "regression": self.likelihood = gpflow.likelihoods.Gaussian(variance=0.1) if self.linear_layer: with tf.variable_scope("linear_weight"): self.W = tf.get_variable(name="linear_w", shape=[self.layers[-1].num_outputs, self.label.shape[1]], dtype=settings.float_type, initializer=tf.glorot_uniform_initializer())
def __init__(self, X, Y, likelihood, layers, minibatch_size=None, num_samples=1): Model.__init__(self) self.num_samples = num_samples self.num_data = X.shape[0] if minibatch_size: self.X = Minibatch(X, minibatch_size, seed=0) self.Y = Minibatch(Y, minibatch_size, seed=0) else: self.X = DataHolder(X) self.Y = DataHolder(Y) self.likelihood = BroadcastingLikelihood(likelihood) self.layers = ParamList(layers)
def __init__(self, X, Y, time_vec, likelihood, layers, minibatch_size=100, num_samples=1, num_data=None, wfunc='exp', **kwargs): Model.__init__(self, **kwargs) self.num_samples = num_samples print(np.ndim(X)) if np.ndim(X) == 2: self.num_data = num_data or X.shape[0] self.X = wMinibatch(X, time_vec, batch_size=minibatch_size, seed=0, wfunc=wfunc) self.Y = wMinibatch(Y, time_vec, batch_size=minibatch_size, seed=0, wfunc=wfunc) else: self.num_data = num_data or X.shape[1] self.X = wpMinibatch(X, time_vec, batch_size=minibatch_size, seed=0, wfunc=wfunc) self.Y = wpMinibatch(Y, time_vec, batch_size=minibatch_size, seed=0, wfunc=wfunc) self.m = 4 self.likelihood = BroadcastingLikelihood(likelihood) self.layers = ParamList(layers)
def __init__(self, X, Y, likelihood, layers, minibatch_size=None, num_samples=1, num_data=None, div_weights=None, **kwargs): Model.__init__(self, **kwargs) self.num_samples = num_samples self.num_data = num_data or X.shape[0] if minibatch_size: self.X = Minibatch(X, minibatch_size, seed=0) self.Y = Minibatch(Y, minibatch_size, seed=0) else: self.X = DataHolder(X) self.Y = DataHolder(Y) self.likelihood = BroadcastingLikelihood(likelihood) self.layers = ParamList(layers) """CHANGES START""" """Weights for the uncertainty quantifiers (per layer)""" if div_weights is None: div_weights = [1.0] * len( layers) #multiply by 1, i.e. don't change elif type(div_weights) == list and len(div_weights) != len(layers): print( "WARNING! You specified a list of weights for the " + "uncertainty quantifiers, but your DGP has more/less layers " + "than the number of weights you specified! " + "We set all weights to 1.0") div_weights = [1.0] * len(layers) elif type(div_weights) == list and len(div_weights) == len(layers): div_weights = div_weights """Distribute the weights into the layers""" for layer, weight in zip(layers, div_weights): layer.set_weight(weight) """CHANGES EEND"""
class SVGPG_Layer(Layer): def __init__(self, kern, Z, mean_function, num_nodes, dim_per_in, dim_per_out, gmat, share_Z=False, nb_init=True, **kwargs): Layer.__init__(self, input_prop_dim=False, **kwargs) self.kern = kern self.num_nodes = num_nodes self.dim_per_in, self.dim_per_out = dim_per_in, dim_per_out self.gmat = gmat self.share_Z = share_Z self.nb_init = nb_init self.num_outputs = num_nodes * dim_per_out self.num_inducing = Z.shape[0] self.q_mu = Parameter(np.zeros((self.num_inducing, num_nodes * dim_per_out))) self.mean_function = ParamList([], trainable=False) self.q_sqrt_lst = ParamList([]) transform = transforms.LowerTriangular(self.num_inducing, num_matrices=self.dim_per_out) if share_Z: self.feature = InducingPoints(Z) else: self.feature = ParamList([]) # InducingPoints(Z) for nd in range(num_nodes): if mean_function: self.mean_function.append(mean_function[nd]) else: self.mean_function.append(Zero()) if share_Z: pa_nd = self.pa_idx(nd) Ku_nd = self.kern[nd].compute_K_symm(Z) Lu_nd = np.linalg.cholesky(Ku_nd + np.eye(Z.shape[0]) * settings.jitter) q_sqrt = np.tile(Lu_nd[None, :, :], [dim_per_out, 1, 1]) self.q_sqrt_lst.append(Parameter(q_sqrt, transform=transform)) else: pa_nd = self.pa_idx(nd) Z_tmp = Z[:, pa_nd].copy() self.feature.append(InducingPoints(Z_tmp)) Ku_nd = self.kern[nd].compute_K_symm(Z_tmp) Lu_nd = np.linalg.cholesky(Ku_nd + np.eye(Z_tmp.shape[0]) * settings.jitter) q_sqrt = np.tile(Lu_nd[None, :, :], [dim_per_out, 1, 1]) self.q_sqrt_lst.append(Parameter(q_sqrt, transform=transform)) self.needs_build_cholesky = True def pa_idx(self, nd): res = [] for n in range(self.num_nodes): w = self.gmat[nd, n] if w > 0: res = res + list(range(n * self.dim_per_in, (n + 1) * self.dim_per_in)) res = np.asarray(res) return res @params_as_tensors def build_cholesky_if_needed(self): # make sure we only compute this once if self.needs_build_cholesky: self.Ku, self.Lu = [None] * self.num_nodes, [None] * self.num_nodes self.Ku_tiled_lst, self.Lu_tiled_lst = [], [] for nd in range(self.num_nodes): if self.share_Z: Ku_nd = self.feature.Kuu(self.kern[nd], jitter=settings.jitter) else: Ku_nd = self.feature[nd].Kuu(self.kern[nd], jitter=settings.jitter) Lu_nd = tf.cholesky(Ku_nd) self.Ku[nd] = Ku_nd self.Lu[nd] = Lu_nd self.Ku_tiled_lst.append(tf.tile(Ku_nd[None, :, :], [self.dim_per_out, 1, 1])) self.Lu_tiled_lst.append(tf.tile(Lu_nd[None, :, :], [self.dim_per_out, 1, 1])) self.needs_build_cholesky = False @time_it def conditional_ND(self, X, full_cov=False): self.build_cholesky_if_needed() if self.share_Z: return self.conditional_ND_share_Z(X, full_cov=False) else: return self.conditional_ND_not_share_Z(X, full_cov=False) def conditional_ND_share_Z(self, X, full_cov=False): mean_lst, var_lst, A_tiled_lst = [], [], [] for nd in range(self.num_nodes): pa_nd = self.pa_idx(nd) Kuf_nd = self.feature.Kuf(self.kern[nd], X) A_nd = tf.matrix_triangular_solve(self.Lu[nd], Kuf_nd, lower=True) A_nd = tf.matrix_triangular_solve(tf.transpose(self.Lu[nd]), A_nd, lower=False) mean_tmp = tf.matmul(A_nd, self.q_mu[:, nd * self.dim_per_out:(nd + 1) * self.dim_per_out], transpose_a=True) X_tmp = tf.gather(X, pa_nd, axis=1) if self.nb_init: mean_tmp += self.mean_function[nd](X_tmp) else: mean_tmp += self.mean_function[nd](X[:, nd * self.dim_per_in:(nd + 1) * self.dim_per_in]) mean_lst.append(mean_tmp) A_tiled_lst.append(tf.tile(A_nd[None, :, :], [self.dim_per_out, 1, 1])) SK_nd = -self.Ku_tiled_lst[nd] q_sqrt_nd = self.q_sqrt_lst[nd] with params_as_tensors_for(q_sqrt_nd, convert=True): SK_nd += tf.matmul(q_sqrt_nd, q_sqrt_nd, transpose_b=True) B_nd = tf.matmul(SK_nd, A_tiled_lst[nd]) # (num_latent, num_X) delta_cov_nd = tf.reduce_sum(A_tiled_lst[nd] * B_nd, 1) Kff_nd = self.kern[nd].Kdiag(X) # either (1, num_X) + (num_latent, num_X) var_nd = tf.expand_dims(Kff_nd, 0) + delta_cov_nd var_nd = tf.transpose(var_nd) var_lst.append(var_nd) mean = tf.concat(mean_lst, axis=1) var = tf.concat(var_lst, axis=1) return mean, var def conditional_ND_not_share_Z(self, X, full_cov=False): mean_lst, var_lst, A_tiled_lst = [], [], [] for nd in range(self.num_nodes): pa_nd = self.pa_idx(nd) X_tmp = tf.gather(X, pa_nd, axis=1) Kuf_nd = self.feature[nd].Kuf(self.kern[nd], X_tmp) A_nd = tf.matrix_triangular_solve(self.Lu[nd], Kuf_nd, lower=True) A_nd = tf.matrix_triangular_solve(tf.transpose(self.Lu[nd]), A_nd, lower=False) mean_tmp = tf.matmul(A_nd, self.q_mu[:, nd * self.dim_per_out:(nd + 1) * self.dim_per_out], transpose_a=True) if self.nb_init: mean_tmp += self.mean_function[nd](X_tmp) else: mean_tmp += self.mean_function[nd](X[:, nd * self.dim_per_in:(nd + 1) * self.dim_per_in]) mean_lst.append(mean_tmp) A_tiled_lst.append(tf.tile(A_nd[None, :, :], [self.dim_per_out, 1, 1])) SK_nd = -self.Ku_tiled_lst[nd] q_sqrt_nd = self.q_sqrt_lst[nd] with params_as_tensors_for(q_sqrt_nd, convert=True): SK_nd += tf.matmul(q_sqrt_nd, q_sqrt_nd, transpose_b=True) B_nd = tf.matmul(SK_nd, A_tiled_lst[nd]) # (num_latent, num_X) delta_cov_nd = tf.reduce_sum(A_tiled_lst[nd] * B_nd, 1) Kff_nd = self.kern[nd].Kdiag(X_tmp) # (1, num_X) + (num_latent, num_X) var_nd = tf.expand_dims(Kff_nd, 0) + delta_cov_nd var_nd = tf.transpose(var_nd) var_lst.append(var_nd) mean = tf.concat(mean_lst, axis=1) var = tf.concat(var_lst, axis=1) return mean, var @time_it def KL(self): """ The KL divergence from the variational distribution to the prior :return: KL divergence from N(q_mu, q_sqrt) to N(0, I), independently for each GP """ self.build_cholesky_if_needed() KL = -0.5 * self.num_inducing * self.num_nodes * self.dim_per_out for nd in range(self.num_nodes): q_sqrt_nd = self.q_sqrt_lst[nd] with params_as_tensors_for(q_sqrt_nd, convert=True): KL -= 0.5 * tf.reduce_sum(tf.log(tf.matrix_diag_part(q_sqrt_nd) ** 2)) KL += tf.reduce_sum(tf.log(tf.matrix_diag_part(self.Lu[nd]))) * self.dim_per_out KL += 0.5 * tf.reduce_sum( tf.square(tf.matrix_triangular_solve(self.Lu_tiled_lst[nd], q_sqrt_nd, lower=True))) q_mu_nd = self.q_mu[:, nd * self.dim_per_out:(nd + 1) * self.dim_per_out] Kinv_m_nd = tf.cholesky_solve(self.Lu[nd], q_mu_nd) KL += 0.5 * tf.reduce_sum(q_mu_nd * Kinv_m_nd) return KL
def init_layers_graph(X, Y, Z, kernels, gmat, num_layers=2, num_nodes=None, dim_per_node=5, dim_per_X=5, dim_per_Y=5, share_Z=False, nb_init=True): layers = [] def pa_idx(nd, dim_per_in): res = [] for n in range(num_nodes): w = gmat[nd, n] if w > 0: # print(res, range(n*self.dim_per_in, (n+1)*self.dim_per_in)) res = res + list(range(n * dim_per_in, (n + 1) * dim_per_in)) res = np.asarray(res) return res X_running, Z_running = X.copy(), Z.copy() for l in range(num_layers - 1): if l == 0: dim_in = dim_per_X dim_out = dim_per_node else: dim_in = dim_per_node dim_out = dim_per_node # print(dim_in, dim_out) X_running_tmp = np.zeros((X.shape[0], dim_out * num_nodes)) Z_running_tmp = np.zeros((Z.shape[0], dim_out * num_nodes)) mf_lst = ParamList([], trainable=False) for nd in range(num_nodes): if nb_init: pa = pa_idx(nd, dim_in) else: pa = np.asarray(range(nd * dim_in, (nd + 1) * dim_in)) agg_dim_in = len(pa) if agg_dim_in == dim_out: mf = Identity() else: if agg_dim_in > dim_out: # stepping down, use the pca projection # _, _, V = np.linalg.svd(X_running[:, nd*dim_in : (nd+1)*dim_in], full_matrices=False) _, _, V = np.linalg.svd(X_running[:, pa], full_matrices=False) W = V[:dim_out, :].T else: # stepping up, use identity + padding W = np.concatenate([np.eye(agg_dim_in), np.zeros((agg_dim_in, dim_out - agg_dim_in))], 1) mf = Linear(W) mf.set_trainable(False) mf_lst.append(mf) if agg_dim_in != dim_out: # print(Z_running_tmp[:, nd*dim_out:(nd+1)*dim_out].shape, Z_running[:, nd*dim_in:(nd+1)*dim_in].shape, # W.shape, Z_running[:, nd*dim_in:(nd+1)*dim_in].dot(W).shape) Z_running_tmp[:, nd * dim_out:(nd + 1) * dim_out] = Z_running[:, pa].dot(W) X_running_tmp[:, nd * dim_out:(nd + 1) * dim_out] = X_running[:, pa].dot(W) else: Z_running_tmp[:, nd * dim_out:(nd + 1) * dim_out] = Z_running[:, pa] X_running_tmp[:, nd * dim_out:(nd + 1) * dim_out] = X_running[:, pa] layers.append( SVGPG_Layer(kernels[l], Z_running, mf_lst, num_nodes, dim_in, dim_out, gmat, share_Z=share_Z, nb_init=nb_init)) Z_running = Z_running_tmp X_running = X_running_tmp # final layer if num_layers == 1: fin_dim_in = dim_per_X else: fin_dim_in = dim_per_node layers.append( SVGPG_Layer(kernels[-1], Z_running, None, num_nodes, fin_dim_in, dim_per_Y, gmat, share_Z=share_Z, nb_init=nb_init)) return layers
def __init__(self, datasets=[], inducing_locations=[], kernels=[], noise_sigmas=[], minibatch_sizes=[], mixing_weight=None, parent_mixtures=None, masks=None, num_samples=1, **kwargs): """ datasets: an array of arrays [X_a, Y_a] ordered by 'trust', ie datasets[0] is the most reliable inducing_points_locations: an array of inducing locations for each of the datasets kernels: an array of kernels for each of the datasets noise_sigmas: an array of noise_sigmas for each of the datasets mixing_weight (MR_Mixing_Weight): an object that will combine the predictions from each of the local experts parent_mixtures: an array of parent mixture models """ Model.__init__(self, **kwargs) self.dataset_sizes = [] for d in datasets: self.dataset_sizes.append(d[0].shape[0]) self.num_datasets = len(datasets) self.X = [] self.Y = [] self.Z = inducing_locations self.masks = masks self.MASKS = [] self.kernels = kernels self.noise_sigmas = noise_sigmas self.num_samples = num_samples #gpflow models are Parameterized objects print(parent_mixtures) self.parent_mixtures = ParamList( parent_mixtures) if parent_mixtures is not None else None self.mixing_weight = mixing_weight minibatch = False for i, d in enumerate(datasets): #TODO: can we just wrap with a ParamList? if minibatch: _x = Minibatch(d[0], batch_size=minibatch_sizes[i], seed=0) _y = Minibatch(d[1], batch_size=minibatch_sizes[i], seed=0) else: _x = DataHolder(d[0]) _y = DataHolder(d[1]) #Check we have some masks if self.masks: #Check if we have a mask for this dataset _mask = None if self.masks[i] is not None: if minibatch: _mask = Minibatch(self.masks[i], batch_size=minibatch_sizes[0], seed=0) else: _mask = DataHolder(self.masks[i]) #make it so GPFlow can find _x, _y setattr(self, 'x_{i}'.format(i=i), _x) setattr(self, 'y_{i}'.format(i=i), _y) if self.masks: setattr(self, 'mask_{i}'.format(i=i), _mask) #save references self.X.append(self.__dict__['x_{i}'.format(i=i)]) self.Y.append(self.__dict__['y_{i}'.format(i=i)]) if self.masks: self.MASKS.append(self.__dict__['mask_{i}'.format(i=i)]) self.setup()
def __init__(self, latent_dim, Y, inputs=None, emissions=None, px1_mu=None, px1_cov=None, kern=None, Z=None, n_ind_pts=100, mean_fn=None, Q_diag=None, Umu=None, Ucov_chol=None, qx1_mu=None, qx1_cov=None, As=None, bs=None, Ss=None, n_samples=100, batch_size=None, chunking=False, seed=None, parallel_iterations=10, jitter=gp.settings.numerics.jitter_level, name=None): super().__init__(latent_dim, Y[0], inputs=None if inputs is None else inputs[0], emissions=emissions, px1_mu=px1_mu, px1_cov=None, kern=kern, Z=Z, n_ind_pts=n_ind_pts, mean_fn=mean_fn, Q_diag=Q_diag, Umu=Umu, Ucov_chol=Ucov_chol, qx1_mu=qx1_mu, qx1_cov=None, As=None, bs=None, Ss=False if Ss is False else None, n_samples=n_samples, seed=seed, parallel_iterations=parallel_iterations, jitter=jitter, name=name) self.T = [Y_s.shape[0] for Y_s in Y] self.T_tf = tf.constant(self.T, dtype=gp.settings.int_type) self.max_T = max(self.T) self.sum_T = float(sum(self.T)) self.n_seq = len(self.T) self.batch_size = batch_size self.chunking = chunking if self.batch_size is None: self.Y = ParamList(Y, trainable=False) else: _Y = np.stack([ np.concatenate( [Ys, np.zeros((self.max_T - len(Ys), self.obs_dim))]) for Ys in Y ]) self.Y = Param(_Y, trainable=False) if inputs is not None: if self.batch_size is None: self.inputs = ParamList(inputs, trainable=False) else: desired_length = self.max_T if self.chunking else self.max_T - 1 _inputs = [ np.concatenate([ inputs[s], np.zeros( (desired_length - len(inputs[s]), self.input_dim)) ]) for s in range(self.n_seq) ] # pad the inputs self.inputs = Param(_inputs, trainable=False) if qx1_mu is None: self.qx1_mu = Param(np.zeros((self.n_seq, self.latent_dim))) self.qx1_cov_chol = Param( np.tile(np.eye(self.latent_dim)[None, ...], [self.n_seq, 1, 1]) if qx1_cov is None else np.linalg.cholesky(qx1_cov), transform=gtf.LowerTriangular(self.latent_dim, num_matrices=self.n_seq)) _As = [np.ones((T_s - 1, self.latent_dim)) for T_s in self.T] if As is None else As _bs = [np.zeros((T_s - 1, self.latent_dim)) for T_s in self.T] if bs is None else bs if Ss is not False: _S_chols = [np.tile(self.Q_sqrt.value.copy()[None, ...], [T_s - 1, 1]) for T_s in self.T] if Ss is None \ else [np.sqrt(S) if S.ndim == 2 else np.linalg.cholesky(S) for S in Ss] if self.batch_size is None: self.As = ParamList(_As) self.bs = ParamList(_bs) if Ss is not False: self.S_chols = ParamList([ Param(Sc, transform=gtf.positive if Sc.ndim == 2 else gtf.LowerTriangular(self.latent_dim, num_matrices=Sc.shape[0])) for Sc in _S_chols ]) else: _As = np.stack([ np.concatenate( [_A, np.zeros((self.max_T - len(_A) - 1, *_A.shape[1:]))]) for _A in _As ]) _bs = np.stack([ np.concatenate([ _b, np.zeros((self.max_T - len(_b) - 1, self.latent_dim)) ]) for _b in _bs ]) self.As = Param(_As) self.bs = Param(_bs) if Ss is not False: _S_chols = [ np.concatenate([ _S, np.zeros((self.max_T - len(_S) - 1, *_S.shape[1:])) ]) for _S in _S_chols ] _S_chols = np.stack(_S_chols) self.S_chols = Param(_S_chols, transform=gtf.positive if _S_chols.ndim == 3 else \ gtf.LowerTriangular(self.latent_dim, num_matrices=(self.n_seq, self.max_T - 1))) self.multi_diag_px1_cov = False if isinstance(px1_cov, list): # different prior for each sequence _x1_cov = np.stack(px1_cov) _x1_cov = np.sqrt( _x1_cov) if _x1_cov.ndim == 2 else np.linalg.cholesky(_x1_cov) _transform = None if _x1_cov.ndim == 2 else gtf.LowerTriangular( self.latent_dim, num_matrices=self.n_seq) self.multi_diag_px1_cov = _x1_cov.ndim == 2 elif isinstance(px1_cov, np.ndarray): # same prior for each sequence assert px1_cov.ndim < 3 _x1_cov = np.sqrt( px1_cov) if px1_cov.ndim == 1 else np.linalg.cholesky(px1_cov) _transform = None if px1_cov.ndim == 1 else gtf.LowerTriangular( self.latent_dim, squeeze=True) self.px1_cov_chol = None if px1_cov is None else Param( _x1_cov, trainable=False, transform=_transform) if self.chunking: px1_mu_check = len(self.px1_mu.shape) == 1 px1_cov_check_1 = not self.multi_diag_px1_cov px1_cov_check_2 = self.px1_cov_chol is None or len( self.px1_cov_chol.shape) < 3 assert px1_mu_check and px1_cov_check_1 and px1_cov_check_2, \ 'Only one prior over x1 allowed for chunking'
def to_param_list(var_list, name): param_list = [] for idx, var in enumerate(var_list): name_idx = '{name}_{idx}'.format(name=name, idx=idx) param_list.append(Param(var, dtype=float_type, name=name_idx)) return ParamList(param_list)