def variational_mfvi(X, mfvi_mixture=False, n_mixture=1, name="", **kwargs): """Defines the mean-field variational family for Gaussian Process. Args: X: (np.ndarray of float32) input training features, with dimension (N, D). mfvi_mixture: (float32) Whether to output variational family with a mixture of MFVI. n_mixture: (int) Number of MFVI mixture component to add. name: (str) name for variational parameters. kwargs: Dict of other keyword variables. For compatibility purpose with other variational family. Returns: q_f, q_sig: (ed.RandomVariable) variational family. q_f_mean, q_f_sdev: (tf.Variable) variational parameters for q_f """ X = tf.convert_to_tensor(X, dtype=tf.float32) N, D = X.shape.as_list() # define variational parameters qf_mean = tf.get_variable(shape=[N], name='{}_mean'.format(name)) qf_sdev = tf.exp(tf.get_variable(shape=[N], name='{}_sdev'.format(name))) # define variational family mixture_par_list = [] if mfvi_mixture: gp_dist = tfd.MultivariateNormalDiag(loc=qf_mean, scale_diag=qf_sdev, name=name) q_f, mixture_par_list = inference_util.make_mfvi_sgp_mixture_family( n_mixture=n_mixture, N=N, gp_dist=gp_dist, name=name) else: q_f = ed.MultivariateNormalDiag(loc=qf_mean, scale_diag=qf_sdev, name=name) return q_f, qf_mean, qf_sdev, mixture_par_list
def variational_mfvi(X, mfvi_mixture=False, n_mixture=1): """Defines the mean-field variational family for GPR. Args: X: (np.ndarray of float32) input training features, shape (N, D). mfvi_mixture: (float32) Whether to output variational family with a mixture of MFVI. n_mixture: (int) Number of MFVI mixture component to add. Returns: q_f, q_sig: (ed.RandomVariable) variational family. q_f_mean, q_f_sdev: (tf.Variable) variational parameters for q_f """ N, D = X.shape # define variational parameters qf_mean = tf.get_variable(shape=[N], name='qf_mean') qf_sdev = tf.exp(tf.get_variable(shape=[N], name='qf_sdev')) q_sig_mean = tf.get_variable(shape=[], name='q_sig_mean') q_sig_sdev = tf.exp(tf.get_variable(shape=[], name='q_sig_sdev')) # define variational family mixture_par_list = [] if mfvi_mixture: gp_dist = tfd.MultivariateNormalDiag(loc=qf_mean, scale_diag=qf_sdev, name='q_f') q_f, mixture_par_list = inference_util.make_mfvi_sgp_mixture_family( n_mixture=n_mixture, N=N, gp_dist=gp_dist, name='q_f') else: q_f = ed.MultivariateNormalDiag(loc=qf_mean, scale_diag=qf_sdev, name='q_f') q_sig = ed.Normal(loc=q_sig_mean, scale=q_sig_sdev, name='q_sig') return q_f, q_sig, qf_mean, qf_sdev, mixture_par_list
def variational_dgpr(X, Z, Zm=None, ls=1., kernel_func=rbf, ridge_factor=1e-3, mfvi_mixture=False, n_mixture=1, name="", **kwargs): """Defines the mean-field variational family for GPR. Args: X: (np.ndarray of float32) input training features, with dimension (Nx, D). Z: (np.ndarray of float32) inducing points, shape (Ns, D). Zm: (np.ndarray of float32 or None) inducing points for mean, shape (Nm, D). If None then same as Z ls: (float32) length scale parameter. kernel_func: (function) kernel function. ridge_factor: (float32) small ridge factor to stabilize Cholesky decomposition mfvi_mixture: (float32) Whether to output variational family with a mixture of MFVI. n_mixture: (int) Number of MFVI mixture component to add. name: (str) name for the variational parameter/random variables. kwargs: Dict of other keyword variables. For compatibility purpose with other variational family. Returns: q_f, q_sig: (ed.RandomVariable) variational family. q_f_mean, q_f_sdev: (tf.Variable) variational parameters for q_f """ X = tf.convert_to_tensor(X) Zs = tf.convert_to_tensor(Z) Zm = tf.convert_to_tensor(Zm) if Zm is not None else Zs Nx, Nm, Ns = X.shape.as_list()[0], Zm.shape.as_list()[0], Zs.shape.as_list( )[0] # 1. Prepare constants # compute matrix constants Kxx = kernel_func(X, ls=ls) Kmm = kernel_func(Zm, ls=ls) Kxm = kernel_func(X, Zm, ls=ls) Kxs = kernel_func(X, Zs, ls=ls) Kss = kernel_func(Zs, ls=ls, ridge_factor=ridge_factor) # 2. Define variational parameters # define free parameters (i.e. mean and full covariance of f_latent) m = tf.get_variable(shape=[Nm, 1], name='{}_mean_latent'.format(name)) s = tf.get_variable(shape=[Ns * (Ns + 1) / 2], name='{}_cov_latent_s'.format(name)) L = fill_triangular(s, name='{}_cov_latent_chol'.format(name)) # components for KL objective H = tf.eye(Ns) + tf.matmul(L, tf.matmul(Kss, L), transpose_a=True) cond_cov_inv = tf.matmul(L, tf.matrix_solve(H, tf.transpose(L))) func_norm_mm = tf.matmul(m, tf.matmul(Kmm, m), transpose_a=True) log_det_ss = tf.log(tf.matrix_determinant(H)) cond_norm_ss = tf.reduce_sum(tf.multiply(Kss, cond_cov_inv)) # compute sparse gp variational parameter (i.e. mean and covariance of P(f_obs | f_latent)) qf_mean = tf.squeeze(tf.tensordot(Kxm, m, [[1], [0]]), name='{}_mean'.format(name)) qf_cov = (Kxx - tf.matmul(Kxs, tf.matmul(cond_cov_inv, Kxs, transpose_b=True)) + ridge_factor * tf.eye(Nx, dtype=tf.float32)) # 3. Define variational family mixture_par_list = [] if mfvi_mixture: gp_dist = dist_util.VariationalGaussianProcessDecoupledDistribution( loc=qf_mean, covariance_matrix=qf_cov, func_norm_mm=func_norm_mm, log_det_ss=log_det_ss, cond_norm_ss=cond_norm_ss) q_f, mixture_par_list = inference_util.make_mfvi_sgp_mixture_family( n_mixture=n_mixture, N=Nx, gp_dist=gp_dist, name=name) else: q_f = dist_util.VariationalGaussianProcessDecoupled( loc=qf_mean, covariance_matrix=qf_cov, func_norm_mm=func_norm_mm, log_det_ss=log_det_ss, cond_norm_ss=cond_norm_ss, name=name) return q_f, qf_mean, qf_cov, mixture_par_list
def variational_sgpr(X, Z, ls=1., kernel_func=rbf, ridge_factor=1e-3, mfvi_mixture=False, n_mixture=1, name="", **kwargs): """Defines the mean-field variational family for GPR. Args: X: (np.ndarray of float32) input training features, with dimension (Nx, D). Z: (np.ndarray of float32) inducing points, with dimension (Nz, D). ls: (float32) length scale parameter. kernel_func: (function) kernel function. ridge_factor: (float32) small ridge factor to stabilize Cholesky decomposition mfvi_mixture: (float32) Whether to output variational family with a mixture of MFVI. n_mixture: (int) Number of MFVI mixture component to add. name: (str) name for the variational parameter/random variables. kwargs: Dict of other keyword variables. For compatibility purpose with other variational family. Returns: q_f, q_sig: (ed.RandomVariable) variational family. q_f_mean, q_f_sdev: (tf.Variable) variational parameters for q_f """ X = tf.convert_to_tensor(X, dtype=tf.float32) Z = tf.convert_to_tensor(Z, dtype=tf.float32) Nx, Nz = X.shape.as_list()[0], Z.shape.as_list()[0] # 1. Prepare constants # compute matrix constants Kxx = kernel_func(X, ls=ls) Kxz = kernel_func(X, Z, ls=ls) Kzz = kernel_func(Z, ls=ls, ridge_factor=ridge_factor) # compute null covariance matrix using Cholesky decomposition Kzz_chol_inv = tf.matrix_inverse(tf.cholesky(Kzz)) Kzz_inv = tf.matmul(Kzz_chol_inv, Kzz_chol_inv, transpose_a=True) Kxz_Kzz_chol_inv = tf.matmul(Kxz, Kzz_chol_inv, transpose_b=True) Kxz_Kzz_inv = tf.matmul(Kxz, Kzz_inv) Sigma_pre = Kxx - tf.matmul( Kxz_Kzz_chol_inv, Kxz_Kzz_chol_inv, transpose_b=True) # 2. Define variational parameters # define free parameters (i.e. mean and full covariance of f_latent) m = tf.get_variable(shape=[Nz], name='{}_mean_latent'.format(name)) s = tf.get_variable(shape=[Nz * (Nz + 1) / 2], name='{}_cov_latent_s'.format(name)) L = fill_triangular(s, name='{}_cov_latent_chol'.format(name)) S = tf.matmul(L, L, transpose_b=True, name='{}_cov_latent'.format(name)) # compute sparse gp variational parameter # (i.e. mean and covariance of P(f_obs | f_latent)) qf_mean = tf.tensordot(Kxz_Kzz_inv, m, [[1], [0]], name='{}_mean'.format(name)) qf_cov = ( Sigma_pre + tf.matmul(Kxz_Kzz_inv, tf.matmul(S, Kxz_Kzz_inv, transpose_b=True)) + ridge_factor * tf.eye(Nx, dtype=tf.float32)) # define variational family mixture_par_list = [] if mfvi_mixture: gp_dist = tfd.MultivariateNormalFullCovariance( loc=qf_mean, covariance_matrix=qf_cov) q_f, mixture_par_list = inference_util.make_mfvi_sgp_mixture_family( n_mixture=n_mixture, N=Nx, gp_dist=gp_dist, name=name) else: q_f = ed.MultivariateNormalFullCovariance(loc=qf_mean, covariance_matrix=qf_cov, name=name) return q_f, qf_mean, qf_cov, mixture_par_list
def variational_sgpr(X, Z, ls=1., kern_func=rbf, ridge_factor=1e-3, mfvi_mixture=False, n_mixture=1): """Defines the mean-field variational family for GPR. Args: X: (np.ndarray of float32) input training features, with dimension (Nx, D). Z: (np.ndarray of float32) inducing points, with dimension (Nz, D). ls: (float32) length scale parameter. kern_func: (function) kernel function. ridge_factor: (float32) small ridge factor to stabilize Cholesky decomposition mfvi_mixture: (float32) Whether to output variational family with a mixture of MFVI. n_mixture: (int) Number of MFVI mixture component to add. Returns: q_f, q_sig: (ed.RandomVariable) variational family. q_f_mean, q_f_sdev: (tf.Variable) variational parameters for q_f mixture_par_list: (list of tf.Variable) variational parameters for MFVI mixture ('mixture_logits', 'mixture_logits_mfvi_mix', 'mean_mfvi', 'sdev_mfvi') if mfvi_mixture=True, else []. """ X = tf.convert_to_tensor(X) Z = tf.convert_to_tensor(Z) Nx, Nz = X.shape.as_list()[0], Z.shape.as_list()[0] # 1. Prepare constants # compute matrix constants Kxx = kern_func(X, ls=ls) Kxz = kern_func(X, Z, ls=ls) Kzz = kern_func(Z, ls=ls, ridge_factor=ridge_factor) # compute null covariance matrix using Cholesky decomposition Kzz_chol_inv = tf.matrix_inverse(tf.cholesky(Kzz)) Kzz_inv = tf.matmul(Kzz_chol_inv, Kzz_chol_inv, transpose_a=True) Kxz_Kzz_chol_inv = tf.matmul(Kxz, Kzz_chol_inv, transpose_b=True) Kxz_Kzz_inv = tf.matmul(Kxz, Kzz_inv) Sigma_pre = Kxx - tf.matmul( Kxz_Kzz_chol_inv, Kxz_Kzz_chol_inv, transpose_b=True) # 2. Define variational parameters # define mean and variance for sigma q_sig_mean = tf.get_variable(shape=[], name='q_sig_mean') q_sig_sdev = tf.exp(tf.get_variable(shape=[], name='q_sig_sdev')) # define free parameters (i.e. mean and full covariance of f_latent) m = tf.get_variable(shape=[Nz], name='qf_m') s = tf.get_variable( shape=[Nz * (Nz + 1) / 2], # initializer=tf.zeros_initializer(), name='qf_s') L = fill_triangular(s, name='qf_chol') S = tf.matmul(L, L, transpose_b=True) # compute sparse gp variational parameter (i.e. mean and covariance of P(f_obs | f_latent)) qf_mean = tf.tensordot(Kxz_Kzz_inv, m, [[1], [0]], name='qf_mean') qf_cov = ( Sigma_pre + tf.matmul(Kxz_Kzz_inv, tf.matmul(S, Kxz_Kzz_inv, transpose_b=True)) + ridge_factor * tf.eye(Nx, dtype=tf.float32)) # define variational family mixture_par_list = [] if mfvi_mixture: gp_dist = tfd.MultivariateNormalFullCovariance( loc=qf_mean, covariance_matrix=qf_cov) q_f, mixture_par_list = inference_util.make_mfvi_sgp_mixture_family( n_mixture=n_mixture, N=Nx, gp_dist=gp_dist, name='q_f') else: q_f = ed.MultivariateNormalFullCovariance(loc=qf_mean, covariance_matrix=qf_cov, name='q_f') q_sig = ed.Normal(loc=q_sig_mean, scale=q_sig_sdev, name='q_sig') return q_f, q_sig, qf_mean, qf_cov, mixture_par_list