def _init_variational_parameters(self, q_mu, q_sqrt): q_mu = np.zeros( (self.num_inducing, self.num_latent)) if q_mu is None else q_mu self.q_mu = Parameter(q_mu, dtype=settings.float_type) # M x K if q_sqrt is None: if self.q_diag: self.q_sqrt = Parameter(np.ones( (self.num_inducing, self.num_latent), dtype=settings.float_type), transform=transforms.positive) # M x K else: q_sqrt = np.array([ np.eye(self.num_inducing, dtype=settings.float_type) for _ in range(self.num_latent) ]) self.q_sqrt = Parameter(q_sqrt, transform=transforms.LowerTriangular( self.num_inducing, self.num_latent)) # K x M x M else: if self.q_diag: assert q_sqrt.ndim == 2 self.q_sqrt = Parameter(q_sqrt, transform=transforms.positive) # M x K else: assert q_sqrt.ndim == 3 self.q_sqrt = Parameter(q_sqrt, transform=transforms.LowerTriangular( self.num_inducing, self.num_classes)) # K x M x M
def _init_variational_parameters(self, num_inducing, q_mu, q_sqrt, q_diag): """ Constructs the mean and cholesky of the covariance of the variational Gaussian posterior. If a user passes values for `q_mu` and `q_sqrt` the routine checks if they have consistent and correct shapes. If a user does not specify any values for `q_mu` and `q_sqrt`, the routine initializes them, their shape depends on `num_inducing` and `q_diag`. Note: most often the comments refer to the number of observations (=output dimensions) with P, number of latent GPs with L, and number of inducing points M. Typically P equals L, but when certain multi-output kernels are used, this can change. Parameters ---------- :param num_inducing: int Number of inducing variables, typically referred to as M. :param q_mu: np.array or None Mean of the variational Gaussian posterior. If None the function will initialise the mean with zeros. If not None, the shape of `q_mu` is checked. :param q_sqrt: np.array or None Cholesky of the covariance of the variational Gaussian posterior. If None the function will initialise `q_sqrt` with identity matrix. If not None, the shape of `q_sqrt` is checked, depending on `q_diag`. :param q_diag: bool Used to check if `q_mu` and `q_sqrt` have the correct shape or to construct them with the correct shape. If `q_diag` is true, `q_sqrt` is two dimensional and only holds the square root of the covariance diagonal elements. If False, `q_sqrt` is three dimensional. """ q_mu = np.zeros( (num_inducing, self.num_latent)) if q_mu is None else q_mu self.q_mu = Parameter(q_mu, dtype=settings.float_type) # M x P if q_sqrt is None: if self.q_diag: self.q_sqrt = Parameter(np.ones( (num_inducing, self.num_latent), dtype=settings.float_type), transform=transforms.positive) # M x P else: q_sqrt = np.array([ np.eye(num_inducing, dtype=settings.float_type) for _ in range(self.num_latent) ]) self.q_sqrt = Parameter(q_sqrt, transform=transforms.LowerTriangular( num_inducing, self.num_latent)) # P x M x M else: if q_diag: assert q_sqrt.ndim == 2 self.num_latent = q_sqrt.shape[1] self.q_sqrt = Parameter( q_sqrt, transform=transforms.positive) # M x L/P else: assert q_sqrt.ndim == 3 self.num_latent = q_sqrt.shape[0] num_inducing = q_sqrt.shape[1] self.q_sqrt = Parameter(q_sqrt, transform=transforms.LowerTriangular( num_inducing, self.num_latent)) # L/P x M x M
def __init__(self, kern, Z, num_inducing, num_outputs, mean_function=None, white=True): self.white = white self.kern = kern self.num_inputs = kern.input_dim self.num_outputs = num_outputs self.num_inducing = num_inducing self.q_diag = False Um = np.zeros((self.num_inducing, self.num_outputs)) Us_sqrt = np.ones( (self.num_inducing, self.num_outputs)) if self.q_diag else np.array( [np.eye(self.num_inducing) for _ in range(self.num_outputs)]) with tf.name_scope("inducing"): self.Z = Param(Z, name="z")() self.Um = Param(Um, name="u")() if self.q_diag: self.Us_sqrt = Param(Us_sqrt, transforms.positive, name="u_variance")() else: self.Us_sqrt = Param(Us_sqrt, transforms.LowerTriangular( self.num_inducing, self.num_outputs), name="u_variance")() self.Ku = self.kern.Ksymm(self.Z) + tf.eye( tf.shape(self.Z)[0], dtype=self.Z.dtype) * settings.jitter self.Lu = tf.cholesky(self.Ku) self.mean_function = mean_function
def __init__(self, kern, Um, Us_sqrt, Z, num_outputs, white=True): self.white = white self.kern = kern self.num_outputs = num_outputs self.num_inducing = Z.shape[0] self.q_diag = True if Us_sqrt.ndim == 2 else False with tf.name_scope("inducing"): self.Z = Param( Z, # MxM name="z")() self.Um = Param( Um, #DxM name="u")() if self.q_diag: self.Us_sqrt = Param( Us_sqrt, # DxM transforms.positive, name="u_variance")() else: self.Us_sqrt = Param( Us_sqrt, # DxMxM transforms.LowerTriangular(Us_sqrt.shape[1], Us_sqrt.shape[0]), name="u_variance")() self.Ku = self.kern.Ksymm(self.Z) + tf.eye( tf.shape(self.Z)[0], dtype=self.Z.dtype) * settings.jitter self.Lu = tf.cholesky(self.Ku) self.Ku_tiled = tf.tile(self.Ku[None, :, :], [self.num_outputs, 1, 1]) # DxMxM self.Lu_tiled = tf.tile(self.Lu[None, :, :], [self.num_outputs, 1, 1])
def __init__(self, kern, num_inducing, num_outputs, mean_function=None, white=True): self.white = white self.kern = kern self.num_inputs = kern.input_dim self.num_outputs = num_outputs self.num_inducing = num_inducing self.q_diag = False Um = np.zeros((self.num_inducing, self.num_outputs)) Us_sqrt = np.ones( (self.num_inducing, self.num_outputs)) if self.q_diag else np.array( [np.eye(self.num_inducing) for _ in range(self.num_outputs)]) with tf.name_scope("inducing"): self.Um = Param(Um, name="u")() if self.q_diag: self.Us_sqrt = Param(Us_sqrt, transforms.positive, name="u_variance")() else: self.Us_sqrt = Param(Us_sqrt, transforms.LowerTriangular( self.num_inducing, self.num_outputs), name="u_variance")() self.mean_function = mean_function
def __init__(self, latent_dim, Y, transitions, T_latent=None, inputs=None, emissions=None, px1_mu=None, px1_cov=None, Xmu=None, Xchol=None, name=None): _Xmu = np.zeros( (T_latent or Y.shape[0], latent_dim)) if Xmu is None else Xmu super().__init__(_Xmu, Y, transitions, inputs, emissions, px1_mu, px1_cov, name=name) _Xchol = np.eye(self.T_latent * self.latent_dim) if Xchol is None else Xchol if _Xchol.ndim == 1: self.Xchol = gp.Param(_Xchol) else: chol_transform = gtf.LowerTriangular( self.T_latent * self.latent_dim if _Xchol.ndim == 2 else self.latent_dim, num_matrices=1 if _Xchol.ndim == 2 else self.T_latent, squeeze=_Xchol.ndim == 2) self.Xchol = gp.Param(_Xchol, transform=chol_transform)
def __init__(self, X_init, Y, transitions, inputs=None, emissions=None, px1_mu=None, px1_cov=None, name=None): super().__init__(name=name) self.T_latent, self.latent_dim = X_init.shape self.T, self.obs_dim = Y.shape self.transitions = transitions self.emissions = emissions or GaussianEmissions( self.latent_dim, self.obs_dim) self.X = gp.Param(X_init) self.Y = gp.Param(Y, trainable=False) self.inputs = None if inputs is None else gp.Param(inputs, trainable=False) self.px1_mu = gp.Param( np.zeros(self.latent_dim) if px1_mu is None else px1_mu, trainable=False) self.px1_cov_chol = gp.Param( np.eye(self.latent_dim) if px1_cov is None else np.linalg.cholesky(px1_cov), trainable=False, transform=gtf.LowerTriangular(self.latent_dim, squeeze=True))
def __init__(self, kern, Z, num_outputs, mean_function): """ A sparse variational GP layer in whitened representation. This layer holds the kernel, variational parameters, inducing points and mean function. The underlying model at inputs X is f = Lv + mean_function(X), where v \sim N(0, I) and LL^T = kern.K(X) The variational distribution over the inducing points is q(v) = N(q_mu, q_sqrt q_sqrt^T) The layer holds D_out independent GPs with the same kernel and inducing points. :kern: The kernel for the layer (input_dim = D_in) :param q_mu: mean initialization (M, D_out) :param q_sqrt: sqrt of variance initialization (D_out,M,M) :param Z: Inducing points (M, D_in) :param mean_function: The mean function :return: """ Parameterized.__init__(self) M = Z.shape[0] q_mu = np.zeros((M, num_outputs)) self.q_mu = Parameter(q_mu) q_sqrt = np.tile(np.eye(M)[None, :, :], [num_outputs, 1, 1]) transform = transforms.LowerTriangular(M, num_matrices=num_outputs) self.q_sqrt = Parameter(q_sqrt, transform=transform) self.feature = InducingPoints(Z) self.kern = kern self.mean_function = mean_function
def __init__(self, dim, input_dim=0, kern=None, Z=None, n_ind_pts=100, mean_fn=None, Q_diag=None, Umu=None, Ucov_chol=None, jitter=gps.numerics.jitter_level, name=None): super().__init__(name=name) self.OBSERVATIONS_AS_INPUT = False self.dim = dim self.input_dim = input_dim self.jitter = jitter self.Q_sqrt = Param(np.ones(self.dim) if Q_diag is None else Q_diag ** 0.5, transform=gtf.positive) self.n_ind_pts = n_ind_pts if Z is None else (Z[0].shape[-2] if isinstance(Z, list) else Z.shape[-2]) if isinstance(Z, np.ndarray) and Z.ndim == 2: self.Z = mf.SharedIndependentMof(gp.features.InducingPoints(Z)) else: Z_list = [np.random.randn(self.n_ind_pts, self.dim + self.input_dim) for _ in range(self.dim)] if Z is None else [z for z in Z] self.Z = mf.SeparateIndependentMof([gp.features.InducingPoints(z) for z in Z_list]) if isinstance(kern, gp.kernels.Kernel): self.kern = mk.SharedIndependentMok(kern, self.dim) else: kern_list = kern or [gp.kernels.Matern32(self.dim + self.input_dim, ARD=True) for _ in range(self.dim)] self.kern = mk.SeparateIndependentMok(kern_list) self.mean_fn = mean_fn or mean_fns.Identity(self.dim) self.Umu = Param(np.zeros((self.dim, self.n_ind_pts)) if Umu is None else Umu) # Lm^-1(Umu - m(Z)) transform = gtf.LowerTriangular(self.n_ind_pts, num_matrices=self.dim, squeeze=False) self.Ucov_chol = Param(np.tile(np.eye(self.n_ind_pts)[None, ...], [self.dim, 1, 1]) if Ucov_chol is None else Ucov_chol, transform=transform) # Lm^-1(Ucov_chol) self._Kzz = None
def setup_variational_parameters(self): self.Z = Parameter(self.inducing_locations) # M x D self.q_mu = Parameter(np.zeros((self.num_inducing, 1))) # M x 1 q_sqrt = np.tile(np.eye(self.num_inducing)[None, :, :], [1, 1, 1]) transform = transforms.LowerTriangular(self.num_inducing, num_matrices=1) self.q_sqrt = Parameter(q_sqrt, transform=transform) # 1 x M x M
def __init__(self, kern, Z, mean_function, num_nodes, dim_per_in, dim_per_out, gmat, share_Z=False, nb_init=True, **kwargs): Layer.__init__(self, input_prop_dim=False, **kwargs) self.kern = kern self.num_nodes = num_nodes self.dim_per_in, self.dim_per_out = dim_per_in, dim_per_out self.gmat = gmat self.share_Z = share_Z self.nb_init = nb_init self.num_outputs = num_nodes * dim_per_out self.num_inducing = Z.shape[0] self.q_mu = Parameter( np.zeros((self.num_inducing, num_nodes * dim_per_out))) self.mean_function = ParamList([], trainable=False) self.q_sqrt_lst = ParamList([]) transform = transforms.LowerTriangular(self.num_inducing, num_matrices=self.dim_per_out) if share_Z: self.feature = InducingPoints(Z) else: self.feature = ParamList([]) # InducingPoints(Z) for nd in range(num_nodes): if mean_function: self.mean_function.append(mean_function[nd]) else: self.mean_function.append(Zero()) if share_Z: pa_nd = self.pa_idx(nd) Ku_nd = self.kern[nd].compute_K_symm(Z) Lu_nd = np.linalg.cholesky(Ku_nd + np.eye(Z.shape[0]) * settings.jitter) q_sqrt = np.tile(Lu_nd[None, :, :], [dim_per_out, 1, 1]) self.q_sqrt_lst.append(Parameter(q_sqrt, transform=transform)) else: pa_nd = self.pa_idx(nd) Z_tmp = Z[:, pa_nd].copy() self.feature.append(InducingPoints(Z_tmp)) Ku_nd = self.kern[nd].compute_K_symm(Z_tmp) Lu_nd = np.linalg.cholesky(Ku_nd + np.eye(Z_tmp.shape[0]) * settings.jitter) q_sqrt = np.tile(Lu_nd[None, :, :], [dim_per_out, 1, 1]) self.q_sqrt_lst.append(Parameter(q_sqrt, transform=transform)) self.needs_build_cholesky = True
def __init__(self, layer_id, kern, U, Z, num_outputs, mean_function, white=False, **kwargs): """ A sparse variational GP layer in whitened representation. This layer holds the kernel, variational parameters, inducing points and mean function. The underlying model at inputs X is f = Lv + mean_function(X), where v \sim N(0, I) and LL^T = kern.K(X) The variational distribution over the inducing points is q(v) = N(q_mu, q_sqrt q_sqrt^T) The layer holds D_out independent GPs with the same kernel and inducing points. :param kern: The kernel for the layer (input_dim = D_in) :param Z: Inducing points (M, D_in) :param num_outputs: The number of GP outputs (q_mu is shape (M, num_outputs)) :param mean_function: The mean function :return: """ Layer.__init__(self, layer_id, U, num_outputs, **kwargs) #Initialize using kmeans self.dim_in = U[0].shape[1] if layer_id == 0 else num_outputs self.Z = Z if Z is not None else np.random.normal( 0, 0.01, (100, self.dim_in)) self.num_inducing = self.Z.shape[0] q_mu = np.zeros((self.num_inducing, num_outputs)) self.q_mu = Parameter(q_mu) q_sqrt = np.tile( np.eye(self.num_inducing)[None, :, :], [num_outputs, 1, 1]) transform = transforms.LowerTriangular(self.num_inducing, num_matrices=num_outputs) self.q_sqrt = Parameter(q_sqrt, transform=transform) self.feature = InducingPoints(self.Z) self.kern = kern self.mean_function = mean_function self.num_outputs = num_outputs self.white = white if not self.white: # initialize to prior Ku = self.kern.compute_K_symm(self.Z) Lu = np.linalg.cholesky(Ku + np.eye(self.Z.shape[0]) * settings.jitter) self.q_sqrt = np.tile(Lu[None, :, :], [num_outputs, 1, 1]) self.needs_build_cholesky = True
def __init__(self, dim, input_dim=0, Q=None, name=None): super().__init__(name=name) self.OBSERVATIONS_AS_INPUT = False self.dim = dim self.input_dim = input_dim if Q is None or Q.ndim == 2: self.Qchol = Param(np.eye(self.dim) if Q is None else np.linalg.cholesky(Q), gtf.LowerTriangular(self.dim, squeeze=True)) elif Q.ndim == 1: self.Qchol = Param(Q ** 0.5)
def __init__(self, kern, num_outputs, mean_function, Z=None, feature=None, white=False, input_prop_dim=None, q_mu=None, q_sqrt=None, **kwargs): """ A sparse variational GP layer in whitened representation. This layer holds the kernel, variational parameters, inducing points and mean function. The underlying model at inputs X is f = Lv + mean_function(X), where v \sim N(0, I) and LL^T = kern.K(X) The variational distribution over the inducing points is q(v) = N(q_mu, q_sqrt q_sqrt^T) The layer holds D_out independent GPs with the same kernel and inducing points. :param kern: The kernel for the layer (input_dim = D_in) :param Z: Inducing points (M, D_in) :param num_outputs: The number of GP outputs (q_mu is shape (M, num_outputs)) :param mean_function: The mean function :return: """ Layer.__init__(self, input_prop_dim, **kwargs) if feature is None: feature = InducingPoints(Z) self.num_inducing = len(feature) self.feature = feature self.kern = kern self.mean_function = mean_function self.num_outputs = num_outputs self.white = white if q_mu is None: q_mu = np.zeros((self.num_inducing, num_outputs), dtype=settings.float_type) self.q_mu = Parameter(q_mu) if q_sqrt is None: if not self.white: # initialize to prior with gpflow.params_as_tensors_for(feature): Ku = conditionals.Kuu(feature, self.kern, jitter=settings.jitter) Lu = tf.linalg.cholesky(Ku) Lu = self.enquire_session().run(Lu) q_sqrt = np.tile(Lu[None, :, :], [num_outputs, 1, 1]) else: q_sqrt = np.tile(np.eye(self.num_inducing, dtype=settings.float_type)[None, :, :], [num_outputs, 1, 1]) transform = transforms.LowerTriangular(self.num_inducing, num_matrices=num_outputs) self.q_sqrt = Parameter(q_sqrt, transform=transform) self.needs_build_cholesky = True
def __init__(self, Z, mean_function, kern, num_latent=1, whiten=True, name=None): super(Latent, self).__init__(name=name) self.mean_function = mean_function self.kern = kern self.num_latent = num_latent M = Z.shape[0] # M = tf.print(M,[M,'any thing i want'],message='Debug message:',summarize=100) self.feature = InducingPoints(Z) num_inducing = len(self.feature) self.whiten = whiten self.q_mu = Parameter(np.zeros((num_inducing, self.num_latent), dtype=settings.float_type)) q_sqrt = np.tile(np.eye(M)[None, :, :], [self.num_latent, 1, 1]) transform = transforms.LowerTriangular(M, num_matrices=self.num_latent) self.q_sqrt = Parameter(q_sqrt, transform=transform)
def _init_variational_parameters(self, Z): q_mu = np.zeros((self.num_inducing, self.num_outputs)) q_mu = gpflow.Param(q_mu) # initialize q_sqrt to prior """ if not self.white: if self.gc_kernel: Ku = self.kernel.compute_Ku_symmetric(Z, jitter=settings.jitter) else: Ku = self.kernel.compute_K_symm(Z) + np.eye(Z.shape[0], dtype=settings.float_type) * settings.jitter Lu = np.linalg.cholesky(Ku) q_sqrt = np.tile(Lu[None, :, :], [self.num_outputs, 1, 1]) else: q_sqrt = np.tile(np.eye(self.num_inducing)[None, :, :], [self.num_outputs, 1, 1]) # q_sqrt = tf.convert_to_tensor(q_sqrt, dtype=settings.float_type) transform = transforms.LowerTriangular(self.num_inducing, num_matrices=self.num_outputs) # q_sqrt = Parameter(q_sqrt, transform=transform) q_sqrt = gpflow.Param(q_sqrt, transform=transform) """ if self.white or self.q_diag: q_sqrt = np.tile(np.eye(self.num_inducing)[None, :, :], [self.num_outputs, 1, 1]) else: if self.gc_kernel: Ku = self.kernel.compute_Ku_symmetric(Z, jitter=settings.jitter) else: Ku = self.kernel.compute_K_symm(Z) + np.eye(Z.shape[0], dtype=settings.float_type) * settings.jitter Lu = np.linalg.cholesky(Ku) q_sqrt = np.tile(Lu[None, :, :], [self.num_outputs, 1, 1]) if self.q_diag: transform = transforms.DiagMatrix(self.num_inducing) else: transform = transforms.LowerTriangular(self.num_inducing, num_matrices=self.num_outputs) q_sqrt = gpflow.Param(q_sqrt, transform=transform) return q_mu, q_sqrt
def __init__(self, kern, kern_g, Z, Z_g, mu0_g, num_inducing, num_inducing_g, num_outputs, mean_function=None, white=True): SVGP_Layer.__init__(self, kern, Z, num_inducing, num_outputs, mean_function, white) self.kern_g = kern_g self.num_inducing_g = num_inducing_g self.q_diag_g = False self.mu0_g = Param(mu0_g, name="mu0_g")() Um_g = np.zeros((self.num_inducing_g, self.num_outputs)) Us_sqrt_g = np.ones( (self.num_inducing_g, self.num_outputs)) if self.q_diag_g else np.array([ np.eye(self.num_inducing_g) for _ in range(self.num_outputs) ]) with tf.name_scope("inducing"): self.Z_g = Param(Z_g, name="z_g")() self.Um_g = Param(Um_g, name="u_g")() if self.q_diag_g: self.Us_sqrt_g = Param(Us_sqrt_g, transforms.positive, name="u_variance_g")() else: self.Us_sqrt_g = Param(Us_sqrt_g, transforms.LowerTriangular( self.num_inducing_g, self.num_outputs), name="u_variance_g")() self.Ku_g = self.kern_g.Ksymm(self.Z_g) + tf.eye( tf.shape(self.Z_g)[0], dtype=self.Z_g.dtype) * settings.numerics.jitter_level self.Lu_g = tf.cholesky(self.Ku_g)
def onoff(Xtrain,Ytrain,Xtest,Ytest,dir): tf.reset_default_graph() parentDir = "/l/hegdep1/onoffgp/uai/experiments/pptr" sys.path.append(parentDir) from onofftf.main import Param, DataSet, GaussKL, KernSE, GPConditional, GaussKLkron from onofftf.utils import modelmanager from gpflow import transforms modelPath = dir tbPath = dir logPath = dir + 'modelsumm.log' logger = logging.getLogger('log') logger.setLevel(logging.DEBUG) logger.addHandler(logging.FileHandler(logPath)) logger.info("traning size = " + str(Xtrain.shape[0])) logger.info("test size = " + str(Xtest.shape[0])) traindf = pd.DataFrame({'ndatehour':Xtrain[:,2].flatten()*1000,'pptr':Ytrain.flatten()}) train_data = DataSet(Xtrain, Ytrain) logger.info("number of training examples:" + str(Xtrain.shape)) # **************************************************************** # parameter initializations # **************************************************************** list_to_np = lambda _list : [np.array(e) for e in _list] num_iter = 50000 num_inducing_f = np.array([10,100]) num_inducing_g = np.array([10,100]) num_data = Xtrain.shape[0] num_minibatch = 1000 init_fkell = list_to_np([[8., 8.],[5./1000]]) init_fkvar = list_to_np([[20.],[20.]]) init_gkell = list_to_np([[8.,8.],[5./1000]]) init_gkvar = list_to_np([[10.],[10.]]) init_noisevar = 0.01 q_diag = True init_Zf_s = kmeans(Xtrain[:,0:2],num_inducing_f[0])[0] init_Zf_t = np.expand_dims(np.linspace(Xtrain[:,2].min(),Xtrain[:,2].max(),num_inducing_f[1]),axis=1) init_Zf = [init_Zf_s,init_Zf_t] init_u_fm = np.random.randn(np.prod(num_inducing_f),1)*0.1 init_u_fs_sqrt = np.ones(np.prod(num_inducing_f)).reshape(1,-1).T init_Zg = init_Zf.copy() init_u_gm = np.random.randn(np.prod(num_inducing_g),1)*0.1 init_u_gs_sqrt = np.ones(np.prod(num_inducing_g)).reshape(1,-1).T kern_param_learning_rate = 1e-3 indp_param_learning_rate = 1e-3 # **************************************************************** # define tensorflow variables and placeholders # **************************************************************** X = tf.placeholder(dtype = float_type) Y = tf.placeholder(dtype = float_type) with tf.name_scope("f_kern"): fkell = [Param(init_fkell[i],transform=transforms.Log1pe(), name="lengthscale",learning_rate = kern_param_learning_rate,summ=True) for i in range(len(num_inducing_f))] fkvar = [Param(init_fkvar[i],transform=transforms.Log1pe(), name="variance",learning_rate = kern_param_learning_rate,summ=True) for i in range(len(num_inducing_f))] fkern_list = [KernSE(fkell[i],fkvar[i]) for i in range(len(num_inducing_f))] with tf.name_scope("g_kern"): gkell = [Param(init_gkell[i],transform=transforms.Log1pe(), name="lengthscale",learning_rate = kern_param_learning_rate,summ=True) for i in range(len(num_inducing_g))] gkvar = [Param(init_gkvar[i],transform=transforms.Log1pe(), name="variance",learning_rate = kern_param_learning_rate,summ=True) for i in range(len(num_inducing_g))] gkern_list = [KernSE(gkell[i],gkvar[i]) for i in range(len(num_inducing_g))] with tf.name_scope("likelihood"): noisevar = Param(init_noisevar,transform=transforms.Log1pe(), name="variance",learning_rate = kern_param_learning_rate,summ=True) with tf.name_scope("f_ind"): Zf_list = [Param(init_Zf[i],name="z",learning_rate = indp_param_learning_rate,summ=True) for i in range(len(num_inducing_f))] u_fm = Param(init_u_fm,name="value",learning_rate = indp_param_learning_rate,summ=True) if q_diag: u_fs_sqrt = Param(init_u_fs_sqrt,transforms.positive, name="variance",learning_rate = indp_param_learning_rate,summ=True) else: u_fs_sqrt = Param(init_u_fs_sqrt,transforms.LowerTriangular(init_u_fs_sqrt.shape[0]), name="variance",learning_rate = indp_param_learning_rate,summ=True) with tf.name_scope("g_ind"): Zg_list = [Param(init_Zg[i],name="z",learning_rate = indp_param_learning_rate,summ=True) for i in range(len(num_inducing_g))] u_gm = Param(init_u_gm,name="value",learning_rate = indp_param_learning_rate,summ=True) if q_diag: u_gs_sqrt = Param(init_u_gs_sqrt,transforms.positive, name="variance",learning_rate = indp_param_learning_rate,summ=True) else: u_gs_sqrt = Param(init_u_gs_sqrt,transforms.LowerTriangular(init_u_gs_sqrt.shape[0]), name="variance",learning_rate = indp_param_learning_rate,summ=True) # **************************************************************** # define model support functions # **************************************************************** def build_prior_kl(u_fm, u_fs_sqrt, fkern_list, Zf_list, u_gm, u_gs_sqrt, gkern_list, Zg_list, whiten=False): if whiten: raise NotImplementedError() else: Kfmm = [fkern_list[i].K(Zf_list[i].get_tfv()) + \ tf.eye(num_inducing_f[i], dtype=float_type) * jitter_level for i in range(len(num_inducing_f))] Kgmm = [gkern_list[i].K(Zg_list[i].get_tfv()) + \ tf.eye(num_inducing_g[i], dtype=float_type) * jitter_level for i in range(len(num_inducing_g))] KL = GaussKLkron(u_fm.get_tfv(), u_fs_sqrt.get_tfv(), Kfmm) + \ GaussKLkron(u_gm.get_tfv(), u_gs_sqrt.get_tfv(), Kgmm) return KL def build_predict(Xnew,u_fm,u_fs_sqrt,fkern_list,Zf_list,u_gm,u_gs_sqrt,gkern_list,Zg_list,f_mu=None): input_mask_f = _gen_inp_mask(Zf_list) input_mask_g = _gen_inp_mask(Zg_list) # compute fmean and fvar from the kronecker inference fmean,fvar = kron_inf(Xnew,fkern_list,Zf_list,u_fm,u_fs_sqrt,num_inducing_f,input_mask_f) if not f_mu is None : fmean = fmean + f_mu.get_tfv() # compute gmean and gvar from the kronecker inference gmean,gvar = kron_inf(Xnew,gkern_list,Zg_list,u_gm,u_gs_sqrt,num_inducing_g,input_mask_g) # compute augemented distributions ephi_g, ephi2_g, evar_phi_g = probit_expectations(gmean, gvar) # compute augmented f # p(f|g) = N(f| diag(ephi_g)* A*u_fm, diag(evar_phi_g)) * (Kfnn + A(u_fs - Kfmm)t(A))) gfmean = tf.multiply(ephi_g, fmean) gfvar = tf.multiply(ephi2_g, fvar) gfmeanu = tf.multiply(evar_phi_g, tf.square(fmean)) # return mean and variance vectors in order return gfmean, gfvar, gfmeanu, fmean, fvar, gmean, gvar, ephi_g, evar_phi_g def kron_inf(Xnew,kern_list,Z_list,q_mu,q_sqrt,num_inducing,input_mask): # Compute alpha = K_mm^-1 * f_m Kmm = [kern_list[p].K(Z_list[p].get_tfv()) + \ tf.eye(num_inducing[p], dtype=float_type) * jitter_level for p in range(len(num_inducing))] Kmm_inv = [tf.matrix_inverse(Kmm[p]) for p in range(len(num_inducing))] alpha = __kron_mv(Kmm_inv,q_mu.get_tfv()) n_batch = tf.stack([tf.shape(Xnew)[0],np.int32(1)]) Knn = tf.ones(n_batch, dtype=float_type) Kmn_kron = [] for p in range(len(num_inducing)): xnew = tf.gather(Xnew, input_mask[p], axis=1) Knn *= tf.reshape(kern_list[p].Kdiag(xnew), n_batch) Kmn_kron.append(kern_list[p].K(Z_list[p].get_tfv(), xnew)) S = tf.diag(tf.squeeze(tf.square(q_sqrt.get_tfv()))) Kmn = tf.reshape(tf.multiply(tf.expand_dims(Kmn_kron[0],1),Kmn_kron[1]),[np.prod(num_inducing),-1]) A = tf.matmul(tf_kron(*Kmm_inv),Kmn) mu = tf.matmul(Kmn, alpha, transpose_a=True) var = Knn - tf.reshape(tf.matrix_diag_part(tf.matmul(Kmn, A,transpose_a=True) - \ tf.matmul(tf.matmul(A,S,transpose_a=True),A)),[-1,1]) return mu , var def __kron_mv( As, x): num_inducing = [int(As[p].get_shape()[0]) for p in range(len(As))] N = np.prod(num_inducing) b = tf.reshape(x, [N,1]) for p in range(len(As)): Ap = As[p] X = tf.reshape(b, (num_inducing[p], np.round(N/num_inducing[p]).astype(np.int))) b = tf.matmul(X, Ap, transpose_a=True, transpose_b=True) b = tf.reshape(b, [N,1]) return b def tf_kron(*args): def __tf_kron(a,b): a_shape = [tf.shape(a)[0],tf.shape(a)[1]] b_shape = [tf.shape(b)[0],tf.shape(b)[1]] return tf.reshape(tf.reshape(a,[a_shape[0],1,a_shape[1],1])* \ tf.reshape(b,[1,b_shape[0],1,b_shape[1]]), [a_shape[0]*b_shape[0],a_shape[1]*b_shape[1]]) kron_pord = tf.constant(1.,shape=[1,1],dtype=float_type) for Ap in args: kron_pord = __tf_kron(kron_pord,Ap) return kron_pord def _gen_inp_mask(Z_list): input_mask = [] tmp = 0 for p in range(len(Z_list)): p_dim = Z_list[p].shape[1] input_mask.append(np.arange(tmp, tmp + p_dim, dtype=np.int32)) tmp += p_dim return input_mask def variational_expectations(Y, fmu, fvar, fmuvar, noisevar): return -0.5 * np.log(2 * np.pi) - 0.5 * tf.log(noisevar) \ - 0.5 * (tf.square(Y - fmu) + fvar + fmuvar) / noisevar def probit_expectations(gmean, gvar): def normcdf(x): return 0.5 * (1.0 + tf.erf(x / np.sqrt(2.0))) * (1. - 2.e-3) + 1.e-3 def owent(h, a): h = tf.abs(h) term1 = tf.atan(a) / (2 * np.pi) term2 = tf.exp((-1 / 2) * (tf.multiply(tf.square(h), (tf.square(a) + 1)))) return tf.multiply(term1, term2) z = gmean / tf.sqrt(1. + gvar) a = 1 / tf.sqrt(1. + (2 * gvar)) cdfz = normcdf(z) tz = owent(z, a) ephig = cdfz ephisqg = (cdfz - 2. * tz) evarphig = (cdfz - 2. * tz - tf.square(cdfz)) # clip negative values from variance terms to zero ephisqg = (ephisqg + tf.abs(ephisqg)) / 2. evarphig = (evarphig + tf.abs(evarphig)) / 2. return ephig, ephisqg, evarphig # **************************************************************** # build model and define lower bound # **************************************************************** # get kl term with tf.name_scope("kl"): kl = build_prior_kl(u_fm,u_fs_sqrt,fkern_list,Zf_list, u_gm,u_gs_sqrt,gkern_list,Zg_list) tf.summary.scalar('kl', kl) # get augmented functions with tf.name_scope("model_build"): gfmean, gfvar, gfmeanu, fmean, fvar, gmean, gvar, pgmean, pgvar = build_predict(X,u_fm,u_fs_sqrt,fkern_list,Zf_list, u_gm,u_gs_sqrt,gkern_list,Zg_list) tf.summary.histogram('gfmean',gfmean) tf.summary.histogram('gfvar',gfvar) tf.summary.histogram('gfmeanu',gfmeanu) tf.summary.histogram('fmean',fmean) tf.summary.histogram('fvar',fvar) tf.summary.histogram('gmean',gmean) tf.summary.histogram('gvar',gvar) tf.summary.histogram('pgmean',pgmean) tf.summary.histogram('pgvar',pgvar) # compute likelihood with tf.name_scope("var_exp"): var_exp = tf.reduce_sum(variational_expectations(Y,gfmean,gfvar,gfmeanu,noisevar.get_tfv())) tf.summary.scalar('var_exp', var_exp) # mini-batch scaling scale = tf.cast(num_data, float_type) / tf.cast(num_minibatch, float_type) var_exp_scaled = var_exp * scale tf.summary.scalar('var_exp_scaled', var_exp_scaled) # final lower bound with tf.name_scope("cost"): cost = -(var_exp_scaled - kl) tf.summary.scalar('cost',cost) # **************************************************************** # define optimizer op # **************************************************************** all_var_list = tf.trainable_variables() all_lr_list = [var._learning_rate for var in all_var_list] train_opt_group = [] for group_learning_rate in set(all_lr_list): _ind_bool = np.where(np.isin(np.array(all_lr_list),group_learning_rate))[0] group_var_list = [all_var_list[ind] for ind in _ind_bool] group_tf_optimizer = tf.train.AdamOptimizer(learning_rate = group_learning_rate) group_grad_list = tf.gradients(cost,group_var_list) group_grads_and_vars = list(zip(group_grad_list,group_var_list)) group_train_op = group_tf_optimizer.apply_gradients(group_grads_and_vars) # Summarize all gradients for grad, var in group_grads_and_vars: tf.summary.histogram(var.name + '/gradient', grad) train_opt_group.append({'names':[var.name for var in group_var_list], 'vars':group_var_list, 'learning_rate':group_learning_rate, 'grads':group_grad_list, 'train_op':group_train_op}) train_op = tf.group(*[group['train_op'] for group in train_opt_group]) # **************************************************************** # define graph and run optimization # **************************************************************** sess = tf.InteractiveSession() # model saver saver = tf.train.Saver() # tensorboard summary summ_merged = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(tbPath, graph=sess.graph) sess.run(tf.global_variables_initializer()) logger.info('******* started optimization at ' + time.strftime('%Y%m%d-%H%M') + " *******") optstime = time.time() logger.info( '{:>16s}'.format("iteration") + '{:>6s}'.format("time")) for i in range(num_iter): optstime = time.time() batch = train_data.next_batch(num_minibatch) try: summary, _ = sess.run([summ_merged,train_op], feed_dict={X : batch[0], Y : batch[1] }) if i% 200 == 0: logger.info( '{:>16d}'.format(i) + '{:>6.3f}'.format((time.time() - optstime)/60)) summary_writer.add_summary(summary,i) summary_writer.flush() if i% 10000 == 0: modelmngr = modelmanager(saver, sess, modelPath) modelmngr.save() # **************************************************************** # plot inducing monitoring plots # **************************************************************** lp_u_fm = u_fm.get_tfv().eval().flatten() lp_u_gm = u_gm.get_tfv().eval().flatten() lp_zf_t = Zf_list[1].get_tfv().eval().flatten() lp_zg_t = Zg_list[1].get_tfv().eval().flatten() lp_zf_sort_ind = np.argsort(lp_zf_t) lp_zg_sort_ind = np.argsort(lp_zg_t) scale_z = 1000 mpl.rcParams['figure.figsize'] = (16,8) fig, (ax1,ax2,ax3) = plt.subplots(3, 1, sharex=True) mean_pptr = traindf.groupby('ndatehour')['pptr'].mean() ax1.bar(mean_pptr.index, mean_pptr.values, align='center') for m in np.arange(num_inducing_f[0]): u_fm_temporal = lp_u_fm[m*num_inducing_f[1]:(m+1)*num_inducing_f[1]] ax2.plot(np.round(lp_zf_t[lp_zf_sort_ind] * scale_z,4),u_fm_temporal[lp_zf_sort_ind],alpha=0.7) ax2.scatter(np.round(lp_zf_t[lp_zf_sort_ind] * scale_z,4),np.ones([num_inducing_f[1],1])*lp_u_fm.min(),color="#514A30") for m in np.arange(num_inducing_g[0]): u_gm_temporal = lp_u_gm[m*num_inducing_g[1]:(m+1)*num_inducing_g[1]] ax3.plot(np.round(lp_zg_t[lp_zg_sort_ind] * scale_z,4),u_gm_temporal[lp_zg_sort_ind],alpha=0.7) ax3.scatter(np.round(lp_zg_t[lp_zg_sort_ind] * scale_z,4),np.ones([num_inducing_g[1],1])*lp_u_gm.min(),color="#514A30") fig.savefig(dir +"inducing_"+str(i)+".png") except KeyboardInterrupt as e: print("Stopping training") break modelmngr = modelmanager(saver, sess, modelPath) modelmngr.save() summary_writer.close() # **************************************************************** # param summary # **************************************************************** logger.info("Noise variance = " + str(noisevar.get_tfv().eval())) logger.info("Kf spatial lengthscale = " + str(fkell[0].get_tfv().eval())) logger.info("Kf spatial variance = " + str(fkvar[0].get_tfv().eval())) logger.info("Kf temporal lengthscale = " + str(fkell[1].get_tfv().eval())) logger.info("Kf temporal variance = " + str(fkvar[1].get_tfv().eval())) logger.info("Kg spatial lengthscale = " + str(gkell[0].get_tfv().eval())) logger.info("Kg spatial variance = " + str(gkvar[0].get_tfv().eval())) logger.info("Kg temporal lengthscale = " + str(gkell[1].get_tfv().eval())) logger.info("Kg temporal variance = " + str(gkvar[1].get_tfv().eval())) # **************************************************************** # model predictions # **************************************************************** # get test and training predictions # def predict_onoff(Xtrain,Xtest): # pred_train = np.maximum(gfmean.eval(feed_dict = {X:Xtrain}),0) # pred_test = np.maximum(gfmean.eval(feed_dict = {X:Xtest}),0) # return pred_train, pred_test # # pred_train, pred_test = predict_onoff(Xtrain,Xtest) # # train_rmse = np.sqrt(np.mean((pred_train - Ytrain)**2)) # train_mae = np.mean(np.abs(pred_train - Ytrain)) # test_rmse = np.sqrt(np.mean((pred_test - Ytest)**2)) # test_mae = np.mean(np.abs(pred_test - Ytest)) # # logger.info("train rmse:"+str(train_rmse)) # logger.info("train mae:"+str(train_mae)) # # logger.info("test rmse:"+str(test_rmse)) # logger.info("test mae:"+str(test_mae)) # logger.removeHandler(logger.handlers) def predict_onoff(Xtest): pred_test = np.maximum(gfmean.eval(feed_dict = {X:Xtest}),0) return pred_test pred_test = predict_onoff(Xtest) test_rmse = np.sqrt(np.mean((pred_test - Ytest)**2)) test_mae = np.mean(np.abs(pred_test - Ytest)) logger.info("test rmse:"+str(test_rmse)) logger.info("test mae:"+str(test_mae)) logger.removeHandler(logger.handlers) # **************************************************************** # return values # **************************************************************** retdict = {'Xtrain':Xtrain,'Ytrain':Ytrain, 'Xtest':Xtest,'Ytest':Ytest, # 'rawpred_train':gfmean.eval(feed_dict = {X:Xtrain}), # 'rawpred_test':gfmean.eval(feed_dict = {X:Xtest}), # 'pred_train':pred_train, # 'pred_test':pred_test, # 'train_rmse':train_rmse, # 'train_mae':train_mae, 'test_rmse':test_rmse, 'test_mae':test_mae # ,'train_log_evidence': -cost.eval({X : Xtrain,Y : Ytrain}) } return retdict
def __init__(self, latent_dim, Y, inputs=None, emissions=None, px1_mu=None, px1_cov=None, kern=None, Z=None, n_ind_pts=100, mean_fn=None, Q_diag=None, Umu=None, Ucov_chol=None, qx1_mu=None, qx1_cov=None, As=None, bs=None, Ss=None, n_samples=100, batch_size=None, chunking=False, seed=None, parallel_iterations=10, jitter=gp.settings.numerics.jitter_level, name=None): super().__init__(latent_dim, Y[0], inputs=None if inputs is None else inputs[0], emissions=emissions, px1_mu=px1_mu, px1_cov=None, kern=kern, Z=Z, n_ind_pts=n_ind_pts, mean_fn=mean_fn, Q_diag=Q_diag, Umu=Umu, Ucov_chol=Ucov_chol, qx1_mu=qx1_mu, qx1_cov=None, As=None, bs=None, Ss=False if Ss is False else None, n_samples=n_samples, seed=seed, parallel_iterations=parallel_iterations, jitter=jitter, name=name) self.T = [Y_s.shape[0] for Y_s in Y] self.T_tf = tf.constant(self.T, dtype=gp.settings.int_type) self.max_T = max(self.T) self.sum_T = float(sum(self.T)) self.n_seq = len(self.T) self.batch_size = batch_size self.chunking = chunking if self.batch_size is None: self.Y = ParamList(Y, trainable=False) else: _Y = np.stack([ np.concatenate( [Ys, np.zeros((self.max_T - len(Ys), self.obs_dim))]) for Ys in Y ]) self.Y = Param(_Y, trainable=False) if inputs is not None: if self.batch_size is None: self.inputs = ParamList(inputs, trainable=False) else: desired_length = self.max_T if self.chunking else self.max_T - 1 _inputs = [ np.concatenate([ inputs[s], np.zeros( (desired_length - len(inputs[s]), self.input_dim)) ]) for s in range(self.n_seq) ] # pad the inputs self.inputs = Param(_inputs, trainable=False) if qx1_mu is None: self.qx1_mu = Param(np.zeros((self.n_seq, self.latent_dim))) self.qx1_cov_chol = Param( np.tile(np.eye(self.latent_dim)[None, ...], [self.n_seq, 1, 1]) if qx1_cov is None else np.linalg.cholesky(qx1_cov), transform=gtf.LowerTriangular(self.latent_dim, num_matrices=self.n_seq)) _As = [np.ones((T_s - 1, self.latent_dim)) for T_s in self.T] if As is None else As _bs = [np.zeros((T_s - 1, self.latent_dim)) for T_s in self.T] if bs is None else bs if Ss is not False: _S_chols = [np.tile(self.Q_sqrt.value.copy()[None, ...], [T_s - 1, 1]) for T_s in self.T] if Ss is None \ else [np.sqrt(S) if S.ndim == 2 else np.linalg.cholesky(S) for S in Ss] if self.batch_size is None: self.As = ParamList(_As) self.bs = ParamList(_bs) if Ss is not False: self.S_chols = ParamList([ Param(Sc, transform=gtf.positive if Sc.ndim == 2 else gtf.LowerTriangular(self.latent_dim, num_matrices=Sc.shape[0])) for Sc in _S_chols ]) else: _As = np.stack([ np.concatenate( [_A, np.zeros((self.max_T - len(_A) - 1, *_A.shape[1:]))]) for _A in _As ]) _bs = np.stack([ np.concatenate([ _b, np.zeros((self.max_T - len(_b) - 1, self.latent_dim)) ]) for _b in _bs ]) self.As = Param(_As) self.bs = Param(_bs) if Ss is not False: _S_chols = [ np.concatenate([ _S, np.zeros((self.max_T - len(_S) - 1, *_S.shape[1:])) ]) for _S in _S_chols ] _S_chols = np.stack(_S_chols) self.S_chols = Param(_S_chols, transform=gtf.positive if _S_chols.ndim == 3 else \ gtf.LowerTriangular(self.latent_dim, num_matrices=(self.n_seq, self.max_T - 1))) self.multi_diag_px1_cov = False if isinstance(px1_cov, list): # different prior for each sequence _x1_cov = np.stack(px1_cov) _x1_cov = np.sqrt( _x1_cov) if _x1_cov.ndim == 2 else np.linalg.cholesky(_x1_cov) _transform = None if _x1_cov.ndim == 2 else gtf.LowerTriangular( self.latent_dim, num_matrices=self.n_seq) self.multi_diag_px1_cov = _x1_cov.ndim == 2 elif isinstance(px1_cov, np.ndarray): # same prior for each sequence assert px1_cov.ndim < 3 _x1_cov = np.sqrt( px1_cov) if px1_cov.ndim == 1 else np.linalg.cholesky(px1_cov) _transform = None if px1_cov.ndim == 1 else gtf.LowerTriangular( self.latent_dim, squeeze=True) self.px1_cov_chol = None if px1_cov is None else Param( _x1_cov, trainable=False, transform=_transform) if self.chunking: px1_mu_check = len(self.px1_mu.shape) == 1 px1_cov_check_1 = not self.multi_diag_px1_cov px1_cov_check_2 = self.px1_cov_chol is None or len( self.px1_cov_chol.shape) < 3 assert px1_mu_check and px1_cov_check_1 and px1_cov_check_2, \ 'Only one prior over x1 allowed for chunking'
def __init__(self, X, Y, kernf, kerng, likelihood, Zf, Zg, mean_function=None, minibatch_size=None, name='model'): Model.__init__(self, name) self.mean_function = mean_function or Zero() self.kernf = kernf self.kerng = kerng self.likelihood = likelihood self.whiten = False self.q_diag = True # save initial attributes for future plotting purpose Xtrain = DataHolder(X) Ytrain = DataHolder(Y) self.Xtrain, self.Ytrain = Xtrain, Ytrain # sort out the X, Y into MiniBatch objects. if minibatch_size is None: minibatch_size = X.shape[0] self.num_data = X.shape[0] self.num_latent = Y.shape[1] # num_latent will be 1 self.X = MinibatchData(X, minibatch_size, np.random.RandomState(0)) self.Y = MinibatchData(Y, minibatch_size, np.random.RandomState(0)) # Add variational paramters self.Zf = Param(Zf) self.Zg = Param(Zg) self.num_inducing_f = Zf.shape[0] self.num_inducing_g = Zg.shape[0] # init variational parameters self.u_fm = Param( np.random.randn(self.num_inducing_f, self.num_latent) * 0.01) self.u_gm = Param( np.random.randn(self.num_inducing_g, self.num_latent) * 0.01) if self.q_diag: self.u_fs_sqrt = Param( np.ones((self.num_inducing_f, self.num_latent)), transforms.positive) self.u_gs_sqrt = Param( np.ones((self.num_inducing_g, self.num_latent)), transforms.positive) else: u_fs_sqrt = np.array([ np.eye(self.num_inducing_f) for _ in range(self.num_latent) ]).swapaxes(0, 2) self.u_fs_sqrt = Param( u_fs_sqrt, transforms.LowerTriangular(u_fs_sqrt.shape[2])) u_gs_sqrt = np.array([ np.eye(self.num_inducing_g) for _ in range(self.num_latent) ]).swapaxes(0, 2) self.u_gs_sqrt = Param( u_gs_sqrt, transforms.LowerTriangular(u_gs_sqrt.shape[2]))
def main(scriptPath): tf.reset_default_graph() parentDir = '/'.join(os.path.dirname(os.path.realpath(scriptPath)).split('/')[:-1]) subDir = "/" + scriptPath.split("/")[-2].split(".py")[0] + "/" sys.path.append(parentDir) from onofftf.main import Param, DataSet, GaussKL, KernSE, GPConditional, GaussKLkron from onofftf.utils import modelmanager from gpflow import transforms cmodelPath = parentDir + subDir + 'results_scgp.pickle' modelPath = parentDir + subDir + 'model_hurdle.ckpt' logPath = parentDir + subDir + 'modelsumm_hurdle.log' logger = logging.getLogger('log') logger.setLevel(logging.DEBUG) logger.addHandler(logging.FileHandler(logPath)) data = pickle.load(open(parentDir + subDir +"data.pickle","rb")) Xtrain = data['Xtrain'] Ytrain = data['Ytrain'] Ytrain_c = data['Ytrain'] > 0 * 1 Xtest = data['Xtest'] Ytest = data['Ytest'] Ytest_c = data['Ytest'] > 0 * 1 # load results from the classifier model cresults = pickle.load(open(cmodelPath,"rb")) train_pred_on_idx,_ = np.where(cresults['pred_train']['pfmean'] > 0.5) test_pred_on_idx,_ = np.where(cresults['pred_test']['pfmean'] > 0.5) Xtrain_reg_hurdle = Xtrain[train_pred_on_idx,:] Ytrain_reg_hurdle = Ytrain[train_pred_on_idx] Xtest_reg_hurdle = Xtest[test_pred_on_idx,:] Ytest_reg_hurdle = Ytest[test_pred_on_idx] traindf = pd.DataFrame({'ndatehour':Xtrain[train_pred_on_idx,2].flatten()*1000,'pptr':Ytrain[train_pred_on_idx].flatten()}) train_data = DataSet(Xtrain_reg_hurdle,Ytrain_reg_hurdle) logger.info("traning size = " + str(Xtrain.shape[0])) logger.info("test size = " + str(Xtest.shape[0])) # **************************************************************** # parameter initializations # **************************************************************** list_to_np = lambda _list : [np.array(e) for e in _list] num_iter = 50000 num_inducing_f = np.array([10,100]) num_data = Xtrain.shape[0] num_minibatch = 500 init_fkell = list_to_np([[5.,5.],[5./1000]]) init_fkvar = list_to_np([[20.],[20.]]) init_noisevar = 0.01 q_diag = True init_Zf_s = kmeans(Xtrain[:,0:2],num_inducing_f[0])[0] init_Zf_t = np.expand_dims(np.linspace(Xtrain[:,2].min(),Xtrain[:,2].max(),num_inducing_f[1]),axis=1) init_Zf = [init_Zf_s,init_Zf_t] init_u_fm = np.random.randn(np.prod(num_inducing_f),1)*0.01 init_u_fs_sqrt = np.ones(np.prod(num_inducing_f)).reshape(1,-1).T kern_param_learning_rate = 1e-3 indp_param_learning_rate = 1e-3 # **************************************************************** # define tensorflow variables and placeholders # **************************************************************** X = tf.placeholder(dtype = float_type) Y = tf.placeholder(dtype = float_type) with tf.name_scope("f_kern"): fkell = [Param(init_fkell[i],transform=transforms.Log1pe(), name="lengthscale",learning_rate = kern_param_learning_rate,summ=True) for i in range(len(num_inducing_f))] fkvar = [Param(init_fkvar[i],transform=transforms.Log1pe(), name="variance",learning_rate = kern_param_learning_rate,summ=True) for i in range(len(num_inducing_f))] fkern_list = [KernSE(fkell[i],fkvar[i]) for i in range(len(num_inducing_f))] with tf.name_scope("likelihood"): noisevar = Param(init_noisevar,transform=transforms.Log1pe(), name="variance",learning_rate = kern_param_learning_rate,summ=True) with tf.name_scope("f_ind"): Zf_list = [Param(init_Zf[i],name="z",learning_rate = indp_param_learning_rate,summ=True) for i in range(len(num_inducing_f))] u_fm = Param(init_u_fm,name="value",learning_rate = indp_param_learning_rate,summ=True) if q_diag: u_fs_sqrt = Param(init_u_fs_sqrt,transforms.positive, name="variance",learning_rate = indp_param_learning_rate,summ=True) else: u_fs_sqrt = Param(init_u_fs_sqrt,transforms.LowerTriangular(init_u_fs_sqrt.shape[0]), name="variance",learning_rate = indp_param_learning_rate,summ=True) # **************************************************************** # define model support functions # **************************************************************** def build_prior_kl(u_fm, u_fs_sqrt, fkern_list, Zf_list,whiten=False): if whiten: raise NotImplementedError() else: Kfmm = [fkern_list[i].K(Zf_list[i].get_tfv()) + \ tf.eye(num_inducing_f[i], dtype=float_type) * jitter_level for i in range(len(num_inducing_f))] KL = GaussKLkron(u_fm.get_tfv(), u_fs_sqrt.get_tfv(), Kfmm) return KL def build_predict(Xnew,u_fm,u_fs_sqrt,fkern_list,Zf_list,f_mu=None): input_mask_f = _gen_inp_mask(Zf_list) # compute fmean and fvar from the kronecker inference fmean,fvar = kron_inf(Xnew,fkern_list,Zf_list,u_fm,u_fs_sqrt,num_inducing_f,input_mask_f) if not f_mu is None : fmean = fmean + f_mu.get_tfv() # return mean and variance vectors in order return fmean, fvar def kron_inf(Xnew,kern_list,Z_list,q_mu,q_sqrt,num_inducing,input_mask): # Compute alpha = K_mm^-1 * f_m Kmm = [kern_list[p].K(Z_list[p].get_tfv()) + \ tf.eye(num_inducing[p], dtype=float_type) * jitter_level for p in range(len(num_inducing))] Kmm_inv = [tf.matrix_inverse(Kmm[p]) for p in range(len(num_inducing))] alpha = __kron_mv(Kmm_inv,q_mu.get_tfv()) n_batch = tf.stack([tf.shape(Xnew)[0],np.int32(1)]) Knn = tf.ones(n_batch, dtype=float_type) Kmn_kron = [] for p in range(len(num_inducing)): xnew = tf.gather(Xnew, input_mask[p], axis=1) Knn *= tf.reshape(kern_list[p].Kdiag(xnew), n_batch) Kmn_kron.append(kern_list[p].K(Z_list[p].get_tfv(), xnew)) S = tf.diag(tf.squeeze(tf.square(q_sqrt.get_tfv()))) Kmn = tf.reshape(tf.multiply(tf.expand_dims(Kmn_kron[0],1),Kmn_kron[1]),[np.prod(num_inducing),-1]) A = tf.matmul(tf_kron(*Kmm_inv),Kmn) mu = tf.matmul(Kmn, alpha, transpose_a=True) var = Knn - tf.reshape(tf.matrix_diag_part(tf.matmul(Kmn, A,transpose_a=True) - \ tf.matmul(tf.matmul(A,S,transpose_a=True),A)),[-1,1]) return mu , var def __kron_mv( As, x): num_inducing = [int(As[p].get_shape()[0]) for p in range(len(As))] N = np.prod(num_inducing) b = tf.reshape(x, [N,1]) for p in range(len(As)): Ap = As[p] X = tf.reshape(b, (num_inducing[p], np.round(N/num_inducing[p]).astype(np.int))) b = tf.matmul(X, Ap, transpose_a=True, transpose_b=True) b = tf.reshape(b, [N,1]) return b def tf_kron(*args): def __tf_kron(a,b): a_shape = [tf.shape(a)[0],tf.shape(a)[1]] b_shape = [tf.shape(b)[0],tf.shape(b)[1]] return tf.reshape(tf.reshape(a,[a_shape[0],1,a_shape[1],1])* \ tf.reshape(b,[1,b_shape[0],1,b_shape[1]]), [a_shape[0]*b_shape[0],a_shape[1]*b_shape[1]]) kron_pord = tf.constant(1.,shape=[1,1],dtype=float_type) for Ap in args: kron_pord = __tf_kron(kron_pord,Ap) return kron_pord def _gen_inp_mask(Z_list): input_mask = [] tmp = 0 for p in range(len(Z_list)): p_dim = Z_list[p].shape[1] input_mask.append(np.arange(tmp, tmp + p_dim, dtype=np.int32)) tmp += p_dim return input_mask def variational_expectations(Y, fmu, fvar, noisevar): return -0.5 * np.log(2 * np.pi) - 0.5 * tf.log(noisevar) \ - 0.5 * (tf.square(Y - fmu) + fvar) / noisevar # **************************************************************** # build model and define lower bound # **************************************************************** # get kl term with tf.name_scope("kl"): kl = build_prior_kl(u_fm,u_fs_sqrt,fkern_list,Zf_list) # get augmented functions with tf.name_scope("model_build"): fmean, fvar = build_predict(X,u_fm,u_fs_sqrt,fkern_list,Zf_list) # compute likelihood with tf.name_scope("var_exp"): var_exp = tf.reduce_sum(variational_expectations(Y,fmean,fvar,noisevar.get_tfv())) scale = tf.cast(num_data, float_type) / tf.cast(num_minibatch, float_type) var_exp_scaled = var_exp * scale # final lower bound with tf.name_scope("cost"): cost = -(var_exp_scaled - kl) # **************************************************************** # define optimizer op # **************************************************************** all_var_list = tf.trainable_variables() all_lr_list = [var._learning_rate for var in all_var_list] train_opt_group = [] for group_learning_rate in set(all_lr_list): _ind_bool = np.where(np.isin(np.array(all_lr_list),group_learning_rate))[0] group_var_list = [all_var_list[ind] for ind in _ind_bool] group_tf_optimizer = tf.train.AdamOptimizer(learning_rate = group_learning_rate) group_grad_list = tf.gradients(cost,group_var_list) group_grads_and_vars = list(zip(group_grad_list,group_var_list)) group_train_op = group_tf_optimizer.apply_gradients(group_grads_and_vars) train_opt_group.append({'names':[var.name for var in group_var_list], 'vars':group_var_list, 'learning_rate':group_learning_rate, 'grads':group_grad_list, 'train_op':group_train_op}) train_op = tf.group(*[group['train_op'] for group in train_opt_group]) # **************************************************************** # define graph and run optimization # **************************************************************** sess = tf.InteractiveSession() saver = tf.train.Saver() sess.run(tf.global_variables_initializer()) logger.info('******* started optimization at ' + time.strftime('%Y%m%d-%H%M') + " *******") optstime = time.time() logger.info( '{:>16s}'.format("iteration") + '{:>6s}'.format("time")) for i in range(num_iter): optstime = time.time() batch = train_data.next_batch(num_minibatch) try: sess.run([train_op],feed_dict={X : batch[0],Y : batch[1]}) if i% 100 == 0: logger.info( '{:>16d}'.format(i) + '{:>6.3f}'.format((time.time() - optstime)/60)) if i% 10000 == 0: modelmngr = modelmanager(saver, sess, modelPath) modelmngr.save() # **************************************************************** # plot inducing monitoring plots # **************************************************************** lp_u_fm = u_fm.get_tfv().eval().flatten() lp_zf_t = Zf_list[1].get_tfv().eval().flatten() lp_zf_sort_ind = np.argsort(lp_zf_t) scale_z = 1000 mpl.rcParams['figure.figsize'] = (16,8) fig, (ax1,ax2) = plt.subplots(2, 1, sharex=True) mean_pptr = traindf.groupby('ndatehour')['pptr'].mean() ax1.bar(mean_pptr.index, mean_pptr.values, align='center') for m in np.arange(num_inducing_f[0]): u_fm_temporal = lp_u_fm[m*num_inducing_f[1]:(m+1)*num_inducing_f[1]] ax2.plot(np.round(lp_zf_t[lp_zf_sort_ind] * scale_z,4),u_fm_temporal[lp_zf_sort_ind],alpha=0.7) ax2.scatter(np.round(lp_zf_t[lp_zf_sort_ind] * scale_z,4),np.ones([num_inducing_f[1],1])*lp_u_fm.min(),color="#514A30") fig.savefig(parentDir+ subDir + "svgp_inducing_"+str(i)+".png") except KeyboardInterrupt as e: print("Stopping training") break modelmngr = modelmanager(saver, sess, modelPath) modelmngr.save() tf.reset_default_graph() # **************************************************************** # param summary # **************************************************************** logger.info("Noise variance = " + str(noisevar.get_tfv().eval())) logger.info("Kf spatial lengthscale = " + str(fkell[0].get_tfv().eval())) logger.info("Kf spatial variance = " + str(fkvar[0].get_tfv().eval())) logger.info("Kf temporal lengthscale = " + str(fkell[1].get_tfv().eval())) logger.info("Kf temporal variance = " + str(fkvar[1].get_tfv().eval())) # **************************************************************** # model predictions # **************************************************************** # get regession summary from onofftf.svgppred import predict_svgp def rmse(predict,actual): predict = np.maximum(predict,0) return np.sqrt(np.mean((actual-predict)**2)) def mad(predict,actual): predict = np.maximum(predict,0) return np.mean(np.abs(actual-predict)) pred_train_hurdle_svgp, pred_test_hurdle_svgp = predict_svgp(Xtrain = Xtrain_reg_hurdle, Xtest = Xtest_reg_hurdle, checkpointPath = modelPath) train_hurdle_reg_rmse = rmse(pred_train_hurdle_svgp["fmean"],Ytrain_reg_hurdle) logger.info("rmse on train set for hurdle svgp : "+str(train_hurdle_reg_rmse)) train_hurdle_reg_mae = mad(pred_train_hurdle_svgp["fmean"],Ytrain_reg_hurdle) logger.info("mad on train set for hurdle svgp : "+str(train_hurdle_reg_mae)) test_hurdle_reg_rmse = rmse(pred_test_hurdle_svgp["fmean"],Ytest_reg_hurdle) logger.info("rmse on test set for hurdle svgp : "+str(test_hurdle_reg_rmse)) test_hurdle_reg_mae = mad(pred_test_hurdle_svgp["fmean"],Ytest_reg_hurdle) logger.info("mad on test set for hurdle svgp : "+str(test_hurdle_reg_mae)) # combine the results from regression and classification train_pred_hurdle_clf = (cresults['pred_train']['pfmean'] > 0.5)*1.0 test_pred_hurdle_clf = (cresults['pred_test']['pfmean'] > 0.5)*1.0 train_pred_hurdle_comb = train_pred_hurdle_clf.copy() train_pred_hurdle_comb[train_pred_on_idx] = pred_train_hurdle_svgp["fmean"] test_pred_hurdle_comb = test_pred_hurdle_clf.copy() test_pred_hurdle_comb[test_pred_on_idx] = pred_test_hurdle_svgp["fmean"] # final results train_hurdle_comb_rmse = rmse(train_pred_hurdle_comb,Ytrain) logger.info("rmse on train set for hurdle svgp : "+str(train_hurdle_comb_rmse)) train_hurdle_comb_mae = mad(train_pred_hurdle_comb,Ytrain) logger.info("mad on train set for hurdle svgp : "+str(train_hurdle_comb_mae)) test_hurdle_comb_rmse = rmse(test_pred_hurdle_comb,Ytest) logger.info("rmse on test set for hurdle svgp : "+str(test_hurdle_comb_rmse)) test_hurdle_comb_mae = mad(test_pred_hurdle_comb,Ytest) logger.info("mad on test set for hurdle svgp : "+str(test_hurdle_comb_mae)) for handler in logger.handlers: handler.close() logger.removeHandler(handler) # **************************************************************** # return values # **************************************************************** results = { 'pred_train_hurdle_svgp':pred_train_hurdle_svgp, 'pred_test_hurdle_svgp':pred_test_hurdle_svgp, 'train_hurdle_reg_rmse':train_hurdle_reg_rmse, 'train_hurdle_reg_mae':train_hurdle_reg_mae, 'test_hurdle_reg_rmse':test_hurdle_reg_rmse, 'test_hurdle_reg_mae':test_hurdle_reg_mae, 'train_pred_hurdle_comb':train_pred_hurdle_comb, 'test_pred_hurdle_comb':test_pred_hurdle_comb, 'train_hurdle_comb_rmse':train_hurdle_comb_rmse, 'train_hurdle_comb_mae':train_hurdle_comb_mae, 'test_hurdle_comb_rmse':test_hurdle_comb_rmse, 'test_hurdle_comb_mae':test_hurdle_comb_mae, 'train_pred_on_idx':train_pred_on_idx, 'test_pred_on_idx':test_pred_on_idx } pickle.dump(results,open(parentDir+ subDir +"results_hurdle.pickle","wb"))
def __init__(self, latent_dim, Y, transitions, T_latent=None, inputs=None, emissions=None, px1_mu=None, px1_cov=None, Xmu=None, Xchol=None, n_samples=100, batch_size=None, seed=None, name=None): super().__init__(latent_dim, Y[0], transitions, T_latent=None, inputs=None, emissions=emissions, px1_mu=px1_mu, px1_cov=None, Xmu=None, Xchol=None, n_samples=n_samples, seed=seed, name=name) self.T = [Y_s.shape[0] for Y_s in Y] self.T_latent = T_latent or self.T self.n_seq = len(self.T) self.T_tf = tf.constant(self.T, dtype=gp.settings.int_type) self.T_latent_tf = tf.constant(self.T_latent, dtype=gp.settings.int_type) self.sum_T = float(sum(self.T)) self.sum_T_latent = float(sum(self.T_latent)) self.batch_size = batch_size self.Y = gp.ParamList(Y, trainable=False) self.inputs = None if inputs is None else gp.ParamList(inputs, trainable=False) _Xmu = [np.zeros((T_s, self.latent_dim)) for T_s in self.T_latent] if Xmu is None else Xmu self.X = gp.ParamList(_Xmu) _Xchol = [np.eye(T_s * self.latent_dim) for T_s in self.T_latent] if Xchol is None else Xchol xc_tr = lambda xc: None if xc.ndim == 1 else gtf.LowerTriangular( xc.shape[-1], num_matrices=1 if xc.ndim == 2 else xc.shape[0], squeeze=xc.ndim == 2) self.Xchol = gp.ParamList( [gp.Param(xc, transform=xc_tr(xc)) for xc in _Xchol]) self.multi_diag_px1_cov = False if isinstance(px1_cov, list): # different prior for each sequence _x1_cov = np.stack(px1_cov) _x1_cov = np.sqrt( _x1_cov) if _x1_cov.ndim == 2 else np.linalg.cholesky(_x1_cov) _transform = None if _x1_cov.ndim == 2 else gtf.LowerTriangular( self.latent_dim, num_matrices=self.n_seq) self.multi_diag_px1_cov = _x1_cov.ndim == 2 elif isinstance(px1_cov, np.ndarray): # same prior for each sequence assert px1_cov.ndim < 3 _x1_cov = np.sqrt( px1_cov) if px1_cov.ndim == 1 else np.linalg.cholesky(px1_cov) _transform = None if px1_cov.ndim == 1 else gtf.LowerTriangular( self.latent_dim, squeeze=True) else: _x1_cov = np.eye(self.latent_dim) _transform = gtf.LowerTriangular(self.latent_dim, squeeze=True) self.px1_cov_chol = gp.Param(_x1_cov, trainable=False, transform=_transform)
def __init__(self, latent_dim, Y, inputs=None, emissions=None, px1_mu=None, px1_cov=None, kern=None, Z=None, n_ind_pts=100, mean_fn=None, Q_diag=None, Umu=None, Ucov_chol=None, qx1_mu=None, qx1_cov=None, As=None, bs=None, Ss=None, n_samples=100, seed=None, parallel_iterations=10, jitter=gps.numerics.jitter_level, name=None): super().__init__(name=name) self.latent_dim = latent_dim self.T, self.obs_dim = Y.shape self.Y = Param(Y, trainable=False) self.inputs = None if inputs is None else Param(inputs, trainable=False) self.input_dim = 0 if self.inputs is None else self.inputs.shape[1] self.qx1_mu = Param( np.zeros(self.latent_dim) if qx1_mu is None else qx1_mu) self.qx1_cov_chol = Param( np.eye(self.latent_dim) if qx1_cov is None else np.linalg.cholesky(qx1_cov), transform=gtf.LowerTriangular(self.latent_dim, squeeze=True)) self.As = Param( np.ones((self.T - 1, self.latent_dim)) if As is None else As) self.bs = Param( np.zeros((self.T - 1, self.latent_dim)) if bs is None else bs) self.Q_sqrt = Param( np.ones(self.latent_dim) if Q_diag is None else Q_diag**0.5, transform=gtf.positive) if Ss is False: self._S_chols = None else: self.S_chols = Param( np.tile(self.Q_sqrt.value.copy()[None, ...], [self.T - 1, 1]) if Ss is None else (np.sqrt(Ss) if Ss.ndim == 2 else np.linalg.cholesky(Ss)), transform=gtf.positive if (Ss is None or Ss.ndim == 2) else gtf.LowerTriangular( self.latent_dim, num_matrices=self.T - 1, squeeze=False)) self.emissions = emissions or GaussianEmissions( latent_dim=self.latent_dim, obs_dim=self.obs_dim) self.px1_mu = Param( np.zeros(self.latent_dim) if px1_mu is None else px1_mu, trainable=False) self.px1_cov_chol = None if px1_cov is None else \ Param(np.sqrt(px1_cov) if px1_cov.ndim == 1 else np.linalg.cholesky(px1_cov), trainable=False, transform=gtf.positive if px1_cov.ndim == 1 else gtf.LowerTriangular(self.latent_dim, squeeze=True)) self.n_samples = n_samples self.seed = seed self.parallel_iterations = parallel_iterations self.jitter = jitter # Inference-specific attributes (see gpssm_models.py for appropriate choices): nans = tf.constant(np.zeros( (self.T, self.n_samples, self.latent_dim)) * np.nan, dtype=gps.float_type) self.sample_fn = lambda **kwargs: (nans, None) self.sample_kwargs = {} self.KL_fn = lambda *fs: tf.constant(np.nan, dtype=gps.float_type) # GP Transitions: self.n_ind_pts = n_ind_pts if Z is None else ( Z[0].shape[-2] if isinstance(Z, list) else Z.shape[-2]) if isinstance(Z, np.ndarray) and Z.ndim == 2: self.Z = mf.SharedIndependentMof(gp.features.InducingPoints(Z)) else: Z_list = [ np.random.randn(self.n_ind_pts, self.latent_dim + self.input_dim) for _ in range(self.latent_dim) ] if Z is None else [z for z in Z] self.Z = mf.SeparateIndependentMof( [gp.features.InducingPoints(z) for z in Z_list]) if isinstance(kern, gp.kernels.Kernel): self.kern = mk.SharedIndependentMok(kern, self.latent_dim) else: kern_list = kern or [ gp.kernels.Matern32(self.latent_dim + self.input_dim, ARD=True) for _ in range(self.latent_dim) ] self.kern = mk.SeparateIndependentMok(kern_list) self.mean_fn = mean_fn or mean_fns.Identity(self.latent_dim) self.Umu = Param( np.zeros((self.latent_dim, self.n_ind_pts)) if Umu is None else Umu) # (Lm^-1)(Umu - m(Z)) LT_transform = gtf.LowerTriangular(self.n_ind_pts, num_matrices=self.latent_dim, squeeze=False) self.Ucov_chol = Param(np.tile( np.eye(self.n_ind_pts)[None, ...], [self.latent_dim, 1, 1]) if Ucov_chol is None else Ucov_chol, transform=LT_transform) # (Lm^-1)Lu self._Kzz = None
def predict_svgp(Xtrain, Xtest, checkpointPath, num_inducing_f=np.array([10, 100]), include_fmu=False): tf.reset_default_graph() # param initializations list_to_np = lambda _list: [np.array(e) for e in _list] init_fkell = list_to_np([[8., 8.], [5. / 1000]]) init_fkvar = list_to_np([[20.], [20.]]) init_noisevar = 0.001 q_diag = True if include_fmu: init_f_mu = 0. init_Zf_s = kmeans(Xtrain[:, 0:2], num_inducing_f[0])[0] init_Zf_t = np.expand_dims(np.linspace(Xtrain[:, 2].min(), Xtrain[:, 2].max(), num_inducing_f[1]), axis=1) init_Zf = [init_Zf_s, init_Zf_t] init_u_fm = np.random.randn(np.prod(num_inducing_f), 1) * 0.1 init_u_fs_sqrt = np.ones(np.prod(num_inducing_f)).reshape(1, -1).T kern_param_learning_rate = 1e-4 indp_param_learning_rate = 1e-4 # **************************************************************** # define tensorflow variables and placeholders # **************************************************************** X = tf.placeholder(dtype=float_type) Y = tf.placeholder(dtype=float_type) with tf.name_scope("f_kern"): fkell = [ Param(init_fkell[i], transform=transforms.Log1pe(), name="lengthscale", learning_rate=kern_param_learning_rate, summ=True) for i in range(len(num_inducing_f)) ] fkvar = [ Param(init_fkvar[i], transform=transforms.Log1pe(), name="variance", learning_rate=kern_param_learning_rate, summ=True) for i in range(len(num_inducing_f)) ] fkern_list = [ KernSE(fkell[i], fkvar[i]) for i in range(len(num_inducing_f)) ] with tf.name_scope("likelihood"): noisevar = Param(init_noisevar, transform=transforms.Log1pe(), name="variance", learning_rate=kern_param_learning_rate, summ=True) with tf.name_scope("f_ind"): Zf_list = [ Param(init_Zf[i], name="z", learning_rate=indp_param_learning_rate, summ=True) for i in range(len(num_inducing_f)) ] u_fm = Param(init_u_fm, name="value", learning_rate=indp_param_learning_rate, summ=True) if q_diag: u_fs_sqrt = Param(init_u_fs_sqrt, transforms.positive, name="variance", learning_rate=indp_param_learning_rate, summ=True) else: u_fs_sqrt = Param(init_u_fs_sqrt, transforms.LowerTriangular( init_u_fs_sqrt.shape[0]), name="variance", learning_rate=indp_param_learning_rate, summ=True) # **************************************************************** # define model support functions # **************************************************************** def build_predict(Xnew, u_fm, u_fs_sqrt, fkern_list, Zf_list, f_mu=None): input_mask_f = _gen_inp_mask(Zf_list) # compute fmean and fvar from the kronecker inference fmean, fvar = kron_inf(Xnew, fkern_list, Zf_list, u_fm, u_fs_sqrt, num_inducing_f, input_mask_f) if not f_mu is None: fmean = fmean + f_mu.get_tfv() # return mean and variance vectors in order return fmean, fvar def kron_inf(Xnew, kern_list, Z_list, q_mu, q_sqrt, num_inducing, input_mask): # Compute alpha = K_mm^-1 * f_m Kmm = [kern_list[p].K(Z_list[p].get_tfv()) + \ tf.eye(num_inducing[p], dtype=float_type) * jitter_level for p in range(len(num_inducing))] Kmm_inv = [tf.matrix_inverse(Kmm[p]) for p in range(len(num_inducing))] alpha = __kron_mv(Kmm_inv, q_mu.get_tfv()) n_batch = tf.stack([tf.shape(Xnew)[0], np.int32(1)]) Knn = tf.ones(n_batch, dtype=float_type) KMN = [] for p in range(len(num_inducing)): xnew = tf.gather(Xnew, input_mask[p], axis=1) Knn *= tf.reshape(kern_list[p].Kdiag(xnew), n_batch) KMN.append(kern_list[p].K(Z_list[p].get_tfv(), xnew)) S = tf.diag(tf.squeeze(tf.square(q_sqrt.get_tfv()))) def loop_rows(n, mu, var): Kmn = tf.reshape(KMN[0][:, n], [num_inducing[0], 1]) for p in range(1, len(num_inducing)): Kmn = tf_kron(Kmn, tf.reshape(KMN[p][:, n], [num_inducing[p], 1])) mu_n = tf.matmul(Kmn, alpha, transpose_a=True) mu = mu.write(n, mu_n) A = __kron_mv(Kmm_inv, Kmn) tmp = Knn[n] - tf.matmul(Kmn, A,transpose_a=True) + \ tf.matmul(tf.matmul(A,S,transpose_a=True),A) var = var.write(n, tmp) return tf.add(n, 1), mu, var def loop_cond(n, mu, var): return tf.less(n, n_batch[0]) mu = tf.TensorArray(float_type, size=n_batch[0]) var = tf.TensorArray(float_type, size=n_batch[0]) _, mu, var = tf.while_loop(loop_cond, loop_rows, [0, mu, var]) mu = tf.reshape(mu.stack(), n_batch) var = tf.reshape(var.stack(), n_batch) return mu, var def __kron_mv(As, x): num_inducing = [int(As[p].get_shape()[0]) for p in range(len(As))] N = np.prod(num_inducing) b = tf.reshape(x, [N, 1]) for p in range(len(As)): Ap = As[p] X = tf.reshape(b, (num_inducing[p], np.round( N / num_inducing[p]).astype(np.int))) b = tf.matmul(X, Ap, transpose_a=True, transpose_b=True) b = tf.reshape(b, [N, 1]) return b def tf_kron(*args): def __tf_kron(a, b): a_shape = [tf.shape(a)[0], tf.shape(a)[1]] b_shape = [tf.shape(b)[0], tf.shape(b)[1]] return tf.reshape(tf.reshape(a,[a_shape[0],1,a_shape[1],1])* \ tf.reshape(b,[1,b_shape[0],1,b_shape[1]]), [a_shape[0]*b_shape[0],a_shape[1]*b_shape[1]]) kron_pord = tf.constant(1., shape=[1, 1], dtype=float_type) for Ap in args: kron_pord = __tf_kron(kron_pord, Ap) return kron_pord def _gen_inp_mask(Z_list): input_mask = [] tmp = 0 for p in range(len(Z_list)): p_dim = Z_list[p].shape[1] input_mask.append(np.arange(tmp, tmp + p_dim, dtype=np.int32)) tmp += p_dim return input_mask # **************************************************************** # build model and define lower bound # **************************************************************** # get augmented functions with tf.name_scope("model_build"): fmean, fvar = build_predict(X, u_fm, u_fs_sqrt, fkern_list, Zf_list) # load model sess = tf.InteractiveSession() sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() modelmngr = modelmanager(saver, sess, checkpointPath) modelmngr.load() # return inside a dictionary pred_train = { 'fmean': fmean.eval(feed_dict={X: Xtrain}), 'fvar': fvar.eval(feed_dict={X: Xtrain}) } if Xtest is not None: pred_test = { 'fmean': fmean.eval(feed_dict={X: Xtest}), 'fvar': fvar.eval(feed_dict={X: Xtest}) } sess.close() if Xtest is not None: return pred_train, pred_test else: return pred_train
def predict_onoff(Xtrain,Xtest,checkpointPath,num_inducing_f = np.array([10,100]),num_inducing_g = np.array([10,100]),include_fmu = False): tf.reset_default_graph() # param initializations list_to_np = lambda _list : [np.array(e) for e in _list] init_fkell = list_to_np([[8.,8.],[5./1000]]) init_fkvar = list_to_np([[20.],[20.]]) init_gkell = list_to_np([[8.,8.],[5./1000]]) init_gkvar = list_to_np([[10.],[10.]]) init_noisevar = 0.001 q_diag = True if include_fmu: init_f_mu = 0. init_Zf_s = kmeans(Xtrain[:,0:2],num_inducing_f[0])[0] init_Zf_t = np.expand_dims(np.linspace(Xtrain[:,2].min(),Xtrain[:,2].max(),num_inducing_f[1]),axis=1) init_Zf = [init_Zf_s,init_Zf_t] init_Zg = init_Zf.copy() init_u_fm = np.random.randn(np.prod(num_inducing_f),1)*0.1 init_u_gm = np.random.randn(np.prod(num_inducing_g),1)*0.1 init_u_fs_sqrt = np.ones(np.prod(num_inducing_f)).reshape(1,-1).T init_u_gs_sqrt = np.ones(np.prod(num_inducing_g)).reshape(1,-1).T kern_param_learning_rate = 1e-4 indp_param_learning_rate = 1e-4 # tf variable declarations X = tf.placeholder(dtype = float_type) Y = tf.placeholder(dtype = float_type) with tf.name_scope("f_kern"): fkell = [Param(init_fkell[i],transform=transforms.Log1pe(), name="lengthscale",learning_rate = kern_param_learning_rate,summ=True) for i in range(len(num_inducing_f))] fkvar = [Param(init_fkvar[i],transform=transforms.Log1pe(), name="variance",learning_rate = kern_param_learning_rate,summ=True) for i in range(len(num_inducing_f))] fkern_list = [KernSE(fkell[i],fkvar[i]) for i in range(len(num_inducing_f))] with tf.name_scope("g_kern"): gkell = [Param(init_gkell[i],transform=transforms.Log1pe(), name="lengthscale",learning_rate = kern_param_learning_rate,summ=True) for i in range(len(num_inducing_g))] gkvar = [Param(init_gkvar[i],transform=transforms.Log1pe(), name="variance",learning_rate = kern_param_learning_rate,summ=True) for i in range(len(num_inducing_g))] gkern_list = [KernSE(gkell[i],gkvar[i]) for i in range(len(num_inducing_g))] with tf.name_scope("likelihood"): noisevar = Param(init_noisevar,transform=transforms.Log1pe(), name="variance",learning_rate = kern_param_learning_rate,summ=True) with tf.name_scope("f_ind"): Zf_list = [Param(init_Zf[i],name="z",learning_rate = indp_param_learning_rate,summ=True) for i in range(len(num_inducing_f))] u_fm = Param(init_u_fm,name="value",learning_rate = indp_param_learning_rate,summ=True) if q_diag: u_fs_sqrt = Param(init_u_fs_sqrt,transforms.positive, name="variance",learning_rate = indp_param_learning_rate,summ=True) else: u_fs_sqrt = Param(init_u_fs_sqrt,transforms.LowerTriangular(init_u_fs_sqrt.shape[0]), name="variance",learning_rate = indp_param_learning_rate,summ=True) # f_mu = Param(init_f_mu,name="fmu",learning_rate = indp_param_learning_rate,summ=True) with tf.name_scope("g_ind"): Zg_list = [Param(init_Zg[i],name="z",learning_rate = indp_param_learning_rate,summ=True) for i in range(len(num_inducing_g))] u_gm = Param(init_u_gm,name="value",learning_rate = indp_param_learning_rate,summ=True) if q_diag: u_gs_sqrt = Param(init_u_gs_sqrt,transforms.positive, name="variance",learning_rate = indp_param_learning_rate,summ=True) else: u_gs_sqrt = Param(init_u_gs_sqrt,transforms.LowerTriangular(init_u_gs_sqrt.shape[0]), name="variance",learning_rate = indp_param_learning_rate,summ=True) def build_prior_kl(u_fm, u_fs_sqrt, fkern_list, Zf_list, u_gm, u_gs_sqrt, gkern_list, Zg_list, whiten=False): if whiten: raise NotImplementedError() else: Kfmm = [fkern_list[i].K(Zf_list[i].get_tfv()) + \ tf.eye(num_inducing_f[i], dtype=float_type) * jitter_level for i in range(len(num_inducing_f))] Kgmm = [gkern_list[i].K(Zg_list[i].get_tfv()) + \ tf.eye(num_inducing_g[i], dtype=float_type) * jitter_level for i in range(len(num_inducing_g))] KL = GaussKLkron(u_fm.get_tfv(), u_fs_sqrt.get_tfv(), Kfmm) + \ GaussKLkron(u_gm.get_tfv(), u_gs_sqrt.get_tfv(), Kgmm) return KL def build_predict(Xnew,u_fm,u_fs_sqrt,fkern_list,Zf_list,u_gm,u_gs_sqrt,gkern_list,Zg_list,f_mu=None): input_mask_f = _gen_inp_mask(Zf_list) input_mask_g = _gen_inp_mask(Zg_list) # compute fmean and fvar from the kronecker inference fmean,fvar = kron_inf(Xnew,fkern_list,Zf_list,u_fm,u_fs_sqrt,num_inducing_f,input_mask_f) # fmean = fmean + mean_function(Xnew) if not f_mu is None : fmean = fmean + f_mu.get_tfv() # compute gmean and gvar from the kronecker inference gmean,gvar = kron_inf(Xnew,gkern_list,Zg_list,u_gm,u_gs_sqrt,num_inducing_g,input_mask_g) gmean = gmean + tf.cast(tf.constant(-1.0),float_type) # compute augemented distributions ephi_g, ephi2_g, evar_phi_g = probit_expectations(gmean, gvar) # compute augmented f # p(f|g) = N(f| diag(ephi_g)* A*u_fm, diag(evar_phi_g)) * (Kfnn + A(u_fs - Kfmm)t(A))) gfmean = tf.multiply(ephi_g, fmean) gfvar = tf.multiply(ephi2_g, fvar) gfmeanu = tf.multiply(evar_phi_g, tf.square(fmean)) # return mean and variance vectors in order return gfmean, gfvar, gfmeanu, fmean, fvar, gmean, gvar, ephi_g, evar_phi_g def kron_inf(Xnew,kern_list,Z_list,q_mu,q_sqrt,num_inducing,input_mask): # Compute alpha = K_mm^-1 * f_m Kmm = [kern_list[p].K(Z_list[p].get_tfv()) + \ tf.eye(num_inducing[p], dtype=float_type) * jitter_level for p in range(len(num_inducing))] Kmm_inv = [tf.matrix_inverse(Kmm[p]) for p in range(len(num_inducing))] alpha = __kron_mv(Kmm_inv,q_mu.get_tfv(),num_inducing) n_batch = tf.stack([tf.shape(Xnew)[0],np.int32(1)]) Knn = tf.ones(n_batch, dtype=float_type) KMN = [] for p in range(len(num_inducing)): xnew = tf.gather(Xnew, input_mask[p], axis=1) Knn *= tf.reshape(kern_list[p].Kdiag(xnew), n_batch) KMN.append(kern_list[p].K(Z_list[p].get_tfv(), xnew)) S = tf.diag(tf.squeeze(tf.square(q_sqrt.get_tfv()))) def loop_rows(n,mu,var): Kmn = tf.reshape(KMN[0][:,n], [num_inducing[0],1]) for p in range(1,len(num_inducing)): Kmn = tf_kron(Kmn,tf.reshape(KMN[p][:,n],[num_inducing[p],1])) mu_n = tf.matmul(Kmn, alpha, transpose_a=True) mu = mu.write(n, mu_n) A = __kron_mv(Kmm_inv,Kmn,num_inducing) tmp = Knn[n] - tf.matmul(Kmn, A,transpose_a=True) + \ tf.matmul(tf.matmul(A,S,transpose_a=True),A) var = var.write(n, tmp) return tf.add(n,1), mu, var def loop_cond(n,mu,var): return tf.less(n, n_batch[0]) mu = tf.TensorArray(float_type, size=n_batch[0]) var = tf.TensorArray(float_type, size=n_batch[0]) _, mu, var = tf.while_loop(loop_cond, loop_rows, [0, mu, var]) mu = tf.reshape(mu.stack(), n_batch) var = tf.reshape(var.stack(), n_batch) return mu , var def __kron_mv( As, x,num_inducing): N = np.prod(num_inducing) b = tf.reshape(x, [N,1]) for p in range(len(As)): Ap = As[p] X = tf.reshape(b, (num_inducing[p], np.round(N/num_inducing[p]).astype(np.int))) b = tf.matmul(X, Ap, transpose_a=True, transpose_b=True) b = tf.reshape(b, [N,1]) return b def tf_kron(a,b): a_shape = [a.shape[0].value,a.shape[1].value] b_shape = [b.shape[0].value,b.shape[1].value] return tf.reshape(tf.reshape(a,[a_shape[0],1,a_shape[1],1])* \ tf.reshape(b,[1,b_shape[0],1,b_shape[1]]), [a_shape[0]*b_shape[0],a_shape[1]*b_shape[1]]) def _gen_inp_mask(Z_list): input_mask = [] tmp = 0 for p in range(len(Z_list)): p_dim = Z_list[p].shape[1] input_mask.append(np.arange(tmp, tmp + p_dim, dtype=np.int32)) tmp += p_dim return input_mask def variational_expectations(Y,fmu,fvar,fmuvar,noisevar): return -0.5 * np.log(2 * np.pi) - 0.5 * tf.log(noisevar) \ - 0.5 * (tf.square(Y - fmu) + fvar + fmuvar) / noisevar def probit_expectations(gmean, gvar): def normcdf(x): return 0.5 * (1.0 + tf.erf(x / np.sqrt(2.0))) * (1. - 2.e-3) + 1.e-3 def owent(h, a): h = tf.abs(h) term1 = tf.atan(a) / (2 * np.pi) term2 = tf.exp((-1 / 2) * (tf.multiply(tf.square(h), (tf.square(a) + 1)))) return tf.multiply(term1, term2) z = gmean / tf.sqrt(1. + gvar) a = 1 / tf.sqrt(1. + (2 * gvar)) cdfz = normcdf(z) tz = owent(z, a) ephig = cdfz ephisqg = (cdfz - 2. * tz) evarphig = (cdfz - 2. * tz - tf.square(cdfz)) # clip negative values from variance terms to zero ephisqg = (ephisqg + tf.abs(ephisqg)) / 2. evarphig = (evarphig + tf.abs(evarphig)) / 2. return ephig, ephisqg, evarphig kl = build_prior_kl(u_fm,u_fs_sqrt,fkern_list,Zf_list, u_fm,u_fs_sqrt,fkern_list,Zf_list) gfmean, gfvar, gfmeanu, fmean, fvar, gmean, gvar, pgmean, pgvar = build_predict(X,u_fm,u_fs_sqrt,fkern_list,Zf_list,u_gm,u_gs_sqrt,gkern_list,Zg_list) # load model sess = tf.InteractiveSession() sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() modelmngr = modelmanager(saver, sess, checkpointPath) modelmngr.load() pred_train = {'gfmean' : gfmean.eval(feed_dict = {X:Xtrain}), 'fmean' : fmean.eval(feed_dict = {X:Xtrain}), 'pgmean' : pgmean.eval(feed_dict = {X:Xtrain})} if Xtest is not None: pred_test = {'gfmean' : gfmean.eval(feed_dict = {X:Xtest}), 'fmean' : fmean.eval(feed_dict = {X:Xtest}), 'pgmean' : pgmean.eval(feed_dict = {X:Xtest})} sess.close() if Xtest is not None: return pred_train, pred_test else: return pred_train