def variational_model(): qpi = ed.Dirichlet(concentration=qpi_alpha, name='qpi') # This model works well qmu = ed.MultivariateNormalDiag( loc=qmu_loc, scale_diag=qmu_scale, name='qmu') # qmu = ed.Normal(loc=qmu_loc, scale=qmu_scale, name='qmu') qsigma = ed.InverseGamma(concentration=qsigma_alpha, rate=qsigma_beta, name='qsigma') qz = ed.MultivariateNormalDiag(loc=qz_loc, scale_diag=qz_scale, name='qz') qw = ed.MultivariateNormalDiag(loc=qw_loc, scale_diag=qw_scale, name='qw') return qpi, qmu, qsigma, qz, qw
def Q(D0,D,n_classes,sample_shape): qmu_loc = [tf.Variable(np.zeros([D0]), dtype=dtype) for i in range(n_classes)] qmu_scale = [tf.nn.softplus(0.1 * tf.Variable(np.ones([D0]), dtype=dtype)) for i in range(n_classes)] qmu = [ed.Normal(loc=qmu_loc[i], scale=qmu_scale[i], name='qmu%d'%i) for i in range(n_classes)] qsigma_alpha = tf.nn.softplus(0.5 * tf.Variable(np.ones([D0]), dtype=dtype)) qsigma_beta = tf.nn.softplus(0.5 * tf.Variable(np.ones([D0]), dtype=dtype)) qsigma = ed.InverseGamma(concentration=qsigma_alpha, rate=qsigma_beta,name='qsigma') qz = [ed.MultivariateNormalDiag(loc=qmu[i],sample_shape=sample_shape[i], scale_diag=qsigma,name='qz%d'%i)for i in range(n_classes)] qbeta_loc = tf.Variable(tf.zeros([D0, D], dtype=dtype), name="qbeta_loc") qbeta_scale = tf.math.softplus(tf.Variable(tf.ones([D0, D], dtype=dtype), name="qbeta_scale")) qbeta = ed.Normal(qbeta_loc, qbeta_scale, name="qbeta") qalpha_loc = tf.Variable(tf.zeros([D], dtype=dtype), name="qalpha_loc") qalpha_scale = tf.math.softplus(tf.Variable(tf.ones([D], dtype=dtype), name="qalpha_scale")) qalpha = ed.Normal(qalpha_loc, qalpha_scale, name="qalpha") qnoise_alpha = tf.nn.softplus(0.5 * tf.Variable(np.ones([1]), dtype=dtype)) qnoise_beta = tf.nn.softplus(0.5 * tf.Variable(np.ones([1]), dtype=dtype)) qnoise = ed.InverseGamma(concentration=qnoise_alpha, rate=qnoise_beta,name='qnoise') return qmu,qsigma,qz,(qbeta,qalpha),qnoise
def definition(self, ridge_factor=1e-3, name="gp", gp_only=False): """Defines Gaussian Process prior with kernel_func. Args: ridge_factor: (float32) ridge factor to stabilize Cholesky decomposition. name: (str) name of the random variable gp_only: (bool) Whether only return gp. Returns: (ed.RandomVariable) A random variable representing the Gaussian Process, dimension (N,) """ X = tf.convert_to_tensor(self.X, dtype=dtype_util.TF_DTYPE) ls = tf.convert_to_tensor(self.ls, dtype=dtype_util.TF_DTYPE) gp_mean = tf.zeros(self.n_obs, dtype=dtype_util.TF_DTYPE) # covariance K_mat = self.kern_func(X, ls=ls, ridge_factor=ridge_factor) gp = ed.MultivariateNormalTriL(loc=gp_mean, scale_tril=tf.cholesky(K_mat), name=name) if gp_only: return gp y = ed.MultivariateNormalDiag(loc=gp, scale_identity_multiplier=.01, name="y") return y
def model(X, log_ls=0., ridge_factor=1e-4, sample_ls=False): """Defines the Gaussian Process Model. Args: X: (np.ndarray of float32) input training features. with dimension (N, D). log_ls: (float32) length scale parameter in log scale. ridge_factor: (float32) ridge factor to stabilize Cholesky decomposition. sample_ls: (bool) Whether sample ls parameter. Returns: (tf.Tensors of float32) model parameters. """ X = tf.convert_to_tensor(X) N = X.shape.as_list()[0] # specify kernel matrix if sample_ls: log_ls = ed.Normal(loc=-5., scale=1., name='ls') K_mat = rbf(X, ls=tf.exp(log_ls), ridge_factor=ridge_factor) # specify model parameters gp_f = ed.MultivariateNormalTriL(loc=tf.zeros(N), scale_tril=tf.cholesky(K_mat), name="gp_f") sigma = ed.Normal(loc=-5., scale=1., name='sigma') y = ed.MultivariateNormalDiag(loc=gp_f, scale_identity_multiplier=tf.exp(sigma), name="y") return y, gp_f, sigma, log_ls
def definition(self, **resid_kwargs): """Defines Gaussian process with identity mean function. Args: **resid_kwargs: Keyword arguments for GaussianProcess model definition. Returns: (ed.RandomVariable) outcome random variable. """ # specify identity mean function mean_func = self.model_cdf # specify residual function gp = self.gp_model.definition(gp_only=True, name="gp", **resid_kwargs) # specify observational noise sigma = ed.Normal(loc=_LOG_NOISE_PRIOR_MEAN, scale=_LOG_NOISE_PRIOR_SDEV, name="log_sigma") # specify outcome cdf_mean = mean_func + gp if self.activation: cdf_mean = self.activation(cdf_mean) cdf = ed.MultivariateNormalDiag(loc=cdf_mean, scale_identity_multiplier=tf.exp(sigma), name="cdf") return cdf
def mfvi_variational_family(X, name="", **kwargs): """Defines the mean-field variational family for Gaussian Process. Args: X: (np.ndarray of float32) input training features, with dimension (N, D). name: (str) name for variational parameters. kwargs: Dict of other keyword variables. For compatibility purpose with other variational family. Returns: q_f, q_sig: (ed.RandomVariable) variational family. q_f_mean, q_f_sdev: (tf.Variable) variational parameters for q_f """ X = tf.convert_to_tensor(X, dtype=tf.float32) N, D = X.shape.as_list() # define variational parameters qf_mean = tf.get_variable(shape=[N], name='{}_mean'.format(name)) qf_sdev = tf.exp(tf.get_variable(shape=[N], name='{}_sdev'.format(name))) # define variational family q_f = ed.MultivariateNormalDiag(loc=qf_mean, scale_diag=qf_sdev, name=name) return q_f, qf_mean, qf_sdev
def call(self, inputs): """Runs the model forward to generate a sequence of productions. Args: inputs: Unused. Returns: productions: Tensor of shape [1, num_productions, num_production_rules]. Slices along the `num_productions` dimension represent one-hot vectors. """ del inputs # unused latent_code = ed.MultivariateNormalDiag(loc=tf.zeros(self.latent_size), sample_shape=1, name="latent_code") state = self.lstm.zero_state(1, dtype=tf.float32) t = 0 productions = [] stack = [self.grammar.start_symbol] while stack: symbol = stack.pop() net, state = self.lstm(latent_code, state) logits = (self.output_layer(net) + self.grammar.mask(symbol, on_value=0., off_value=-1e9)) production = ed.OneHotCategorical(logits=logits, name="production_" + str(t)) _, rhs = self.grammar.production_rules[tf.argmax(production, axis=-1)] for symbol in rhs: if symbol in self.grammar.nonterminal_symbols: stack.append(symbol) productions.append(production) t += 1 return tf.stack(productions, axis=1)
def latent_encoder(self, x, y): """Encodes the inputs into one representation. Args: x: Tensor of shape [batch_size, observations, d_x]. For the prior, these are context x-values. For the posterior, these are target x-values. y: Tensor of shape [batch_size, observations, d_y]. For the prior, these are context y-values. For the posterior, these are target y-values. Returns: A normal distribution over tensors of shape [batch_size, num_latents]. """ encoder_input = tf.concat([x, y], axis=-1) per_example_embedding = batch_mlp(encoder_input, self._latent_encoder_sizes) dataset_embedding = tf.reduce_mean(per_example_embedding, axis=1) hidden = tf.keras.layers.Dense( (self._latent_encoder_sizes[-1] + self._num_latents) // 2, activation=tf.nn.relu)(dataset_embedding) loc = tf.keras.layers.Dense(self._num_latents, activation=None)(hidden) untransformed_scale = tf.keras.layers.Dense(self._num_latents, activation=None)(hidden) # Constraint scale following Garnelo et al. (2018). scale_diag = 0.1 + 0.9 * tf.sigmoid(untransformed_scale) return ed.MultivariateNormalDiag(loc=loc, scale_diag=scale_diag)
def logistic_regression(features): """Bayesian logistic regression, which returns labels given features.""" coeffs = ed.MultivariateNormalDiag(loc=tf.zeros(features.shape[1]), name="coeffs") labels = ed.Bernoulli(logits=tf.tensordot(features, coeffs, [[1], [0]]), name="labels") return labels
def model_mixture_adaptive(X, ls=1., n_mix=2, ridge_factor=1e-3): """Defines the Adaptive Mixture of Gaussian Process Model. Note: Currently this method is not tested and is likely to not work well due to explicit sampling of membership variables. (i.e. mix_member). More work need to be done to perform integrated sampling. Args: X: (np.ndarray of float32) input training features. with dimension (N, D). ls: (float32) length scale parameter. n_mix: (int8) Number of mixture components. ridge_factor: (float32) ridge factor to stabilize Cholesky decomposition. Returns: (tf.Tensors of float32) model parameters. """ # TODO(jereliu): find a way to integrate over adaptive mixture. raise Warning( "Currently this method is not tested and is likely to not" "work well due to explicit sampling of membership variables. " "(i.e. mix_member). More work need to be done to perform " "integrated sampling.") N = X.shape[0] K_mat = rbf(X, ls=ls, ridge_factor=ridge_factor) gp_weight = ed.Independent(distribution=tfd.MultivariateNormalTriL( loc=tf.zeros(shape=[n_mix, N]), scale_tril=replicate_along_zero_axis(tf.cholesky(K_mat), n_mix), ), reinterpreted_batch_ndims=1, name="gp_w") mix_member = ed.Multinomial(total_count=[1.], logits=tf.transpose(gp_weight), name="mix_prob") gp_comp = ed.Independent(distribution=tfd.MultivariateNormalTriL( loc=tf.zeros(shape=[n_mix, N]), scale_tril=replicate_along_zero_axis(tf.cholesky(K_mat), n_mix), ), reinterpreted_batch_ndims=1, name="gp_f") gp_f = tf.reduce_sum(tf.transpose(gp_comp) * mix_member, axis=-1) sigma = ed.Normal(loc=-5., scale=1., name='sigma') y = ed.MultivariateNormalDiag(loc=gp_f, scale_identity_multiplier=tf.exp(sigma), name="y") # y = ed.MixtureSameFamily( # components_distribution=tfd.MultivariateNormalDiag( # loc=gp_comp, scale_identity_multiplier=tf.exp(sigma)), # mixture_distribution=tfd.Categorical(logits=gp_weight), # name="y") return gp_weight, mix_member, gp_comp, sigma, y
def predict_target(posterior_mu, posterior_sigma,test_data): K = len(posterior_mu) model = [ed.MultivariateNormalDiag(posterior_mu[i], posterior_sigma) for i in range (K)] prob = [] with tf.Session() as sess: for i in range(K): label_prob = model[i].distribution.log_prob(test_data).eval() prob.append(label_prob) pred_target = np.argmax(prob,0) return pred_target
def normal_variational_family(shape, name=None, init_loc=None, init_scale=None, trainable=True): """Defines mean-field variational family. Args: shape: (tuple of int) Defines shape of variational random variable. name: (str) Name of the random variable. init_loc: (float32) Initial values for q_mean. init_scale: (float32) Initial values for q_log_sd. trainable: (bool) Whether the variational family is trainable. Returns: q_rv (ed.RandomVariable) RandomVariable representing the variational family. q_mean, q_log_sd (tf.Variable) Variational parameters. Raises: (ValueError) init_loc/init_scale is None when trainable=False. """ if not trainable: if init_loc is None or init_scale is None: raise ValueError( "Initial values cannot be None if trainable=False.") with tf.variable_scope("q_{}_scope".format(name), default_name="normal_variational"): # TODO(jereliu): manipulate such that we can initiate these values. # allow user to set trainable=False and give value through initializer. q_mean = tf.get_variable("q_mean", shape=shape, dtype=dtype_util.TF_DTYPE, initializer=init_loc, trainable=trainable) q_log_sd = tf.get_variable("q_log_sd", shape=shape, dtype=dtype_util.TF_DTYPE, initializer=init_scale, trainable=trainable) rv_shape = shape if shape is not None else init_loc.shape if len(rv_shape) == 0: q_rv = ed.Normal(loc=q_mean, scale=tf.exp(q_log_sd), name="q_{}".format(name)) else: q_rv = ed.MultivariateNormalDiag(loc=q_mean, scale_diag=tf.exp(q_log_sd), name="q_{}".format(name)) return q_rv, q_mean, q_log_sd
def definition(self, **resid_kwargs): """Sets up model definition and parameters. Args: **resid_kwargs: Keyword arguments for GaussianProcess model definition. Returns: (ed.RandomVariable) outcome random variable. """ # convert data type F = tf.convert_to_tensor(self.base_pred_array, dtype=dtype_util.TF_DTYPE) # specify mean function W = ed.MultivariateNormalDiag(loc=tf.zeros(shape=(self.n_model,)), scale_identity_multiplier=_WEIGHT_PRIOR_SDEV, name="mean_weight") FW = tf.matmul(F, tf.expand_dims(W, -1)) mean_func = tf.reduce_sum(FW, axis=1, name="mean_func") # specify residual function resid_func = 0. if self.add_resid: resid_func = self.resid_model.definition(gp_only=True, name="resid_func", **resid_kwargs) # specify observational noise sigma = ed.Normal(loc=_LOG_NOISE_PRIOR_MEAN, scale=_LOG_NOISE_PRIOR_SDEV, name="log_sigma") # specify outcome y = ed.MultivariateNormalDiag(loc=mean_func + resid_func, scale_identity_multiplier=tf.exp(sigma), name="y") return y
def model(features): # Set up fixed effects and other parameters. intercept = tf.get_variable("intercept", []) service_effects = tf.get_variable("service_effects", []) student_stddev_unconstrained = tf.get_variable("student_stddev_pre", []) instructor_stddev_unconstrained = tf.get_variable( "instructor_stddev_pre", []) # Set up random effects. student_effects = ed.MultivariateNormalDiag( loc=tf.zeros(num_students), scale_identity_multiplier=tf.exp(student_stddev_unconstrained), name="student_effects") instructor_effects = ed.MultivariateNormalDiag( loc=tf.zeros(num_instructors), scale_identity_multiplier=tf.exp(instructor_stddev_unconstrained), name="instructor_effects" ) # Set up likelihood given fixed and random effects. ratings = ed.Normal( loc=(service_effects * features["service"] + tf.gather(student_effects, features["students"]) + tf.gather(instructor_effects, features["instructors"]) + intercept), scale=1., name="ratings") return ratings
def EpiAnno(D0,D,n_classes,sample_shape): mu = [ed.Normal(loc=tf.zeros([D0], dtype),scale=tf.ones([D0], dtype),name='mu%d'%i) for i in range(n_classes)] sigma = ed.InverseGamma(concentration=tf.ones([D0], dtype=dtype), rate=tf.ones([D0], dtype=dtype),name='sigma') z = [ed.MultivariateNormalDiag(loc=mu[i],sample_shape=sample_shape[i], scale_diag=sigma,name='z%d'%i)for i in range(n_classes)] h = tf.concat(z,axis = 0) beta = ed.Normal(tf.zeros([D0, D],dtype = dtype), 1., name="beta") alpha = ed.Normal(tf.zeros([D],dtype = dtype), 1., name="alpha") output = tf.nn.leaky_relu(h @ beta + alpha,alpha = 0.5) noise = ed.InverseGamma(concentration=tf.ones([1], dtype=dtype), rate=tf.ones([1], dtype=dtype),name='noise') x = ed.Normal(loc = output, scale = noise, name = 'x') return mu,sigma,z,(beta,alpha),noise,x
def call(self, inputs): """Runs the model forward to return a stochastic encoding. Args: inputs: Tensor of shape [1, num_productions, num_production_rules]. It is a sequence of productions of length `num_productions`. Each production is a one-hot vector of length `num_production_rules`: it determines which production rule the production corresponds to. Returns: latent_code_posterior: A random variable capturing a sample from the variational distribution, of shape [1, self.latent_size]. """ net = self.encoder_net(tf.cast(inputs, tf.float32)) return ed.MultivariateNormalDiag(loc=net[..., :self.latent_size], scale_diag=tf.nn.softplus( net[..., self.latent_size:]), name="latent_code_posterior")
def variational_mfvi(X, mfvi_mixture=False, n_mixture=1, name="", **kwargs): """Defines the mean-field variational family for Gaussian Process. Args: X: (np.ndarray of float32) input training features, with dimension (N, D). mfvi_mixture: (float32) Whether to output variational family with a mixture of MFVI. n_mixture: (int) Number of MFVI mixture component to add. name: (str) name for variational parameters. kwargs: Dict of other keyword variables. For compatibility purpose with other variational family. Returns: q_f, q_sig: (ed.RandomVariable) variational family. q_f_mean, q_f_sdev: (tf.Variable) variational parameters for q_f """ X = tf.convert_to_tensor(X, dtype=tf.float32) N, D = X.shape.as_list() # define variational parameters qf_mean = tf.get_variable(shape=[N], name='{}_mean'.format(name)) qf_sdev = tf.exp(tf.get_variable(shape=[N], name='{}_sdev'.format(name))) # define variational family mixture_par_list = [] if mfvi_mixture: gp_dist = tfd.MultivariateNormalDiag(loc=qf_mean, scale_diag=qf_sdev, name=name) q_f, mixture_par_list = inference_util.make_mfvi_sgp_mixture_family( n_mixture=n_mixture, N=N, gp_dist=gp_dist, name=name) else: q_f = ed.MultivariateNormalDiag(loc=qf_mean, scale_diag=qf_sdev, name=name) return q_f, qf_mean, qf_sdev, mixture_par_list
def variational_mfvi(X, mfvi_mixture=False, n_mixture=1): """Defines the mean-field variational family for GPR. Args: X: (np.ndarray of float32) input training features, shape (N, D). mfvi_mixture: (float32) Whether to output variational family with a mixture of MFVI. n_mixture: (int) Number of MFVI mixture component to add. Returns: q_f, q_sig: (ed.RandomVariable) variational family. q_f_mean, q_f_sdev: (tf.Variable) variational parameters for q_f """ N, D = X.shape # define variational parameters qf_mean = tf.get_variable(shape=[N], name='qf_mean') qf_sdev = tf.exp(tf.get_variable(shape=[N], name='qf_sdev')) q_sig_mean = tf.get_variable(shape=[], name='q_sig_mean') q_sig_sdev = tf.exp(tf.get_variable(shape=[], name='q_sig_sdev')) # define variational family mixture_par_list = [] if mfvi_mixture: gp_dist = tfd.MultivariateNormalDiag(loc=qf_mean, scale_diag=qf_sdev, name='q_f') q_f, mixture_par_list = inference_util.make_mfvi_sgp_mixture_family( n_mixture=n_mixture, N=N, gp_dist=gp_dist, name='q_f') else: q_f = ed.MultivariateNormalDiag(loc=qf_mean, scale_diag=qf_sdev, name='q_f') q_sig = ed.Normal(loc=q_sig_mean, scale=q_sig_sdev, name='q_sig') return q_f, q_sig, qf_mean, qf_sdev, mixture_par_list
def model_flat(X, base_pred, family_tree=None, ls_weight=1., ls_resid=1., **kwargs): r"""Defines the sparse adaptive ensemble model. y ~ N(f, sigma^2) f(x) ~ gaussian_process(sum{ f_model(x) * w_model(x) }, k_resid(x)) w_model = tail_free_process(w0_model) w0_model(x) ~ gaussian_process(0, k_w(x)) where the tail_free_process is defined by sparse_ensemble_weight. Args: X: (np.ndarray) Input features of dimension (N, D) base_pred: (dict of np.ndarray) A dictionary of out-of-sample prediction from base models. For each item in the dictionary, key is the model name, and value is the model prediction with dimension (N, ). ls_weight: (float32) lengthscale for the kernel of ensemble weight GPs. ls_resid: (float32) lengthscale for the kernel of residual process GP. family_tree: (dict of list or None) A dictionary of list of strings to specify the family tree between models, if None then assume there's no structure (i.e. flat). **kwargs: Additional parameters to pass to sparse_ensemble_weight. Returns: (tf.Tensors of float32) model parameters. """ # check dimension N, D = X.shape for key, value in base_pred.items(): if not value.shape == (N, ): raise ValueError( "All base-model predictions should have shape ({},), but" "observed {} for '{}'".format(N, value.shape, key)) # specify tail-free priors for ensemble weight if not family_tree: temp = ed.Normal(loc=tail_free._TEMP_PRIOR_MEAN, scale=tail_free._TEMP_PRIOR_SDEV, name='temp') else: # specify a list of temp parameters for each node in the tree temp = ed.Normal(loc=[tail_free._TEMP_PRIOR_MEAN] * len(family_tree), scale=tail_free._TEMP_PRIOR_SDEV, name='temp') # specify ensemble weight W = sparse_conditional_weight(X, base_pred, temp, family_tree=family_tree, ls=ls_weight, name="ensemble_weight", **kwargs) # specify ensemble prediction F = np.asarray(list(base_pred.values())).T FW = tf.multiply(F, W) ensemble_mean = tf.reduce_sum(FW, axis=1, name="ensemble_mean") # specify residual process ensemble_resid = gp.prior(X, ls_resid, kernel_func=gp.rbf, name="ensemble_resid") # specify observation noise sigma = ed.Normal(loc=_NOISE_PRIOR_MEAN, scale=_NOISE_PRIOR_SDEV, name="sigma") # specify observation y = ed.MultivariateNormalDiag(loc=ensemble_mean + ensemble_resid, scale_identity_multiplier=tf.exp(sigma), name="y") return y
def model_tailfree(X, base_pred, family_tree=None, log_ls_weight=None, log_ls_resid=None, **kwargs): r"""Defines the sparse adaptive ensemble model. y ~ N(f, sigma^2) f(x) ~ gaussian_process(sum{ f_model(x) * w_model(x) }, k_resid(x)) w_model = tail_free_process(w0_model) w0_model(x) ~ gaussian_process(0, k_w(x)) where the tail_free_process is defined by sparse_ensemble_weight. Args: X: (np.ndarray) Input features of dimension (N, D) base_pred: (dict of np.ndarray) A dictionary of out-of-sample prediction from base models. For each item in the dictionary, key is the model name, and value is the model prediction with dimension (N, ). family_tree: (dict of list or None) A dictionary of list of strings to specify the family tree between models, if None then assume there's no structure (i.e. flat). log_ls_weight: (float32) length-scale parameter for weight GP. If None then will estimate with normal prior. log_ls_resid: (float32) length-scale parameter for residual GP. If None then will estimate with normal prior. **kwargs: Additional parameters to pass to tail_free.prior. Returns: (tf.Tensors of float32) model parameters. """ # check dimension N, D = X.shape for key, value in base_pred.items(): if not value.shape == (N, ): raise ValueError( "All base-model predictions should have shape ({},), but" "observed {} for '{}'".format(N, value.shape, key)) # specify prior for lengthscale and observation noise if log_ls_weight is None: log_ls_weight = ed.Normal(loc=_LS_PRIOR_MEAN, scale=_LS_PRIOR_SDEV, name="ls_weight") if log_ls_resid is None: log_ls_resid = ed.Normal(loc=_LS_PRIOR_MEAN, scale=_LS_PRIOR_SDEV, name="ls_resid") sigma = ed.Normal(loc=_NOISE_PRIOR_MEAN, scale=_NOISE_PRIOR_SDEV, name="sigma") # specify tail-free priors for ensemble weight ensemble_weights, model_names = tail_free.prior(X, base_pred, family_tree=family_tree, ls=tf.exp(log_ls_weight), name="ensemble_weight", **kwargs) # specify ensemble prediction base_models = np.asarray([base_pred[name] for name in model_names]).T FW = tf.multiply(base_models, ensemble_weights) ensemble_mean = tf.reduce_sum(FW, axis=1, name="ensemble_mean") # specify residual process ensemble_resid = gp.prior(X, ls=tf.exp(log_ls_resid), kernel_func=gp.rbf, name="ensemble_resid") # specify observation y = ed.MultivariateNormalDiag(loc=ensemble_mean + ensemble_resid, scale_identity_multiplier=tf.exp(sigma), name="y") return y
def model(X, base_pred, add_resid=True, log_ls_resid=None): r"""Defines the sparse adaptive ensemble model. y ~ sum{ f_k(x) * w_k } + delta(x) + epsilon w_k ~ LogisticNormal ( 0, sigma_k ) delta(x) ~ GaussianProcess ( 0, k(x) ) epsilon ~ Normal ( 0, sigma_e ) where the LogisticNormal is sparse_softmax transformed Normals. Args: X: (np.ndarray) Input features of dimension (N, D) base_pred: (dict of np.ndarray) A dictionary of out-of-sample prediction from base models. For each item in the dictionary, key is the model name, and value is the model prediction with dimension (N, ). add_resid: (bool) Whether to add residual process to model. log_ls_resid: (float32) length-scale parameter for residual GP. If None then will estimate with normal prior. Returns: (tf.Tensors of float32) model parameters. """ # convert data type F = np.asarray(list(base_pred.values())).T F = tf.convert_to_tensor(F, dtype=tf.float32) X = tf.convert_to_tensor(X, dtype=tf.float32) # check dimension N, D = X.shape for key, value in base_pred.items(): if not value.shape == (N,): raise ValueError( "All base-model predictions should have shape ({},), but" "observed {} for '{}'".format(N, value.shape, key)) # specify prior for lengthscale and observation noise if log_ls_resid is None: log_ls_resid = ed.Normal(loc=_LS_PRIOR_MEAN, scale=_LS_PRIOR_SDEV, name="ls_resid") # specify logistic normal priors for ensemble weight temp = ed.Normal(loc=_TEMP_PRIOR_MEAN, scale=_TEMP_PRIOR_SDEV, name='temp') W = sparse_logistic_weight(base_pred, temp, name="ensemble_weight") # specify ensemble prediction FW = tf.matmul(F, W) ensemble_mean = tf.reduce_sum(FW, axis=1, name="ensemble_mean") # specify residual process if add_resid: ensemble_resid = gp.prior(X, ls=tf.exp(log_ls_resid), kernel_func=gp.rbf, name="ensemble_resid") else: ensemble_resid = 0. # specify observation noise sigma = ed.Normal(loc=_NOISE_PRIOR_MEAN, scale=_NOISE_PRIOR_SDEV, name="sigma") # specify observation y = ed.MultivariateNormalDiag(loc=ensemble_mean + ensemble_resid, scale_identity_multiplier=tf.exp(sigma), name="y") return y