def __init__(self, dim: int, mean: Tuple[int] or Model = (128, 128), var: Tuple[int] or Model = (128, 128)): self.mean_net = _mlp_models(dim, mean, activation="relu", name="Mean") self.logvar_net = _mlp_models(dim, var, activation="relu", name="Var") self.dim = dim self._gaussian_sampler = MultivariateNormalDiag( loc=tf.zeros(dim), scale_identity_multiplier=1)
def __init__(self, units, **kwargs): super().__init__(units, posterior=MultivariateNormalLayer, posterior_kwargs=dict(covariance='diag', scale_activation='softplus1'), prior=MultivariateNormalDiag( loc=tf.zeros(shape=units), scale_identity_multiplier=1.), **kwargs)
def test_pad_mixture_dimensions_mixture_same_family(self): gm = MixtureSameFamily( mixture_distribution=Categorical(probs=[0.3, 0.7]), components_distribution=MultivariateNormalDiag( loc=[[-1., 1], [1, -1]], scale_identity_multiplier=[1.0, 0.5])) x = tf.constant([[1.0, 2.0], [3.0, 4.0]]) x_pad = distribution_util.pad_mixture_dimensions( x, gm, gm.mixture_distribution, tensorshape_util.rank(gm.event_shape)) x_out, x_pad_out = self.evaluate([x, x_pad]) self.assertAllEqual(x_pad_out.shape, [2, 2, 1]) self.assertAllEqual(x_out.reshape([-1]), x_pad_out.reshape([-1]))
def set_prior(self, loc=0., log_scale=np.log(np.expm1(1)), mixture_logits=None): r""" Set the prior for mixture density network loc : Scalar or Tensor with shape `[n_components, event_size]` log_scale : Scalar or Tensor with shape `[n_components, event_size]` for 'none' and 'diag' component, and `[n_components, event_size*(event_size +1)//2]` for 'full' component. mixture_logits : Scalar or Tensor with shape `[n_components]` """ event_size = self.event_size if self.covariance == 'diag': scale_shape = [self.n_components, event_size] fn = lambda l, s: MultivariateNormalDiag( loc=l, scale_diag=tf.nn.softplus(s)) elif self.covariance == 'none': scale_shape = [self.n_components, event_size] fn = lambda l, s: Independent( Normal(loc=l, scale=tf.math.softplus(s)), 1) elif self.covariance == 'full': scale_shape = [ self.n_components, event_size * (event_size + 1) // 2 ] fn = lambda l, s: MultivariateNormalTriL( loc=l, scale_tril=FillScaleTriL(diag_shift=1e-5)(tf.math.softplus(s))) # if isinstance(log_scale, Number) or tf.rank(log_scale) == 0: loc = tf.fill([self.n_components, self.event_size], loc) # if isinstance(log_scale, Number) or tf.rank(log_scale) == 0: log_scale = tf.fill(scale_shape, log_scale) # if mixture_logits is None: p = 1. / self.n_components mixture_logits = np.log(p / (1. - p)) if isinstance(mixture_logits, Number) or tf.rank(mixture_logits) == 0: mixture_logits = tf.fill([self.n_components], mixture_logits) # loc = tf.cast(loc, self.dtype) log_scale = tf.cast(log_scale, self.dtype) mixture_logits = tf.cast(mixture_logits, self.dtype) self._prior = MixtureSameFamily( components_distribution=fn(loc, log_scale), mixture_distribution=Categorical(logits=mixture_logits), name="prior") return self
def model_gmmprior(args: Arguments): nets = get_networks(args.ds, zdim=args.zdim, is_hierarchical=False, is_semi_supervised=False) latent_size = np.prod(nets['latents'].event_shape) n_components = 100 loc = tf.compat.v1.get_variable(name="loc", shape=[n_components, latent_size]) raw_scale_diag = tf.compat.v1.get_variable( name="raw_scale_diag", shape=[n_components, latent_size]) mixture_logits = tf.compat.v1.get_variable( name="mixture_logits", shape=[n_components]) nets['latents'].prior = MixtureSameFamily( components_distribution=MultivariateNormalDiag( loc=loc, scale_diag=tf.nn.softplus(raw_scale_diag) + tf.math.exp(-7.)), mixture_distribution=Categorical(logits=mixture_logits), name="prior") return VariationalAutoencoder(**nets, name='GMMPrior')
class GaussianEncoder(GenericEncoder): def __init__(self, dim: int, mean: Tuple[int] or Model = (128, 128), var: Tuple[int] or Model = (128, 128)): self.mean_net = _mlp_models(dim, mean, activation="relu", name="Mean") self.logvar_net = _mlp_models(dim, var, activation="relu", name="Var") self.dim = dim self._gaussian_sampler = MultivariateNormalDiag( loc=tf.zeros(dim), scale_identity_multiplier=1) def __call__(self, q, *args, **kwargs): return self.mean_net(q), self.logvar_net(q) def sample(self, q: Tensor, *args, **kwargs): batch_size = q.shape[0] epsilon = self._gaussian_sampler.sample(batch_size) # That way, epsilon has same shape than q mu, sigma = self(q) return mu + epsilon * tf.exp(0.5 * sigma) def logmean(self, q: Tensor, p: Tensor = None): """ Returns the expectancy of log(f(p | q) when p has law f(. | q). An closed-form formula may exists. But as the other term in the loss is computed with SGD, we can do the same here. Parameters ---------- q, p: state. If the momentum is not provided, it is sampled. Returns ------- A real number, value of the expectancy. Without 2 pi. """ if p is None: p = self.sample(q) mu, sigma = self(q) log_det = tf.reduce_sum(sigma) exp_term = 1 / 2 * tf.reduce_sum(tf.square(p - mu) * tf.exp(-sigma)) return -log_det - exp_term @property def trainable_variables(self): return self.mean_net.trainable_variables + self.logvar_net.trainable_variables
def build(self, input_shape: TensorShape): if self.prior_memory_mean is None: self.build_prior_state() self._code_size = tf.constant(self.code_size, name="code_size") self._memory_size = tf.constant(self.memory_size, name="memory_size") self._iteration_count = tf.constant(self.iteration_count, name="iteration_count") if self.batch_size is not None: self._batch_size = tf.constant(self.batch_size, name="batch_size") # region Address weights with tf.name_scope("w_prior"): self._w_prior_stddev = tf.constant(self.w_prior_stddev, name="w_prior_stddev") self.w_prior_distribution = MultivariateNormalDiag( loc=tf.zeros(shape=[self._memory_size]), scale_identity_multiplier=self._w_prior_stddev, name="w_prior_distribution") log_w_stddev = self.add_weight(initializer=constant( self.initial_w_stddev), name="log_w_stddev", shape=[]) self._w_stddev = tf.exp(log_w_stddev, name="w_stddev") # endregion # region Observational noise if self.observational_noise_stddev > 0.0: observational_noise_stddev = tf.constant( self.observational_noise_stddev, name="observational_noise_stddev") else: log_observational_noise_stddev = self.add_weight( initializer=zeros(), name="log_observational_noise_stddev", shape=[]) observational_noise_stddev = tf.exp( log_observational_noise_stddev, name="observational_noise_stddev") self._observational_noise_stddev = observational_noise_stddev # endregion self.built = True
def __init__(self, units: int, prior_loc: float = 0., prior_scale: float = 1., projection: bool = True, name: str = "Latents", **kwargs): super().__init__( event_shape=(int(units), ), posterior=MultivariateNormalLayer, posterior_kwargs=dict(covariance='diag', scale_activation='softplus1'), prior=MultivariateNormalDiag( loc=tf.fill((units, ), prior_loc), scale_identity_multiplier=prior_scale), projection=projection, name=name, **kwargs, )
def _finite_fourier_gpr(model: gpflow.models.GPR, kernel: gpflow.kernels.Stationary, sample_shape: List[int], num_basis: int, basis: Callable = None, prior: MultivariateNormalDiag = None, dtype: Any = None, **kwargs): if dtype is None: dtype = default_float() if basis is None: basis = RandomFourierBasis(kernel=model.kernel, units=num_basis, dtype=dtype) if prior is None: prior = MultivariateNormalDiag( scale_diag=tf.ones(num_basis, dtype=dtype)) blr = BayesianLinearRegression(basis=basis, prior=prior, likelihood=model.likelihood) def initializer(shape, dtype): X, y = model.data if model.mean_function is not None: y = y - model.mean_function(X) weights = blr.predict_w_samples(sample_shape=shape[:-1], data=(X, y)) assert weights.shape[-1] == shape[-1] == basis.units return tf.cast(weights, dtype) weight_shape = list(sample_shape) + [1, num_basis] weights = initializer(weight_shape, dtype) return BayesianLinearSampler(basis=basis, weights=weights, mean_function=model.mean_function, weight_initializer=initializer, **kwargs)
def model_fullcovgmm(args: Arguments): nets = get_networks(args.ds, zdim=args.zdim, is_hierarchical=False, is_semi_supervised=False) latent_size = int(np.prod(nets['latents'].event_shape)) n_components = 100 loc = tf.compat.v1.get_variable(name="loc", shape=[n_components, latent_size]) raw_scale_diag = tf.compat.v1.get_variable( name="raw_scale_diag", shape=[n_components, latent_size]) mixture_logits = tf.compat.v1.get_variable( name="mixture_logits", shape=[n_components]) nets['latents'] = RVconf( event_shape=latent_size, projection=True, posterior='mvntril', prior=MixtureSameFamily( components_distribution=MultivariateNormalDiag( loc=loc, scale_diag=tf.nn.softplus(raw_scale_diag) + tf.math.exp(-7.)), mixture_distribution=Categorical(logits=mixture_logits), name="prior"), name='latents').create_posterior() return VariationalAutoencoder(**nets, name='FullCov')
def __init__(self, config, name=None, scope=None): self.config = config super(OmniAnomaly, self).__init__(name=name, scope=scope) with reopen_variable_scope(self.variable_scope): if config.posterior_flow_type == 'nf': self._posterior_flow = spt.layers.planar_normalizing_flows( config.nf_layers, name='posterior_flow') else: self._posterior_flow = None self._window_length = config.window_length self._x_dims = config.x_dim self._z_dims = config.z_dim self._vae = VAE( p_z=TfpDistribution( LinearGaussianStateSpaceModel( num_timesteps=config.window_length, transition_matrix=LinearOperatorIdentity(config.z_dim), transition_noise=MultivariateNormalDiag( scale_diag=tf.ones([config.z_dim])), observation_matrix=LinearOperatorIdentity( config.z_dim), observation_noise=MultivariateNormalDiag( scale_diag=tf.ones([config.z_dim])), initial_state_prior=MultivariateNormalDiag( scale_diag=tf.ones([config.z_dim])))) if config.use_connected_z_p else Normal(mean=tf.zeros([config.z_dim]), std=tf.ones([config.z_dim])), p_x_given_z=Normal, q_z_given_x=partial(RecurrentDistribution, mean_q_mlp=partial(tf.layers.dense, units=config.z_dim, name='z_mean', reuse=tf.AUTO_REUSE), std_q_mlp=partial( softplus_std, units=config.z_dim, epsilon=config.std_epsilon, name='z_std'), z_dim=config.z_dim, window_length=config.window_length) if config.use_connected_z_q else Normal, h_for_p_x=Lambda( partial(wrap_params_net, h_for_dist=lambda x: rnn(x=x, window_length=config.window_length, rnn_num_hidden=config.rnn_num_hidden, hidden_dense=2, dense_dim=config.dense_dim, name='rnn_p_x'), mean_layer=partial(tf.layers.dense, units=config.x_dim, name='x_mean', reuse=tf.AUTO_REUSE), std_layer=partial(softplus_std, units=config.x_dim, epsilon=config.std_epsilon, name='x_std')), name='p_x_given_z'), h_for_q_z=Lambda(lambda x: { 'input_q': rnn(x=x, window_length=config.window_length, rnn_num_hidden=config.rnn_num_hidden, hidden_dense=2, dense_dim=config.dense_dim, name="rnn_q_z") }, name='q_z_given_x') if config.use_connected_z_q else Lambda( partial(wrap_params_net, h_for_dist=lambda x: rnn(x=x, window_length=config.window_length, rnn_num_hidden=config.rnn_num_hidden, hidden_dense=2, dense_dim=config.dense_dim, name="rnn_q_z"), mean_layer=partial(tf.layers.dense, units=config.z_dim, name='z_mean', reuse=tf.AUTO_REUSE), std_layer=partial(softplus_std, units=config.z_dim, epsilon=config.std_epsilon, name='z_std')), name='q_z_given_x'))
def __init__(self, loc, scale, logits=None, probs=None, covariance_type='diag', trainable=False, validate_args=False, allow_nan_stats=True, name=None): kw = dict(validate_args=validate_args, allow_nan_stats=allow_nan_stats) self._trainable = bool(trainable) self._llk_history = [] if trainable: loc = tf.Variable(loc, trainable=True, name='loc') scale = tf.Variable(scale, trainable=True, name='scale') if logits is not None: logits = tf.Variable(logits, trainable=True, name='logits') if probs is not None: probs = tf.Variable(probs, trainable=True, name='probs') ### initialize mixture Categorical mixture = Categorical(logits=logits, probs=probs, name="MixtureWeights", **kw) n_components = mixture._num_categories() ### initialize Gaussian components covariance_type = str(covariance_type).lower().strip() if name is None: name = 'Mixture%sGaussian' % \ (covariance_type.capitalize() if covariance_type != 'none' else 'Independent') ## create the components if covariance_type == 'diag': if tf.rank(scale) == 0: # scalar extra_kw = dict(scale_identity_multiplier=scale) else: # a tensor extra_kw = dict(scale_diag=scale) components = MultivariateNormalDiag(loc=loc, name=name, **kw, **extra_kw) elif covariance_type in ('tril', 'full'): if tf.rank(scale) == 1 or \ (scale.shape[-1] != scale.shape[-2]): scale_tril = FillScaleTriL(diag_shift=np.array( 1e-5, tf.convert_to_tensor(scale).dtype.as_numpy_dtype())) scale = scale_tril(scale) components = MultivariateNormalTriL(loc=loc, scale_tril=scale, name=name, **kw) elif covariance_type == 'none': components = Independent(distribution=Normal(loc=loc, scale=scale, **kw), reinterpreted_batch_ndims=1, name=name) else: raise ValueError("No support for covariance_type: '%s'" % covariance_type) ### validate the n_components assert (components.batch_shape[-1] == int(n_components)), \ "Number of components mismatch, given:%d, mixture:%d, components:%d" % \ (mixture.event_shape[-1], components.batch_shape[-1], int(n_components)) super().__init__(mixture_distribution=mixture, components_distribution=components, name=name, **kw)
def get_z_distribution(self, z_mean: tf.Tensor) -> MultivariateNormalDiag: return MultivariateNormalDiag( loc=z_mean, scale_identity_multiplier=self._observational_noise_stddev, name="z_distribution")
def get_w_distribution(self, w_mean) -> MultivariateNormalDiag: return MultivariateNormalDiag(loc=w_mean, scale_identity_multiplier=self._w_stddev, name="w_distribution")