def _inverse(self, y): y = y - self.low if self.low is not None else y if self.hinge_softness is None: return tfp_math.softplus_inverse(y) hinge_softness = tf.cast(self.hinge_softness, y.dtype) return hinge_softness * tfp_math.softplus_inverse( y / hinge_softness)
def _forward_log_det_jacobian(self, x): x = x - 0.5 fractional_part = x - tf.math.floor(x) inner_part = (fractional_part - 0.5) / self.temperature offset = (tf.math.log(self.temperature) - tf.math.softplus(0.5 / self.temperature) + tfp_math.softplus_inverse(0.5 / self.temperature)) return (-tf.math.softplus(-inner_part) - tf.math.softplus(inner_part) - offset)
def build_trainable_location_scale_distribution(initial_loc, initial_scale, event_ndims, distribution_fn=tfd.Normal, validate_args=False, name=None): """Builds a variational distribution from a location-scale family. Args: initial_loc: Float `Tensor` initial location. initial_scale: Float `Tensor` initial scale. event_ndims: Integer `Tensor` number of event dimensions in `initial_loc`. distribution_fn: Optional constructor for a `tfd.Distribution` instance in a location-scale family. This should have signature `dist = distribution_fn(loc, scale, validate_args)`. Default value: `tfd.Normal`. validate_args: Python `bool`. Whether to validate input with asserts. This imposes a runtime cost. If `validate_args` is `False`, and the inputs are invalid, correct behavior is not guaranteed. Default value: `False`. name: Python `str` name prefixed to ops created by this function. Default value: `None` (i.e., 'build_trainable_location_scale_distribution'). Returns: posterior_dist: A `tfd.Distribution` instance. """ with tf.name_scope(name or 'build_trainable_location_scale_distribution'): dtype = dtype_util.common_dtype([initial_loc, initial_scale], dtype_hint=tf.float32) initial_loc = tf.convert_to_tensor(initial_loc, dtype=dtype) initial_scale = tf.convert_to_tensor(initial_scale, dtype=dtype) loc = tf.Variable(initial_value=initial_loc, name='loc') scale = tfp_util.DeferredTensor( tf.nn.softplus, tf.Variable(initial_value=tf.broadcast_to( tfp_math.softplus_inverse(initial_scale), shape=prefer_static.shape(initial_loc)), name='inverse_softplus_scale')) posterior_dist = distribution_fn(loc=loc, scale=scale, validate_args=validate_args) # Ensure the distribution has the desired number of event dimensions. static_event_ndims = tf.get_static_value(event_ndims) if static_event_ndims is None or static_event_ndims > 0: posterior_dist = tfd.Independent( posterior_dist, reinterpreted_batch_ndims=event_ndims, validate_args=validate_args) return posterior_dist
def _forward_log_det_jacobian(self, x): t = tf.convert_to_tensor(self.temperature) fractional_part = x - tf.math.floor(x) # Because our function is from [0.5, 1.5], we need to transform our # fractional_part to that domain like in the forward transformation. fractional_part = tf.where(fractional_part < 0.5, fractional_part + 0.5, fractional_part - 0.5) inner_part = (fractional_part - 0.5) / t offset = (tf.math.log(t) - tf.math.softplus(0.5 / t) + tfp_math.softplus_inverse(0.5 / t)) return (-tf.math.softplus(-inner_part) - tf.math.softplus(inner_part) - offset)
def __init__(self, n_components=10, components_prior=0.7, encoder_layers=[64, 64], activation='relu', n_mcmc_samples=1, analytic=True, random_state=None): super(LatentDirichletAllocation, self).__init__() self._random_state = np.random.RandomState(seed=random_state) \ if not isinstance(random_state, np.random.RandomState) \ else random_state self._initializer = tf.initializers.GlorotNormal( seed=self._random_state.randint(1e8)) self.n_components = int(n_components) self.components_prior = np.array(softplus_inverse(components_prior)) self.n_mcmc_samples = n_mcmc_samples self.analytic = analytic # ====== encoder ====== # encoder = Sequential(name="Encoder") for num_hidden_units in encoder_layers: encoder.add( Dense(num_hidden_units, activation=activation, kernel_initializer=self._initializer)) encoder.add( Dense(n_components, activation=tf.nn.softplus, kernel_initializer=self._initializer, name="DenseConcentration")) encoder.add( DirichletLayer(clip_for_stable=True, pre_softplus=False, name="topics_posterior")) self.encoder = encoder # ====== decoder ====== # # The observations are bag of words and therefore not one-hot. However, # log_prob of OneHotCategorical computes the probability correctly in # this case. self.decoder = OneHotCategoricalLayer(probs_input=True, name="bag_of_words")
def _create_loc_scale_vars(self, shape: Tuple[Optional[int], ...]) -> None: self.loc = self.add_weight( name="location", shape=shape, initializer=self.location_initializer, trainable=self.location_trainable, ) if self.scale_trainable: self.scale = self.add_weight( name="scale", shape=shape, initializer=self.scale_initializer, trainable=self.scale_trainable, constraint=GreaterEqualEpsilon(-2.0), ) self.scale.assign(softplus_inverse(self.scale)) else: self.scale = self.add_weight( name="scale", shape=shape, initializer=self.scale_initializer, trainable=self.scale_trainable, )
def _inverse(self, y): if self.hinge_softness is None: return tfp_math.softplus_inverse(y) hinge_softness = tf.cast(self.hinge_softness, y.dtype) return hinge_softness * tfp_math.softplus_inverse( y / hinge_softness)
def __init__( self, n_words: int, n_topics: int = 20, posterior: Literal['gaussian', 'dirichlet'] = 'dirichlet', posterior_activation: Union[str, Callable[[], Tensor]] = 'softplus', concentration_clip: bool = True, distribution: Literal['onehot', 'negativebinomial', 'binomial', 'poisson', 'zinb'] = 'onehot', dropout: float = 0.0, dropout_strategy: Literal['all', 'warmup', 'finetune'] = 'warmup', batch_norm: bool = False, trainable_prior: bool = True, warmup: int = 10000, step: Union[int, Variable] = 0, input_shape: Optional[List[int]] = None, name: str = "Topics", ): super().__init__(name=name) self.n_words = int(n_words) self.n_topics = int(n_topics) self.batch_norm = bool(batch_norm) self.warmup = int(warmup) self.posterior = str(posterior).lower() self.distribution = str(distribution).lower() self.dropout = float(dropout) self.warmup = int(warmup) assert dropout_strategy in ('all', 'warmup', 'finetune'), \ ("Support dropout strategy: all, warmup, finetune; " f"but given:{dropout_strategy}") self.dropout_strategy = str(dropout_strategy) if isinstance(step, Variable): self.step = step else: self.step = Variable(int(step), dtype=tf.float32, trainable=False, name="Step") ### batch norm if self.batch_norm: self._batch_norm_layer = BatchNormalization(trainable=True) ### posterior kw = dict(event_shape=(n_topics, ), name="TopicsPosterior") if posterior == 'dirichlet': kw['posterior'] = DirichletLayer init_value = softplus_inverse(0.7).numpy() post_kw = dict(concentration_activation=posterior_activation, concentration_clip=concentration_clip) elif posterior == "gaussian": kw['posterior'] = MultivariateNormalLayer init_value = 0. post_kw = dict(covariance='diag', loc_activation='identity', scale_activation=posterior_activation) else: raise NotImplementedError( "Support one of the following latent distribution: " "'gaussian', 'dirichlet'") self.topics_prior_logits = self.add_weight( initializer=tf.initializers.constant(value=init_value), shape=[1, n_topics], trainable=bool(trainable_prior), name="topics_prior_logits") self.posterior_layer = DenseDistribution( posterior_kwargs=post_kw, prior=self.topics_prior_distribution, projection=True, **kw) ### output distribution kw = dict(event_shape=(self.n_words, ), name="WordsDistribution") count_activation = 'softplus' if self.distribution in ('onehot', ): self.distribution_layer = OneHotCategoricalLayer(probs_input=True, **kw) self.n_parameterization = 1 elif self.distribution in ('poisson', ): self.distribution_layer = PoissonLayer(**kw) self.n_parameterization = 1 elif self.distribution in ('negativebinomial', 'nb'): self.distribution_layer = NegativeBinomialLayer( count_activation=count_activation, **kw) self.n_parameterization = 2 elif self.distribution in ('zinb', ): self.distribution_layer = ZINegativeBinomialLayer( count_activation=count_activation, **kw) self.n_parameterization = 3 elif self.distribution in ('binomial', ): self.distribution_layer = BinomialLayer( count_activation=count_activation, **kw) self.n_parameterization = 2 else: raise ValueError( f"No support for word distribution: {self.distribution}") # topics words parameterization self.topics_words_params = self.add_weight( 'topics_words_params', shape=[self.n_topics, self.n_words * self.n_parameterization], initializer=tf.initializers.glorot_normal(), trainable=True) # initialize the Model if input_shape given if input_shape is not None: self(Input(shape=input_shape, dtype=self.dtype))
def __init__(self, n_questions, n_answers, n_components=10, components_prior=0.7, encoder_layers=[16, 16], activation='relu', n_mcmc_samples=1, random_state=None): super(GradeMembershipModel, self).__init__() self._random_state = np.random.RandomState(seed=random_state) \ if not isinstance(random_state, np.random.RandomState) \ else random_state self._initializer = tf.initializers.GlorotNormal( seed=self._random_state.randint(1e8)) self.n_questions = int(n_questions) # this assume the same amount of answers # are given to every question self.n_answers = int(n_answers) self.n_components = int(n_components) self.components_prior = np.array(softplus_inverse(components_prior)) self.n_mcmc_samples = n_mcmc_samples self.encoder = [] self.decoder = [] # Each question get 1 encoder and decoder for question_idx in range(self.n_questions): # ====== encoder ====== # encoder = Sequential(name="EncoderQ%d" % question_idx) for num_hidden_units in encoder_layers: encoder.add( Dense(num_hidden_units, activation=activation, kernel_initializer=self._initializer)) encoder.add( Dense(n_components, activation=tf.nn.softplus, kernel_initializer=self._initializer, name="DenseConcentration")) encoder.add( DirichletLayer(clip_for_stable=True, pre_softplus=False, name="topics_posteriorQ%d" % question_idx)) # this is a must for the Model store Layer parameters setattr(self, 'encoder%d' % question_idx, encoder) self.encoder.append(encoder) # ====== decoder ====== # # decoder group_answer_logits = self.add_weight( name="topics_words_logits%d" % question_idx, shape=[self.n_components, n_answers], initializer=self._initializer) # The observations are bag of words and therefore not one-hot. However, # log_prob of OneHotCategorical computes the probability correctly in # this case. decoder = OneHotCategoricalLayer(probs_input=True, name="AnswerSheetQ%d" % question_idx) # this is a must for the Model store Layer parameters setattr(self, 'decoder%d' % question_idx, decoder) self.decoder.append([group_answer_logits, decoder]) # same prior for all questions self.prior_logit = self.add_weight( name="prior_logit", shape=[1, self.n_components], trainable=False, initializer=tf.initializers.Constant(self.components_prior))