Exemplo n.º 1
0
 def new(params,
         event_shape=(),
         softplus_scale=True,
         validate_args=False,
         name=None):
     """Create the distribution instance from a `params` vector."""
     with tf.compat.v1.name_scope(name, 'LogNormal', [params, event_shape]):
         params = tf.convert_to_tensor(value=params, name='params')
         event_shape = dist_util.expand_to_vector(tf.convert_to_tensor(
             value=event_shape, name='event_shape', dtype=tf.int32),
                                                  tensor_name='event_shape')
         output_shape = tf.concat([
             tf.shape(input=params)[:-1],
             event_shape,
         ],
                                  axis=0)
         loc_params, scale_params = tf.split(params, 2, axis=-1)
         if softplus_scale:
             scale_params = tf.math.softplus(
                 scale_params) + tfd.softplus_inverse(1.0)
         return tfd.Independent(
             tfd.LogNormal(loc=tf.reshape(loc_params, output_shape),
                           scale=tf.reshape(scale_params, output_shape),
                           validate_args=validate_args),
             reinterpreted_batch_ndims=tf.size(input=event_shape),
             validate_args=validate_args)
Exemplo n.º 2
0
  def new(params, event_size, covariance_type, softplus_scale,
          validate_args=False, name=None):
    """Create the distribution instance from a `params` vector."""
    covariance_type = str(covariance_type).lower().strip()
    assert covariance_type in ('full', 'tril', 'diag'), \
    "No support for given covariance_type: '%s'" % covariance_type

    scale_fn = lambda x: tf.math.softplus(x) + tfd.softplus_inverse(1.0) \
    if bool(softplus_scale) else x

    with tf.compat.v1.name_scope(name, 'MultivariateNormal',
                                 [params, event_size]):
      params = tf.convert_to_tensor(value=params, name='params')

      if covariance_type == 'tril':
        scale_tril = tfb.ScaleTriL(
            diag_shift=np.array(1e-5, params.dtype.as_numpy_dtype()),
            validate_args=validate_args)
        return tfd.MultivariateNormalTriL(
            loc=params[..., :event_size],
            scale_tril=scale_tril(scale_fn(params[..., event_size:])),
            validate_args=validate_args)

      elif covariance_type == 'diag':
        return tfd.MultivariateNormalDiag(
            loc=params[..., :event_size],
            scale_diag=scale_fn(params[..., event_size:]))

      elif covariance_type == 'full':
        return tfd.MultivariateNormalFullCovariance(
            loc=params[..., :event_size],
            covariance_matrix=tf.reshape(scale_fn(params[..., event_size:]),
                                         (event_size, event_size)))
Exemplo n.º 3
0
    def new(params,
            event_size,
            covariance_type,
            softplus_scale,
            validate_args=False,
            name=None):
        """Create the distribution instance from a `params` vector."""
        covariance_type = str(covariance_type).lower().strip()
        assert covariance_type in ('full', 'tril', 'diag'), \
        "No support for given covariance_type: '%s'" % covariance_type

        scale_fn = lambda x: tf.math.softplus(x) + tfd.softplus_inverse(1.0) \
        if bool(softplus_scale) else x

        with tf.compat.v1.name_scope(name, 'MultivariateNormal',
                                     [params, event_size]):
            params = tf.convert_to_tensor(value=params, name='params')

            if covariance_type == 'tril':
                scale_tril = tfb.ScaleTriL(diag_shift=np.array(
                    1e-5, params.dtype.as_numpy_dtype()),
                                           validate_args=validate_args)
                return tfd.MultivariateNormalTriL(
                    loc=params[..., :event_size],
                    scale_tril=scale_tril(scale_fn(params[..., event_size:])),
                    validate_args=validate_args)

            elif covariance_type == 'diag':
                return tfd.MultivariateNormalDiag(
                    loc=params[..., :event_size],
                    scale_diag=scale_fn(params[..., event_size:]))

            elif covariance_type == 'full':
                return tfd.MultivariateNormalFullCovariance(
                    loc=params[..., :event_size],
                    covariance_matrix=tf.reshape(
                        scale_fn(params[..., event_size:]),
                        (event_size, event_size)))
Exemplo n.º 4
0
 def new(params, event_shape=(), softplus_scale=True,
         validate_args=False, name=None):
   """Create the distribution instance from a `params` vector."""
   with tf.compat.v1.name_scope(name, 'Normal',
                                [params, event_shape]):
     params = tf.convert_to_tensor(value=params, name='params')
     event_shape = dist_util.expand_to_vector(
         tf.convert_to_tensor(
             value=event_shape, name='event_shape', dtype=tf.int32),
         tensor_name='event_shape')
     output_shape = tf.concat([
         tf.shape(input=params)[:-1],
         event_shape,
     ], axis=0)
     loc_params, scale_params = tf.split(params, 2, axis=-1)
     if softplus_scale:
       scale_params = tf.math.softplus(scale_params) + tfd.softplus_inverse(1.0)
     return tfd.Independent(
         tfd.Normal(
             loc=tf.reshape(loc_params, output_shape),
             scale=tf.reshape(scale_params, output_shape),
             validate_args=validate_args),
         reinterpreted_batch_ndims=tf.size(input=event_shape),
         validate_args=validate_args)
Exemplo n.º 5
0
    def __init__(self,
                 n_questions,
                 n_answers,
                 n_components=10,
                 components_prior=0.7,
                 encoder_layers=[16, 16],
                 activation='relu',
                 n_mcmc_samples=1,
                 random_state=None):
        super(GradeMembershipModel, self).__init__()
        self._random_state = np.random.RandomState(seed=random_state) \
            if not isinstance(random_state, np.random.RandomState) \
              else random_state
        self._initializer = tf.initializers.GlorotNormal(
            seed=self._random_state.randint(1e8))

        self.n_questions = int(n_questions)
        # this assume the same amount of answers
        # are given to every question
        self.n_answers = int(n_answers)
        self.n_components = int(n_components)

        self.components_prior = np.array(softplus_inverse(components_prior))

        self.n_mcmc_samples = n_mcmc_samples

        self.encoder = []
        self.decoder = []
        # Each question get 1 encoder and decoder
        for question_idx in range(self.n_questions):
            # ====== encoder ====== #
            encoder = Sequential(name="EncoderQ%d" % question_idx)
            for num_hidden_units in encoder_layers:
                encoder.add(
                    Dense(num_hidden_units,
                          activation=activation,
                          kernel_initializer=self._initializer))
            encoder.add(
                Dense(n_components,
                      activation=tf.nn.softplus,
                      kernel_initializer=self._initializer,
                      name="DenseConcentration"))
            encoder.add(
                DirichletLayer(clip_for_stable=True,
                               pre_softplus=False,
                               name="topics_posteriorQ%d" % question_idx))
            # this is a must for the Model store Layer parameters
            setattr(self, 'encoder%d' % question_idx, encoder)
            self.encoder.append(encoder)
            # ====== decoder ====== #
            # decoder
            group_answer_logits = self.add_weight(
                name="topics_words_logits%d" % question_idx,
                shape=[self.n_components, n_answers],
                initializer=self._initializer)
            # The observations are bag of words and therefore not one-hot. However,
            # log_prob of OneHotCategorical computes the probability correctly in
            # this case.
            decoder = OneHotCategorical(probs_input=True,
                                        name="AnswerSheetQ%d" % question_idx)
            # this is a must for the Model store Layer parameters
            setattr(self, 'decoder%d' % question_idx, decoder)
            self.decoder.append([group_answer_logits, decoder])

        # same prior for all questions
        self.prior_logit = self.add_weight(
            name="prior_logit",
            shape=[1, self.n_components],
            trainable=False,
            initializer=tf.initializers.Constant(self.components_prior))
Exemplo n.º 6
0
def csiszar_vimco_helper(logu, name=None):
    """Helper to `csiszar_vimco`; computes `log_avg_u`, `log_sooavg_u`.

  `axis = 0` of `logu` is presumed to correspond to iid samples from `q`, i.e.,

  ```none
  logu[j] = log(u[j])
  u[j] = p(x, h[j]) / q(h[j] | x)
  h[j] iid~ q(H | x)
  ```

  Args:
    logu: Floating-type `Tensor` representing `log(p(x, h) / q(h | x))`.
    name: Python `str` name prefixed to Ops created by this function.

  Returns:
    log_avg_u: `logu.dtype` `Tensor` corresponding to the natural-log of the
      average of `u`. The sum of the gradient of `log_avg_u` is `1`.
    log_sooavg_u: `logu.dtype` `Tensor` characterized by the natural-log of the
      average of `u`` except that the average swaps-out `u[i]` for the
      leave-`i`-out Geometric-average. The mean of the gradient of
      `log_sooavg_u` is `1`. Mathematically `log_sooavg_u` is,
      ```none
      log_sooavg_u[i] = log(Avg{h[j ; i] : j=0, ..., m-1})
      h[j ; i] = { u[j]                              j!=i
                 { GeometricAverage{u[k] : k != i}   j==i
      ```

  """
    with tf.compat.v1.name_scope(name, "csiszar_vimco_helper", [logu]):
        logu = tf.convert_to_tensor(value=logu, name="logu")

        n = tf.compat.dimension_value(logu.shape.with_rank_at_least(1)[0])
        if n is None:
            n = tf.shape(input=logu)[0]
            log_n = tf.math.log(tf.cast(n, dtype=logu.dtype))
            nm1 = tf.cast(n - 1, dtype=logu.dtype)
        else:
            log_n = np.log(n).astype(logu.dtype.as_numpy_dtype)
            nm1 = np.asarray(n - 1, dtype=logu.dtype.as_numpy_dtype)

        # Throughout we reduce across axis=0 since this is presumed to be iid
        # samples.

        log_max_u = tf.reduce_max(input_tensor=logu, axis=0)
        log_sum_u_minus_log_max_u = tf.reduce_logsumexp(input_tensor=logu -
                                                        log_max_u,
                                                        axis=0)

        # log_loosum_u[i] =
        # = logsumexp(logu[j] : j != i)
        # = log( exp(logsumexp(logu)) - exp(logu[i]) )
        # = log( exp(logsumexp(logu - logu[i])) exp(logu[i])  - exp(logu[i]))
        # = logu[i] + log(exp(logsumexp(logu - logu[i])) - 1)
        # = logu[i] + log(exp(logsumexp(logu) - logu[i]) - 1)
        # = logu[i] + softplus_inverse(logsumexp(logu) - logu[i])
        d = log_sum_u_minus_log_max_u + (log_max_u - logu)
        # We use `d != 0` rather than `d > 0.` because `d < 0.` should never
        # happens; if it does we want to complain loudly (which `softplus_inverse`
        # will).
        d_ok = tf.not_equal(d, 0.)
        safe_d = tf.compat.v1.where(d_ok, d, tf.ones_like(d))
        d_ok_result = logu + tfd.softplus_inverse(safe_d)

        inf = np.array(np.inf, dtype=logu.dtype.as_numpy_dtype)

        # When not(d_ok) and is_positive_and_largest then we manually compute the
        # log_loosum_u. (We can efficiently do this for any one point but not all,
        # hence we still need the above calculation.) This is good because when
        # this condition is met, we cannot use the above calculation; its -inf.
        # We now compute the log-leave-out-max-sum, replicate it to every
        # point and make sure to select it only when we need to.
        is_positive_and_largest = tf.logical_and(
            logu > 0., tf.equal(logu, log_max_u[tf.newaxis, ...]))
        log_lomsum_u = tf.reduce_logsumexp(input_tensor=tf.compat.v1.where(
            is_positive_and_largest, tf.fill(tf.shape(input=logu), -inf),
            logu),
                                           axis=0,
                                           keepdims=True)
        log_lomsum_u = tf.tile(
            log_lomsum_u,
            multiples=1 +
            tf.pad(tensor=[n - 1], paddings=[[0, tf.rank(logu) - 1]]))

        d_not_ok_result = tf.compat.v1.where(is_positive_and_largest,
                                             log_lomsum_u,
                                             tf.fill(tf.shape(input=d), -inf))

        log_loosum_u = tf.compat.v1.where(d_ok, d_ok_result, d_not_ok_result)

        # The swap-one-out-sum ("soosum") is n different sums, each of which
        # replaces the i-th item with the i-th-left-out average, i.e.,
        # soo_sum_u[i] = [exp(logu) - exp(logu[i])] + exp(mean(logu[!=i]))
        #              =  exp(log_loosum_u[i])      + exp(looavg_logu[i])
        looavg_logu = (tf.reduce_sum(input_tensor=logu, axis=0) - logu) / nm1
        log_soosum_u = tf.reduce_logsumexp(input_tensor=tf.stack(
            [log_loosum_u, looavg_logu]),
                                           axis=0)

        log_avg_u = log_sum_u_minus_log_max_u + log_max_u - log_n
        log_sooavg_u = log_soosum_u - log_n

        log_avg_u.set_shape(logu.shape.with_rank_at_least(1)[1:])
        log_sooavg_u.set_shape(logu.shape)

        return log_avg_u, log_sooavg_u
Exemplo n.º 7
0
def csiszar_vimco_helper(logu, name=None):
  """Helper to `csiszar_vimco`; computes `log_avg_u`, `log_sooavg_u`.

  `axis = 0` of `logu` is presumed to correspond to iid samples from `q`, i.e.,

  ```none
  logu[j] = log(u[j])
  u[j] = p(x, h[j]) / q(h[j] | x)
  h[j] iid~ q(H | x)
  ```

  Args:
    logu: Floating-type `Tensor` representing `log(p(x, h) / q(h | x))`.
    name: Python `str` name prefixed to Ops created by this function.

  Returns:
    log_avg_u: `logu.dtype` `Tensor` corresponding to the natural-log of the
      average of `u`. The sum of the gradient of `log_avg_u` is `1`.
    log_sooavg_u: `logu.dtype` `Tensor` characterized by the natural-log of the
      average of `u`` except that the average swaps-out `u[i]` for the
      leave-`i`-out Geometric-average. The mean of the gradient of
      `log_sooavg_u` is `1`. Mathematically `log_sooavg_u` is,
      ```none
      log_sooavg_u[i] = log(Avg{h[j ; i] : j=0, ..., m-1})
      h[j ; i] = { u[j]                              j!=i
                 { GeometricAverage{u[k] : k != i}   j==i
      ```

  """
  with tf.name_scope(name, "csiszar_vimco_helper", [logu]):
    logu = tf.convert_to_tensor(logu, name="logu")

    n = logu.shape.with_rank_at_least(1)[0].value
    if n is None:
      n = tf.shape(logu)[0]
      log_n = tf.log(tf.cast(n, dtype=logu.dtype))
      nm1 = tf.cast(n - 1, dtype=logu.dtype)
    else:
      log_n = np.log(n).astype(logu.dtype.as_numpy_dtype)
      nm1 = np.asarray(n - 1, dtype=logu.dtype.as_numpy_dtype)

    # Throughout we reduce across axis=0 since this is presumed to be iid
    # samples.

    log_max_u = tf.reduce_max(logu, axis=0)
    log_sum_u_minus_log_max_u = tf.reduce_logsumexp(
        logu - log_max_u, axis=0)

    # log_loosum_u[i] =
    # = logsumexp(logu[j] : j != i)
    # = log( exp(logsumexp(logu)) - exp(logu[i]) )
    # = log( exp(logsumexp(logu - logu[i])) exp(logu[i])  - exp(logu[i]))
    # = logu[i] + log(exp(logsumexp(logu - logu[i])) - 1)
    # = logu[i] + log(exp(logsumexp(logu) - logu[i]) - 1)
    # = logu[i] + softplus_inverse(logsumexp(logu) - logu[i])
    d = log_sum_u_minus_log_max_u + (log_max_u - logu)
    # We use `d != 0` rather than `d > 0.` because `d < 0.` should never
    # happens; if it does we want to complain loudly (which `softplus_inverse`
    # will).
    d_ok = tf.not_equal(d, 0.)
    safe_d = tf.where(d_ok, d, tf.ones_like(d))
    d_ok_result = logu + tfd.softplus_inverse(safe_d)

    inf = np.array(np.inf, dtype=logu.dtype.as_numpy_dtype)

    # When not(d_ok) and is_positive_and_largest then we manually compute the
    # log_loosum_u. (We can efficiently do this for any one point but not all,
    # hence we still need the above calculation.) This is good because when
    # this condition is met, we cannot use the above calculation; its -inf.
    # We now compute the log-leave-out-max-sum, replicate it to every
    # point and make sure to select it only when we need to.
    is_positive_and_largest = tf.logical_and(
        logu > 0.,
        tf.equal(logu, log_max_u[tf.newaxis, ...]))
    log_lomsum_u = tf.reduce_logsumexp(
        tf.where(is_positive_and_largest,
                 tf.fill(tf.shape(logu), -inf),
                 logu),
        axis=0, keep_dims=True)
    log_lomsum_u = tf.tile(
        log_lomsum_u,
        multiples=1 + tf.pad([n-1], [[0, tf.rank(logu)-1]]))

    d_not_ok_result = tf.where(
        is_positive_and_largest,
        log_lomsum_u,
        tf.fill(tf.shape(d), -inf))

    log_loosum_u = tf.where(d_ok, d_ok_result, d_not_ok_result)

    # The swap-one-out-sum ("soosum") is n different sums, each of which
    # replaces the i-th item with the i-th-left-out average, i.e.,
    # soo_sum_u[i] = [exp(logu) - exp(logu[i])] + exp(mean(logu[!=i]))
    #              =  exp(log_loosum_u[i])      + exp(looavg_logu[i])
    looavg_logu = (tf.reduce_sum(logu, axis=0) - logu) / nm1
    log_soosum_u = tf.reduce_logsumexp(
        tf.stack([log_loosum_u, looavg_logu]),
        axis=0)

    log_avg_u = log_sum_u_minus_log_max_u + log_max_u - log_n
    log_sooavg_u = log_soosum_u - log_n

    log_avg_u.set_shape(logu.shape.with_rank_at_least(1)[1:])
    log_sooavg_u.set_shape(logu.shape)

    return log_avg_u, log_sooavg_u