Пример #1
0
def model_mixture_adaptive(X, ls=1., n_mix=2, ridge_factor=1e-3):
    """Defines the Adaptive Mixture of Gaussian Process Model.

    Note: Currently this method is not tested and is likely to not
        work well due to explicit sampling of membership variables.
        (i.e. mix_member). More work need to be done to perform
        integrated sampling.

    Args:
        X: (np.ndarray of float32) input training features.
        with dimension (N, D).
        ls: (float32) length scale parameter.
        n_mix: (int8) Number of mixture components.
        ridge_factor: (float32) ridge factor to stabilize Cholesky decomposition.

    Returns:
         (tf.Tensors of float32) model parameters.
    """
    # TODO(jereliu): find a way to integrate over adaptive mixture.
    raise Warning(
        "Currently this method is not tested and is likely to not"
        "work well due to explicit sampling of membership variables. "
        "(i.e. mix_member). More work need to be done to perform "
        "integrated sampling.")

    N = X.shape[0]
    K_mat = rbf(X, ls=ls, ridge_factor=ridge_factor)

    gp_weight = ed.Independent(distribution=tfd.MultivariateNormalTriL(
        loc=tf.zeros(shape=[n_mix, N]),
        scale_tril=replicate_along_zero_axis(tf.cholesky(K_mat), n_mix),
    ),
                               reinterpreted_batch_ndims=1,
                               name="gp_w")
    mix_member = ed.Multinomial(total_count=[1.],
                                logits=tf.transpose(gp_weight),
                                name="mix_prob")

    gp_comp = ed.Independent(distribution=tfd.MultivariateNormalTriL(
        loc=tf.zeros(shape=[n_mix, N]),
        scale_tril=replicate_along_zero_axis(tf.cholesky(K_mat), n_mix),
    ),
                             reinterpreted_batch_ndims=1,
                             name="gp_f")

    gp_f = tf.reduce_sum(tf.transpose(gp_comp) * mix_member, axis=-1)

    sigma = ed.Normal(loc=-5., scale=1., name='sigma')

    y = ed.MultivariateNormalDiag(loc=gp_f,
                                  scale_identity_multiplier=tf.exp(sigma),
                                  name="y")
    # y = ed.MixtureSameFamily(
    #     components_distribution=tfd.MultivariateNormalDiag(
    #         loc=gp_comp, scale_identity_multiplier=tf.exp(sigma)),
    #     mixture_distribution=tfd.Categorical(logits=gp_weight),
    #     name="y")

    return gp_weight, mix_member, gp_comp, sigma, y
Пример #2
0
 def __call__(self, shape, dtype=None, partition_info=None):
     del partition_info  # unused arg
     if not self.built:
         self.build(shape, dtype)
     return ed.Independent(ed.Normal(loc=self.mean,
                                     scale=self.stddev).distribution,
                           reinterpreted_batch_ndims=len(shape))
Пример #3
0
  def call(self, inputs):
    if self.conditional_inputs is None and self.conditional_outputs is None:
      covariance_matrix = self.covariance_fn(inputs, inputs)
      # Tile locations so output has shape [units, batch_size]. Covariance will
      # broadcast to [units, batch_size, batch_size], and we perform
      # shape manipulations to get a random variable over [batch_size, units].
      loc = self.mean_fn(inputs)
      loc = tf.tile(loc[tf.newaxis], [self.units] + [1] * len(loc.shape))
    else:
      knn = self.covariance_fn(inputs, inputs)
      knm = self.covariance_fn(inputs, self.conditional_inputs)
      kmm = self.covariance_fn(self.conditional_inputs, self.conditional_inputs)
      kmm = tf.matrix_set_diag(
          kmm, tf.matrix_diag_part(kmm) + tf.keras.backend.epsilon())
      kmm_tril = tf.linalg.cholesky(kmm)
      kmm_tril_operator = tf.linalg.LinearOperatorLowerTriangular(kmm_tril)
      knm_operator = tf.linalg.LinearOperatorFullMatrix(knm)

      # TODO(trandustin): Vectorize linear algebra for multiple outputs. For
      # now, we do each separately and stack to obtain a locations Tensor of
      # shape [units, batch_size].
      loc = []
      for conditional_outputs_unit in tf.unstack(self.conditional_outputs,
                                                 axis=-1):
        center = conditional_outputs_unit - self.mean_fn(
            self.conditional_inputs)
        loc_unit = knm_operator.matvec(
            kmm_tril_operator.solvevec(kmm_tril_operator.solvevec(center),
                                       adjoint=True))
        loc.append(loc_unit)
      loc = tf.stack(loc) + self.mean_fn(inputs)[tf.newaxis]

      covariance_matrix = knn
      covariance_matrix -= knm_operator.matmul(
          kmm_tril_operator.solve(
              kmm_tril_operator.solve(knm, adjoint_arg=True), adjoint=True))

    covariance_matrix = tf.matrix_set_diag(
        covariance_matrix,
        tf.matrix_diag_part(covariance_matrix) + tf.keras.backend.epsilon())

    # Form a multivariate normal random variable with batch_shape units and
    # event_shape batch_size. Then make it be independent across the units
    # dimension. Then transpose its dimensions so it is [batch_size, units].
    random_variable = ed.MultivariateNormalFullCovariance(
        loc=loc, covariance_matrix=covariance_matrix)
    random_variable = ed.Independent(random_variable.distribution,
                                     reinterpreted_batch_ndims=1)
    bijector = tfp.bijectors.Inline(
        forward_fn=lambda x: tf.transpose(x, [1, 0]),
        inverse_fn=lambda y: tf.transpose(y, [1, 0]),
        forward_event_shape_fn=lambda input_shape: input_shape[::-1],
        forward_event_shape_tensor_fn=lambda input_shape: input_shape[::-1],
        inverse_log_det_jacobian_fn=lambda y: tf.cast(0, y.dtype),
        forward_min_event_ndims=2)
    random_variable = ed.TransformedDistribution(random_variable.distribution,
                                                 bijector=bijector)
    return random_variable
Пример #4
0
def model_mixture_adaptive2(X, ls=1., n_mix=2, ridge_factor=1e-3):
    """Alternative representation using Mixture family.

    Args:
        X: (np.ndarray of float32) input training features.
        with dimension (N, D).
        ls: (float32) length scale parameter.
        n_mix: (int8) Number of mixture components.
        ridge_factor: (float32) ridge factor to stabilize Cholesky decomposition.

    Returns:
         (tf.Tensors of float32) model parameters.
    """
    N = X.shape[0]
    K_mat = rbf(X, ls=ls, ridge_factor=ridge_factor)

    gp_weight = ed.Independent(distribution=tfd.MultivariateNormalTriL(
        loc=tf.zeros(shape=[n_mix, N]),
        scale_tril=replicate_along_zero_axis(tf.cholesky(K_mat), n_mix),
    ),
                               reinterpreted_batch_ndims=1,
                               name="gp_w")
    mix_member = ed.Multinomial(total_count=[1.],
                                logits=tf.transpose(gp_weight),
                                name="mix_prob")

    gp_comp = ed.Independent(distribution=tfd.MultivariateNormalTriL(
        loc=tf.zeros(shape=[n_mix, N]),
        scale_tril=replicate_along_zero_axis(tf.cholesky(K_mat), n_mix),
    ),
                             reinterpreted_batch_ndims=1,
                             name="gp_f")

    sigma = ed.Normal(loc=tf.ones(n_mix) * -5.,
                      scale=tf.ones(n_mix) * 1.,
                      name='sigma')

    y = ed.MixtureSameFamily(
        components_distribution=tfd.MultivariateNormalDiag(
            loc=gp_comp, scale_identity_multiplier=tf.exp(sigma)),
        mixture_distribution=tfd.Categorical(logits=tf.transpose(gp_weight)),
        name="y")

    return gp_weight, mix_member, gp_comp, sigma, y
Пример #5
0
 def __call__(self, x):
     """Computes regularization given an ed.Normal random variable as input."""
     if not isinstance(x, ed.RandomVariable):
         raise ValueError('Input must be an ed.RandomVariable.')
     random_variable = ed.Independent(ed.Normal(
         loc=tf.broadcast_to(self.mean, x.distribution.event_shape),
         scale=tf.broadcast_to(self.stddev,
                               x.distribution.event_shape)).distribution,
                                      reinterpreted_batch_ndims=len(
                                          x.distribution.event_shape))
     return random_variable.distribution.kl_divergence(x.distribution)
Пример #6
0
 def __call__(self, shape=None, dtype=None, partition_info=None):
     del shape, dtype, partition_info  # Unused in TrainableInitializers.
     # TODO(dusenberrymw): Restructure so that we can build as needed.
     if not self.built:
         raise ValueError(
             'A TrainableInitializer must be built by a layer before '
             'usage, and is currently only compatible with Bayesian '
             'layers.')
     return ed.Independent(ed.Normal(loc=self.mean,
                                     scale=self.stddev).distribution,
                           reinterpreted_batch_ndims=len(self.shape))
Пример #7
0
 def __call__(self, x):
   """Computes regularization using an unbiased Monte Carlo estimate."""
   prior = ed.Independent(
       ed.HalfCauchy(
           loc=tf.broadcast_to(self.loc, x.distribution.event_shape),
           scale=tf.broadcast_to(self.scale, x.distribution.event_shape)
       ).distribution,
       reinterpreted_batch_ndims=len(x.distribution.event_shape))
   negative_entropy = x.distribution.log_prob(x)
   cross_entropy = -prior.distribution.log_prob(x)
   return negative_entropy + cross_entropy
Пример #8
0
def model_mixture(X, ls=1., n_mix=2, ridge_factor=1e-3):
    """Defines the Gaussian Process Model.

    Args:
        X: (np.ndarray of float32) input training features.
        with dimension (N, D).
        ls: (float32) length scale parameter.
        n_mix: (int8) Number of mixture components.
        ridge_factor: (float32) ridge factor to stabilize Cholesky decomposition.

    Returns:
         (tf.Tensors of float32) model parameters.
    """
    N = X.shape[0]
    K_mat = rbf(X, ls=ls, ridge_factor=ridge_factor)

    mix_prob = ed.Dirichlet(concentration=tf.ones(n_mix, dtype=tf.float32) /
                            n_mix,
                            name='mix_prob')

    gp_f = ed.Independent(distribution=tfd.MultivariateNormalTriL(
        loc=tf.zeros(shape=[n_mix, N]),
        scale_tril=replicate_along_zero_axis(tf.cholesky(K_mat), n_mix),
    ),
                          reinterpreted_batch_ndims=1,
                          name="gp_f")

    sigma = ed.Normal(loc=tf.ones(n_mix) * -5.,
                      scale=tf.ones(n_mix) * 1.,
                      name='sigma')

    y = ed.MixtureSameFamily(
        components_distribution=tfd.MultivariateNormalDiag(
            loc=gp_f, scale_identity_multiplier=tf.exp(sigma)),
        mixture_distribution=tfd.Categorical(probs=mix_prob),
        name="y")

    return mix_prob, gp_f, sigma, y