Exemplo n.º 1
0
CONFIG = \
"""
model:
"""
output_dir = '/tmp/vae_pp'

# network configuration
batch_size = 32
max_iter = 50000
encoder = vi.NetConf([256, 256, 256], flatten_inputs=True, name='Encoder')
decoder = vi.NetConf([256, 256, 256], flatten_inputs=True, name='Decoder')
encoded_size = 16
posteriors_info = [
    ('gaussian', 'mvndiag', 'mvntril'),
    (
        D.Sample(D.Normal(loc=0., scale=1.),
                 sample_shape=encoded_size,
                 name='independent'),
        D.MultivariateNormalDiag(loc=tf.zeros(encoded_size),
                                 scale_diag=tf.ones(encoded_size),
                                 name='mvndiag'),
        D.MultivariateNormalTriL(loc=tf.zeros(encoded_size),
                                 scale_tril=bj.FillScaleTriL()(tf.ones(
                                     encoded_size * (encoded_size + 1) // 2)),
                                 name='mvntril'),
        D.MixtureSameFamily(
            components_distribution=D.MultivariateNormalDiag(
                loc=tf.zeros([10, encoded_size]),
                scale_diag=tf.ones([10, encoded_size])),
            mixture_distribution=D.Categorical(logits=tf.fill([10], 1.0 / 10)),
            name='gmm10'),
Exemplo n.º 2
0
def _default_prior(event_shape, posterior, prior, posterior_kwargs):
  if not isinstance(event_shape, (Sequence, MutableSequence, tf.TensorShape)):
    raise ValueError("event_shape must be list of integer but given: "
                     f"{event_shape} type: {type(event_shape)}")
  if isinstance(prior, (Distribution, DistributionLambda, Callable)):
    return prior
  elif not isinstance(prior, (string_types, type(None))):
    raise ValueError("prior must be string or instance of "
                     f"Distribution or DistributionLambda, but given: {prior}")
  # no prior given
  layer, dist = parse_distribution(posterior)
  if isinstance(prior, dict):
    kw = dict(prior)
    prior = None
  else:
    kw = {}
  event_size = int(np.prod(event_shape))

  ## helper function
  def _kwargs(**args):
    for k, v in args.items():
      if k not in kw:
        kw[k] = v
    return kw

  ## Normal
  if layer == obl.GaussianLayer:
    prior = obd.Independent(
        obd.Normal(**_kwargs(loc=tf.zeros(shape=event_shape),
                             scale=tf.ones(shape=event_shape))),
        reinterpreted_batch_ndims=1,
    )
  ## Multivariate Normal
  elif issubclass(layer, obl.MultivariateNormalLayer):
    cov = layer._partial_kwargs['covariance']
    if cov == 'diag':  # diagonal covariance
      loc = tf.zeros(shape=event_shape)
      if tf.rank(loc) == 0:
        loc = tf.expand_dims(loc, axis=-1)
      prior = obd.MultivariateNormalDiag(
          **_kwargs(loc=loc, scale_identity_multiplier=1.))
    else:  # low-triangle covariance
      bijector = tfp.bijectors.FillScaleTriL(
          diag_bijector=tfp.bijectors.Identity(), diag_shift=1e-5)
      size = tf.reduce_prod(event_shape)
      loc = tf.zeros(shape=[size])
      scale_tril = bijector.forward(tf.ones(shape=[size * (size + 1) // 2]))
      prior = obd.MultivariateNormalTriL(
          **_kwargs(loc=loc, scale_tril=scale_tril))
  ## Log Normal
  elif layer == obl.LogNormalLayer:
    prior = obd.Independent(
        obd.LogNormal(**_kwargs(loc=tf.zeros(shape=event_shape),
                                scale=tf.ones(shape=event_shape))),
        reinterpreted_batch_ndims=1,
    )
  ## mixture
  elif issubclass(layer, obl.MixtureGaussianLayer):
    if hasattr(layer, '_partial_kwargs'):
      cov = layer._partial_kwargs['covariance']
    else:
      cov = 'none'
    n_components = int(posterior_kwargs.get('n_components', 2))
    if cov == 'diag':
      scale_shape = [n_components, event_size]
      fn = lambda l, s: obd.MultivariateNormalDiag(loc=l,
                                                   scale_diag=tf.nn.softplus(s))
    elif cov == 'none':
      scale_shape = [n_components, event_size]
      fn = lambda l, s: obd.Independent(
          obd.Normal(loc=l, scale=tf.math.softplus(s)),
          reinterpreted_batch_ndims=1,
      )
    elif cov in ('full', 'tril'):
      scale_shape = [n_components, event_size * (event_size + 1) // 2]
      fn = lambda l, s: obd.MultivariateNormalTriL(
          loc=l,
          scale_tril=tfp.bijectors.FillScaleTriL(diag_shift=1e-5)
          (tf.math.softplus(s)))
    loc = tf.cast(tf.fill([n_components, event_size], 0.), dtype=tf.float32)
    log_scale = tf.cast(tf.fill(scale_shape, np.log(np.expm1(1.))),
                        dtype=tf.float32)
    p = 1. / n_components
    mixture_logits = tf.cast(tf.fill([n_components], np.log(p / (1 - p))),
                             dtype=tf.float32)
    prior = obd.MixtureSameFamily(
        components_distribution=fn(loc, log_scale),
        mixture_distribution=obd.Categorical(logits=mixture_logits))
  ## discrete
  elif dist in (obd.OneHotCategorical, obd.Categorical) or \
    layer == obl.RelaxedOneHotCategoricalLayer:
    p = 1. / event_size
    prior = dist(**_kwargs(logits=[np.log(p / (1 - p))] * event_size),
                 dtype=tf.float32)
  elif dist == obd.Dirichlet:
    prior = dist(**_kwargs(concentration=[1.] * event_size))
  elif dist == obd.Bernoulli:
    prior = obd.Independent(
        obd.Bernoulli(**_kwargs(logits=np.zeros(event_shape)),
                      dtype=tf.float32),
        reinterpreted_batch_ndims=len(event_shape),
    )
  ## other
  return prior
Exemplo n.º 3
0
def _default_prior(event_shape, posterior, prior, posterior_kwargs):
    if isinstance(prior, obd.Distribution):
        return prior
    layer, dist = parse_distribution(posterior)
    if isinstance(prior, dict):
        kw = dict(prior)
        prior = None
    else:
        kw = {}
    event_size = int(np.prod(event_shape))

    ## helper function
    def _kwargs(**args):
        for k, v in args.items():
            if k not in kw:
                kw[k] = v
        return kw

    ## Normal
    if layer == obl.GaussianLayer:
        prior = obd.Independent(
            obd.Normal(**_kwargs(loc=tf.zeros(shape=event_shape),
                                 scale=tf.ones(shape=event_shape))), 1)
    ## Multivariate Normal
    elif issubclass(layer, obl.MultivariateNormalLayer):
        cov = layer._partial_kwargs['covariance']
        if cov == 'diag':  # diagonal covariance
            loc = tf.zeros(shape=event_shape)
            if tf.rank(loc) == 0:
                loc = tf.expand_dims(loc, axis=-1)
            prior = obd.MultivariateNormalDiag(
                **_kwargs(loc=loc, scale_identity_multiplier=1.))
        else:  # low-triangle covariance
            bijector = tfp.bijectors.FillScaleTriL(
                diag_bijector=tfp.bijectors.Identity(), diag_shift=1e-5)
            size = tf.reduce_prod(event_shape)
            loc = tf.zeros(shape=[size])
            scale_tril = bijector.forward(
                tf.ones(shape=[size * (size + 1) // 2]))
            prior = obd.MultivariateNormalTriL(
                **_kwargs(loc=loc, scale_tril=scale_tril))
    ## Log Normal
    elif layer == obl.LogNormalLayer:
        prior = obd.Independent(
            obd.LogNormal(**_kwargs(loc=tf.zeros(shape=event_shape),
                                    scale=tf.ones(shape=event_shape))), 1)
    ## mixture
    elif issubclass(layer, obl.MixtureGaussianLayer):
        if hasattr(layer, '_partial_kwargs'):
            cov = layer._partial_kwargs['covariance']
        else:
            cov = 'none'
        n_components = int(posterior_kwargs.get('n_components', 2))
        if cov == 'diag':
            scale_shape = [n_components, event_size]
            fn = lambda l, s: obd.MultivariateNormalDiag(
                loc=l, scale_diag=tf.nn.softplus(s))
        elif cov == 'none':
            scale_shape = [n_components, event_size]
            fn = lambda l, s: obd.Independent(
                obd.Normal(loc=l, scale=tf.math.softplus(s)), 1)
        elif cov in ('full', 'tril'):
            scale_shape = [n_components, event_size * (event_size + 1) // 2]
            fn = lambda l, s: obd.MultivariateNormalTriL(
                loc=l,
                scale_tril=tfp.bijectors.FillScaleTriL(diag_shift=1e-5)
                (tf.math.softplus(s)))
        loc = tf.cast(tf.fill([n_components, event_size], 0.),
                      dtype=tf.float32)
        log_scale = tf.cast(tf.fill(scale_shape, np.log(np.expm1(1.))),
                            dtype=tf.float32)
        mixture_logits = tf.cast(tf.fill([n_components], 1.), dtype=tf.float32)
        prior = obd.MixtureSameFamily(
            components_distribution=fn(loc, log_scale),
            mixture_distribution=obd.Categorical(logits=mixture_logits))
    ## discrete
    elif dist in (obd.OneHotCategorical, obd.Categorical) or \
      layer == obl.RelaxedOneHotCategoricalLayer:
        prior = dist(**_kwargs(logits=np.log([1. / event_size] * event_size),
                               dtype=tf.float32))
    elif dist == obd.Dirichlet:
        prior = dist(**_kwargs(concentration=[1.] * event_size))
    elif dist == obd.Bernoulli:
        prior = obd.Independent(
            obd.Bernoulli(**_kwargs(logits=np.full(event_shape, np.log(0.5)),
                                    dtype=tf.float32)), len(event_shape))
    ## other
    return prior
Exemplo n.º 4
0
 def create_posterior(self,
                      input_shape: Optional[List[int]] = None,
                      name: Optional[str] = None) -> obl.DenseDistribution:
     r""" Initiate a Distribution for the random variable """
     # use Gaussian noise as prior distribution for  deterministic case
     if self.is_deterministic:
         prior = obd.Independent(
             obd.Normal(loc=tf.zeros(shape=self.event_shape),
                        scale=tf.ones(shape=self.event_shape)),
             reinterpreted_batch_ndims=1,
         )
     else:
         prior = _default_prior(self.event_shape, self.posterior,
                                self.prior, self.kwargs)
     event_shape = self.event_shape
     posterior = self.posterior
     posterior_kwargs = dict(self.kwargs)
     name = self.name if name is None else str(name)
     # ====== deterministic distribution with loss function from tensorflow ====== #
     if posterior in dir(tf.losses) or posterior in dir(keras.activations):
         distribution_layer = obl.VectorDeterministicLayer
         if posterior in dir(tf.losses):
             activation = 'linear'
             fn = tf.losses.get(str(posterior))
         else:  # just activation function, loss default MSE
             activation = keras.activations.get(self.posterior)
             fn = tf.losses.get(posterior_kwargs.pop('loss', 'mse'))
         posterior_kwargs['log_prob'] = \
           lambda self, y_true: -fn(y_true, self.mean())
     # ====== probabilistic loss ====== #
     else:
         distribution_layer = parse_distribution(self.posterior)[0]
         activation = self.preactivation
     # ====== create distribution layers ====== #
     kw = dict(projection=self.projection)
     if input_shape is not None:
         kw['input_shape'] = input_shape
     ### create the layer
     ## mixture distributions
     if posterior in ('mdn', 'mixdiag', 'mixfull', 'mixtril'):
         posterior_kwargs.pop('covariance', None)
         posterior_kwargs.update(kw)
         # dense network for projection
         layer = obl.MixtureDensityNetwork(event_shape,
                                           loc_activation=activation,
                                           scale_activation='softplus1',
                                           covariance=dict(
                                               mdn='none',
                                               mdndiag='diag',
                                               mdnfull='tril',
                                               mdntril='tril')[posterior],
                                           name=name,
                                           prior=prior,
                                           dropout=self.dropout,
                                           **posterior_kwargs)
     ## non-mixture distribution
     else:
         layer = obl.DenseDistribution(event_shape,
                                       posterior=distribution_layer,
                                       prior=prior,
                                       activation=activation,
                                       posterior_kwargs=posterior_kwargs,
                                       dropout=self.dropout,
                                       name=name,
                                       **kw)
     ### set attributes
     if not hasattr(layer, 'event_shape'):
         layer.event_shape = event_shape
     # build the layer in advance
     if input_shape is not None and layer.projection:
         inputs = keras.Input(shape=input_shape, batch_size=None)
         layer(inputs)
     return layer
Exemplo n.º 5
0
  def create_divergence_matrix(self,
                               n_samples=1000,
                               lognorm=True,
                               n_components=2,
                               normalize_per_code=True,
                               decode=False):
    r""" Using GMM fitted on the factors to estimate the divergence to each
    latent code.

    It means calculating the divergence: `DKL(q(z|x)||p(y))`, where:
      - q(z|x) is latent code of Gaussian distribution
      - p(y) is factor of Gaussian mixture model with `n_components`

    The calculation is repeated for each pair of (code, factor). This method is
    recommended for factors that are continuous values.

    Return:
      a matrix of shape `[n_codes, n_factors]`
    """
    n_samples = int(n_samples)
    n_codes = self.n_codes
    n_factors = self.n_factors
    matrices = []
    for qZ, y in zip(self.representations, self.original_factors):
      ### normalizing the factors
      if lognorm:
        y = np.log1p(y)
      # standardizing for each factor
      y = (y - np.mean(y, axis=0, keepdims=True)) / (
          np.std(y, axis=0, keepdims=True) + 1e-10)
      ### train the Gaussian mixture on the factors
      f_gmm = []
      for fidx, (f, fname) in enumerate(zip(y.T, self.factor_names)):
        gmm = tfd.GaussianMixture.init(f[:, np.newaxis],
                                       n_components=n_components,
                                       covariance_type='diag',
                                       batch_shape=None,
                                       dtype=tf.float64,
                                       name=fname)
        f_gmm.append(gmm)
      ### the code Gaussian
      z_gau = []
      for mean, stddev, code_name in zip(tf.transpose(qZ.mean()),
                                         tf.transpose(qZ.stddev()),
                                         self.code_names):
        mean = tf.cast(mean, tf.float64)
        stddev = tf.cast(stddev, tf.float64)
        z_gau.append(
            tfd.Independent(tfd.Normal(loc=mean, scale=stddev, name=code_name),
                            reinterpreted_batch_ndims=1))
      ### calculate the KL divergence
      density_matrix = np.empty(shape=(n_codes, n_factors), dtype=np.float64)
      for zidx, gau in enumerate(z_gau):
        for fidx, gmm in enumerate(f_gmm):
          # non-analytic KL(q=gau||p=gmm)
          samples = gau.sample(n_samples)
          with tf.device("/CPU:0"):
            qllk = gau.log_prob(samples)
            pllk = tf.reduce_sum(tf.reshape(
                gmm.log_prob(tf.reshape(samples, (-1, 1))), (n_samples, -1)),
                                 axis=1)
            kl = tf.reduce_mean(qllk - pllk)
          density_matrix[zidx, fidx] = kl.numpy()
      if bool(normalize_per_code):
        density_matrix = density_matrix / np.sum(
            density_matrix, axis=1, keepdims=True)
      matrices.append(density_matrix)
    ### decoding and return
    train, test = matrices
    if decode:
      ids = search.diagonal_linear_assignment(train.T)
      train = train[ids]
      test = test[ids]
      return train, test, ids
    return train, test