Exemplo n.º 1
0
def discrete_entropy(labels):
  r""" Iterately compute discrete entropy for integer samples set along the
  column of 2-D array.

  Arguments:
    labels : 1-D or 2-D array

  Returns:
    entropy : A Scalar or array `[n_factors]`
  """
  labels = np.atleast_1d(labels)
  if labels.ndim == 1:
    return entropy1D(labels.ravel())
  elif labels.ndim > 2:
    raise ValueError("Only support 1-D or 2-D array for labels entropy.")
  num_factors = labels.shape[1]
  h = np.zeros(num_factors)
  for j in range(num_factors):
    h[j] = entropy1D(labels[:, j])
  return h
Exemplo n.º 2
0
def giga(X_y: Union[tf.data.Dataset, Generator, Iterator],
         fn_qz_x: Callable[[tf.Tensor], tfd.Distribution],
         fn_px_z: Callable[[tf.Tensor], tfd.Distribution],
         pz: Optional[tfd.Distribution] = None,
         n_samples: int = 10000,
         n_mcmc: int = 100,
         batch_size: int = 32,
         adjusted: bool = True,
         verbose: bool = True):
  C_mcmc = tf.math.log(tf.constant(n_mcmc, dtype=tf.float32))
  ## process the data into mini batches
  if not isinstance(X_y, (tf.data.Dataset, Generator, Iterator)):
    X, y = X_y
    if not isinstance(X, tf.data.Dataset):
      X = tf.data.Dataset.from_tensor_slices(X).batch(batch_size)
    if not isinstance(y, tf.data.Dataset):
      y = tf.data.Dataset.from_tensor_slices(y).batch(batch_size)
    X_y = tf.data.Dataset.zip((X, y))
  if isinstance(X_y, tf.data.Dataset):
    X_y = X_y.repeat(-1).shuffle(1000)
    X_y = iter(X_y)
  ## iterate the dataset until have enough n_samples
  count = 0
  log_qz_x = []
  log_px_z = []
  log_pz = []
  qy = []
  prog = tqdm(desc='GIGA',
              total=n_samples * n_mcmc,
              unit='samples',
              disable=not verbose)
  while count < n_samples:
    ## step 1: sample q(x, y)
    try:
      X, y = next(X_y)
    except StopIteration:
      warnings.warn(f'Not enough data for {n_samples} samples.')
      break
    batch_x = min(X.shape[0], n_samples - count)
    X = X[:batch_x]
    y = y[:batch_x]
    qy.append(y)
    qz_x = fn_qz_x(X)
    # infer the prior of z
    if pz is not None:
      pass
    elif hasattr(qz_x, 'KL_divergence'):
      pz = qz_x.KL_divergence.prior
    else:
      pz = tfd.Normal(tf.zeros(qz_x.event_shape), 1., dtype=qz_x.dtype)
    ## step 2: sample q(z|x)
    batch_llk_px = []
    batch_llk_qz = []
    batch_llk_pz = []
    for start in range(0, n_mcmc, batch_size):
      batch_z = min(n_mcmc - start, batch_size)
      z = qz_x.sample(batch_z)
      llk_qz = qz_x.log_prob(z)
      batch_llk_qz.append(llk_qz)
      llk_pz = pz.log_prob(z)
      batch_llk_pz.append(llk_pz)
      z = tf.reshape(z, (-1, z.shape[-1]))
      ## step 3: calculate log(p(x|z))
      px_z = fn_px_z(z)
      llk_x = px_z.log_prob(px_z.sample())
      llk_x = tf.reshape(llk_x, (batch_z, -1))
      batch_llk_px.append(llk_x)
      ## update progress
      prog.update(batch_z * batch_x)
    ## step 4: aggregate the log-likelihoods
    batch_llk_qz = tf.concat(batch_llk_qz, axis=0)
    log_qz_x.append(batch_llk_qz)
    batch_llk_pz = tf.concat(batch_llk_pz, axis=0)
    log_pz.append(batch_llk_pz)
    batch_llk_px = tf.concat(batch_llk_px, axis=0)
    log_px_z.append(batch_llk_px)
    count += batch_x
  ## finalizing results
  prog.clear()
  prog.close()
  log_px_z = tf.concat(log_px_z, axis=1)  # [n_mcmc, n_samples]
  log_qz_x = tf.concat(log_qz_x, axis=1)  # [n_mcmc, n_samples]
  log_pz = tf.concat(log_pz, axis=1)  # [n_mcmc, n_samples]
  qy = tf.concat(qy, axis=0)
  n_factors = qy.shape[1]
  ## Calculate I(X; Z)
  log_pxz = log_px_z + log_pz - log_qz_x
  log_px = tf.reduce_logsumexp(log_pxz, axis=0, keepdims=True) - C_mcmc
  log_qx = tf.math.log(1. / n_samples)
  pxz = tf.math.exp(log_pxz - log_qx)
  I_xz = pxz * (log_px_z - log_px)  # [n_mcmc, n_samples]
  # entropy of x
  H_x = tf.reduce_mean(-pxz * log_px)
  # entropy of z
  H_z = tf.reduce_mean(-tf.math.exp(log_pz - log_qz_x) * log_pz)
  I_xz = tf.reduce_mean(I_xz, axis=0)  # [n_samples]
  # I_xz = I_xz / tf.math.sqrt(H_x * H_z)
  I_xz = tf.reduce_mean(I_xz)
  ## Calculate I(Z; Y) - H(Z)
  I_zy = {}  # for each factor
  for i in range(n_factors):
    y = np.asarray(qy[:, i], dtype=np.int32)
    I_zyi = {}
    labels = np.unique(y)
    for yk in labels:
      ids = (y == yk)
      K = np.sum(ids)
      log_qz_xk = tf.boolean_mask(log_qz_x, ids, axis=1)
      log_qz_xk = tf.reduce_logsumexp(log_qz_xk, axis=1) - tf.math.log(
          tf.constant(K, dtype=tf.float32))
      I_zyi[yk] = tf.reduce_mean(log_qz_xk, axis=0)
    # average among labels within a factor
    I_zyi = sum(v for v in I_zyi.values()) / len(labels)
    I_zy[i] = I_zyi
  # average among all factors
  H_y = np.array([entropy1D(qy[:, i]) for i in range(n_factors)])
  I_zy = np.array(list(I_zy.values()))
  I_zy = np.mean(I_zy / H_y)
  ## giga
  return I_xz + I_zy