示例#1
0
 def testInvalidShapeAtEval(self):
   with self.test_session(use_gpu=self._use_gpu):
     v = tf.placeholder(dtype=tf.float32)
     with self.assertRaisesOpError("input must be at least 2-dim"):
       tf.matrix_diag_part(v).eval(feed_dict={v: 0.0})
     with self.assertRaisesOpError("last two dimensions must be equal"):
       tf.matrix_diag_part(v).eval(feed_dict={v: [[0, 1], [1, 0], [0, 0]]})
示例#2
0
  def testSampleWithSameSeed(self):
    if tf.executing_eagerly():
      return
    scale = make_pd(1., 2)
    df = 4

    chol_w = tfd.Wishart(
        df, scale_tril=chol(scale), input_output_cholesky=False)

    x = self.evaluate(chol_w.sample(1, seed=42))
    chol_x = [chol(x[0])]

    full_w = tfd.Wishart(df, scale, input_output_cholesky=False)
    self.assertAllClose(x, self.evaluate(full_w.sample(1, seed=42)))

    chol_w_chol = tfd.Wishart(
        df, scale_tril=chol(scale), input_output_cholesky=True)
    self.assertAllClose(chol_x, self.evaluate(chol_w_chol.sample(1, seed=42)))
    eigen_values = tf.matrix_diag_part(chol_w_chol.sample(1000, seed=42))
    np.testing.assert_array_less(0., self.evaluate(eigen_values))

    full_w_chol = tfd.Wishart(df, scale=scale, input_output_cholesky=True)
    self.assertAllClose(chol_x, self.evaluate(full_w_chol.sample(1, seed=42)))
    eigen_values = tf.matrix_diag_part(full_w_chol.sample(1000, seed=42))
    np.testing.assert_array_less(0., self.evaluate(eigen_values))
示例#3
0
 def testRectangular(self):
   with self.test_session(use_gpu=self._use_gpu):
     mat = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
     mat_diag = tf.matrix_diag_part(mat)
     self.assertAllEqual(mat_diag.eval(), np.array([1.0, 5.0]))
     mat = np.array([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]])
     mat_diag = tf.matrix_diag_part(mat)
     self.assertAllEqual(mat_diag.eval(), np.array([1.0, 4.0]))
 def _variance(self):
   if distribution_util.is_diagonal_scale(self.scale):
     return 2. * tf.square(self.scale.diag_part())
   elif (isinstance(self.scale, tf.linalg.LinearOperatorLowRankUpdate) and
         self.scale.is_self_adjoint):
     return tf.matrix_diag_part(2. * self.scale.matmul(self.scale.to_dense()))
   else:
     return 2. * tf.matrix_diag_part(
         self.scale.matmul(self.scale.to_dense(), adjoint_arg=True))
示例#5
0
 def _maybe_attach_assertion(x):
   if not validate_args:
     return x
   if assert_positive:
     return control_flow_ops.with_dependencies([
         tf.assert_positive(
             tf.matrix_diag_part(x), message="diagonal part must be positive"),
     ], x)
   return control_flow_ops.with_dependencies([
       tf.assert_none_equal(
           tf.matrix_diag_part(x),
           tf.zeros([], x.dtype),
           message="diagonal part must be non-zero"),
   ], x)
示例#6
0
  def testSample(self):
    with self.test_session():
      scale = make_pd(1., 2)
      df = 4

      chol_w = distributions.WishartCholesky(
          df, chol(scale), cholesky_input_output_matrices=False)

      x = chol_w.sample_n(1, seed=42).eval()
      chol_x = [chol(x[0])]

      full_w = distributions.WishartFull(
          df, scale, cholesky_input_output_matrices=False)
      self.assertAllClose(x, full_w.sample_n(1, seed=42).eval())

      chol_w_chol = distributions.WishartCholesky(
          df, chol(scale), cholesky_input_output_matrices=True)
      self.assertAllClose(chol_x, chol_w_chol.sample_n(1, seed=42).eval())
      eigen_values = tf.matrix_diag_part(chol_w_chol.sample_n(1000, seed=42))
      np.testing.assert_array_less(0., eigen_values.eval())

      full_w_chol = distributions.WishartFull(
          df, scale, cholesky_input_output_matrices=True)
      self.assertAllClose(chol_x, full_w_chol.sample_n(1, seed=42).eval())
      eigen_values = tf.matrix_diag_part(full_w_chol.sample_n(1000, seed=42))
      np.testing.assert_array_less(0., eigen_values.eval())

      # Check first and second moments.
      df = 4.
      chol_w = distributions.WishartCholesky(
          df=df,
          scale=chol(make_pd(1., 3)),
          cholesky_input_output_matrices=False)
      x = chol_w.sample_n(10000, seed=42)
      self.assertAllEqual((10000, 3, 3), x.get_shape())

      moment1_estimate = tf.reduce_mean(x, reduction_indices=[0]).eval()
      self.assertAllClose(chol_w.mean().eval(),
                          moment1_estimate,
                          rtol=0.05)

      # The Variance estimate uses the squares rather than outer-products
      # because Wishart.Variance is the diagonal of the Wishart covariance
      # matrix.
      variance_estimate = (
          tf.reduce_mean(tf.square(x), reduction_indices=[0]) -
          tf.square(moment1_estimate)).eval()
      self.assertAllClose(chol_w.variance().eval(),
                          variance_estimate,
                          rtol=0.05)
示例#7
0
 def testMatrix(self):
   with self.test_session(use_gpu=self._use_gpu):
     v = np.array([1.0, 2.0, 3.0])
     mat = np.diag(v)
     mat_diag = tf.matrix_diag_part(mat)
     self.assertEqual((3,), mat_diag.get_shape())
     self.assertAllEqual(mat_diag.eval(), v)
 def _forward_log_det_jacobian(self, x):
   # We formulate the Jacobian with respect to the flattened matrices
   # `vec(x)` and `vec(y)`. Suppose for notational convenience that
   # the first `n` entries of `vec(x)` are the diagonal of `x`, and
   # the remaining `n**2-n` entries are the off-diagonals in
   # arbitrary order. Then the Jacobian is a block-diagonal matrix,
   # with the Jacobian of the diagonal bijector in the first block,
   # and the identity Jacobian for the remaining entries (since this
   # bijector acts as the identity on non-diagonal entries):
   #
   # J_vec(x) (vec(y)) =
   # -------------------------------
   # | J_diag(x) (diag(y))      0  | n entries
   # |                             |
   # | 0                        I  | n**2-n entries
   # -------------------------------
   #   n                     n**2-n
   #
   # Since the log-det of the second (identity) block is zero, the
   # overall log-det-jacobian is just the log-det of first block,
   # from the diagonal bijector.
   #
   # Note that for elementwise operations (exp, softplus, etc) the
   # first block of the Jacobian will itself be a diagonal matrix,
   # but our implementation does not require this to be true.
   return self._diag_bijector.forward_log_det_jacobian(
       tf.matrix_diag_part(x), event_ndims=1)
示例#9
0
def multivariate_normal(x, mu, L):
    """
    Computes the log-density of a multivariate normal.
    :param x  : Dx1 or DxN sample(s) for which we want the density
    :param mu : Dx1 or DxN mean(s) of the normal distribution
    :param L  : DxD Cholesky decomposition of the covariance matrix
    :return p : (1,) or (N,) vector of log densities for each of the N x's and/or mu's

    x and mu are either vectors or matrices. If both are vectors (N,1):
    p[0] = log pdf(x) where x ~ N(mu, LL^T)
    If at least one is a matrix, we assume independence over the *columns*:
    the number of rows must match the size of L. Broadcasting behaviour:
    p[n] = log pdf of:
    x[n] ~ N(mu, LL^T) or x ~ N(mu[n], LL^T) or x[n] ~ N(mu[n], LL^T)
    """
    if x.shape.ndims is None:
        warnings.warn('Shape of x must be 2D at computation.')
    elif x.shape.ndims != 2:
        raise ValueError('Shape of x must be 2D.')
    if mu.shape.ndims is None:
        warnings.warn('Shape of mu may be unknown or not 2D.')
    elif mu.shape.ndims != 2:
        raise ValueError('Shape of mu must be 2D.')

    d = x - mu
    alpha = tf.matrix_triangular_solve(L, d, lower=True)
    num_dims = tf.cast(tf.shape(d)[0], L.dtype)
    p = - 0.5 * tf.reduce_sum(tf.square(alpha), 0)
    p -= 0.5 * num_dims * np.log(2 * np.pi)
    p -= tf.reduce_sum(tf.log(tf.matrix_diag_part(L)))
    return p
示例#10
0
def gauss_kl(q_mu, q_sqrt, K):
    """
    Compute the KL divergence from

          q(x) = N(q_mu, q_sqrt^2)
    to
          p(x) = N(0, K)

    We assume multiple independent distributions, given by the columns of
    q_mu and the last dimension of q_sqrt.

    q_mu is a matrix, each column contains a mean.

    q_sqrt is a 3D tensor, each matrix within is a lower triangular square-root
        matrix of the covariance of q.

    K is a positive definite matrix: the covariance of p.
    """
    L = tf.cholesky(K)
    alpha = tf.matrix_triangular_solve(L, q_mu, lower=True)
    KL = 0.5 * tf.reduce_sum(tf.square(alpha))  # Mahalanobis term.
    num_latent = tf.cast(tf.shape(q_sqrt)[2], float_type)
    KL += num_latent * 0.5 * tf.reduce_sum(tf.log(tf.square(tf.diag_part(L))))  # Prior log-det term.
    KL += -0.5 * tf.cast(tf.reduce_prod(tf.shape(q_sqrt)[1:]), float_type)  # constant term
    Lq = tf.matrix_band_part(tf.transpose(q_sqrt, (2, 0, 1)), -1, 0)  # force lower triangle
    KL += -0.5*tf.reduce_sum(tf.log(tf.square(tf.matrix_diag_part(Lq))))  # logdet
    L_tiled = tf.tile(tf.expand_dims(L, 0), tf.pack([tf.shape(Lq)[0], 1, 1]))
    LiLq = tf.matrix_triangular_solve(L_tiled, Lq, lower=True)
    KL += 0.5 * tf.reduce_sum(tf.square(LiLq))  # Trace term
    return KL
示例#11
0
def _expectation(p, mean, none, kern, feat, nghp=None):
    """
    Compute the expectation:
    expectation[n] = <x_n K_{x_n, Z}>_p(x_n)
        - K_{.,.} :: RBF kernel

    :return: NxDxM
    """
    Xmu, Xcov = p.mu, p.cov

    with tf.control_dependencies([tf.assert_equal(
            tf.shape(Xmu)[1], tf.constant(kern.input_dim, settings.tf_int),
            message="Currently cannot handle slicing in exKxz.")]):
        Xmu = tf.identity(Xmu)

    with params_as_tensors_for(kern), params_as_tensors_for(feat):
        D = tf.shape(Xmu)[1]
        lengthscales = kern.lengthscales if kern.ARD \
            else tf.zeros((D,), dtype=settings.float_type) + kern.lengthscales

        chol_L_plus_Xcov = tf.cholesky(tf.matrix_diag(lengthscales ** 2) + Xcov)  # NxDxD
        all_diffs = tf.transpose(feat.Z) - tf.expand_dims(Xmu, 2)  # NxDxM

        sqrt_det_L = tf.reduce_prod(lengthscales)
        sqrt_det_L_plus_Xcov = tf.exp(tf.reduce_sum(tf.log(tf.matrix_diag_part(chol_L_plus_Xcov)), axis=1))
        determinants = sqrt_det_L / sqrt_det_L_plus_Xcov  # N

        exponent_mahalanobis = tf.cholesky_solve(chol_L_plus_Xcov, all_diffs)  # NxDxM
        non_exponent_term = tf.matmul(Xcov, exponent_mahalanobis, transpose_a=True)
        non_exponent_term = tf.expand_dims(Xmu, 2) + non_exponent_term  # NxDxM

        exponent_mahalanobis = tf.reduce_sum(all_diffs * exponent_mahalanobis, 1)  # NxM
        exponent_mahalanobis = tf.exp(-0.5 * exponent_mahalanobis)  # NxM

        return kern.variance * (determinants[:, None] * exponent_mahalanobis)[:, None, :] * non_exponent_term
示例#12
0
def _expectation(p, kern, feat, none1, none2, nghp=None):
    """
    Compute the expectation:
    <K_{X, Z}>_p(X)
        - K_{.,.} :: RBF kernel

    :return: NxM
    """
    with params_as_tensors_for(kern), params_as_tensors_for(feat):
        # use only active dimensions
        Xcov = kern._slice_cov(p.cov)
        Z, Xmu = kern._slice(feat.Z, p.mu)
        D = tf.shape(Xmu)[1]
        if kern.ARD:
            lengthscales = kern.lengthscales
        else:
            lengthscales = tf.zeros((D,), dtype=settings.tf_float) + kern.lengthscales

        chol_L_plus_Xcov = tf.cholesky(tf.matrix_diag(lengthscales ** 2) + Xcov)  # NxDxD

        all_diffs = tf.transpose(Z) - tf.expand_dims(Xmu, 2)  # NxDxM
        exponent_mahalanobis = tf.matrix_triangular_solve(chol_L_plus_Xcov, all_diffs, lower=True)  # NxDxM
        exponent_mahalanobis = tf.reduce_sum(tf.square(exponent_mahalanobis), 1)  # NxM
        exponent_mahalanobis = tf.exp(-0.5 * exponent_mahalanobis)  # NxM

        sqrt_det_L = tf.reduce_prod(lengthscales)
        sqrt_det_L_plus_Xcov = tf.exp(tf.reduce_sum(tf.log(tf.matrix_diag_part(chol_L_plus_Xcov)), axis=1))
        determinants = sqrt_det_L / sqrt_det_L_plus_Xcov  # N

        return kern.variance * (determinants[:, None] * exponent_mahalanobis)
示例#13
0
    def _build_likelihood(self):
        """
        q_alpha, q_lambda are variational parameters, size N x R
        This method computes the variational lower bound on the likelihood,
        which is:
            E_{q(F)} [ \log p(Y|F) ] - KL[ q(F) || p(F)]
        with
            q(f) = N(f | K alpha + mean, [K^-1 + diag(square(lambda))]^-1) .
        """
        K = self.kern.K(self.X)
        K_alpha = tf.matmul(K, self.q_alpha)
        f_mean = K_alpha + self.mean_function(self.X)

        # compute the variance for each of the outputs
        I = tf.tile(tf.expand_dims(tf.eye(self.num_data, dtype=settings.float_type), 0),
                    [self.num_latent, 1, 1])
        A = I + tf.expand_dims(tf.transpose(self.q_lambda), 1) * \
            tf.expand_dims(tf.transpose(self.q_lambda), 2) * K
        L = tf.cholesky(A)
        Li = tf.matrix_triangular_solve(L, I)
        tmp = Li / tf.expand_dims(tf.transpose(self.q_lambda), 1)
        f_var = 1. / tf.square(self.q_lambda) - tf.transpose(tf.reduce_sum(tf.square(tmp), 1))

        # some statistics about A are used in the KL
        A_logdet = 2.0 * tf.reduce_sum(tf.log(tf.matrix_diag_part(L)))
        trAi = tf.reduce_sum(tf.square(Li))

        KL = 0.5 * (A_logdet + trAi - self.num_data * self.num_latent +
                    tf.reduce_sum(K_alpha * self.q_alpha))

        v_exp = self.likelihood.variational_expectations(f_mean, f_var, self.Y)
        return tf.reduce_sum(v_exp) - KL
示例#14
0
def _expectation(p, rbf_kern, feat1, lin_kern, feat2, nghp=None):
    """
    Compute the expectation:
    expectation[n] = <Ka_{Z1, x_n} Kb_{x_n, Z2}>_p(x_n)
        - K_lin_{.,.} :: RBF kernel
        - K_rbf_{.,.} :: Linear kernel
    Different Z1 and Z2 are handled if p is diagonal and K_lin and K_rbf have disjoint
    active_dims, in which case the joint expectations simplify into a product of expectations

    :return: NxM1xM2
    """
    if rbf_kern.on_separate_dims(lin_kern) and isinstance(p, DiagonalGaussian):  # no joint expectations required
        eKxz1 = expectation(p, (rbf_kern, feat1))
        eKxz2 = expectation(p, (lin_kern, feat2))
        return eKxz1[:, :, None] * eKxz2[:, None, :]

    if feat1 != feat2:
        raise NotImplementedError("Features have to be the same for both kernels.")

    if rbf_kern.active_dims != lin_kern.active_dims:
        raise NotImplementedError("active_dims have to be the same for both kernels.")

    with params_as_tensors_for(rbf_kern), params_as_tensors_for(lin_kern), \
         params_as_tensors_for(feat1), params_as_tensors_for(feat2):
        # use only active dimensions
        Xcov = rbf_kern._slice_cov(tf.matrix_diag(p.cov) if isinstance(p, DiagonalGaussian) else p.cov)
        Z, Xmu = rbf_kern._slice(feat1.Z, p.mu)

        N = tf.shape(Xmu)[0]
        D = tf.shape(Xmu)[1]

        lin_kern_variances = lin_kern.variance if lin_kern.ARD \
            else tf.zeros((D,), dtype=settings.tf_float) + lin_kern.variance

        rbf_kern_lengthscales = rbf_kern.lengthscales if rbf_kern.ARD \
            else tf.zeros((D,), dtype=settings.tf_float) + rbf_kern.lengthscales  ## Begin RBF eKxz code:

        chol_L_plus_Xcov = tf.cholesky(tf.matrix_diag(rbf_kern_lengthscales ** 2) + Xcov)  # NxDxD

        Z_transpose = tf.transpose(Z)
        all_diffs = Z_transpose - tf.expand_dims(Xmu, 2)  # NxDxM
        exponent_mahalanobis = tf.matrix_triangular_solve(chol_L_plus_Xcov, all_diffs, lower=True)  # NxDxM
        exponent_mahalanobis = tf.reduce_sum(tf.square(exponent_mahalanobis), 1)  # NxM
        exponent_mahalanobis = tf.exp(-0.5 * exponent_mahalanobis)  # NxM

        sqrt_det_L = tf.reduce_prod(rbf_kern_lengthscales)
        sqrt_det_L_plus_Xcov = tf.exp(tf.reduce_sum(tf.log(tf.matrix_diag_part(chol_L_plus_Xcov)), axis=1))
        determinants = sqrt_det_L / sqrt_det_L_plus_Xcov  # N
        eKxz_rbf = rbf_kern.variance * (determinants[:, None] * exponent_mahalanobis)  ## NxM <- End RBF eKxz code

        tiled_Z = tf.tile(tf.expand_dims(Z_transpose, 0), (N, 1, 1))  # NxDxM
        z_L_inv_Xcov = tf.matmul(tiled_Z, Xcov / rbf_kern_lengthscales[:, None] ** 2., transpose_a=True)  # NxMxD

        cross_eKzxKxz = tf.cholesky_solve(
            chol_L_plus_Xcov, (lin_kern_variances * rbf_kern_lengthscales ** 2.)[..., None] * tiled_Z)  # NxDxM

        cross_eKzxKxz = tf.matmul((z_L_inv_Xcov + Xmu[:, None, :]) * eKxz_rbf[..., None], cross_eKzxKxz)  # NxMxM
        return cross_eKzxKxz
示例#15
0
def _expectation(p, kern1, feat1, kern2, feat2, nghp=None):
    """
    Compute the expectation:
    expectation[n] = <Ka_{Z1, x_n} Kb_{x_n, Z2}>_p(x_n)
        - Ka_{.,.}, Kb_{.,.} :: RBF kernels
    Ka and Kb as well as Z1 and Z2 can differ from each other, but this is supported
    only if the Gaussian p is Diagonal (p.cov NxD) and Ka, Kb have disjoint active_dims
    in which case the joint expectations simplify into a product of expectations

    :return: NxMxM
    """
    if kern1.on_separate_dims(kern2) and isinstance(p, DiagonalGaussian):  # no joint expectations required
        eKxz1 = expectation(p, (kern1, feat1))
        eKxz2 = expectation(p, (kern2, feat2))
        return eKxz1[:, :, None] * eKxz2[:, None, :]

    if feat1 != feat2 or kern1 != kern2:
        raise NotImplementedError("The expectation over two kernels has only an "
                                  "analytical implementation if both kernels are equal.")

    kern = kern1
    feat = feat1

    with params_as_tensors_for(kern), params_as_tensors_for(feat):
        # use only active dimensions
        Xcov = kern._slice_cov(tf.matrix_diag(p.cov) if isinstance(p, DiagonalGaussian) else p.cov)
        Z, Xmu = kern._slice(feat.Z, p.mu)

        N = tf.shape(Xmu)[0]
        D = tf.shape(Xmu)[1]

        squared_lengthscales = kern.lengthscales ** 2. if kern.ARD \
            else tf.zeros((D,), dtype=settings.tf_float) + kern.lengthscales ** 2.

        sqrt_det_L = tf.reduce_prod(0.5 * squared_lengthscales) ** 0.5
        C = tf.cholesky(0.5 * tf.matrix_diag(squared_lengthscales) + Xcov)  # NxDxD
        dets = sqrt_det_L / tf.exp(tf.reduce_sum(tf.log(tf.matrix_diag_part(C)), axis=1))  # N

        C_inv_mu = tf.matrix_triangular_solve(C, tf.expand_dims(Xmu, 2), lower=True)  # NxDx1
        C_inv_z = tf.matrix_triangular_solve(C,
                                             tf.tile(tf.expand_dims(tf.transpose(Z) / 2., 0), [N, 1, 1]),
                                             lower=True)  # NxDxM
        mu_CC_inv_mu = tf.expand_dims(tf.reduce_sum(tf.square(C_inv_mu), 1), 2)  # Nx1x1
        z_CC_inv_z = tf.reduce_sum(tf.square(C_inv_z), 1)  # NxM
        zm_CC_inv_zn = tf.matmul(C_inv_z, C_inv_z, transpose_a=True)  # NxMxM
        two_z_CC_inv_mu = 2 * tf.matmul(C_inv_z, C_inv_mu, transpose_a=True)[:, :, 0]  # NxM

        exponent_mahalanobis = mu_CC_inv_mu + tf.expand_dims(z_CC_inv_z, 1) + \
                               tf.expand_dims(z_CC_inv_z, 2) + 2 * zm_CC_inv_zn - \
                               tf.expand_dims(two_z_CC_inv_mu, 2) - tf.expand_dims(two_z_CC_inv_mu, 1)  # NxMxM
        exponent_mahalanobis = tf.exp(-0.5 * exponent_mahalanobis)  # NxMxM

        # Compute sqrt(self.K(Z)) explicitly to prevent automatic gradient from
        # being NaN sometimes, see pull request #615
        kernel_sqrt = tf.exp(-0.25 * kern.square_dist(Z, None))
        return kern.variance ** 2 * kernel_sqrt * \
               tf.reshape(dets, [N, 1, 1]) * exponent_mahalanobis
示例#16
0
 def testGrad(self):
   shapes = ((3, 3), (5, 3, 3))
   with self.test_session(use_gpu=self._use_gpu):
     for shape in shapes:
       x = tf.constant(np.random.rand(*shape), dtype=np.float32)
       y = tf.matrix_diag_part(x)
       error = tf.test.compute_gradient_error(x, x.get_shape().as_list(),
                                              y, y.get_shape().as_list())
       self.assertLess(error, 1e-4)
示例#17
0
 def _variance(self):
   # Because df is a scalar, we need to expand dimensions to match
   # scale_operator. We use ellipses notation (...) to select all dimensions
   # and add two dimensions to the end.
   df = self.df[..., tf.newaxis, tf.newaxis]
   x = tf.sqrt(df) * self._square_scale_operator()
   d = tf.expand_dims(tf.matrix_diag_part(x), -1)
   v = tf.square(x) + tf.matmul(d, d, adjoint_b=True)
   return v
示例#18
0
 def testRectangularBatch(self):
   with self.test_session(use_gpu=self._use_gpu):
     v_batch = np.array([[1.0, 2.0],
                         [4.0, 5.0]])
     mat_batch = np.array(
         [[[1.0, 0.0, 0.0],
           [0.0, 2.0, 0.0]],
          [[4.0, 0.0, 0.0],
           [0.0, 5.0, 0.0]]])
     self.assertEqual(mat_batch.shape, (2, 2, 3))
     mat_batch_diag = tf.matrix_diag_part(mat_batch)
     self.assertEqual((2, 2), mat_batch_diag.get_shape())
     self.assertAllEqual(mat_batch_diag.eval(), v_batch)
 def _forward_log_det_jacobian(self, x):
   # CholeskyToInvCholesky.forward(X) is equivalent to
   # 1) M = CholeskyOuterProduct.forward(X)
   # 2) N = invert(M)
   # 3) Y = CholeskyOuterProduct.inverse(N)
   #
   # For step 1,
   #   |Jac(outerprod(X))| = 2^p prod_{j=0}^{p-1} X[j,j]^{p-j}.
   # For step 2,
   #   |Jac(inverse(M))| = |M|^{-(p+1)} (because M is symmetric)
   #                     = |X|^{-2(p+1)} = (prod_{j=0}^{p-1} X[j,j])^{-2(p+1)}
   #   (see http://web.mit.edu/18.325/www/handouts/handout2.pdf sect 3.0.2)
   # For step 3,
   #   |Jac(Cholesky(N))| = -|Jac(outerprod(Y)|
   #                      = 2^p prod_{j=0}^{p-1} Y[j,j]^{p-j}
   n = tf.cast(tf.shape(x)[-1], x.dtype)
   y = self._forward(x)
   return (
       (self._cholesky.forward_log_det_jacobian(x, event_ndims=2) -
        (n + 1.) * tf.reduce_sum(tf.log(tf.matrix_diag_part(x)), axis=-1)) -
       (self._cholesky.forward_log_det_jacobian(y, event_ndims=2) -
        (n + 1.) * tf.reduce_sum(tf.log(tf.matrix_diag_part(y)), axis=-1)))
示例#20
0
def _expectation(p, kern, none1, none2, none3, nghp=None):
    """
    Compute the expectation:
    <diag(K_{X, X})>_p(X)
        - K_{.,.} :: Linear kernel

    :return: N
    """
    with params_as_tensors_for(kern):
        # use only active dimensions
        Xmu, _ = kern._slice(p.mu, None)
        Xcov = kern._slice_cov(p.cov)

        return tf.reduce_sum(kern.variance * (tf.matrix_diag_part(Xcov) + tf.square(Xmu)), 1)
示例#21
0
    def _build_likelihood(self):
        """
        Construct a tensorflow function to compute the bound on the marginal
        likelihood. For a derivation of the terms in here, see the associated
        SGPR notebook.
        """

        num_inducing = len(self.feature)
        num_data = tf.cast(tf.shape(self.Y)[0], settings.float_type)
        output_dim = tf.cast(tf.shape(self.Y)[1], settings.float_type)

        err = self.Y - self.mean_function(self.X)
        Kdiag = self.kern.Kdiag(self.X)
        Kuf = self.feature.Kuf(self.kern, self.X)
        Kuu = self.feature.Kuu(self.kern, jitter=settings.numerics.jitter_level)
        L = tf.cholesky(Kuu)
        sigma = tf.sqrt(self.likelihood.variance)

        # Compute intermediate matrices
        A = tf.matrix_triangular_solve(L, Kuf, lower=True) / sigma
        AAT = tf.matmul(A, A, transpose_b=True)
        B = AAT + tf.eye(num_inducing, dtype=settings.float_type)
        LB = tf.cholesky(B)
        Aerr = tf.matmul(A, err)
        c = tf.matrix_triangular_solve(LB, Aerr, lower=True) / sigma

        # compute log marginal bound
        bound = -0.5 * num_data * output_dim * np.log(2 * np.pi)
        bound += tf.negative(output_dim) * tf.reduce_sum(tf.log(tf.matrix_diag_part(LB)))
        bound -= 0.5 * num_data * output_dim * tf.log(self.likelihood.variance)
        bound += -0.5 * tf.reduce_sum(tf.square(err)) / self.likelihood.variance
        bound += 0.5 * tf.reduce_sum(tf.square(c))
        bound += -0.5 * output_dim * tf.reduce_sum(Kdiag) / self.likelihood.variance
        bound += 0.5 * output_dim * tf.reduce_sum(tf.matrix_diag_part(AAT))

        return bound
示例#22
0
 def fit(self, x=None, y=None):
   # p(coeffs | x, y) = Normal(coeffs |
   #   mean = (1/noise_variance) (1/noise_variance x^T x + I)^{-1} x^T y,
   #   covariance = (1/noise_variance x^T x + I)^{-1})
   # TODO(trandustin): We newly fit the data at each call. Extend to do
   # Bayesian updating.
   kernel_matrix = tf.matmul(x, x, transpose_a=True) / self.noise_variance
   coeffs_precision = tf.matrix_set_diag(
       kernel_matrix, tf.matrix_diag_part(kernel_matrix) + 1.)
   coeffs_precision_tril = tf.linalg.cholesky(coeffs_precision)
   self.coeffs_precision_tril_op = tf.linalg.LinearOperatorLowerTriangular(
       coeffs_precision_tril)
   self.coeffs_mean = self.coeffs_precision_tril_op.solvevec(
       self.coeffs_precision_tril_op.solvevec(tf.einsum('nm,n->m', x, y)),
       adjoint=True) / self.noise_variance
   # TODO(trandustin): To be fully Keras-compatible, return History object.
   return
示例#23
0
def zero_mean_covariance(covariance, stability=0.0):
    '''Output covariance of ReLU for zero-mean Gaussian input.

    f(x) = max(x, 0).

    Args:
        covariance: Input covariance matrix (Size, Size).
        stability: For accurate results this should be zero
            if used in training, use a value like 1e-4 for stability.

    Returns:
        Output covariance of ReLU for zero-mean Gaussian input (Size, Size).
    '''

    S = outer(tf.sqrt(tf.matrix_diag_part(covariance)))
    V = tf.clip_by_value(covariance / S, stability - 1.0, 1.0 - stability)
    Q = tf.acos(-V) * V + tf.sqrt(1.0 - (V**2.0)) - 1.0
    return S * Q * (1.0 / (2.0 * math.pi))
 def _assertions(self, x):
   if not self.validate_args:
     return []
   x_shape = tf.shape(x)
   is_matrix = tf.assert_rank_at_least(
       x, 2,
       message="Input must have rank at least 2.")
   is_square = tf.assert_equal(
       x_shape[-2], x_shape[-1],
       message="Input must be a square matrix.")
   diag_part_x = tf.matrix_diag_part(x)
   is_lower_triangular = tf.assert_equal(
       tf.matrix_band_part(x, 0, -1),  # Preserves triu, zeros rest.
       tf.matrix_diag(diag_part_x),
       message="Input must be lower triangular.")
   is_positive_diag = tf.assert_positive(
       diag_part_x,
       message="Input must have all positive diagonal entries.")
   return [is_matrix, is_square, is_lower_triangular, is_positive_diag]
示例#25
0
    def _build_likelihood(self):
        """
        Construct a tensorflow function to compute the bound on the marginal
        likelihood.
        """

        # FITC approximation to the log marginal likelihood is
        # log ( normal( y | mean, K_fitc ) )
        # where K_fitc = Qff + diag( \nu )
        # where Qff = Kfu Kuu^{-1} Kuf
        # with \nu_i = Kff_{i,i} - Qff_{i,i} + \sigma^2

        # We need to compute the Mahalanobis term -0.5* err^T K_fitc^{-1} err
        # (summed over functions).

        # We need to deal with the matrix inverse term.
        # K_fitc^{-1} = ( Qff + \diag( \nu ) )^{-1}
        #            = ( V^T V + \diag( \nu ) )^{-1}
        # Applying the Woodbury identity we obtain
        #            = \diag( \nu^{-1} ) - \diag( \nu^{-1} ) V^T ( I + V \diag( \nu^{-1} ) V^T )^{-1) V \diag(\nu^{-1} )
        # Let \beta =  \diag( \nu^{-1} ) err
        # and let \alpha = V \beta
        # then Mahalanobis term = -0.5* ( \beta^T err - \alpha^T Solve( I + V \diag( \nu^{-1} ) V^T, alpha ) )

        err, nu, Luu, L, alpha, beta, gamma = self._build_common_terms()

        mahalanobisTerm = -0.5 * tf.reduce_sum(tf.square(err) / tf.expand_dims(nu, 1)) \
                          + 0.5 * tf.reduce_sum(tf.square(gamma))

        # We need to compute the log normalizing term -N/2 \log 2 pi - 0.5 \log \det( K_fitc )

        # We need to deal with the log determinant term.
        # \log \det( K_fitc ) = \log \det( Qff + \diag( \nu ) )
        #                    = \log \det( V^T V + \diag( \nu ) )
        # Applying the determinant lemma we obtain
        #                    = \log [ \det \diag( \nu ) \det( I + V \diag( \nu^{-1} ) V^T ) ]
        #                    = \log [ \det \diag( \nu ) ] + \log [ \det( I + V \diag( \nu^{-1} ) V^T ) ]

        constantTerm = -0.5 * self.num_data * tf.log(tf.constant(2. * np.pi, settings.float_type))
        logDeterminantTerm = -0.5 * tf.reduce_sum(tf.log(nu)) - tf.reduce_sum(tf.log(tf.matrix_diag_part(L)))
        logNormalizingTerm = constantTerm + logDeterminantTerm

        return mahalanobisTerm + logNormalizingTerm * self.num_latent
 def testCovarianceFromSampling(self):
   # We will test mean, cov, var, stddev on a DirichletMultinomial constructed
   # via broadcast between alpha, n.
   alpha = np.array([[1., 2, 3],
                     [2.5, 4, 0.01]], dtype=np.float32)
   # Ideally we'd be able to test broadcasting but, the multinomial sampler
   # doesn't support different total counts.
   n = np.float32(5)
   with self.cached_session() as sess:
     # batch_shape=[2], event_shape=[3]
     dist = ds.DirichletMultinomial(n, alpha)
     x = dist.sample(int(250e3), seed=1)
     sample_mean = tf.reduce_mean(x, 0)
     x_centered = x - sample_mean[tf.newaxis, ...]
     sample_cov = tf.reduce_mean(tf.matmul(
         x_centered[..., tf.newaxis],
         x_centered[..., tf.newaxis, :]), 0)
     sample_var = tf.matrix_diag_part(sample_cov)
     sample_stddev = tf.sqrt(sample_var)
     [
         sample_mean_,
         sample_cov_,
         sample_var_,
         sample_stddev_,
         analytic_mean,
         analytic_cov,
         analytic_var,
         analytic_stddev,
     ] = sess.run([
         sample_mean,
         sample_cov,
         sample_var,
         sample_stddev,
         dist.mean(),
         dist.covariance(),
         dist.variance(),
         dist.stddev(),
     ])
     self.assertAllClose(sample_mean_, analytic_mean, atol=0.04, rtol=0.)
     self.assertAllClose(sample_cov_, analytic_cov, atol=0.05, rtol=0.)
     self.assertAllClose(sample_var_, analytic_var, atol=0.05, rtol=0.)
     self.assertAllClose(sample_stddev_, analytic_stddev, atol=0.02, rtol=0.)
示例#27
0
  def _define_full_covariance_probs(self, shard_id, shard):
    """Defines the full covariance probabilties per example in a class.

    Updates a matrix with dimension num_examples X num_classes.

    Args:
      shard_id: id of the current shard.
      shard: current data shard, 1 X num_examples X dimensions.
    """
    diff = shard - self._means
    cholesky = tf.cholesky(self._covs + self._min_var)
    log_det_covs = 2.0 * tf.reduce_sum(tf.log(tf.matrix_diag_part(cholesky)), 1)
    x_mu_cov = tf.square(
        tf.matrix_triangular_solve(
            cholesky, tf.transpose(
                diff, perm=[0, 2, 1]), lower=True))
    diag_m = tf.transpose(tf.reduce_sum(x_mu_cov, 1))
    self._probs[shard_id] = -0.5 * (
        diag_m + tf.to_float(self._dimensions) * tf.log(2 * np.pi) +
        log_det_covs)
示例#28
0
 def testCovarianceFromSampling(self):
   # We will test mean, cov, var, stddev on a Multinomial constructed via
   # broadcast between alpha, n.
   theta = np.array([[1., 2, 3],
                     [2.5, 4, 0.01]], dtype=np.float32)
   theta /= np.sum(theta, 1)[..., tf.newaxis]
   n = np.array([[10., 9.], [8., 7.], [6., 5.]], dtype=np.float32)
   with self.cached_session() as sess:
     # batch_shape=[3, 2], event_shape=[3]
     dist = multinomial.Multinomial(n, theta)
     x = dist.sample(int(1000e3), seed=1)
     sample_mean = tf.reduce_mean(x, 0)
     x_centered = x - sample_mean[tf.newaxis, ...]
     sample_cov = tf.reduce_mean(tf.matmul(
         x_centered[..., tf.newaxis],
         x_centered[..., tf.newaxis, :]), 0)
     sample_var = tf.matrix_diag_part(sample_cov)
     sample_stddev = tf.sqrt(sample_var)
     [
         sample_mean_,
         sample_cov_,
         sample_var_,
         sample_stddev_,
         analytic_mean,
         analytic_cov,
         analytic_var,
         analytic_stddev,
     ] = sess.run([
         sample_mean,
         sample_cov,
         sample_var,
         sample_stddev,
         dist.mean(),
         dist.covariance(),
         dist.variance(),
         dist.stddev(),
     ])
     self.assertAllClose(sample_mean_, analytic_mean, atol=0.01, rtol=0.01)
     self.assertAllClose(sample_cov_, analytic_cov, atol=0.01, rtol=0.01)
     self.assertAllClose(sample_var_, analytic_var, atol=0.01, rtol=0.01)
     self.assertAllClose(sample_stddev_, analytic_stddev, atol=0.01, rtol=0.01)
示例#29
0
 def _assertions(self, x):
   if not self.validate_args:
     return []
   shape = tf.shape(x)
   is_matrix = tf.assert_rank_at_least(
       x, 2, message="Input must have rank at least 2.")
   is_square = tf.assert_equal(
       shape[-2], shape[-1], message="Input must be a square matrix.")
   above_diagonal = tf.matrix_band_part(
       tf.matrix_set_diag(x, tf.zeros(shape[:-1], dtype=tf.float32)), 0, -1)
   is_lower_triangular = tf.assert_equal(
       above_diagonal,
       tf.zeros_like(above_diagonal),
       message="Input must be lower triangular.")
   # A lower triangular matrix is nonsingular iff all its diagonal entries are
   # nonzero.
   diag_part = tf.matrix_diag_part(x)
   is_nonsingular = tf.assert_none_equal(
       diag_part,
       tf.zeros_like(diag_part),
       message="Input must have all diagonal entries nonzero.")
   return [is_matrix, is_square, is_lower_triangular, is_nonsingular]
示例#30
0
 def _forward_log_det_jacobian(self, x):
   # Calculation of the Jacobian:
   #
   # Let X = (x_{ij}), 0 <= i,j < n, be a matrix of indeterminates.  Let Z =
   # X^{-1} where Z = (z_{ij}).  Then
   #
   #     dZ/dx_{ij} = (d/dt | t=0) Y(t)^{-1},
   #
   # where Y(t) = X + t*E_{ij} and E_{ij} is the matrix with a 1 in the (i,j)
   # entry and zeros elsewhere.  By the product rule,
   #
   #     0 = d/dt [Identity matrix]
   #       = d/dt [Y Y^{-1}]
   #       = Y d/dt[Y^{-1}] + dY/dt Y^{-1}
   #
   # so
   #
   #     d/dt[Y^{-1}] = -Y^{-1} dY/dt Y^{-1}
   #                  = -Y^{-1} E_{ij} Y^{-1}.
   #
   # Evaluating at t=0,
   #
   #     dZ/dx_{ij} = -Z E_{ij} Z.
   #
   # Taking the (r,s) entry of each side,
   #
   #     dz_{rs}/dx_{ij} = -z_{ri}z_{sj}.
   #
   # Now, let J be the Jacobian dZ/dX, arranged as the n^2-by-n^2 matrix whose
   # (r*n + s, i*n + j) entry is dz_{rs}/dx_{ij}.  Considering J as an n-by-n
   # block matrix with n-by-n blocks, the above expression for dz_{rs}/dx_{ij}
   # shows that the block at position (r,i) is -z_{ri}Z.  Hence
   #
   #          J = -KroneckerProduct(Z, Z),
   #     det(J) = (-1)^(n^2) (det Z)^(2n)
   #            = (-1)^n (det X)^(-2n).
   with tf.control_dependencies(self._assertions(x)):
     return (-2. * tf.cast(tf.shape(x)[-1], x.dtype.base_dtype) *
             tf.reduce_sum(tf.log(tf.abs(tf.matrix_diag_part(x))), axis=-1))
  def create_model(self,
                   model_input,
                   vocab_size,
                   is_training,
                   num_mixtures=None,
                   l2_penalty=1e-8,
                   **unused_params):
    """Creates a Mixture of (Logistic) Experts model.
     It also includes the possibility of gating the probabilities
     The model consists of a per-class softmax distribution over a
     configurable number of logistic classifiers. One of the classifiers in the
     mixture is not trained, and always predicts 0.
    Args:
      model_input: 'batch_size' x 'num_features' matrix of input features.
      vocab_size: The number of classes in the dataset.
      is_training: Is this the training phase ?
      num_mixtures: The number of mixtures (excluding a dummy 'expert' that
        always predicts the non-existence of an entity).
      l2_penalty: How much to penalize the squared magnitudes of parameter
        values.
    Returns:
      A dictionary with a tensor containing the probability predictions of the
      model in the 'predictions' key. The dimensions of the tensor are
      batch_size x num_classes.
    """
    num_mixtures = num_mixtures or FLAGS.moe_num_mixtures
    low_rank_gating = FLAGS.moe_low_rank_gating
    l2_penalty = FLAGS.moe_l2;
    gating_probabilities = FLAGS.moe_prob_gating
    gating_input = FLAGS.moe_prob_gating_input

    input_size = model_input.get_shape().as_list()[1]
    remove_diag = FLAGS.gating_remove_diag

    if low_rank_gating == -1:
        gate_activations = layers.fully_connected(
            model_input,
            vocab_size * (num_mixtures + 1),
            activation_fn=None,
            biases_initializer=None,
            weights_regularizer=slim.l2_regularizer(l2_penalty),
            scope="gates",
            float16_flag=FLAGS.float16_flag)
    else:
       gate_activations1 = slim.fully_connected(
            model_input,
            low_rank_gating,
            activation_fn=None,
            biases_initializer=None,
            weights_regularizer=slim.l2_regularizer(l2_penalty),
            scope="gates1")
       gate_activations = slim.fully_connected(
            gate_activations1,
            vocab_size * (num_mixtures + 1),
            activation_fn=None,
            biases_initializer=None,
            weights_regularizer=slim.l2_regularizer(l2_penalty),
            scope="gates2")


    expert_activations = layers.fully_connected(
        model_input,
        vocab_size * num_mixtures,
        activation_fn=None,
        weights_regularizer=slim.l2_regularizer(l2_penalty),
        scope="experts",
        float16_flag=FLAGS.float16_flag)

    gating_distribution = tf.nn.softmax(tf.reshape(
        gate_activations,
        [-1, num_mixtures + 1]))  # (Batch * #Labels) x (num_mixtures + 1)
    expert_distribution = tf.nn.sigmoid(tf.reshape(
        expert_activations,
        [-1, num_mixtures]))  # (Batch * #Labels) x num_mixtures
    
    if '16' in str(expert_distribution.dtype):
        expert_distribution = tf.cast(expert_distribution,tf.float32) 

    probabilities_by_class_and_batch = tf.reduce_sum(
        gating_distribution[:, :num_mixtures] * expert_distribution, 1)
    probabilities = tf.reshape(probabilities_by_class_and_batch,
                                     [-1, vocab_size])

    if gating_probabilities:
        if gating_input == 'prob':
            gating_weights = tf.get_variable("gating_prob_weights",
              [vocab_size, vocab_size],
              initializer = tf.random_normal_initializer(stddev=1 / math.sqrt(vocab_size)),
              dtype = tf.float16 if FLAGS.float16_flag else tf.float32)
            gates = tf.matmul(probabilities, tf.cast(gating_weights,tf.float32) if '16' in str(gating_weights.dtype) else gating_weights)
        else:
            gating_weights = tf.get_variable("gating_prob_weights",
              [input_size, vocab_size],
              initializer = tf.random_normal_initializer(stddev=1 / math.sqrt(vocab_size)),
              dtype = tf.float16 if FLAGS.float16_flag else tf.float32)
 
            gates = tf.matmul(model_input, tf.cast(gating_weights,tf.float32) if '16' in str(gating_weights.dtype) else gating_weights)
        
        if remove_diag:
            #removes diagonals coefficients
            diagonals = tf.matrix_diag_part(gating_weights)
            gates = gates - tf.multiply(diagonals,probabilities)

        gates = slim.batch_norm(
              gates,
              center=True,
              scale=True,
              is_training=is_training,
              scope="gating_prob_bn")

        gates = tf.sigmoid(gates)

        probabilities = tf.multiply(probabilities, gates)


    return {"predictions": probabilities}
示例#32
0
def gauss_kl(q_mu, q_sqrt, K=None):
    """
    Compute the KL divergence KL[q || p] between

          q(x) = N(q_mu, q_sqrt^2)
    and
          p(x) = N(0, K)

    We assume N multiple independent distributions, given by the columns of
    q_mu and the last dimension of q_sqrt. Returns the sum of the divergences.

    q_mu is a matrix (M x L), each column contains a mean.

    q_sqrt can be a 3D tensor (L xM x M), each matrix within is a lower
        triangular square-root matrix of the covariance of q.
    q_sqrt can be a matrix (M x L), each column represents the diagonal of a
        square-root matrix of the covariance of q.

    K is the covariance of p.
    It is a positive definite matrix (M x M) or a tensor of stacked such matrices (L x M x M)
    If K is None, compute the KL divergence to p(x) = N(0, I) instead.
    """

    white = K is None
    diag = q_sqrt.get_shape().ndims == 2

    M, B = tf.shape(q_mu)[0], tf.shape(q_mu)[1]

    if white:
        alpha = q_mu  # M x B
    else:
        batch = K.get_shape().ndims == 3

        Lp = tf.cholesky(K)  # B x M x M or M x M
        q_mu = tf.transpose(
            q_mu)[:, :, None] if batch else q_mu  # B x M x 1 or M x B
        alpha = tf.matrix_triangular_solve(Lp, q_mu,
                                           lower=True)  # B x M x 1 or M x B

    if diag:
        Lq = Lq_diag = q_sqrt
        Lq_full = tf.matrix_diag(tf.transpose(q_sqrt))  # B x M x M
    else:
        Lq = Lq_full = tf.matrix_band_part(
            q_sqrt, -1, 0)  # force lower triangle # B x M x M
        Lq_diag = tf.matrix_diag_part(Lq)  # M x B

    # Mahalanobis term: μqᵀ Σp⁻¹ μq
    mahalanobis = tf.reduce_sum(tf.square(alpha))

    # Constant term: - B * M
    constant = tf.cast(-tf.size(q_mu, out_type=tf.int64),
                       dtype=settings.float_type)

    # Log-determinant of the covariance of q(x):
    logdet_qcov = tf.reduce_sum(tf.log(tf.square(Lq_diag)))

    # Trace term: tr(Σp⁻¹ Σq)
    if white:
        trace = tf.reduce_sum(tf.square(Lq))
    else:
        if diag and not batch:
            # K is M x M and q_sqrt is M x B: fast specialisation
            LpT = tf.transpose(Lp)  # M x M
            Lp_inv = tf.matrix_triangular_solve(Lp,
                                                tf.eye(
                                                    M,
                                                    dtype=settings.float_type),
                                                lower=True)  # M x M
            K_inv = tf.matrix_diag_part(
                tf.matrix_triangular_solve(
                    LpT, Lp_inv, lower=False))[:, None]  # M x M -> M x 1
            trace = tf.reduce_sum(K_inv * tf.square(q_sqrt))
        else:
            # TODO: broadcast instead of tile when tf allows (not implemented in tf <= 1.6.0)
            Lp_full = Lp if batch else tf.tile(tf.expand_dims(Lp, 0),
                                               [B, 1, 1])
            LpiLq = tf.matrix_triangular_solve(Lp_full, Lq_full, lower=True)
            trace = tf.reduce_sum(tf.square(LpiLq))

    twoKL = mahalanobis + constant - logdet_qcov + trace

    # Log-determinant of the covariance of p(x):
    if not white:
        log_sqdiag_Lp = tf.log(tf.square(tf.matrix_diag_part(Lp)))
        sum_log_sqdiag_Lp = tf.reduce_sum(log_sqdiag_Lp)
        # If K is B x M x M, num_latent is no longer implicit, no need to multiply the single kernel logdet
        scale = 1.0 if batch else tf.cast(B, settings.float_type)
        twoKL += scale * sum_log_sqdiag_Lp

    return 0.5 * twoKL
示例#33
0
	def build_model(self):

		# Compression Network
		# Takes x
		# Produces x'
		# Produces z = concat((z_c, z_r)) (Equations 1, 2, 3)
		# z_r = concat((eu_dist, cos_sim))
		self.input = tf.placeholder(
			shape=(None, self.input_dim),
			dtype=tf.float32,
			name="input",
		)
		encoder_1 = tf.layers.dense(
			inputs=self.input,
			units=12,
			activation=tf.tanh,
		)
		encoder_2 = tf.layers.dense(
			inputs=encoder_1,
			units=4,
			activation=tf.tanh,
		)
		self.z_c = tf.layers.dense(
			inputs=encoder_2,
			units=1,
			activation=None,
		)
		decoder_1 = tf.layers.dense(
			inputs=self.z_c,
			units=4,
			activation=tf.tanh,
		)
		decoder_2 = tf.layers.dense(
			inputs=decoder_1,
			units=12,
			activation=tf.tanh,
		)
		self.recon = tf.layers.dense(
			inputs=decoder_2,
			units=self.input_dim,
			activation=None,
		)
		
		eu_dist = tf.norm(self.input - self.recon, axis=1, keep_dims=True) / tf.norm(self.input, axis=1, keep_dims=True)
		cos_sim = tf.reduce_sum(self.input * self.recon, axis=1, keep_dims=True) / (tf.norm(self.input, axis=1, keep_dims=True) * tf.norm(self.recon, axis=1, keep_dims=True))
		self.z_r = tf.concat((eu_dist, cos_sim), axis=1)

		self.z = tf.concat((self.z_c, self.z_r), axis=1)
		
		# Estimation Network
		# Takes z = concat((z_c, z_r))
		# Produces p, where gamma = softmax(p) = soft mixture-component membership prediction (Equation 4)
		self.is_train = tf.placeholder(
			# for dropout
			shape=None,
			dtype=tf.bool,
			name="is_train",
		)
		estim_1 = tf.layers.dense(
			inputs=self.z,
			units=10,
			activation=tf.tanh,
		)
		estim_dropout = tf.layers.dropout(
			inputs=estim_1,
			rate=0.5,
			training=self.is_train,
		)
		self.p = tf.layers.dense(
			inputs=estim_dropout,
			units=self.gmm_k,
			activation=None,
		)
		self.gamma = tf.nn.softmax(self.p)

		# GMM parameters: gmm_dist (phi), gmm_mean (mu), gmm_cov (epsilon) (Equation 5)
		# self.gmm_dist = tf.expand_dims(tf.reduce_mean(self.gamma, axis=0, keep_dims=True), axis=2)
		self.gmm_dist = tf.transpose(tf.reduce_mean(self.gamma, axis=0, keep_dims=True))
		self.gmm_mean = tf.matmul(self.gamma, self.z, transpose_a=True) / tf.transpose(tf.reduce_sum(self.gamma, axis=0, keep_dims=True))
		self.diff_mean = diff_mean = tf.tile(tf.expand_dims(self.z, axis=0), tf.constant([self.gmm_k, 1, 1])) - tf.expand_dims(self.gmm_mean, axis=1)
		self.gmm_cov = tf.matmul(tf.transpose(diff_mean, perm=[0, 2, 1]), tf.expand_dims(tf.transpose(self.gamma), axis=2) * diff_mean) / tf.expand_dims(tf.transpose(tf.reduce_sum(self.gamma, axis=0, keep_dims=True)), axis=2)
		# Energy Function (Equation 6)
		energy_numerator = tf.exp(-0.5 * tf.reduce_sum(tf.matmul(self.diff_mean, self.gmm_cov) * self.diff_mean, axis=2))
		energy_denominator = tf.expand_dims(tf.expand_dims(tf.sqrt(tf.matrix_determinant(2 * np.pi * self.gmm_cov)), axis=1), axis=2)
		self.energy = tf.expand_dims(-tf.log(tf.reduce_sum(tf.reduce_sum(tf.expand_dims(self.gmm_dist, axis=1) * energy_numerator / energy_denominator, axis=0), axis=0)), axis=1)

		# Loss Function (Equation 7)
		# Reconstruction loss + lmda_1 * Energy loss + lmda_2 * Diagonal loss
		# self.recon_loss = recon_loss = tf.losses.mean_squared_error(self.input, self.recon)
		self.recon_loss = recon_loss = tf.reduce_mean(tf.norm((self.input - self.recon), axis=1) ** 2)
		self.energy_loss = energy_loss = tf.reduce_mean(self.energy)
		self.diagonal_loss = diagonal_loss = tf.reduce_sum(tf.pow(tf.matrix_diag_part(self.gmm_cov), -tf.ones_like(tf.matrix_diag_part(self.gmm_cov))))
		self.loss = recon_loss + self.lmda_1 * energy_loss + self.lmda_2 * diagonal_loss

		self.optimize = tf.train.AdamOptimizer(learning_rate=1e-3).minimize(self.loss)
def _add_diagonal_shift(matrix, shift):
  diag_plus_shift = tf.matrix_diag_part(matrix) + shift
  return tf.matrix_set_diag(matrix, diag_plus_shift)
示例#35
0
    def measure_fock(self, modes, select=None, **kwargs):
        """
        Measures 'modes' in the Fock basis and updates remaining modes conditioned on this result.
        After measurement, the states in 'modes' are reset to the vacuum.

        Args:
            modes (Sequence[int]): which modes to measure (in increasing order).
            select (Sequence[int]): user-specified measurement value (used instead of random sampling)
            **kwargs: can be used to pass a session or a feed_dict. Otherwise a temporary session
            and no feed_dict will be used.

        Returns:
            A list with the Fock number measurement results for each mode.
        """
        # allow integer (non-list) arguments
        # not part of the API, but provided for convenience
        if isinstance(modes, int):
            modes = [modes]
        if isinstance(select, int):
            select = [select]

        # convert lists to np arrays
        if isinstance(modes, list):
            modes = np.array(modes)
        if isinstance(select, list):
            select = np.array(select)

        # check for valid 'modes' argument
        if len(modes) == 0 or len(modes) > self._num_modes or len(
                modes) != len(set(modes)):  #pylint: disable=len-as-condition
            raise ValueError("Specified modes are not valid.")
        if np.any(modes != sorted(modes)):
            raise ValueError("'modes' must be sorted in increasing order.")

        # check for valid 'select' argument
        if select is not None:
            if np.any(select == None):  #pylint: disable=singleton-comparison
                raise NotImplementedError(
                    "Post-selection lists must only contain numerical values.")
            if self._batched:
                num_meas_modes = len(modes)
                # in this case, select must either be:
                # np array of shape (M,), or
                # np array of shape (B,M)
                # where B is the batch_size and M is the number of measured modes
                shape_err = False
                if len(select.shape) == 1:
                    # non-batched list, must broadcast
                    if select.shape[0] != num_meas_modes:
                        shape_err = True
                    else:
                        select = np.vstack([select] * self._batch_size)
                elif len(select.shape) == 2:
                    # batch of lists, no need to broadcast
                    if select.shape != (self._batch_size, num_meas_modes):
                        shape_err = True
                else:
                    shape_err = True
                if shape_err:
                    raise ValueError(
                        "The shape of 'select' is incompatible with 'modes'.")
            else:
                # in this case, select should be a vector
                if select.shape != modes.shape:
                    raise ValueError(
                        "'select' must be have the same shape as 'modes'")

        # carry out the operation
        with self.graph.as_default():
            evaluate_results, session, feed_dict, close_session = ops._check_for_eval(
                kwargs)
            num_reduced_state_modes = len(modes)
            reduced_state = self._state
            if self._state_is_pure:
                mode_size = 1
            else:
                mode_size = 2
            if self._batched:
                batch_size = self._batch_size
                batch_offset = 1
            else:
                batch_size = 1
                batch_offset = 0

            if select is not None:
                # just use the supplied measurement results
                meas_result = select
            else:
                # compute and sample measurement result
                if self._state_is_pure and len(modes) == self._num_modes:
                    # in this case, measure directly on the pure state
                    probs = tf.abs(self._state)**2
                    logprobs = tf.log(probs)
                    sample = tf.multinomial(
                        tf.reshape(logprobs, [batch_size, -1]), 1)
                    sample_tensor = tf.squeeze(sample)
                else:
                    # otherwise, trace out unmeasured modes and sample using diagonal of reduced state
                    removed_ctr = 0
                    red_state_is_pure = self._state_is_pure
                    for m in range(self._num_modes):
                        if m not in modes:
                            new_mode_idx = m - removed_ctr
                            reduced_state = ops.partial_trace(
                                reduced_state, new_mode_idx, red_state_is_pure,
                                self._batched)
                            red_state_is_pure = False
                            removed_ctr += 1
                    # go from bra_A,ket_A,bra_B,ket_B,... -> bra_A,bra_B,ket_A,ket_B,... since this is what diag_part expects
                    # workaround for getting multi-index diagonal since tensorflow doesn't support getting diag of more than one subsystem at once
                    if num_reduced_state_modes > 1:
                        state_indices = np.arange(batch_offset +
                                                  2 * num_reduced_state_modes)
                        batch_index = state_indices[:batch_offset]
                        bra_indices = state_indices[batch_offset::2]
                        ket_indices = state_indices[batch_offset + 1::2]
                        transpose_list = np.concatenate(
                            [batch_index, bra_indices, ket_indices])
                        reduced_state_reshuffled = tf.transpose(
                            reduced_state, transpose_list)
                    else:
                        reduced_state_reshuffled = reduced_state
                    diag_indices = [self._cutoff_dim**num_reduced_state_modes
                                    ] * 2
                    if self._batched:
                        diag_indices = [self._batch_size] + diag_indices
                    diag_tensor = tf.reshape(reduced_state_reshuffled,
                                             diag_indices)
                    diag_entries = tf.matrix_diag_part(diag_tensor)
                    # hack so we can use tf.multinomial for sampling
                    logprobs = tf.log(tf.cast(diag_entries, tf.float64))
                    sample = tf.multinomial(
                        tf.reshape(logprobs, [batch_size, -1]), 1)
                    # sample is a single integer; we need to convert it to the corresponding [n0,n1,n2,...]
                    sample_tensor = tf.squeeze(sample)

                # sample_val is a single integer for each batch entry;
                # we need to convert it to the corresponding [n0,n1,n2,...]
                meas_result = ops.unravel_index(sample_tensor,
                                                [self._cutoff_dim] *
                                                num_reduced_state_modes)
                if not self._batched:
                    meas_result = meas_result[
                        0]  # no batch index, can get rid of first axis

            # unstack this here because that's how it should be returned
            meas_result = tf.unstack(meas_result, axis=-1, name="Meas_result")

            # project remaining modes into conditional state
            if len(modes) == self._num_modes:
                # in this case, all modes were measured and we can put everything in vacuum by reseting
                self.reset(pure=self._state_is_pure)
            else:
                # only some modes were measured: put unmeasured modes in conditional state, while reseting measured modes to vac
                fock_state = tf.one_hot(tf.stack(meas_result, axis=-1),
                                        depth=self._cutoff_dim,
                                        dtype=ops.def_type)
                conditional_state = self._state
                for idx, mode in enumerate(modes):
                    if self._batched:
                        f = fock_state[:, idx]
                    else:
                        f = fock_state[idx]
                    conditional_state = ops.conditional_state(
                        conditional_state,
                        f,
                        mode,
                        self._state_is_pure,
                        batched=self._batched)

                if self._state_is_pure:
                    norm = tf.norm(tf.reshape(conditional_state,
                                              [batch_size, -1]),
                                   axis=1)
                else:
                    # calculate norm of conditional_state
                    # use a cheap hack since tensorflow doesn't allow einsum equation for trace:
                    r = conditional_state
                    for _ in range(self._num_modes - num_reduced_state_modes -
                                   1):
                        r = ops.partial_trace(r, 0, False, self._batched)
                    norm = tf.trace(r)

                # for broadcasting
                norm_reshape = [1] * len(
                    conditional_state.shape[batch_offset:])
                if self._batched:
                    norm_reshape = [self._batch_size] + norm_reshape

                normalized_conditional_state = conditional_state / tf.reshape(
                    norm, norm_reshape)

                # reset measured modes into vacuum
                single_mode_vac = self._single_mode_pure_vac if self._state_is_pure else self._single_mode_mixed_vac
                if len(modes) == 1:
                    meas_modes_vac = single_mode_vac
                else:
                    meas_modes_vac = ops.combine_single_modes(
                        [single_mode_vac] * len(modes), self._batched)
                batch_index = indices[:batch_offset]
                meas_mode_indices = indices[batch_offset:batch_offset +
                                            mode_size * len(modes)]
                conditional_indices = indices[batch_offset + mode_size *
                                              len(modes):batch_offset +
                                              mode_size * self._num_modes]
                eqn_lhs = batch_index + meas_mode_indices + "," + batch_index + conditional_indices
                eqn_rhs = ''
                meas_ctr = 0
                cond_ctr = 0
                for m in range(self._num_modes):
                    if m in modes:
                        # use measured_indices
                        eqn_rhs += meas_mode_indices[mode_size *
                                                     meas_ctr:mode_size *
                                                     (meas_ctr + 1)]
                        meas_ctr += 1
                    else:
                        # use conditional indices
                        eqn_rhs += conditional_indices[mode_size *
                                                       cond_ctr:mode_size *
                                                       (cond_ctr + 1)]
                        cond_ctr += 1
                eqn = eqn_lhs + "->" + batch_index + eqn_rhs
                new_state = tf.einsum(eqn, meas_modes_vac,
                                      normalized_conditional_state)

                self._update_state(new_state)

            # return measurement result
            if evaluate_results:
                _meas = [t.eval(feed_dict, session) for t in meas_result]
                if close_session:
                    session.close()
            else:
                _meas = meas_result

            return tuple(_meas)
示例#36
0
 def logdet(self, A, **kwargs):
     A = (A + self.matrix_transpose(A)) / 2.
     term = tf.log(tf.matrix_diag_part(self.cholesky(A, **kwargs)))
     return 2 * tf.reduce_sum(term, -1)
示例#37
0
    def __init__(self, params, is_training=True):
        self.is_training = is_training
        batch_size = params["batch_size"]
        num_layers = params['nlayer']
        rnn_size = params['n_hidden']
        grad_clip = params["grad_clip"]
        self.output_keep_prob = tf.placeholder(tf.float32)
        self.input_keep_prob = tf.placeholder(tf.float32)

        NOUT = params['n_output']

        # Transition LSTM
        cell_lst = []
        for i in range(num_layers):
            cell = rnncell.ModifiedLSTMCell(
                rnn_size,
                forget_bias=1,
                initializer=tf.contrib.layers.xavier_initializer(),
                num_proj=None,
                is_training=self.is_training)
            if i > -1 and is_training == True:
                cell_drop = rnncell.DropoutWrapper(
                    cell, output_keep_prob=self.output_keep_prob)
                cell = cell_drop
            if i > 10 and params['input_keep_prob'] < 1:
                cell_drop = rnncell.DropoutWrapper(
                    cell, input_keep_prob=self.input_keep_prob)
                cell = cell_drop
            cell_lst.append(cell)
        self.cell = rnncell.MultiRNNCell(cell_lst)

        # LSTM for Q noise
        cell_lst = []
        for i in range(params['Qnlayer']):
            cell_Q_noise = rnncell.ModifiedLSTMCell(
                params['Qn_hidden'],
                forget_bias=1,
                initializer=tf.contrib.layers.xavier_initializer(),
                num_proj=None,
                is_training=self.is_training)
            if i > -1 and is_training == True:
                cell_drop = rnncell.DropoutWrapper(
                    cell_Q_noise, output_keep_prob=self.output_keep_prob)
                cell_Q_noise = cell_drop
            if i > 10 and params['input_keep_prob'] < 1:
                cell_drop = rnncell.DropoutWrapper(
                    cell, input_keep_prob=self.input_keep_prob)
                cell = cell_drop
            cell_lst.append(cell_Q_noise)
        self.cell_Q_noise = rnncell.MultiRNNCell(cell_lst)

        # LSTM for R noise
        cell_lst = []
        for i in range(params['Rnlayer']):
            cell_R_noise = rnncell.ModifiedLSTMCell(
                params['Rn_hidden'],
                forget_bias=1,
                initializer=tf.contrib.layers.xavier_initializer(),
                num_proj=None,
                is_training=self.is_training)
            if i > -1 and is_training == True:
                cell_drop = rnncell.DropoutWrapper(
                    cell_R_noise, output_keep_prob=self.output_keep_prob)
                cell_R_noise = cell_drop
            if i > 10 and params['input_keep_prob'] < 1:
                cell_drop = rnncell.DropoutWrapper(
                    cell, input_keep_prob=self.input_keep_prob)
                cell = cell_drop
            cell_lst.append(cell_R_noise)
        self.cell_R_noise = rnncell.MultiRNNCell(cell_lst)

        self.initial_state = self.cell.zero_state(
            batch_size=params['batch_size'], dtype=tf.float32)
        self.initial_state_Q_noise = self.cell_Q_noise.zero_state(
            batch_size=params['batch_size'], dtype=tf.float32)
        # self.initial_state_R_noise = self.cell_Q_noise.zero_state(batch_size=params['batch_size'], dtype=tf.float32)
        self.initial_state_R_noise = self.cell_R_noise.zero_state(
            batch_size=params['batch_size'], dtype=tf.float32)
        self.repeat_data = tf.placeholder(
            dtype=tf.int32, shape=[params["batch_size"], params['seq_length']])

        #Measurements
        self._z = tf.placeholder(dtype=tf.float32,
                                 shape=[None, params['seq_length'], NOUT
                                        ])  # batch size, seqlength, feature
        self._x_inp = tf.placeholder(
            dtype=tf.float32, shape=[None, NOUT],
            name='Initialx')  # batch size, seqlength, feature
        self.target_data = tf.placeholder(
            dtype=tf.float32, shape=[None, params['seq_length'],
                                     NOUT])  # batch size, seqlength, feature
        self._P_inp = tf.placeholder(dtype=tf.float32,
                                     shape=[None, NOUT, NOUT],
                                     name='P')
        self._F = 0.0  # state transition matrix
        self._alpha_sq = 1.  # fading memory control
        self.M = 0.0  # process-measurement cross correlation
        self._I = tf.placeholder(dtype=tf.float32,
                                 shape=[None, NOUT, NOUT],
                                 name='I')
        self.u = 0.0

        xres_lst = []
        xpred_lst = []
        pres_lst = []
        tres_lst = []
        kres_lst = []
        qres_lst = []
        rres_lst = []
        with tf.variable_scope('rnnlm'):
            output_w1 = tf.get_variable(
                "output_w1", [rnn_size, rnn_size],
                initializer=tf.contrib.layers.xavier_initializer())
            output_b1 = tf.get_variable("output_b1", [rnn_size])
            output_w2 = tf.get_variable(
                "output_w2", [rnn_size, rnn_size],
                initializer=tf.contrib.layers.xavier_initializer())
            output_b2 = tf.get_variable("output_b2", [rnn_size])
            output_w3 = tf.get_variable(
                "output_w3", [rnn_size, NOUT],
                initializer=tf.contrib.layers.xavier_initializer())
            output_b3 = tf.get_variable("output_b3", [NOUT])

            output_w1_Q_noise = tf.get_variable(
                "output_w_Q_noise", [params['Qn_hidden'], NOUT],
                initializer=tf.contrib.layers.xavier_initializer())
            output_b1_Q_noise = tf.get_variable("output_b_Q_noise", [NOUT])
            output_w1_R_noise = tf.get_variable(
                "output_w_R_noise", [params['Rn_hidden'], NOUT],
                initializer=tf.contrib.layers.xavier_initializer())
            # output_b1_R_noise = tf.get_variable("output_b_R_noise", [NOUT],initializer=tf.ones_initializer())
            output_b1_R_noise = tf.get_variable("output_b_R_noise", [NOUT])
        #
        indices = list(zip(*np.tril_indices(NOUT)))
        indices = tf.constant([list(i) for i in indices], dtype=tf.int64)

        state_F = self.initial_state
        state_Q = self.initial_state_Q_noise
        state_R = self.initial_state_R_noise
        with tf.variable_scope("rnnlm"):
            for time_step in range(params['seq_length']):
                if time_step > 0: tf.get_variable_scope().reuse_variables()
                z = self._z[:, time_step, :]  #bs,features
                if time_step == 0:
                    # self._x= z
                    self._x = self._x_inp
                    self._P = self._P_inp
                with tf.variable_scope("transitionF"):
                    (pred, state_F, ls_internals) = self.cell(self._x, state_F)
                    # pred  = tf.matmul(pred,output_w1)+output_b1
                    pred = tf.nn.relu(
                        tf.add(tf.matmul(pred, output_w1), output_b1))
                    pred = tf.nn.relu(
                        tf.add(tf.matmul(pred, output_w2), output_b2))
                    pred = tf.add(tf.matmul(pred, output_w3), output_b3)

                with tf.variable_scope("noiseQ"):
                    (pred_Q_noise, state_Q,
                     ls_internals) = self.cell_Q_noise(self._x, state_Q)
                    pred_Q_noise = tf.matmul(
                        pred_Q_noise, output_w1_Q_noise) + output_b1_Q_noise

                # one_mask = tf.ones(shape=(batch_size, NOUT))
                # zero_mask = tf.zeros(shape=(batch_size, NOUT))
                # random_mask = tf.random_uniform(shape=(batch_size, NOUT))
                # means = tf.mul(tf.ones(shape=(batch_size, NOUT)), 1 - self.output_keep_prob)
                # mask = tf.select(random_mask - means > 0.5, zero_mask, one_mask)
                # meas_z = tf.select(self.output_keep_prob >= 1, z, tf.mul(z, mask))
                # norm = tf.random_normal(shape=(batch_size, NOUT), mean=0, stddev=0.01)
                # meas_z = tf.select(self.output_keep_prob >= 1, z, tf.add(z, norm))
                meas_z = z

                with tf.variable_scope("noiseR"):
                    (pred_R_noise, state_R,
                     ls_internals) = self.cell_R_noise(meas_z, state_R)
                    pred_R_noise = tf.matmul(
                        pred_R_noise, output_w1_R_noise) + output_b1_R_noise
                #
                self._x = pred

                # lst=tf.unpack(pred, axis=1)

                # Q= tf.sparse_to_dense(sparse_indices=indices, output_shape=[batch_size,NOUT, NOUT], \
                #                           sparse_values=pred_Q_noise, default_value=0, \
                #                           validate_indices=True)
                #
                Q = tf.matrix_diag(tf.exp(pred_Q_noise))
                R = tf.matrix_diag(tf.exp(pred_R_noise))
                # Q=tf.matmul(tf.matrix_diag(tf.exp(pred_Q_noise)),tf.matrix_diag(tf.exp(pred_Q_noise)))
                # R=tf.matmul(tf.matrix_diag(tf.exp(pred_R_noise)),tf.matrix_diag(tf.exp(pred_R_noise)))

                #predict
                P = self._P
                self._P = P + Q

                #update
                P = self._P
                x = self._x

                self._y = meas_z - x

                # S = HPH' + R
                # project system uncertainty into measurement space
                S = P + R
                # S = P

                # K = PH'inv(S)
                # map system uncertainty into kalman gain
                K = tf.matmul(P,
                              tf.matrix_inverse(S))  #(Q+P_init/(R+Q+P_init))

                # x = x + Ky
                # predict new x with residual scaled by the kalman gain
                self._x = tf.squeeze(tf.matmul(K, tf.expand_dims(self._y, 2)),
                                     -1)  #K-->>1, _x=z, K-->>0, _x=x,
                xpred_lst.append(x)
                xres_lst.append(self._x)
                tres_lst.append(meas_z)
                kres_lst.append(tf.matrix_diag_part(K))
                rres_lst.append(tf.matrix_diag_part(R))
                qres_lst.append(tf.matrix_diag_part(Q))

                # P = (I-KH)P(I-KH)' + KRK'
                I_KH = self._I - K
                self._P = tf.matmul(
                    I_KH, tf.matmul(P, tf.matrix_transpose(I_KH))) + tf.matmul(
                        K, tf.matmul(R, tf.matrix_transpose(K)))
                # self._P = tf.matmul(I_KH, tf.matmul(P, tf.matrix_transpose(I_KH))) + tf.matmul(K, tf.matrix_transpose(K))

                self._S = S
                self._K = K
        final_output = tf.reshape(tf.transpose(tf.stack(xres_lst), [1, 0, 2]),
                                  [-1, params['n_output']])
        final_pred_output = tf.reshape(
            tf.transpose(tf.stack(xpred_lst), [1, 0, 2]),
            [-1, params['n_output']])
        final_q_output = tf.reshape(
            tf.transpose(tf.stack(qres_lst), [1, 0, 2]),
            [-1, params['n_output']])
        final_r_output = tf.reshape(
            tf.transpose(tf.stack(rres_lst), [1, 0, 2]),
            [-1, params['n_output']])
        final_k_output = tf.reshape(
            tf.transpose(tf.stack(kres_lst), [1, 0, 2]),
            [-1, params['n_output']])
        final_meas_output = tf.reshape(
            tf.transpose(tf.stack(tres_lst), [1, 0, 2]),
            [-1, params['n_output']])
        flt = tf.squeeze(tf.reshape(self.repeat_data, [-1, 1]), [1])
        where_flt = tf.not_equal(flt, 0)
        indices = tf.where(where_flt)

        y = tf.reshape(self.target_data, [-1, params["n_output"]])
        self.final_output = tf.gather(final_output, tf.squeeze(indices, [1]))
        self.final_pred_output = tf.gather(final_pred_output,
                                           tf.squeeze(indices, [1]))
        self.final_q_output = tf.gather(final_q_output,
                                        tf.squeeze(indices, [1]))
        self.final_r_output = tf.gather(final_r_output,
                                        tf.squeeze(indices, [1]))
        self.final_k_output = tf.gather(final_k_output,
                                        tf.squeeze(indices, [1]))
        self.final_meas_output = tf.gather(final_meas_output,
                                           tf.squeeze(indices, [1]))
        self.y = tf.gather(y, tf.squeeze(indices, [1]))

        tmp = self.final_output - self.y
        loss = tf.nn.l2_loss(tmp)
        tmp_pred = self.final_pred_output - self.y
        loss_pred = tf.nn.l2_loss(tmp_pred)

        # tmp_pred = self.final_pred_output - self.y
        # loss_pred = tf.nn.l2_loss(tmp_pred)

        self.tvars = tf.trainable_variables()
        l2_reg = tf.reduce_sum([tf.nn.l2_loss(var) for var in self.tvars])
        l2_reg = tf.multiply(l2_reg, 1e-4)
        self.cost = tf.reduce_mean(
            loss) + l2_reg + 0.8 * tf.reduce_mean(loss_pred)
        self.lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        total_parameters = 0
        for variable in self.tvars:
            # shape is an array of tf.Dimension
            shape = variable.get_shape()
            variable_parametes = 1
            for dim in shape:
                variable_parametes *= dim.value
            total_parameters += variable_parametes
        self.total_parameters = total_parameters
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                                          grad_clip)
        optimizer = tf.train.AdamOptimizer(self.lr)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))

        self.states = {}
        self.states["F_t"] = state_F
        self.states["Q_t"] = state_Q
        self.states["R_t"] = state_R
        self.states["PCov_t"] = self._P
        self.states["_x_t"] = self._x
        self.xres_lst = xres_lst
        self.pres_lst = pres_lst
        self.tres_lst = tres_lst
        self.kres_lst = kres_lst
示例#38
0
 def atten_col(self, avg_atten):
     row_sum = tf.reduce_sum(avg_atten, axis=1)
     diag_softmax = tf.matrix_diag_part(avg_atten)
     attended_by = tf.math.subtract(row_sum, diag_softmax)
     return attended_by
示例#39
0
    def sub_model(self,
                  model_input,
                  vocab_size,
                  is_training,
                  num_mixtures=None,
                  l2_penalty=1e-8,
                  sub_scope="",
                  dropout=False,
                  keep_prob=None,
                  noise_level=None,
                  **unused_params):
        num_mixtures = num_mixtures or FLAGS.moe_num_mixtures

        low_rank_gating = FLAGS.moe_low_rank_gating
        l2_penalty = FLAGS.moe_l2
        gating_probabilities = FLAGS.moe_prob_gating
        gating_input = FLAGS.moe_prob_gating_input
        remove_diag = FLAGS.gating_remove_diag

        if dropout:
            model_input = tf.nn.dropout(model_input, keep_prob=keep_prob)

        gate_activations = slim.fully_connected(
            model_input,
            vocab_size * (num_mixtures + 1),
            activation_fn=None,
            biases_initializer=None,
            weights_regularizer=slim.l2_regularizer(l2_penalty),
            scope="gates-" + sub_scope)
        expert_activations = slim.fully_connected(
            model_input,
            vocab_size * num_mixtures,
            activation_fn=None,
            weights_regularizer=slim.l2_regularizer(l2_penalty),
            scope="experts-" + sub_scope)

        gating_distribution = tf.nn.softmax(
            tf.reshape(gate_activations,
                       [-1, num_mixtures + 1
                        ]))  # (Batch * #Labels) x (num_mixtures + 1)
        expert_distribution = tf.nn.sigmoid(
            tf.reshape(expert_activations,
                       [-1, num_mixtures]))  # (Batch * #Labels) x num_mixtures

        final_probabilities_by_class_and_batch = tf.reduce_sum(
            gating_distribution[:, :num_mixtures] * expert_distribution, 1)
        final_probabilities = tf.reshape(
            final_probabilities_by_class_and_batch, [-1, vocab_size])
        probabilities = final_probabilities
        with tf.variable_scope(sub_scope):
            if gating_probabilities:
                if gating_input == 'prob':
                    gating_weights = tf.get_variable(
                        "gating_prob_weights", [vocab_size, vocab_size],
                        initializer=tf.random_normal_initializer(
                            stddev=1 / math.sqrt(vocab_size)))
                    gates = tf.matmul(probabilities, gating_weights)
                else:
                    gating_weights = tf.get_variable(
                        "gating_prob_weights", [input_size, vocab_size],
                        initializer=tf.random_normal_initializer(
                            stddev=1 / math.sqrt(vocab_size)))

                    gates = tf.matmul(model_input, gating_weights)

                if remove_diag:
                    #removes diagonals coefficients
                    diagonals = tf.matrix_diag_part(gating_weights)
                    gates = gates - tf.multiply(diagonals, probabilities)

                gates = slim.batch_norm(gates,
                                        center=True,
                                        scale=True,
                                        is_training=is_training,
                                        scope="gating_prob_bn")

                gates = tf.sigmoid(gates)

                probabilities = tf.multiply(probabilities, gates)

        final_probabilities = probabilities
        return final_probabilities
def _add_diagonal_shift(matrix, shift):
  return tf.matrix_set_diag(
      matrix, tf.matrix_diag_part(matrix) + shift, name='add_diagonal_shift')
示例#41
0
 def outer_diag(self, tensor):
     trans = tf.transpose(tensor)
     diag = tf.matrix_diag_part(trans)
     return tf.transpose(diag)
示例#42
0
    def create_model(self,
                     model_input,
                     vocab_size,
                     num_frames,
                     iterations=None,
                     add_batch_norm=None,
                     sample_random_frames=None,
                     cluster_size=None,
                     hidden_size=None,
                     **unused_params):
        iterations = iterations or FLAGS.iterations
        add_batch_norm = add_batch_norm or FLAGS.netvlad_add_batch_norm
        random_frames = sample_random_frames or FLAGS.sample_random_frames
        cluster_size = cluster_size or FLAGS.netvlad_cluster_size
        hidden1_size = hidden_size or FLAGS.netvlad_hidden_size
        relu = FLAGS.netvlad_relu
        dimred = FLAGS.netvlad_dimred
        gating = FLAGS.gating
        remove_diag = FLAGS.gating_remove_diag
        print "FLAGS.lightvlad", FLAGS.lightvlad
        lightvlad = FLAGS.lightvlad
        vlagd = FLAGS.vlagd

        num_frames = tf.cast(tf.expand_dims(num_frames, 1), tf.float32)
        print "num_frames:", num_frames
        if random_frames:
            model_input = utils.SampleRandomFrames(model_input, num_frames,
                                                   iterations)
        else:
            model_input = utils.SampleRandomSequence(model_input, num_frames,
                                                     iterations)

        max_frames = model_input.get_shape().as_list()[1]
        feature_size = model_input.get_shape().as_list()[2]
        reshaped_input = tf.reshape(model_input, [-1, feature_size])

        if lightvlad:
            video_NetVLAD = LightVLAD(1024, max_frames, cluster_size,
                                      add_batch_norm)
            audio_NetVLAD = LightVLAD(128, max_frames, cluster_size / 2,
                                      add_batch_norm)

        if add_batch_norm:  # and not lightvlad:
            reshaped_input = slim.batch_norm(reshaped_input,
                                             center=True,
                                             scale=True,
                                             scope="input_bn")

        with tf.variable_scope("video_VLAD"):
            vlad_video = video_NetVLAD.forward(reshaped_input[:, 0:1024])

        with tf.variable_scope("audio_VLAD"):
            vlad_audio = audio_NetVLAD.forward(reshaped_input[:, 1024:])

        vlad = tf.concat([vlad_video, vlad_audio], 1)

        vlad_dim = vlad.get_shape().as_list()[1]
        hidden1_weights = tf.get_variable(
            "hidden1_weights", [vlad_dim, hidden1_size],
            initializer=tf.random_normal_initializer(stddev=1 /
                                                     math.sqrt(cluster_size)))

        activation = tf.matmul(vlad, hidden1_weights)

        if add_batch_norm and relu:
            activation = slim.batch_norm(activation,
                                         center=True,
                                         scale=True,
                                         scope="hidden1_bn")

        else:
            hidden1_biases = tf.get_variable(
                "hidden1_biases", [hidden1_size],
                initializer=tf.random_normal_initializer(stddev=0.01))
            tf.summary.histogram("hidden1_biases", hidden1_biases)
            activation += hidden1_biases

        if relu:
            activation = tf.nn.relu6(activation)

        if gating:
            gating_weights = tf.get_variable(
                "gating_weights_2", [hidden1_size, hidden1_size],
                initializer=tf.random_normal_initializer(
                    stddev=1 / math.sqrt(hidden1_size)))

            gates = tf.matmul(activation, gating_weights)

            if remove_diag:
                #removes diagonals coefficients
                diagonals = tf.matrix_diag_part(gating_weights)
                gates = gates - tf.multiply(diagonals, activation)

            if add_batch_norm:
                gates = slim.batch_norm(gates,
                                        center=True,
                                        scale=True,
                                        scope="gating_bn")
            else:
                gating_biases = tf.get_variable(
                    "gating_biases", [cluster_size],
                    initializer=tf.random_normal(stddev=1 /
                                                 math.sqrt(feature_size)))
                gates += gating_biases

            gates = tf.sigmoid(gates)

            activation = tf.multiply(activation, gates)

        aggregated_model = getattr(video_level_models,
                                   FLAGS.video_level_classifier_model)

        return aggregated_model().create_model(model_input=activation,
                                               vocab_size=vocab_size,
                                               **unused_params)
示例#43
0
    def __init__(self,
                 df,
                 scale=None,
                 scale_tril=None,
                 input_output_cholesky=False,
                 validate_args=False,
                 allow_nan_stats=True,
                 name="Wishart"):
        """Construct Wishart distributions.

    Args:
      df: `float` or `double` `Tensor`. Degrees of freedom, must be greater than
        or equal to dimension of the scale matrix.
      scale: `float` or `double` `Tensor`. The symmetric positive definite
        scale matrix of the distribution. Exactly one of `scale` and
        'scale_tril` must be passed.
      scale_tril: `float` or `double` `Tensor`. The Cholesky factorization
        of the symmetric positive definite scale matrix of the distribution.
        Exactly one of `scale` and 'scale_tril` must be passed.
      input_output_cholesky: Python `bool`. If `True`, functions whose input or
        output have the semantics of samples assume inputs are in Cholesky form
        and return outputs in Cholesky form. In particular, if this flag is
        `True`, input to `log_prob` is presumed of Cholesky form and output from
        `sample`, `mean`, and `mode` are of Cholesky form.  Setting this
        argument to `True` is purely a computational optimization and does not
        change the underlying distribution; for instance, `mean` returns the
        Cholesky of the mean, not the mean of Cholesky factors. The `variance`
        and `stddev` methods are unaffected by this flag.
        Default value: `False` (i.e., input/output does not have Cholesky
        semantics).
      validate_args: Python `bool`, default `False`. When `True` distribution
        parameters are checked for validity despite possibly degrading runtime
        performance. When `False` invalid inputs may silently render incorrect
        outputs.
      allow_nan_stats: Python `bool`, default `True`. When `True`, statistics
        (e.g., mean, mode, variance) use the value "`NaN`" to indicate the
        result is undefined. When `False`, an exception is raised if one or
        more of the statistic's batch members are undefined.
      name: Python `str` name prefixed to Ops created by this class.
    Raises:
      ValueError: if zero or both of 'scale' and 'scale_tril' are passed in.
    """
        parameters = dict(locals())

        with tf.name_scope(name, values=[scale, scale_tril]) as name:
            with tf.name_scope("init", values=[scale, scale_tril]):
                if (scale is None) == (scale_tril is None):
                    raise ValueError(
                        "Must pass scale or scale_tril, but not both.")

                if scale is not None:
                    scale = tf.convert_to_tensor(scale)
                    if validate_args:
                        scale = distribution_util.assert_symmetric(scale)
                    scale_tril = tf.cholesky(scale)
                else:  # scale_tril is not None
                    scale_tril = tf.convert_to_tensor(scale_tril)
                    if validate_args:
                        scale_tril = control_flow_ops.with_dependencies([
                            tf.assert_positive(
                                tf.matrix_diag_part(scale_tril),
                                message="scale_tril must be positive definite"
                            ),
                            tf.assert_equal(
                                tf.shape(scale_tril)[-1],
                                tf.shape(scale_tril)[-2],
                                message="scale_tril must be square")
                        ], scale_tril)

            super(Wishart, self).__init__(
                df=df,
                scale_operator=tf.linalg.LinearOperatorLowerTriangular(
                    tril=scale_tril,
                    is_non_singular=True,
                    is_positive_definite=True,
                    is_square=True),
                input_output_cholesky=input_output_cholesky,
                validate_args=validate_args,
                allow_nan_stats=allow_nan_stats,
                name=name)
        self._parameters = parameters
示例#44
0
# Declare k-value and batch size
k = 4
batch_size = len(x_vals_test)

# Placeholders
x_data_train = tf.placeholder(shape=[None, num_features], dtype=tf.float32)
x_data_test = tf.placeholder(shape=[None, num_features], dtype=tf.float32)
y_target_train = tf.placeholder(shape=[None, 1], dtype=tf.float32)
y_target_test = tf.placeholder(shape=[None, 1], dtype=tf.float32)

# Declare weighted distance metric
# Weighted - L2 = sqrt((x-y)^T * A * (x-y))
subtraction_term = tf.subtract(x_data_train, tf.expand_dims(x_data_test, 1))
first_product = tf.matmul(subtraction_term, tf.tile(tf.expand_dims(weight_matrix, 0), [batch_size, 1, 1]))
second_product = tf.matmul(first_product, tf.transpose(subtraction_term, perm=[0, 2, 1]))
distance = tf.sqrt(tf.matrix_diag_part(second_product))

# Predict: Get min distance index (Nearest neighbor)
top_k_xvals, top_k_indices = tf.nn.top_k(tf.negative(distance), k=k)
x_sums = tf.expand_dims(tf.reduce_sum(top_k_xvals, 1), 1)
x_sums_repeated = tf.matmul(x_sums, tf.ones([1, k], tf.float32))
x_val_weights = tf.expand_dims(tf.div(top_k_xvals, x_sums_repeated), 1)

top_k_yvals = tf.gather(y_target_train, top_k_indices)
prediction = tf.squeeze(tf.matmul(x_val_weights, top_k_yvals), squeeze_dims=[1])

# Calculate MSE
mse = tf.div(tf.reduce_sum(tf.square(tf.subtract(prediction, y_target_test))), batch_size)

# Calculate how many loops over training data
num_loops = int(np.ceil(len(x_vals_test) / batch_size))
示例#45
0
def _expectation(p, kern1, feat1, kern2, feat2, nghp=None):
    """
    Compute the expectation:
    expectation[n] = <Ka_{Z1, x_n} Kb_{x_n, Z2}>_p(x_n)
        - Ka_{.,.}, Kb_{.,.} :: RBF kernels
    Ka and Kb as well as Z1 and Z2 can differ from each other, but this is supported
    only if the Gaussian p is Diagonal (p.cov NxD) and Ka, Kb have disjoint active_dims
    in which case the joint expectations simplify into a product of expectations

    :return: NxMxM
    """
    if kern1.on_separate_dims(kern2) and isinstance(
            p, DiagonalGaussian):  # no joint expectations required
        eKxz1 = expectation(p, (kern1, feat1))
        eKxz2 = expectation(p, (kern2, feat2))
        return eKxz1[:, :, None] * eKxz2[:, None, :]

    if feat1 != feat2 or kern1 != kern2:
        raise NotImplementedError(
            "The expectation over two kernels has only an "
            "analytical implementation if both kernels are equal.")

    kern = kern1
    feat = feat1

    with params_as_tensors_for(kern), params_as_tensors_for(feat):
        # use only active dimensions
        Xcov = kern._slice_cov(
            tf.matrix_diag(p.cov) if isinstance(p, DiagonalGaussian) else p.cov
        )
        Z, Xmu = kern._slice(feat.Z, p.mu)

        N = tf.shape(Xmu)[0]
        D = tf.shape(Xmu)[1]

        squared_lengthscales = kern.lengthscales ** 2. if kern.ARD \
            else tf.zeros((D,), dtype=settings.tf_float) + kern.lengthscales ** 2.

        sqrt_det_L = tf.reduce_prod(0.5 * squared_lengthscales)**0.5
        C = tf.cholesky(0.5 * tf.matrix_diag(squared_lengthscales) +
                        Xcov)  # NxDxD
        dets = sqrt_det_L / tf.exp(
            tf.reduce_sum(tf.log(tf.matrix_diag_part(C)), axis=1))  # N

        C_inv_mu = tf.matrix_triangular_solve(C,
                                              tf.expand_dims(Xmu, 2),
                                              lower=True)  # NxDx1
        C_inv_z = tf.matrix_triangular_solve(
            C,
            tf.tile(tf.expand_dims(tf.transpose(Z) / 2., 0), [N, 1, 1]),
            lower=True)  # NxDxM
        mu_CC_inv_mu = tf.expand_dims(tf.reduce_sum(tf.square(C_inv_mu), 1),
                                      2)  # Nx1x1
        z_CC_inv_z = tf.reduce_sum(tf.square(C_inv_z), 1)  # NxM
        zm_CC_inv_zn = tf.matmul(C_inv_z, C_inv_z, transpose_a=True)  # NxMxM
        two_z_CC_inv_mu = 2 * tf.matmul(C_inv_z, C_inv_mu,
                                        transpose_a=True)[:, :, 0]  # NxM

        exponent_mahalanobis = mu_CC_inv_mu + tf.expand_dims(z_CC_inv_z, 1) + \
                               tf.expand_dims(z_CC_inv_z, 2) + 2 * zm_CC_inv_zn - \
                               tf.expand_dims(two_z_CC_inv_mu, 2) - tf.expand_dims(two_z_CC_inv_mu, 1)  # NxMxM
        exponent_mahalanobis = tf.exp(-0.5 * exponent_mahalanobis)  # NxMxM

        # Compute sqrt(self.K(Z)) explicitly to prevent automatic gradient from
        # being NaN sometimes, see pull request #615
        kernel_sqrt = tf.exp(-0.25 * kern.square_dist(Z, None))
        return kern.variance ** 2 * kernel_sqrt * \
               tf.reshape(dets, [N, 1, 1]) * exponent_mahalanobis
示例#46
0
 def _variance(self):
     x = tf.sqrt(self.df) * self._square_scale_operator()
     d = tf.expand_dims(tf.matrix_diag_part(x), -1)
     v = tf.square(x) + tf.matmul(d, d, adjoint_b=True)
     return v
示例#47
0
    def _log_prob(self, x):
        if self.input_output_cholesky:
            x_sqrt = x
        else:
            # Complexity: O(nbk**3)
            x_sqrt = tf.cholesky(x)

        batch_shape = self.batch_shape_tensor()
        event_shape = self.event_shape_tensor()
        ndims = tf.rank(x_sqrt)
        # sample_ndims = ndims - batch_ndims - event_ndims
        sample_ndims = ndims - tf.shape(batch_shape)[0] - 2
        sample_shape = tf.strided_slice(tf.shape(x_sqrt), [0], [sample_ndims])

        # We need to be able to pre-multiply each matrix by its corresponding
        # batch scale matrix. Since a Distribution Tensor supports multiple
        # samples per batch, this means we need to reshape the input matrix `x`
        # so that the first b dimensions are batch dimensions and the last two
        # are of shape [dimension, dimensions*number_of_samples]. Doing these
        # gymnastics allows us to do a batch_solve.
        #
        # After we're done with sqrt_solve (the batch operation) we need to undo
        # this reshaping so what we're left with is a Tensor partitionable by
        # sample, batch, event dimensions.

        # Complexity: O(nbk**2) since transpose must access every element.
        scale_sqrt_inv_x_sqrt = x_sqrt
        perm = tf.concat(
            [tf.range(sample_ndims, ndims),
             tf.range(0, sample_ndims)], 0)
        scale_sqrt_inv_x_sqrt = tf.transpose(scale_sqrt_inv_x_sqrt, perm)
        shape = tf.concat(
            (batch_shape, (tf.cast(self.dimension, dtype=tf.int32), -1)), 0)
        scale_sqrt_inv_x_sqrt = tf.reshape(scale_sqrt_inv_x_sqrt, shape)

        # Complexity: O(nbM*k) where M is the complexity of the operator solving a
        # vector system. For LinearOperatorLowerTriangular, each solve is O(k**2) so
        # this step has complexity O(nbk^3).
        scale_sqrt_inv_x_sqrt = self.scale_operator.solve(
            scale_sqrt_inv_x_sqrt)

        # Undo make batch-op ready.
        # Complexity: O(nbk**2)
        shape = tf.concat([batch_shape, event_shape, sample_shape], 0)
        scale_sqrt_inv_x_sqrt = tf.reshape(scale_sqrt_inv_x_sqrt, shape)
        perm = tf.concat([
            tf.range(ndims - sample_ndims, ndims),
            tf.range(0, ndims - sample_ndims)
        ], 0)
        scale_sqrt_inv_x_sqrt = tf.transpose(scale_sqrt_inv_x_sqrt, perm)

        # Write V = SS', X = LL'. Then:
        # tr[inv(V) X] = tr[inv(S)' inv(S) L L']
        #              = tr[inv(S) L L' inv(S)']
        #              = tr[(inv(S) L) (inv(S) L)']
        #              = sum_{ik} (inv(S) L)_{ik}**2
        # The second equality follows from the cyclic permutation property.
        # Complexity: O(nbk**2)
        trace_scale_inv_x = tf.reduce_sum(tf.square(scale_sqrt_inv_x_sqrt),
                                          axis=[-2, -1])

        # Complexity: O(nbk)
        half_log_det_x = tf.reduce_sum(tf.log(tf.matrix_diag_part(x_sqrt)),
                                       axis=[-1])

        # Complexity: O(nbk**2)
        log_prob = ((self.df - self.dimension - 1.) * half_log_det_x -
                    0.5 * trace_scale_inv_x - self.log_normalization())

        # Set shape hints.
        # Try to merge what we know from the input then what we know from the
        # parameters of this distribution.
        if x.get_shape().ndims is not None:
            log_prob.set_shape(x.get_shape()[:-2])
        if (log_prob.get_shape().ndims is not None
                and self.batch_shape.ndims is not None
                and self.batch_shape.ndims > 0):
            log_prob.get_shape()[-self.batch_shape.ndims:].merge_with(
                self.batch_shape)

        return log_prob
def model_fn(features, labels, mode, params, config):

    visit_items_index = features["visit_items_index"]    # num * 5
    continuous_features_value = features["continuous_features_value"]  # num * 16
    next_visit_item_index = labels    # num
    keep_prob = params["keep_prob"]
    embedding_size = params["embedding_size"]
    item_num = params["item_num"]
    learning_rate = params["learning_rate"]
    top_k = params["top_k"]

    # items embedding 初始化
    initializer = tf.initializers.random_uniform(minval=-0.5 / embedding_size, maxval=0.5 / embedding_size)
    partitioner = tf.fixed_size_partitioner(num_shards=embedding_size)
    item_embedding = tf.get_variable("item_embedding", [item_num, embedding_size],
                                     tf.float32, initializer=initializer, partitioner=partitioner)

    visit_items_embedding = tf.nn.embedding_lookup(item_embedding, visit_items_index)       # num * 5 * embedding_size
    visit_items_average_embedding = tf.reduce_mean(visit_items_embedding, axis=1)     # num * embedding_size
    input_embedding = tf.concat([visit_items_average_embedding, continuous_features_value], 1)   # num * (embedding_size + 16)
    kernel_initializer_1 = tf.initializers.random_normal(mean=0.0, stddev=0.1)
    bias_initializer_1 = tf.initializers.random_normal(mean=0.0, stddev=0.1)
    layer_1 = tf.layers.dense(input_embedding, 64, activation=tf.nn.relu,
                              kernel_initializer=kernel_initializer_1,
                              bias_initializer=bias_initializer_1, name="layer_1")
    layer_dropout_1 = tf.nn.dropout(layer_1, keep_prob=keep_prob, name="layer_dropout_1")
    kernel_initializer_2 = tf.initializers.random_normal(mean=0.0, stddev=0.1)
    bias_initializer_2 = tf.initializers.random_normal(mean=0.0, stddev=0.1)
    layer_2 = tf.layers.dense(layer_dropout_1, 32, activation=tf.nn.relu,
                              kernel_initializer=kernel_initializer_2,
                              bias_initializer=bias_initializer_2, name="layer_2")
    layer_dropout_2 = tf.nn.dropout(layer_2, keep_prob=keep_prob, name="layer_dropout_2")
    # user vector, num * embedding_size
    kernel_initializer_3 = tf.initializers.random_normal(mean=0.0, stddev=0.1)
    bias_initializer_3 = tf.initializers.random_normal(mean=0.0, stddev=0.1)
    user_vector = tf.layers.dense(layer_dropout_2, embedding_size, activation=tf.nn.relu,
                                  kernel_initializer=kernel_initializer_3,
                                  bias_initializer=bias_initializer_3, name="user_vector")

    if mode == tf.estimator.ModeKeys.TRAIN:
        # 训练
        output_embedding = tf.nn.embedding_lookup(item_embedding, next_visit_item_index)  # num * embedding_size
        logits = tf.matmul(user_vector, output_embedding, transpose_a=False, transpose_b=True)  # num * num
        yhat = tf.nn.softmax(logits)  # num * num
        cross_entropy = tf.reduce_mean(-tf.log(tf.matrix_diag_part(yhat) + 1e-16))
        optimizer = tf.train.GradientDescentOptimizer(learning_rate)
        train = optimizer.minimize(cross_entropy, global_step=tf.train.get_global_step())
        return tf.estimator.EstimatorSpec(mode, loss=cross_entropy, train_op=train)

    if mode == tf.estimator.ModeKeys.EVAL:
        # 评估
        output_embedding = tf.nn.embedding_lookup(item_embedding, next_visit_item_index)  # num * embedding_size
        logits = tf.matmul(user_vector, output_embedding, transpose_a=False, transpose_b=True)  # num * num
        yhat = tf.nn.softmax(logits)  # num * num
        cross_entropy = tf.reduce_mean(-tf.log(tf.matrix_diag_part(yhat) + 1e-16))
        return tf.estimator.EstimatorSpec(mode, loss=cross_entropy)

    if mode == tf.estimator.ModeKeys.PREDICT:
        logits_predict = tf.matmul(user_vector, item_embedding, transpose_a=False, transpose_b=True)  # num *  item_num
        yhat_predict = tf.nn.softmax(logits_predict)  # num *  item_num
        _, indices = tf.nn.top_k(yhat_predict, k=top_k, sorted=True)
        index = tf.identity(indices, name="index")  # num * top_k
        # 预测
        predictions = {
            "user_vector": user_vector,
            "index": index
        }
        export_outputs = {
            "prediction": tf.estimator.export.PredictOutput(predictions)
        }
        return tf.estimator.EstimatorSpec(mode, predictions=predictions, export_outputs=export_outputs)
示例#49
0
import tensorflow as tf
"""tf.matrix_diag_part(input,name=None)
功能:返回批对角阵的对角元素
输入:tensor,批对角阵"""

a = tf.constant([[[1, 3, 0], [0, 2, 0], [0, 0, 3]],
                 [[4, 0, 0], [0, 5, 0], [0, 0, 6]]])
z = tf.matrix_diag_part(a)

sess = tf.Session()
print(sess.run(z))
sess.close()
# z==>[[1 2 3]
#      [4 5 6]]
示例#50
0
    def call(self, inputs):
        if self.conditional_inputs is None and self.conditional_outputs is None:
            covariance_matrix = self.covariance_fn(inputs, inputs)
            # Tile locations so output has shape [units, batch_size]. Covariance will
            # broadcast to [units, batch_size, batch_size], and we perform
            # shape manipulations to get a random variable over [batch_size, units].
            loc = self.mean_fn(inputs)
            loc = tf.tile(loc[tf.newaxis], [self.units] + [1] * len(loc.shape))
        else:
            knn = self.covariance_fn(inputs, inputs)
            knm = self.covariance_fn(inputs, self.conditional_inputs)
            kmm = self.covariance_fn(self.conditional_inputs,
                                     self.conditional_inputs)
            kmm = tf.matrix_set_diag(
                kmm,
                tf.matrix_diag_part(kmm) + tf.keras.backend.epsilon())
            kmm_tril = tf.linalg.cholesky(kmm)
            kmm_tril_operator = tf.linalg.LinearOperatorLowerTriangular(
                kmm_tril)
            knm_operator = tf.linalg.LinearOperatorFullMatrix(knm)

            # TODO(trandustin): Vectorize linear algebra for multiple outputs. For
            # now, we do each separately and stack to obtain a locations Tensor of
            # shape [units, batch_size].
            loc = []
            for conditional_outputs_unit in tf.unstack(
                    self.conditional_outputs, axis=-1):
                center = conditional_outputs_unit - self.mean_fn(
                    self.conditional_inputs)
                loc_unit = knm_operator.matvec(
                    kmm_tril_operator.solvevec(
                        kmm_tril_operator.solvevec(center), adjoint=True))
                loc.append(loc_unit)
            loc = tf.stack(loc) + self.mean_fn(inputs)[tf.newaxis]

            covariance_matrix = knn
            covariance_matrix -= knm_operator.matmul(
                kmm_tril_operator.solve(kmm_tril_operator.solve(
                    knm, adjoint_arg=True),
                                        adjoint=True))

        covariance_matrix = tf.matrix_set_diag(
            covariance_matrix,
            tf.matrix_diag_part(covariance_matrix) +
            tf.keras.backend.epsilon())

        # Form a multivariate normal random variable with batch_shape units and
        # event_shape batch_size. Then make it be independent across the units
        # dimension. Then transpose its dimensions so it is [batch_size, units].
        random_variable = ed.MultivariateNormalFullCovariance(
            loc=loc, covariance_matrix=covariance_matrix)
        random_variable = ed.Independent(random_variable.distribution,
                                         reinterpreted_batch_ndims=1)
        bijector = tfp.bijectors.Inline(
            forward_fn=lambda x: tf.transpose(x, [1, 0]),
            inverse_fn=lambda y: tf.transpose(y, [1, 0]),
            forward_event_shape_fn=lambda input_shape: input_shape[::-1],
            forward_event_shape_tensor_fn=lambda input_shape: input_shape[::-1
                                                                          ],
            inverse_log_det_jacobian_fn=lambda y: tf.cast(0, y.dtype),
            forward_min_event_ndims=2)
        random_variable = ed.TransformedDistribution(
            random_variable.distribution, bijector=bijector)
        return random_variable
示例#51
0
    def _model(self, features):
        Z = features['numbers']
        C = features['srdf']

        # masking
        mask = tf.cast(tf.expand_dims(Z, 1) * tf.expand_dims(Z, 2),
                       tf.float32)
        diag = tf.matrix_diag_part(mask)
        diag = tf.ones_like(diag)
        offdiag = 1 - tf.matrix_diag(diag)
        mask *= offdiag
        mask = tf.expand_dims(mask, -1)

        I = np.eye(self.max_z).astype(np.float32)
        ZZ = tf.nn.embedding_lookup(I, Z)
        r = tf.sqrt(1. / tf.sqrt(float(self.n_basis)))
        X = L.dense(ZZ, self.n_basis, use_bias=False,
                    weight_init=tf.random_normal_initializer(stddev=r))

        fC = L.dense(C, self.n_factors, use_bias=True)

        reuse = None
        for i in range(self.n_interactions):
            tmp = tf.expand_dims(X, 1)

            fX = L.dense(tmp, self.n_factors, use_bias=True,
                         scope='in2fac', reuse=reuse)

            fVj = fX * fC

            Vj = L.dense(fVj, self.n_basis, use_bias=False,
                         weight_init=tf.constant_initializer(0.0),
                         nonlinearity=tf.nn.tanh,
                         scope='fac2out', reuse=reuse)

            V = L.masked_sum(Vj, mask, axes=2)

            X += V
            reuse = True

        # output
        o1 = L.dense(X, self.n_basis // 2, nonlinearity=tf.nn.tanh)
        yi = L.dense(o1, 1,
                     weight_init=tf.constant_initializer(0.0),
                     use_bias=True)

        mu = tf.get_variable('mu', shape=(1,),
                             initializer=L.reference_initializer(self.mu),
                             trainable=False)
        std = tf.get_variable('std', shape=(1,),
                              initializer=L.reference_initializer(self.std),
                              trainable=False)
        yi = yi * std + mu

        if self.atom_ref is not None:
            E0i = L.embedding(Z, 100, 1,
                              reference=self.atom_ref, trainable=False)
            yi += E0i

        atom_mask = tf.expand_dims(Z, -1)
        if self.per_atom:
            y = L.masked_mean(yi, atom_mask, axes=1)
            #E0 = L.masked_mean(E0i, atom_mask, axes=1)
        else:
            y = L.masked_sum(yi, atom_mask, axes=1)
            #E0 = L.masked_sum(E0i, atom_mask, axes=1)

        return {'y': y, 'y_i': yi} #, 'E0': E0}
 def call(self, list_tensors):
     layer = tf.keras.layers.Dot(axes=[2, 2], normalize=True)
     output_dot = layer(list_tensors)
     output_diag = tf.matrix_diag_part(output_dot)
     return output_diag
示例#53
0
 def test_SU(self):
     N = 5
     batch_size = 1000
     algebra = SU(N)
     # check random unitaries
     u = algebra.random_element(batch_size)
     self.assertEqual(u.shape.as_list(), [batch_size, N, N])
     det = tf.linalg.det(u)
     prod = tf.matmul(u, tf.linalg.adjoint(u))
     re = tf.reduce_mean(tf.abs(tf.real(u)))
     im = tf.reduce_mean(tf.abs(tf.imag(u)))
     with tf.Session() as sess:
         det, prod, re, im = sess.run([det, prod, re, im])
     self.assertTrue(0.9 < re / im < 1.1)
     self.assertTrue(np.allclose(det, 1))
     self.assertTrue(np.allclose(prod, np.eye(N), atol=1e-5))
     # test gauge fixing: shapes
     mat = tf.complex(tf.random.normal([batch_size, 3, N, N]),
                      tf.random.normal([batch_size, 3, N, N]))
     mat = mat + tf.linalg.adjoint(mat)  # make it hermitian
     g, rep = algebra.gauge_fixing(mat)
     self.assertEqual(g.shape, [batch_size, N, N])
     self.assertEqual(rep.shape, [batch_size, 3, N, N])
     # test gauge fixing: g should be in SU(N)
     det = tf.linalg.det(g)
     prod = tf.matmul(g, tf.linalg.adjoint(g))
     with tf.Session() as sess:
         det, prod = sess.run([det, prod])
     self.assertTrue(np.allclose(det, 1))
     self.assertTrue(np.allclose(prod, np.eye(N), atol=1e-5))
     # test gauge fixing: rep should agree with the gauge choice
     rep_adj = tf.linalg.adjoint(rep)
     diag = tf.matrix_diag_part(rep[:, 0, :, :])
     off_diag = rep[:, 0, :, :] - tf.matrix_diag(diag)
     diff = diag[:, 1:] - diag[:, :-1]
     next_diagonal = tf.matrix_diag_part(
         tf.roll(rep[:, 1, :, :], shift=-1, axis=-1))[:, :-1]
     with tf.Session() as sess:
         r, rep_adj, off_diag, diff, next_diagonal = sess.run(
             [rep, rep_adj, off_diag, diff, next_diagonal])
     self.assertTrue(np.allclose(r, rep_adj, atol=1e-5))
     self.assertTrue(np.allclose(off_diag, 0, atol=1e-5))
     self.assertTrue(np.allclose(diff, np.abs(diff)))
     self.assertTrue(
         np.allclose(next_diagonal, -np.conj(next_diagonal), atol=1e-5))
     self.assertTrue(
         np.allclose(np.imag(next_diagonal),
                     np.abs(np.imag(next_diagonal)),
                     atol=1e-5))
     # test gauge fixing and action: g rep is mat
     mat_ = tf.einsum("bij,brjk,bkl->bril", g, rep, tf.linalg.adjoint(g))
     mat__ = algebra.action(g, rep)
     g_, rep_ = algebra.gauge_fixing(mat__)
     self.assertEqual(g_.shape, g.shape)
     self.assertEqual(rep_.shape, rep.shape)
     with tf.Session() as sess:
         g, g_, r, rep_, m, mat_, mat__ = sess.run(
             [g, g_, rep, rep_, mat, mat_, mat__])
     self.assertTrue(
         np.allclose(np.abs(g), np.abs(g_),
                     atol=1e-2))  # g and g_ may differ by an overall phase
     self.assertTrue(np.allclose(r, rep_, atol=1e-2))
     self.assertTrue(np.allclose(m, mat_, atol=1e-5))
     self.assertTrue(np.allclose(m, mat__, atol=1e-5))
     # test uniqueness of the gauge representative
     _, rep_ = algebra.gauge_fixing(algebra.action(u, mat))
     with tf.Session() as sess:
         rep1, rep2 = sess.run([rep, rep_])
     self.assertTrue(np.allclose(rep1, rep2, atol=1e-3))
     # check the shape of log_orbit_measure
     m = algebra.log_orbit_measure(rep)
     self.assertEqual(m.shape, [batch_size])
     # test infinitesimal action
     dg = algebra.random_algebra_element(batch_size)
     dmat = algebra.infinitesimal_action(dg, mat)
     dmat_adj = tf.linalg.adjoint(dmat)
     eps = 1e-4
     dmat_ = (algebra.action(
         tf.linalg.expm(1j * eps * algebra.vector_to_matrix(dg)), mat) -
              mat) / eps
     o = tf.reduce_sum(tf.conj(mat) * dmat, axis=[-2, -1])
     with tf.Session() as sess:
         dmat, dmat_adj, dmat_, o = sess.run([dmat, dmat_adj, dmat_, o])
     self.assertTrue(np.allclose(dmat, dmat_adj, atol=1e-5))
     self.assertTrue(np.allclose(dmat, dmat_, atol=1e-1))
     self.assertTrue(np.allclose(o, 0, atol=1e-4))
     # test conversion between vectors and matrices
     dg = algebra.random_algebra_element(batch_size)
     norm = tf.linalg.norm(dg, axis=-1)
     mat = algebra.vector_to_matrix(dg)
     dg_ = algebra.matrix_to_vector(mat)
     mat_ = algebra.vector_to_matrix(dg_)
     self.assertEqual(algebra.N, N)
     self.assertEqual(algebra.dim, N * N - 1)
     self.assertEqual(dg_.shape, [batch_size, algebra.dim])
     self.assertEqual(mat_.shape, [batch_size, algebra.N, algebra.N])
     with tf.Session() as sess:
         norm, dg, dg_, mat, mat_ = sess.run([norm, dg, dg_, mat, mat_])
     self.assertTrue(np.allclose(norm, np.sqrt(N * N - 1)))
     self.assertTrue(np.allclose(dg, dg_, atol=1e-5))
     self.assertTrue(np.allclose(mat, mat_, atol=1e-5))
示例#54
0
    def build(self):

        dd_q_input = Input(
            (self.config.nb_supervised_doc, self.config.doc_topk_term, 1),
            name='dd_q_input')
        dd_d_input = Input((self.config.nb_supervised_doc,
                            self.config.doc_topk_term, self.config.hist_size),
                           name='dd_d_input')

        dd_q_w = Dense(1,
                       kernel_initializer=self.initializer_gate,
                       use_bias=False,
                       name='dd_q_gate')(dd_q_input)
        dd_q_w = Lambda(lambda x: softmax(x, axis=2),
                        output_shape=(
                            self.config.nb_supervised_doc,
                            self.config.doc_topk_term,
                        ),
                        name='dd_q_softmax')(dd_q_w)

        z = dd_d_input
        for i in range(self.config.nb_layers):
            z = Dense(self.config.hidden_size[i],
                      activation='tanh',
                      kernel_initializer=self.initializer_fc,
                      name='hidden')(z)
        z = Dense(self.config.out_size,
                  kernel_initializer=self.initializer_fc,
                  name='dd_d_gate')(z)
        z = Reshape((
            self.config.nb_supervised_doc,
            self.config.doc_topk_term,
        ))(z)
        dd_q_w = Reshape((
            self.config.nb_supervised_doc,
            self.config.doc_topk_term,
        ))(dd_q_w)
        # out = Dot(axes=[2, 2], name='dd_pseudo_out')([z, dd_q_w])

        out = Lambda(lambda x: K.batch_dot(x[0], x[1], axes=[2, 2]),
                     name='dd_pseudo_out')([z, dd_q_w])
        dd_init_out = Lambda(lambda x: tf.matrix_diag_part(x),
                             output_shape=(self.config.nb_supervised_doc, ),
                             name='dd_init_out')(out)
        '''
    dd_init_out = Lambda(lambda x: tf.reduce_sum(x, axis=2), output_shape=(self.config.nb_supervised_doc,))(z)
    '''
        #dd_out = Reshape((self.config.nb_supervised_doc,))(dd_out)

        # dd out gating
        dd_gate = Input((self.config.nb_supervised_doc, 1),
                        name='baseline_doc_score')
        dd_w = Dense(1,
                     kernel_initializer=self.initializer_gate,
                     use_bias=False,
                     name='dd_gate')(dd_gate)
        # dd_w = Lambda(lambda x: softmax(x, axis=1), output_shape=(self.config.nb_supervised_doc,), name='dd_softmax')(dd_w)

        # dd_out = Dot(axes=[1, 1], name='dd_out')([dd_init_out, dd_w])
        dd_w = Reshape((self.config.nb_supervised_doc, ))(dd_w)
        dd_init_out = Reshape((self.config.nb_supervised_doc, ))(dd_init_out)

        if self.config.method in [1, 3]:  # no doc gating, with dense layer
            z = dd_init_out
        elif self.config.method == 2:
            logging.info("Apply doc gating")
            z = Multiply(name='dd_out')([dd_init_out, dd_w])
        else:
            raise ValueError(
                "Method not initialized, please check config file")

        if self.config.method in [1, 2]:
            logging.info("Dense layer on top")
            z = Dense(self.config.merge_hidden,
                      activation='tanh',
                      name='merge_hidden')(z)
            out = Dense(self.config.merge_out, name='score')(z)
        else:
            logging.info(
                "Apply doc gating, No dense layer on top, sum up scores")
            out = Dot(axes=[1, 1], name='score')([z, dd_w])

        model = Model(inputs=[dd_q_input, dd_d_input, dd_gate], outputs=[out])
        print(model.summary())

        return model
示例#55
0
 def trace_represent(self):
     self.density_diag = tf.matrix_diag_part(self.M_qa)
     self.density_trace = tf.expand_dims(tf.trace(self.M_qa),-1)
     self.match_represent = tf.concat([self.density_diag,self.density_trace],1)
    def _mean_of_covariance_given_quadrature_component(self, diag_only):
        p = self.mixture_distribution.probs

        # To compute E[Cov(Z|V)], we'll add matrices within three categories:
        # scaled-identity, diagonal, and full. Then we'll combine these at the end.
        scale_identity_multiplier = None
        diag = None
        full = None

        for k, aff in enumerate(self.interpolated_affine):
            s = aff.scale  # Just in case aff.scale has side-effects, we'll call once.
            if (s is None or isinstance(s, tf.linalg.LinearOperatorIdentity)):
                scale_identity_multiplier = add(scale_identity_multiplier,
                                                p[..., k, tf.newaxis])
            elif isinstance(s, tf.linalg.LinearOperatorScaledIdentity):
                scale_identity_multiplier = add(
                    scale_identity_multiplier,
                    (p[..., k, tf.newaxis] * tf.square(s.multiplier)))
            elif isinstance(s, tf.linalg.LinearOperatorDiag):
                diag = add(diag,
                           (p[..., k, tf.newaxis] * tf.square(s.diag_part())))
            else:
                x = (p[..., k, tf.newaxis, tf.newaxis] *
                     s.matmul(s.to_dense(), adjoint_arg=True))
                if diag_only:
                    x = tf.matrix_diag_part(x)
                full = add(full, x)

        # We must now account for the fact that the base distribution might have a
        # non-unity variance. Recall that, since X ~ iid Law(X_0),
        #   `Cov(SX+m) = S Cov(X) S.T = S S.T Diag(Var(X_0))`.
        # We can scale by `Var(X)` (vs `Cov(X)`) since X corresponds to `d` iid
        # samples from a scalar-event distribution.
        v = self.distribution.variance()
        if scale_identity_multiplier is not None:
            scale_identity_multiplier *= v
        if diag is not None:
            diag *= v[..., tf.newaxis]
        if full is not None:
            full *= v[..., tf.newaxis]

        if diag_only:
            # Apparently we don't need the full matrix, just the diagonal.
            r = add(diag, full)
            if r is None and scale_identity_multiplier is not None:
                ones = tf.ones(self.event_shape_tensor(), dtype=self.dtype)
                return scale_identity_multiplier[..., tf.newaxis] * ones
            return add(r, scale_identity_multiplier)

        # `None` indicates we don't know if the result is positive-definite.
        is_positive_definite = (True if all(
            aff.scale.is_positive_definite
            for aff in self.endpoint_affine) else None)

        to_add = []
        if diag is not None:
            to_add.append(
                tf.linalg.LinearOperatorDiag(
                    diag=diag, is_positive_definite=is_positive_definite))
        if full is not None:
            to_add.append(
                tf.linalg.LinearOperatorFullMatrix(
                    matrix=full, is_positive_definite=is_positive_definite))
        if scale_identity_multiplier is not None:
            to_add.append(
                tf.linalg.LinearOperatorScaledIdentity(
                    num_rows=self.event_shape_tensor()[0],
                    multiplier=scale_identity_multiplier,
                    is_positive_definite=is_positive_definite))

        return (linop_add_lib.add_operators(to_add)[0].to_dense()
                if to_add else None)
示例#57
0
文件: mdn.py 项目: uscresl/bayes_sim
 def tril_matrix(elements):
     tfd = tfp.distributions
     tril_m = tfd.fill_triangular(elements)
     tf.matrix_set_diag(tril_m, tf.exp(tf.matrix_diag_part(tril_m)))
     return tril_m
示例#58
0
    def call(self, inputs):
        self.call_weights()
        if (not isinstance(inputs, ed.RandomVariable)
                and not isinstance(self.kernel, ed.RandomVariable)
                and not isinstance(self.bias, ed.RandomVariable)):
            return super(DenseDVI, self).call(inputs)
        inputs_mean, inputs_variance, inputs_covariance = get_moments(inputs)
        kernel_mean, kernel_variance, _ = get_moments(self.kernel)
        if self.use_bias:
            bias_mean, _, bias_covariance = get_moments(self.bias)

        # E[outputs] = E[inputs] * E[kernel] + E[bias]
        mean = tf.tensordot(inputs_mean, kernel_mean, [[-1], [0]])
        if self.use_bias:
            mean = tf.nn.bias_add(mean, bias_mean)

        # Cov = E[inputs**2] Cov(kernel) + E[W]^T Cov(inputs) E[W] + Cov(bias)
        # For first term, assume Cov(kernel) = 0 on off-diagonals so we only
        # compute diagonal term.
        covariance_diag = tf.tensordot(inputs_variance + inputs_mean**2,
                                       kernel_variance, [[-1], [0]])
        # Compute quadratic form E[W]^T Cov E[W] from right-to-left. First is
        #  [..., features, features], [features, units] -> [..., features, units].
        cov_w = tf.tensordot(inputs_covariance, kernel_mean, [[-1], [0]])
        # Next is [..., features, units], [features, units] -> [..., units, units].
        w_cov_w = tf.tensordot(cov_w, kernel_mean, [[-2], [0]])
        covariance = w_cov_w
        if self.use_bias:
            covariance += bias_covariance
        covariance = tf.matrix_set_diag(
            covariance,
            tf.matrix_diag_part(covariance) + covariance_diag)

        if self.activation in (tf.keras.activations.relu, tf.nn.relu):
            # Compute activation's moments with variable names from Wu et al. (2018).
            variance = tf.matrix_diag_part(covariance)
            scale = tf.sqrt(variance)
            mu = mean / (scale + tf.keras.backend.epsilon())
            mean = scale * soft_relu(mu)

            pairwise_variances = (tf.expand_dims(variance, -1) *
                                  tf.expand_dims(variance, -2)
                                  )  # [..., units, units]
            rho = covariance / tf.sqrt(pairwise_variances +
                                       tf.keras.backend.epsilon())
            rho = tf.clip_by_value(rho,
                                   -1. / (1. + tf.keras.backend.epsilon()),
                                   1. / (1. + tf.keras.backend.epsilon()))
            s = covariance / (rho + tf.keras.backend.epsilon())
            mu1 = tf.expand_dims(mu, -1)  # [..., units, 1]
            mu2 = tf.matrix_transpose(mu1)  # [..., 1, units]
            a = (soft_relu(mu1) * soft_relu(mu2) +
                 rho * tfp.distributions.Normal(0., 1.).cdf(mu1) *
                 tfp.distributions.Normal(0., 1.).cdf(mu2))
            gh = tf.asinh(rho)
            bar_rho = tf.sqrt(1. - rho**2)
            gr = gh + rho / (1. + bar_rho)
            # Include numerically stable versions of gr and rho when multiplying or
            # dividing them. The sign of gr*rho and rho/gr is always positive.
            safe_gr = tf.abs(gr) + 0.5 * tf.keras.backend.epsilon()
            safe_rho = tf.abs(rho) + tf.keras.backend.epsilon()
            exp_negative_q = gr / (
                2. * math.pi) * tf.exp(-safe_rho / (2. * safe_gr *
                                                    (1 + bar_rho)) +
                                       (gh - rho) /
                                       (safe_gr * safe_rho) * mu1 * mu2)
            covariance = s * (a + exp_negative_q)
        elif self.activation not in (tf.keras.activations.linear, None):
            raise NotImplementedError(
                'Activation is {}. Deterministic variational '
                'inference is only available if activation is '
                'ReLU or None.'.format(self.activation))

        return ed.MultivariateNormalFullCovariance(mean, covariance)
示例#59
0
文件: gmm.py 项目: harvardchen/UCD
    def cov_diag_loss(self):
        with tf.variable_scope("GMM_diag_loss"):
            diag_loss = tf.reduce_sum(tf.divide(1, tf.matrix_diag_part(self.sigma)))

        return diag_loss
示例#60
0
文件: util.py 项目: thomkeh/AutoGP
def log_cholesky_det(chol):
    return 2 * tf.reduce_sum(tf.log(tf.matrix_diag_part(chol)), axis=-1)