示例#1
0
def _expand_independent_outputs(fvar, full_cov, full_output_cov):
    """
    Reshapes fvar to the correct shape, specified by `full_cov` and `full_output_cov`.

    :param fvar: has shape N x P (full_cov = False) or P x N x N (full_cov = True).
    :return:
    1. full_cov: True and full_output_cov: True
       fvar N x P x N x P
    2. full_cov: True and full_output_cov: False
       fvar P x N x N
    3. full_cov: False and full_output_cov: True
       fvar N x P x P
    4. full_cov: False and full_output_cov: False
       fvar N x P
    """
    if full_cov and full_output_cov:
        fvar = tf.matrix_diag(tf.transpose(fvar))   # N x N x P x P
        fvar = tf.transpose(fvar, [0, 2, 1, 3])  # N x P x N x P
    if not full_cov and full_output_cov:
        fvar = tf.matrix_diag(fvar)   # N x P x P
    if full_cov and not full_output_cov:
        pass  # P x N x N
    if not full_cov and not full_output_cov:
        pass  # N x P

    return fvar
示例#2
0
def _expectation(p, rbf_kern, feat1, lin_kern, feat2, nghp=None):
    """
    Compute the expectation:
    expectation[n] = <Ka_{Z1, x_n} Kb_{x_n, Z2}>_p(x_n)
        - K_lin_{.,.} :: RBF kernel
        - K_rbf_{.,.} :: Linear kernel
    Different Z1 and Z2 are handled if p is diagonal and K_lin and K_rbf have disjoint
    active_dims, in which case the joint expectations simplify into a product of expectations

    :return: NxM1xM2
    """
    if rbf_kern.on_separate_dims(lin_kern) and isinstance(p, DiagonalGaussian):  # no joint expectations required
        eKxz1 = expectation(p, (rbf_kern, feat1))
        eKxz2 = expectation(p, (lin_kern, feat2))
        return eKxz1[:, :, None] * eKxz2[:, None, :]

    if feat1 != feat2:
        raise NotImplementedError("Features have to be the same for both kernels.")

    if rbf_kern.active_dims != lin_kern.active_dims:
        raise NotImplementedError("active_dims have to be the same for both kernels.")

    with params_as_tensors_for(rbf_kern), params_as_tensors_for(lin_kern), \
         params_as_tensors_for(feat1), params_as_tensors_for(feat2):
        # use only active dimensions
        Xcov = rbf_kern._slice_cov(tf.matrix_diag(p.cov) if isinstance(p, DiagonalGaussian) else p.cov)
        Z, Xmu = rbf_kern._slice(feat1.Z, p.mu)

        N = tf.shape(Xmu)[0]
        D = tf.shape(Xmu)[1]

        lin_kern_variances = lin_kern.variance if lin_kern.ARD \
            else tf.zeros((D,), dtype=settings.tf_float) + lin_kern.variance

        rbf_kern_lengthscales = rbf_kern.lengthscales if rbf_kern.ARD \
            else tf.zeros((D,), dtype=settings.tf_float) + rbf_kern.lengthscales  ## Begin RBF eKxz code:

        chol_L_plus_Xcov = tf.cholesky(tf.matrix_diag(rbf_kern_lengthscales ** 2) + Xcov)  # NxDxD

        Z_transpose = tf.transpose(Z)
        all_diffs = Z_transpose - tf.expand_dims(Xmu, 2)  # NxDxM
        exponent_mahalanobis = tf.matrix_triangular_solve(chol_L_plus_Xcov, all_diffs, lower=True)  # NxDxM
        exponent_mahalanobis = tf.reduce_sum(tf.square(exponent_mahalanobis), 1)  # NxM
        exponent_mahalanobis = tf.exp(-0.5 * exponent_mahalanobis)  # NxM

        sqrt_det_L = tf.reduce_prod(rbf_kern_lengthscales)
        sqrt_det_L_plus_Xcov = tf.exp(tf.reduce_sum(tf.log(tf.matrix_diag_part(chol_L_plus_Xcov)), axis=1))
        determinants = sqrt_det_L / sqrt_det_L_plus_Xcov  # N
        eKxz_rbf = rbf_kern.variance * (determinants[:, None] * exponent_mahalanobis)  ## NxM <- End RBF eKxz code

        tiled_Z = tf.tile(tf.expand_dims(Z_transpose, 0), (N, 1, 1))  # NxDxM
        z_L_inv_Xcov = tf.matmul(tiled_Z, Xcov / rbf_kern_lengthscales[:, None] ** 2., transpose_a=True)  # NxMxD

        cross_eKzxKxz = tf.cholesky_solve(
            chol_L_plus_Xcov, (lin_kern_variances * rbf_kern_lengthscales ** 2.)[..., None] * tiled_Z)  # NxDxM

        cross_eKzxKxz = tf.matmul((z_L_inv_Xcov + Xmu[:, None, :]) * eKxz_rbf[..., None], cross_eKzxKxz)  # NxMxM
        return cross_eKzxKxz
示例#3
0
def _expectation(p, kern1, feat1, kern2, feat2, nghp=None):
    """
    Compute the expectation:
    expectation[n] = <Ka_{Z1, x_n} Kb_{x_n, Z2}>_p(x_n)
        - Ka_{.,.}, Kb_{.,.} :: RBF kernels
    Ka and Kb as well as Z1 and Z2 can differ from each other, but this is supported
    only if the Gaussian p is Diagonal (p.cov NxD) and Ka, Kb have disjoint active_dims
    in which case the joint expectations simplify into a product of expectations

    :return: NxMxM
    """
    if kern1.on_separate_dims(kern2) and isinstance(p, DiagonalGaussian):  # no joint expectations required
        eKxz1 = expectation(p, (kern1, feat1))
        eKxz2 = expectation(p, (kern2, feat2))
        return eKxz1[:, :, None] * eKxz2[:, None, :]

    if feat1 != feat2 or kern1 != kern2:
        raise NotImplementedError("The expectation over two kernels has only an "
                                  "analytical implementation if both kernels are equal.")

    kern = kern1
    feat = feat1

    with params_as_tensors_for(kern), params_as_tensors_for(feat):
        # use only active dimensions
        Xcov = kern._slice_cov(tf.matrix_diag(p.cov) if isinstance(p, DiagonalGaussian) else p.cov)
        Z, Xmu = kern._slice(feat.Z, p.mu)

        N = tf.shape(Xmu)[0]
        D = tf.shape(Xmu)[1]

        squared_lengthscales = kern.lengthscales ** 2. if kern.ARD \
            else tf.zeros((D,), dtype=settings.tf_float) + kern.lengthscales ** 2.

        sqrt_det_L = tf.reduce_prod(0.5 * squared_lengthscales) ** 0.5
        C = tf.cholesky(0.5 * tf.matrix_diag(squared_lengthscales) + Xcov)  # NxDxD
        dets = sqrt_det_L / tf.exp(tf.reduce_sum(tf.log(tf.matrix_diag_part(C)), axis=1))  # N

        C_inv_mu = tf.matrix_triangular_solve(C, tf.expand_dims(Xmu, 2), lower=True)  # NxDx1
        C_inv_z = tf.matrix_triangular_solve(C,
                                             tf.tile(tf.expand_dims(tf.transpose(Z) / 2., 0), [N, 1, 1]),
                                             lower=True)  # NxDxM
        mu_CC_inv_mu = tf.expand_dims(tf.reduce_sum(tf.square(C_inv_mu), 1), 2)  # Nx1x1
        z_CC_inv_z = tf.reduce_sum(tf.square(C_inv_z), 1)  # NxM
        zm_CC_inv_zn = tf.matmul(C_inv_z, C_inv_z, transpose_a=True)  # NxMxM
        two_z_CC_inv_mu = 2 * tf.matmul(C_inv_z, C_inv_mu, transpose_a=True)[:, :, 0]  # NxM

        exponent_mahalanobis = mu_CC_inv_mu + tf.expand_dims(z_CC_inv_z, 1) + \
                               tf.expand_dims(z_CC_inv_z, 2) + 2 * zm_CC_inv_zn - \
                               tf.expand_dims(two_z_CC_inv_mu, 2) - tf.expand_dims(two_z_CC_inv_mu, 1)  # NxMxM
        exponent_mahalanobis = tf.exp(-0.5 * exponent_mahalanobis)  # NxMxM

        # Compute sqrt(self.K(Z)) explicitly to prevent automatic gradient from
        # being NaN sometimes, see pull request #615
        kernel_sqrt = tf.exp(-0.25 * kern.square_dist(Z, None))
        return kern.variance ** 2 * kernel_sqrt * \
               tf.reshape(dets, [N, 1, 1]) * exponent_mahalanobis
示例#4
0
def _quadrature_expectation(p, obj1, feature1, obj2, feature2, num_gauss_hermite_points):
    """
    General handling of quadrature expectations for Gaussians and DiagonalGaussians
    Fallback method for missing analytic expectations
    """
    num_gauss_hermite_points = 100 if num_gauss_hermite_points is None else num_gauss_hermite_points

    warnings.warn("Quadrature is used to calculate the expectation. This means that "
                  "an analytical implementations is not available for the given combination.")

    if obj2 is None:
        eval_func = lambda x: get_eval_func(obj1, feature1)(x)
    elif obj1 is None:
        raise NotImplementedError("First object cannot be None.")
    else:
        eval_func = lambda x: (get_eval_func(obj1, feature1, np.s_[:, :, None])(x) *
                               get_eval_func(obj2, feature2, np.s_[:, None, :])(x))

    if isinstance(p, DiagonalGaussian):
        if isinstance(obj1, kernels.Kernel) and isinstance(obj2, kernels.Kernel) \
                and obj1.on_separate_dims(obj2):  # no joint expectations required

            eKxz1 = quadrature_expectation(p, (obj1, feature1),
                                           num_gauss_hermite_points=num_gauss_hermite_points)
            eKxz2 = quadrature_expectation(p, (obj2, feature2),
                                           num_gauss_hermite_points=num_gauss_hermite_points)
            return eKxz1[:, :, None] * eKxz2[:, None, :]

        else:
            cov = tf.matrix_diag(p.cov)
    else:
        cov = p.cov
    return mvnquad(eval_func, p.mu, cov, num_gauss_hermite_points)
示例#5
0
    def _build_predict(self, Xnew, full_cov=False):
        """
        The posterior variance of F is given by
            q(f) = N(f | K alpha + mean, [K^-1 + diag(lambda**2)]^-1)
        Here we project this to F*, the values of the GP at Xnew which is given
        by
           q(F*) = N ( F* | K_{*F} alpha + mean, K_{**} - K_{*f}[K_{ff} +
                                           diag(lambda**-2)]^-1 K_{f*} )
        """

        # compute kernel things
        Kx = self.kern.K(self.X, Xnew)
        K = self.kern.K(self.X)

        # predictive mean
        f_mean = tf.matmul(Kx, self.q_alpha, transpose_a=True) + self.mean_function(Xnew)

        # predictive var
        A = K + tf.matrix_diag(tf.transpose(1. / tf.square(self.q_lambda)))
        L = tf.cholesky(A)
        Kx_tiled = tf.tile(tf.expand_dims(Kx, 0), [self.num_latent, 1, 1])
        LiKx = tf.matrix_triangular_solve(L, Kx_tiled)
        if full_cov:
            f_var = self.kern.K(Xnew) - tf.matmul(LiKx, LiKx, transpose_a=True)
        else:
            f_var = self.kern.Kdiag(Xnew) - tf.reduce_sum(tf.square(LiKx), 1)
        return f_mean, tf.transpose(f_var)
示例#6
0
def _expectation(p, kern, feat, none1, none2, nghp=None):
    """
    Compute the expectation:
    <K_{X, Z}>_p(X)
        - K_{.,.} :: RBF kernel

    :return: NxM
    """
    with params_as_tensors_for(kern), params_as_tensors_for(feat):
        # use only active dimensions
        Xcov = kern._slice_cov(p.cov)
        Z, Xmu = kern._slice(feat.Z, p.mu)
        D = tf.shape(Xmu)[1]
        if kern.ARD:
            lengthscales = kern.lengthscales
        else:
            lengthscales = tf.zeros((D,), dtype=settings.tf_float) + kern.lengthscales

        chol_L_plus_Xcov = tf.cholesky(tf.matrix_diag(lengthscales ** 2) + Xcov)  # NxDxD

        all_diffs = tf.transpose(Z) - tf.expand_dims(Xmu, 2)  # NxDxM
        exponent_mahalanobis = tf.matrix_triangular_solve(chol_L_plus_Xcov, all_diffs, lower=True)  # NxDxM
        exponent_mahalanobis = tf.reduce_sum(tf.square(exponent_mahalanobis), 1)  # NxM
        exponent_mahalanobis = tf.exp(-0.5 * exponent_mahalanobis)  # NxM

        sqrt_det_L = tf.reduce_prod(lengthscales)
        sqrt_det_L_plus_Xcov = tf.exp(tf.reduce_sum(tf.log(tf.matrix_diag_part(chol_L_plus_Xcov)), axis=1))
        determinants = sqrt_det_L / sqrt_det_L_plus_Xcov  # N

        return kern.variance * (determinants[:, None] * exponent_mahalanobis)
示例#7
0
def _expectation(p, mean, none, kern, feat, nghp=None):
    """
    Compute the expectation:
    expectation[n] = <x_n K_{x_n, Z}>_p(x_n)
        - K_{.,.} :: RBF kernel

    :return: NxDxM
    """
    Xmu, Xcov = p.mu, p.cov

    with tf.control_dependencies([tf.assert_equal(
            tf.shape(Xmu)[1], tf.constant(kern.input_dim, settings.tf_int),
            message="Currently cannot handle slicing in exKxz.")]):
        Xmu = tf.identity(Xmu)

    with params_as_tensors_for(kern), params_as_tensors_for(feat):
        D = tf.shape(Xmu)[1]
        lengthscales = kern.lengthscales if kern.ARD \
            else tf.zeros((D,), dtype=settings.float_type) + kern.lengthscales

        chol_L_plus_Xcov = tf.cholesky(tf.matrix_diag(lengthscales ** 2) + Xcov)  # NxDxD
        all_diffs = tf.transpose(feat.Z) - tf.expand_dims(Xmu, 2)  # NxDxM

        sqrt_det_L = tf.reduce_prod(lengthscales)
        sqrt_det_L_plus_Xcov = tf.exp(tf.reduce_sum(tf.log(tf.matrix_diag_part(chol_L_plus_Xcov)), axis=1))
        determinants = sqrt_det_L / sqrt_det_L_plus_Xcov  # N

        exponent_mahalanobis = tf.cholesky_solve(chol_L_plus_Xcov, all_diffs)  # NxDxM
        non_exponent_term = tf.matmul(Xcov, exponent_mahalanobis, transpose_a=True)
        non_exponent_term = tf.expand_dims(Xmu, 2) + non_exponent_term  # NxDxM

        exponent_mahalanobis = tf.reduce_sum(all_diffs * exponent_mahalanobis, 1)  # NxM
        exponent_mahalanobis = tf.exp(-0.5 * exponent_mahalanobis)  # NxM

        return kern.variance * (determinants[:, None] * exponent_mahalanobis)[:, None, :] * non_exponent_term
  def Test(self):
    np.random.seed(1)
    n = shape_[-1]
    batch_shape = shape_[:-2]
    a = np.random.uniform(
        low=-1.0, high=1.0, size=n * n).reshape([n, n]).astype(dtype_)
    a += a.T
    a = np.tile(a, batch_shape + (1, 1))
    if dtype_ == np.float32:
      atol = 1e-4
    else:
      atol = 1e-12
    for compute_v in False, True:
      np_e, np_v = np.linalg.eig(a)
      with self.test_session():
        if compute_v:
          tf_e, tf_v = tf.self_adjoint_eig(tf.constant(a))

          # Check that V*diag(E)*V^T is close to A.
          a_ev = tf.batch_matmul(
              tf.batch_matmul(tf_v, tf.matrix_diag(tf_e)), tf_v, adj_y=True)
          self.assertAllClose(a_ev.eval(), a, atol=atol)

          # Compare to numpy.linalg.eig.
          CompareEigenDecompositions(self, np_e, np_v, tf_e.eval(), tf_v.eval(),
                                     atol)
        else:
          tf_e = tf.self_adjoint_eigvals(tf.constant(a))
          self.assertAllClose(
              np.sort(np_e, -1), np.sort(tf_e.eval(), -1), atol=atol)
示例#9
0
def _expectation(p, kern1, feat1, kern2, feat2, nghp=None):
    """
    Compute the expectation:
    expectation[n] = <Ka_{Z1, x_n} Kb_{x_n, Z2}>_p(x_n)
        - Ka_{.,.}, Kb_{.,.} :: Linear kernels
    Ka and Kb as well as Z1 and Z2 can differ from each other, but this is supported
    only if the Gaussian p is Diagonal (p.cov NxD) and Ka, Kb have disjoint active_dims
    in which case the joint expectations simplify into a product of expectations

    :return: NxMxM
    """
    if kern1.on_separate_dims(kern2) and isinstance(p, DiagonalGaussian):  # no joint expectations required
        eKxz1 = expectation(p, (kern1, feat1))
        eKxz2 = expectation(p, (kern2, feat2))
        return eKxz1[:, :, None] * eKxz2[:, None, :]

    if kern1 != kern2 or feat1 != feat2:
        raise NotImplementedError("The expectation over two kernels has only an "
                                  "analytical implementation if both kernels are equal.")

    kern = kern1
    feat = feat1

    with params_as_tensors_for(kern), params_as_tensors_for(feat):
        # use only active dimensions
        Xcov = kern._slice_cov(tf.matrix_diag(p.cov) if isinstance(p, DiagonalGaussian) else p.cov)
        Z, Xmu = kern._slice(feat.Z, p.mu)

        N = tf.shape(Xmu)[0]
        var_Z = kern.variance * Z
        tiled_Z = tf.tile(tf.expand_dims(var_Z, 0), (N, 1, 1))  # NxMxD
        XX = Xcov + tf.expand_dims(Xmu, 1) * tf.expand_dims(Xmu, 2)  # NxDxD
        return tf.matmul(tf.matmul(tiled_Z, XX), tiled_Z, transpose_b=True)
  def testSampleWithBroadcastScale(self):
    # mu corresponds to a 2-batch of 3-variate normals
    mu = np.zeros([2, 3])

    # diag corresponds to no batches of 3-variate normals
    diag = np.ones([3])

    with self.test_session():
      dist = tfd.VectorExponentialDiag(mu, diag, validate_args=True)

      mean = dist.mean()
      self.assertAllEqual([2, 3], mean.get_shape())
      self.assertAllClose(mu + diag, mean.eval())

      n = int(1e4)
      samps = dist.sample(n, seed=0).eval()
      samps_centered = samps - samps.mean(axis=0)
      cov_mat = tf.matrix_diag(diag).eval()**2
      sample_cov = np.matmul(samps_centered.transpose([1, 2, 0]),
                             samps_centered.transpose([1, 0, 2])) / n

      self.assertAllClose(mu + diag, samps.mean(axis=0),
                          atol=0.10, rtol=0.05)
      self.assertAllClose([cov_mat, cov_mat], sample_cov,
                          atol=0.10, rtol=0.05)
示例#11
0
 def K(self, X, X2=None, presliced=False):
     if X2 is None:
         d = tf.fill(tf.stack([tf.shape(X)[0]]), tf.squeeze(self.variance))
         return tf.matrix_diag(d)
     else:
         shape = tf.stack([tf.shape(X)[0], tf.shape(X2)[0]])
         return tf.zeros(shape, settings.float_type)
  def _operator_and_mat_and_feed_dict(self, shape, dtype, use_placeholder):
    shape = list(shape)
    diag_shape = shape[:-1]

    diag = tf.random_normal(diag_shape, dtype=dtype.real_dtype)
    if dtype.is_complex:
      diag = tf.complex(
          diag, tf.random_normal(diag_shape, dtype=dtype.real_dtype))

    diag_ph = tf.placeholder(dtype=dtype)

    if use_placeholder:
      # Evaluate the diag here because (i) you cannot feed a tensor, and (ii)
      # diag is random and we want the same value used for both mat and
      # feed_dict.
      diag = diag.eval()
      operator = linalg.LinearOperatorDiag(diag_ph)
      feed_dict = {diag_ph: diag}
    else:
      operator = linalg.LinearOperatorDiag(diag)
      feed_dict = None

    mat = tf.matrix_diag(diag)

    return operator, mat, feed_dict
示例#13
0
 def testVector(self):
   with self.test_session(use_gpu=self._use_gpu):
     v = np.array([1.0, 2.0, 3.0])
     mat = np.diag(v)
     v_diag = tf.matrix_diag(v)
     self.assertEqual((3, 3), v_diag.get_shape())
     self.assertAllEqual(v_diag.eval(), mat)
示例#14
0
 def K(self, X, X2=None, full_output_cov=True):
     K = self.kern.K(X, X2)  # N x N2
     if full_output_cov:
         Ks = tf.tile(K[..., None], [1, 1, self.P])  # N x N2 x P
         return tf.transpose(tf.matrix_diag(Ks), [0, 2, 1, 3])  # N x P x N2 x P
     else:
         return tf.tile(K[None, ...], [self.P, 1, 1])  # P x N x N2
  def test_broadcast_apply_and_solve(self):
    # These cannot be done in the automated (base test class) tests since they
    # test shapes that tf.matmul cannot handle.
    # In particular, tf.matmul does not broadcast.
    with self.test_session() as sess:
      x = tf.random_normal(shape=(2, 2, 3, 4))

      # This LinearOperatorDiag will be brodacast to (2, 2, 3, 3) during solve
      # and apply with 'x' as the argument.
      diag = tf.random_uniform(shape=(2, 1, 3))
      operator = linalg.LinearOperatorDiag(diag)
      self.assertAllEqual((2, 1, 3, 3), operator.shape)

      # Create a batch matrix with the broadcast shape of operator.
      diag_broadcast = tf.concat(1, (diag, diag))
      mat = tf.matrix_diag(diag_broadcast)
      self.assertAllEqual((2, 2, 3, 3), mat.get_shape())  # being pedantic.

      operator_apply = operator.apply(x)
      mat_apply = tf.matmul(mat, x)
      self.assertAllEqual(operator_apply.get_shape(), mat_apply.get_shape())
      self.assertAllClose(*sess.run([operator_apply, mat_apply]))

      operator_solve = operator.solve(x)
      mat_solve = tf.matrix_solve(mat, x)
      self.assertAllEqual(operator_solve.get_shape(), mat_solve.get_shape())
      self.assertAllClose(*sess.run([operator_solve, mat_solve]))
示例#16
0
 def K(self, X, X2=None, presliced=False):
     if X2 is None:
         d = tf.fill(tf.shape(X)[:-1], tf.squeeze(self.variance))
         return tf.matrix_diag(d)
     else:
         shape = tf.concat([tf.shape(X)[:-2],
                            tf.reshape(tf.shape(X)[-2], [1]),
                            tf.reshape(tf.shape(X2)[-2], [1])], 0)
         return tf.zeros(shape, settings.float_type)
  def testSample(self):
    mu = [-1., 1]
    diag = [1., -2]
    dist = tfd.VectorLaplaceDiag(mu, diag, validate_args=True)
    samps = self.evaluate(dist.sample(int(1e4), seed=0))
    cov_mat = 2. * self.evaluate(tf.matrix_diag(diag))**2

    self.assertAllClose(mu, samps.mean(axis=0), atol=0., rtol=0.05)
    self.assertAllClose(cov_mat, np.cov(samps.T), atol=0.05, rtol=0.05)
示例#18
0
 def K(self, X, X2=None):
     X, X2 = self._slice(X, X2)
     X = tf.cast(X[:, 0], tf.int32)
     if X2 is None:
         X2 = X
     else:
         X2 = tf.cast(X2[:, 0], tf.int32)
     B = tf.matmul(self.W, self.W, transpose_b=True) + tf.matrix_diag(self.kappa)
     return tf.gather(tf.transpose(tf.gather(B, X2)), X)
示例#19
0
 def testGrad(self):
   shapes = ((3,), (7, 4))
   with self.test_session(use_gpu=self._use_gpu):
     for shape in shapes:
       x = tf.constant(np.random.rand(*shape), np.float32)
       y = tf.matrix_diag(x)
       error = tf.test.compute_gradient_error(x, x.get_shape().as_list(),
                                              y, y.get_shape().as_list())
       self.assertLess(error, 1e-4)
 def _build_operator_and_mat(self, batch_shape, k, dtype=np.float64):
     # Build an identity matrix with right shape and dtype.
     # Build an operator that should act the same way.
     batch_shape = list(batch_shape)
     diag_shape = batch_shape + [k]
     matrix_shape = batch_shape + [k, k]
     diag = tf.ones(diag_shape, dtype=dtype)
     identity_matrix = tf.matrix_diag(diag)
     operator = operator_pd_identity.OperatorPDIdentity(matrix_shape, dtype)
     return operator, identity_matrix.eval()
示例#21
0
  def testSample(self):
    mu = [-1.0, 1.0]
    diag = [1.0, 2.0]
    with self.test_session():
      dist = distributions.MultivariateNormalDiag(mu, diag)
      samps = dist.sample_n(1000, seed=0).eval()
      cov_mat = tf.matrix_diag(diag).eval()**2

      self.assertAllClose(mu, samps.mean(axis=0), atol=0.1)
      self.assertAllClose(cov_mat, np.cov(samps.T), atol=0.1)
示例#22
0
  def testMultivariateNormalDiagWithSoftplusStDev(self):
    mu = [-1.0, 1.0]
    diag = [-1.0, -2.0]
    with self.test_session():
      dist = distributions.MultivariateNormalDiagWithSoftplusStDev(mu, diag)
      samps = dist.sample(1000, seed=0).eval()
      cov_mat = tf.matrix_diag(tf.nn.softplus(diag)).eval()**2

      self.assertAllClose(mu, samps.mean(axis=0), atol=0.1)
      self.assertAllClose(cov_mat, np.cov(samps.T), atol=0.1)
示例#23
0
  def testMultivariateNormalDiagWithSoftplusScale(self):
    mu = [-1.0, 1.0]
    diag = [-1.0, -2.0]
    dist = tfd.MultivariateNormalDiagWithSoftplusScale(
        mu, diag, validate_args=True)
    samps = self.evaluate(dist.sample(1000, seed=0))
    cov_mat = self.evaluate(tf.matrix_diag(tf.nn.softplus(diag))**2)

    self.assertAllClose(mu, samps.mean(axis=0), atol=0.1)
    self.assertAllClose(cov_mat, np.cov(samps.T), atol=0.1)
 def _covariance(self):
   # Let
   #   W = (w1,...,wk), with wj ~ iid Exponential(0, 1).
   # Then this distribution is
   #   X = loc + LW,
   # and then since Cov(wi, wj) = 1 if i=j, and 0 otherwise,
   #   Cov(X) = L Cov(W W^T) L^T = L L^T.
   if distribution_util.is_diagonal_scale(self.scale):
     return tf.matrix_diag(tf.square(self.scale.diag_part()))
   else:
     return self.scale.matmul(self.scale.to_dense(), adjoint_arg=True)
示例#25
0
  def testSample(self):
    mu = [-1., 1]
    diag = [1., -2]
    with self.test_session():
      dist = tfd.MultivariateNormalDiag(mu, diag, validate_args=True)
      samps = dist.sample(int(1e3), seed=0).eval()
      cov_mat = tf.matrix_diag(diag).eval()**2

      self.assertAllClose(mu, samps.mean(axis=0),
                          atol=0., rtol=0.05)
      self.assertAllClose(cov_mat, np.cov(samps.T),
                          atol=0.05, rtol=0.05)
  def testSample(self):
    mu = [-2., 1]
    diag = [1., -2]
    with self.test_session():
      dist = tfd.VectorExponentialDiag(mu, diag, validate_args=True)
      samps = dist.sample(int(1e4), seed=0).eval()
      cov_mat = tf.matrix_diag(diag).eval()**2

      self.assertAllClose([-2 + 1, 1. - 2], samps.mean(axis=0),
                          atol=0., rtol=0.05)
      self.assertAllClose(cov_mat, np.cov(samps.T),
                          atol=0.05, rtol=0.05)
 def _covariance(self):
   # Let
   #   W = (w1,...,wk), with wj ~ iid Laplace(0, 1).
   # Then this distribution is
   #   X = loc + LW,
   # and since E[X] = loc,
   #   Cov(X) = E[LW W^T L^T] = L E[W W^T] L^T.
   # Since E[wi wj] = 0 if i != j, and 2 if i == j, we have
   #   Cov(X) = 2 LL^T
   if distribution_util.is_diagonal_scale(self.scale):
     return 2. * tf.matrix_diag(tf.square(self.scale.diag_part()))
   else:
     return 2. * self.scale.matmul(self.scale.to_dense(), adjoint_arg=True)
示例#28
0
 def testBatchVector(self):
   with self.test_session(use_gpu=self._use_gpu):
     v_batch = np.array([[1.0, 2.0, 3.0],
                         [4.0, 5.0, 6.0]])
     mat_batch = np.array(
         [[[1.0, 0.0, 0.0],
           [0.0, 2.0, 0.0],
           [0.0, 0.0, 3.0]],
          [[4.0, 0.0, 0.0],
           [0.0, 5.0, 0.0],
           [0.0, 0.0, 6.0]]])
     v_batch_diag = tf.matrix_diag(v_batch)
     self.assertEqual((2, 3, 3), v_batch_diag.get_shape())
     self.assertAllEqual(v_batch_diag.eval(), mat_batch)
  def _updated_mat(self, mat, v, diag):
    # Get dense matrix defined by its square root, which is an update of `mat`:
    # A = (mat + v D v^T) (mat + v D v^T)^T
    # D is the diagonal matrix with `diag` on the diagonal.

    # If diag is None, then it defaults to the identity matrix, so DV^T = V^T
    if diag is None:
      diag_vt = tf.matrix_transpose(v)
    else:
      diag_mat = tf.matrix_diag(diag)
      diag_vt = tf.matmul(diag_mat, v, adjoint_b=True)

    v_diag_vt = tf.matmul(v, diag_vt)
    sqrt = mat + v_diag_vt
    a = tf.matmul(sqrt, sqrt, adjoint_b=True)
    return a.eval()
  def _operator_and_mat_and_feed_dict(self, shape, dtype, use_placeholder):
    diag = linear_operator_test_util.random_sign_uniform(
        shape[:-1], minval=1., maxval=2., dtype=dtype)
    if use_placeholder:
      diag_ph = tf.placeholder(dtype=dtype)
      # Evaluate the diag here because (i) you cannot feed a tensor, and (ii)
      # diag is random and we want the same value used for both mat and
      # feed_dict.
      diag = diag.eval()
      operator = linalg.LinearOperatorDiag(diag_ph)
      feed_dict = {diag_ph: diag}
    else:
      operator = linalg.LinearOperatorDiag(diag)
      feed_dict = None

    mat = tf.matrix_diag(diag)

    return operator, mat, feed_dict
示例#31
0
def batch_matrix_log(x, epsilon):
    """
    Matrix log with epsilon to ensure stability.
    Input must be a Symmetric matrix.

    Parameters
    ----------
    x : tf.Tensor with [..., dim1, dim2]

    epsilon

    Returns
    -------
    log of eigen-values.

    """
    s, u, v = tf.svd(x)
    # print(s.eval())
    inner = s + epsilon
    inner = tf.log(inner)
    inner = tf.matrix_diag(inner)
    return tf.matmul(u, tf.matmul(inner, tf.transpose(u, [0,2,1])))
示例#32
0
def _quadrature_expectation(p, obj1, feature1, obj2, feature2,
                            num_gauss_hermite_points):
    """
    General handling of quadrature expectations for Gaussians and DiagonalGaussians
    Fallback method for missing analytic expectations
    """
    num_gauss_hermite_points = 100 if num_gauss_hermite_points is None else num_gauss_hermite_points

    logger.warn(
        "Quadrature is used to calculate the expectation. This means that "
        "an analytical implementations is not available for the given combination."
    )

    if obj2 is None:
        eval_func = lambda x: get_eval_func(obj1, feature1)(x)
    elif obj1 is None:
        raise NotImplementedError("First object cannot be None.")
    else:
        eval_func = lambda x: (get_eval_func(obj1, feature1, np.s_[:, :, None])
                               (x) * get_eval_func(obj2, feature2, np.
                                                   s_[:, None, :])(x))

    if isinstance(p, DiagonalGaussian):
        if isinstance(obj1, kernels.Kernel) and isinstance(obj2, kernels.Kernel) \
                and obj1.on_separate_dims(obj2):  # no joint expectations required

            eKxz1 = quadrature_expectation(
                p, (obj1, feature1),
                num_gauss_hermite_points=num_gauss_hermite_points)
            eKxz2 = quadrature_expectation(
                p, (obj2, feature2),
                num_gauss_hermite_points=num_gauss_hermite_points)
            return eKxz1[:, :, None] * eKxz2[:, None, :]

        else:
            cov = tf.matrix_diag(p.cov)
    else:
        cov = p.cov
    return mvnquad(eval_func, p.mu, cov, num_gauss_hermite_points)
示例#33
0
    def constraints(self):
        # don't how to do this, to keep in consistent, need to return this twice
        # decisions_at_negative = tf.less(self._decision_vars, 0.0 - self.epsilon)
        decisions_at_bound = 1 - tf.abs(
            tf.cast(tf.equal(self._decision_vars, 0.0), dtype=tf.float32) +
            tf.cast(tf.equal(self._decision_vars, 1.0), dtype=tf.float32))
        # decisions_at_bound *= 100
        # positive_margin = 0.5 - tf.abs(self._decision_vars - 0.5) - self._epsilon
        # negative_margin = -0.5 + tf.abs(self._decision_vars - 0.5) - self._epsilon
        # positive_margin = tf.reshape(positive_margin, [-1])
        # negative_margin = tf.reshape(negative_margin, [-1])

        # assume epsilon won't be too large s.t. two interval overlap
        # decisions_at_bound = tf.abs(
        #     tf.cast(tf.less(tf.abs(self._decision_vars - 0.0), self._epsilon), dtype=tf.float32) +
        #     tf.cast(tf.less(tf.abs(self._decision_vars - 1.0), self._epsilon), dtype=tf.float32))
        # decisions_at_negative = tf.less(self._decision_vars, 0.0, 'decisions_at_negative_side')
        decisions_at_bound = tf.reshape(decisions_at_bound, [-1])

        decision_vars = tf.rint(self._decision_vars)
        lose_diagonal = decision_vars - tf.matrix_diag(
            tf.matrix_diag_part(decision_vars))
        predecessor = tf.reduce_sum(lose_diagonal, 0) - 1
        successor = tf.reduce_sum(lose_diagonal, 1) - 1
        n = self._size

        mtz = []
        # mtz, Miller-Tucker-Zemlin formulation
        # enforce single subtour
        for i in range(n):
            for j in range(n):
                if i != j:
                    mtz.append(dummy_vars[i] - dummy_vars[j] +
                               n * lose_diagonal[i][j] - n + 1)

        mtz = tf.stack(mtz)  # n x (n-1)
        return tf.concat([
            decisions_at_bound, decisions_at_bound, predecessor, successor, mtz
        ], 0)
示例#34
0
文件: model.py 项目: sikid/CPM_Nets
 def classification_loss(self):
     F_h_h = tf.matmul(self.h_temp, tf.transpose(self.h_temp))
     F_hn_hn = tf.diag_part(F_h_h)
     F_h_h = tf.subtract(F_h_h, tf.matrix_diag(F_hn_hn))
     classes = tf.reduce_max(self.gt) - tf.reduce_min(self.gt) + 1
     label_onehot = tf.one_hot(self.gt - 1, classes)  # gt begin from 1
     label_num = tf.reduce_sum(
         label_onehot, 0,
         keep_dims=True)  # should sub 1.Avoid numerical errors
     F_h_h_sum = tf.matmul(F_h_h, label_onehot)
     label_num_broadcast = tf.tile(label_num,
                                   [self.trainLen, 1]) - label_onehot
     F_h_h_mean = tf.divide(F_h_h_sum, label_num_broadcast)
     gt_ = tf.cast(tf.argmax(F_h_h_mean, axis=1),
                   tf.int32) + 1  # gt begin from 1
     F_h_h_mean_max = tf.reduce_max(F_h_h_mean, axis=1, keep_dims=False)
     theta = tf.cast(tf.not_equal(self.gt, gt_), tf.float32)
     F_h_hn_mean_ = tf.multiply(F_h_h_mean, label_onehot)
     F_h_hn_mean = tf.reduce_sum(F_h_hn_mean_, axis=1, name='F_h_hn_mean')
     return tf.reduce_sum(
         tf.nn.relu(tf.add(theta, tf.subtract(F_h_h_mean_max,
                                              F_h_hn_mean))))
示例#35
0
def svd(A, full_matrices=False, compute_uv=True, name=None):
  # since dA = dUSVt + UdSVt + USdVt
  # we can simply recompute each matrix using A = USVt
  # while blocking gradients to the original op.
  _, M, N = A.get_shape().as_list()
  P = min(M, N)
  S0, U0, V0 = map(tf.stop_gradient, tf.svd(A, full_matrices=True, name=name))
  Ui, Vti = map(tf.matrix_inverse, [U0, tf.transpose(V0, (0, 2, 1))])
  # A = USVt
  # S = UiAVti
  S = tf.matmul(Ui, tf.matmul(A, Vti))
  S = tf.matrix_diag_part(S)
  if not compute_uv:
    return S
  Si = tf.pad(tf.matrix_diag(1/S0), [[0,0], [0,N-P], [0,M-P]])
  # U = AVtiSi
  U = tf.matmul(A, tf.matmul(Vti, Si))
  U = U if full_matrices else U[:, :M, :P]
  # Vt = SiUiA
  V = tf.transpose(tf.matmul(Si, tf.matmul(Ui, A)), (0, 2, 1))
  V = V if full_matrices else V[:, :N, :P]
  return S, U, V
示例#36
0
        def log_prob_fn(params):
            rho, alpha, sigma = tf.split(params, [num_features, 1, 1], -1)

            one = tf.ones(num_features)

            def indep(d):
                return tfd.Independent(d, 1)

            p_rho = indep(tfd.InverseGamma(5. * one, 5. * one))
            p_alpha = indep(tfd.HalfNormal([1.]))
            p_sigma = indep(tfd.HalfNormal([1.]))

            rho_shape = tf.shape(rho)
            alpha_shape = tf.shape(alpha)

            x1 = tf.expand_dims(x, -2)
            x2 = tf.expand_dims(x, -3)
            exp = -0.5 * tf.squared_difference(x1, x2)
            exp /= tf.reshape(
                tf.square(rho),
                tf.concat([rho_shape[:1], [1, 1], rho_shape[1:]], 0))
            exp = tf.reduce_sum(exp, -1, keep_dims=True)
            exp += 2. * tf.reshape(
                tf.log(alpha),
                tf.concat([alpha_shape[:1], [1, 1], alpha_shape[1:]], 0))
            exp = tf.exp(exp[Ellipsis, 0])
            exp += tf.matrix_diag(
                tf.tile(tf.square(sigma), [1, int(x.shape[0])]) + 1e-6)
            exp = tf.check_numerics(exp, "exp 2 has NaNs")
            with tf.control_dependencies([tf.print(exp[0], summarize=99999)]):
                exp = tf.identity(exp)

            p_y = tfd.MultivariateNormalFullCovariance(covariance_matrix=exp)

            log_prob = (p_rho.log_prob(rho) + p_alpha.log_prob(alpha) +
                        p_sigma.log_prob(sigma) + p_y.log_prob(y))

            return log_prob
def gradient_svd(op, ds, dU, dV):
    s, U, V = op.outputs

    u_sz = tf.squeeze(tf.slice(tf.shape(dU),[1],[1]))
    v_sz = tf.squeeze(tf.slice(tf.shape(dV),[1],[1]))
    s_sz = tf.squeeze(tf.slice(tf.shape(ds),[1],[1]))

    S = tf.matrix_diag(s)
    s_2 = tf.square(s)

    eye = tf.expand_dims(tf.eye(s_sz),0) 
    k = (1 - eye)/(tf.expand_dims(s_2,2)-tf.expand_dims(s_2,1) + eye)
    KT = tf.matrix_transpose(k)
    KT = removenan(KT)
    
    def msym(X):
        return (X+tf.matrix_transpose(X))
    
    def left_grad(U,S,V,dU,dV):
        U, V = (V, U); dU, dV = (dV, dU)
        D = tf.matmul(dU,tf.matrix_diag(1/(s+1e-8)))
        US = tf.matmul(U,S)
    
        grad = tf.matmul(D, V, transpose_b=True)\
              +tf.matmul(tf.matmul(U,tf.matrix_diag(tf.matrix_diag_part(-tf.matmul(U,D,transpose_a=True)))), V, transpose_b=True)\
              +tf.matmul(2*tf.matmul(US, msym(KT*(tf.matmul(V,-tf.matmul(V,tf.matmul(D,US,transpose_a=True)),transpose_a=True)))),V,transpose_b=True)
        grad = tf.matrix_transpose(grad)
        return grad

    def right_grad(U,S,V,dU,dV):
        US = tf.matmul(U,S)
        grad = tf.matmul(2*tf.matmul(US, msym(KT*(tf.matmul(V,dV,transpose_a=True))) ),V,transpose_b=True)
        return grad
    
    grad = tf.cond(tf.greater(v_sz, u_sz), lambda : left_grad(U,S,V,dU,dV), 
                                           lambda : right_grad(U,S,V,dU,dV))
    
    return [grad]
示例#38
0
    def __init__(self,
                 beta,
                 gamma,
                 mean,
                 variance,
                 conv_weights,
                 strides=(1, 1),
                 padding='same',
                 epsilon=1e-3,
                 dilation_rate=(1, 1),
                 **kwargs):
        super(FuseConvBN, self).__init__(**kwargs)
        # conv layer config
        self.strides = strides
        self.padding = padding
        self.dilation_rate = dilation_rate
        # origin weights
        self.beta = tf.constant(beta, dtype='float32')
        self.gamma = tf.constant(gamma, dtype='float32')
        self.mean = tf.constant(mean, dtype='float32')
        self.variance = tf.constant(variance, dtype='float32')
        self.conv_weights = tf.constant(conv_weights, dtype='float32')

        # compute W_{bn} & b_{bn}
        k, k, filters_in, filters_out = K.int_shape(
            self.conv_weights)  # build shape in keras Conv
        self.filters_out = filters_out
        weights_conv = tf.reshape(
            tf.transpose(self.conv_weights, (3, 0, 1, 2)), (filters_out, -1))
        weights_bn = tf.matrix_diag(gamma / tf.sqrt(variance + epsilon))
        bias_bn = beta - gamma * mean / tf.sqrt(variance + epsilon)

        # compute fused W & b
        fused_weights = tf.matmul(weights_bn, weights_conv)
        self.fused_weights = tf.transpose(
            tf.reshape(fused_weights, (filters_out, k, k, filters_in)),
            (1, 2, 3, 0))
        self.fused_bias = bias_bn
示例#39
0
def _expectation(p, kern, feat, none1, none2, nghp=None):
    """
    Compute the expectation:
    <K_{X, Z}>_p(X)
        - K_{.,.} :: RBF kernel

    remember in this case, p(X) is factorized not independently.

    :return: NxM
    """
    print('TGaussian Psi1')

    with params_as_tensors_for(kern, feat):
        Xcov = kern._slice_cov(p.cov) # QxNxN - because var distribution is factorized only per latent dimension
        Z, Xmu = kern._slice(feat.Z, p.mu) # MxQ and NxQ

        D = tf.shape(Xmu)[1] # Q

        if kern.ARD:
            lengthscales = kern.lengthscales
        else:
            lengthscales = tf.zeros((D,), dtype=settings.tf_float) + kern.lengthscales

        #The entry of this covariance matrix is the variance
        chol_L_plus_Xcov = tf.cholesky(tf.matrix_diag(lengthscales ** 2) + Xcov)  #QxNxN
        print(chol_L_plus_Xcov)
        all_diffs = tf.transpose(Z) - tf.expand_dims(Xmu, 2)
        all_diffs = tf.transpose(all_diffs, [1, 0, 2]) # QxNxM

        exponent_mahalanobis = tf.matrix_triangular_solve(chol_L_plus_Xcov, all_diffs, lower=True)  # QxNxN  QxNxM
        exponent_mahalanobis = tf.reduce_sum(tf.square(exponent_mahalanobis), 1)  # Q x M
        exponent_mahalanobis = tf.exp(-0.5 * exponent_mahalanobis)  # Q x M

        sqrt_det_L = tf.reduce_prod(lengthscales)
        sqrt_det_L_plus_Xcov = tf.exp(tf.reduce_sum(tf.log(tf.matrix_diag_part(chol_L_plus_Xcov)), axis=1))
        determinants = sqrt_det_L / sqrt_det_L_plus_Xcov  # N

    return kern.variance * (determinants[:, None] * exponent_mahalanobis)
示例#40
0
    def decode(self, vec_rep):
        """De-vectorizes the bosonic matrices. Reverse of the encode method.

        Arguments:
            vec_rep (tensor of shape (batch_size, K)): the vectorized gauge representatives

        Returns:
            rep (tensor of shape (batch_size, 3, N, N)): the bosonic matrix gauge representatives
        """
        batch_size = int(vec_rep.shape[0])
        if self.bijector is None:
            vec_rep = tf.reshape(vec_rep, [batch_size, -1, self.algebra.dim])
            return self.algebra.vector_to_matrix(vec_rep)
        else:
            N = self.algebra.N
            dim = N - 1
            diff = self.bijector.forward(vec_rep[:, :dim])
            # by our gauge fixing, each row of e must be nondecreasing
            e = tf.concat(
                [tf.zeros([batch_size, 1]),
                 tf.cumsum(diff, axis=-1)], axis=-1)
            # the sum of each row of e should also be zero
            e = e - tf.reduce_sum(e, axis=-1, keepdims=True) / N
            mat_e = tf.cast(tf.expand_dims(tf.matrix_diag(e), axis=-3),
                            tf.complex64)
            # by our gauge fixing, first (N - 1) elements in each row of vec_rest must vanish
            # and the following (N - 1) elements must be positive
            # note this depends on our ordering of the SU(N) basis in algebra.py
            vec_rest = tf.concat([
                tf.zeros([batch_size, dim]),
                self.bijector.forward(vec_rep[:, dim:2 * dim]),
                vec_rep[:, 2 * dim:]
            ],
                                 axis=-1)
            vec_rest = tf.reshape(vec_rest, [batch_size, 2, self.algebra.dim])
            mat_rest = self.algebra.vector_to_matrix(vec_rest)
            rep = tf.concat([mat_e, mat_rest], axis=-3)
            return rep
示例#41
0
    def transition_model(self, states, covariances, odometry):
        """
        Implements a stochastic transition model for localization.
        :param states: tf op (batch, K, 3), particle states before the update.
        :param covariances: tf op (batch, K, 3, 3)
        :param odometry: tf op (batch, 3), odometry reading, relative motion in the robot coordinate frame
        :return: particle_states updated with the odometry and optionally transition noise
        """
        translation_std = self.params.transition_std[
            0] / self.params.map_pixel_in_meters  # In pixels
        rotation_std = self.params.transition_std[1]  # In radians

        with tf.name_scope('transition'):
            part_x, part_y, part_th = tf.unstack(states, axis=-1, num=3)

            odometry = tf.expand_dims(odometry, axis=1)
            odom_x, odom_y, odom_th = tf.unstack(odometry, axis=-1, num=3)

            cos_th = tf.cos(part_th)
            sin_th = tf.sin(part_th)
            delta_x = cos_th * odom_x - sin_th * odom_y
            delta_y = sin_th * odom_x + cos_th * odom_y
            delta_th = odom_th

            new_th = tf.mod(part_th + delta_th + np.pi, 2 * np.pi) - np.pi
            states = tf.stack([part_x + delta_x, part_y + delta_y, new_th],
                              axis=-1)

            pose_cov = tf.square(
                tf.constant([translation_std, translation_std, rotation_std],
                            tf.float32))
            noise = tf.abs(
                tf.random_normal(states.get_shape(), mean=0.0,
                                 stddev=1.0)) * pose_cov
            noise = tf.matrix_diag(noise)
            covariances = covariances + noise

            return states, covariances
示例#42
0
    def get_params(self, x, c, b, m, id):
        B = tf.shape(x)[0]
        d = self.hps.dimension
        mask = np.arange(d, dtype=np.float32)
        mask = tf.mod(mask + id, 2)
        mask = tf.tile(tf.expand_dims(mask, axis=0), [B, 1])
        inp = tf.concat([x * mask, mask, c, b, m], axis=1)
        params = self.nets[id](inp)
        scale, shift = tf.split(params, 2, axis=1)
        # reorder
        query = m * (1 - b)
        order = tf.contrib.framework.argsort(query,
                                             direction='DESCENDING',
                                             stable=True)
        t = tf.batch_gather(tf.matrix_diag(query), order)
        t = tf.transpose(t, perm=[0, 2, 1])
        scale = tf.einsum('nd,ndi->ni', scale, t)
        shift = tf.einsum('nd,ndi->ni', shift, t)
        # mask
        scale = scale * (1. - mask)
        shift = shift * (1. - mask)

        return scale, shift
示例#43
0
 def _getMatrixTree(r, A, mask1, mask2, mask_multiply, mask_add):
     if mask_multiply is None:
         A_masked = A
     else:
         A_masked = A * mask_multiply
     L_reduce = tf.reduce_sum(A_masked, 1)
     L_diag = tf.matrix_diag(L_reduce)
     L_minus = L_diag - A_masked
     LL_diag = L_minus[:, 1:, :]
     LL = tf.concat([tf.expand_dims(r, [1]), LL_diag], 1)
     if mask_multiply is None:
         LL_inv = tf.matrix_inverse(LL)
     else:
         LL_masked = mask_multiply * LL
         LL_masked = LL_masked + mask_add
         LL_inv = tf.matrix_inverse(LL_masked)  # batch_l, doc_l, doc_l
     d0 = tf.multiply(r, LL_inv[:, :, 0])  # root
     LL_inv_diag = tf.expand_dims(tf.matrix_diag_part(LL_inv), 2)
     tmp1 = tf.matrix_transpose(tf.multiply(tf.matrix_transpose(A_masked), LL_inv_diag))
     tmp2 = tf.multiply(A_masked, tf.matrix_transpose(LL_inv))
     d_no_root = mask1 * tmp1 - mask2 * tmp2
     d = tf.concat([tf.expand_dims(d0,[1]), d_no_root], 1)  # add column at beginning for root
     return d, d_no_root, LL
示例#44
0
        def dignoal(x, kernel_size, scatter_rate):  #利用对角采样计算均值,
            b = tf.shape(x)[0]
            w = tf.shape(x)[1]
            h = tf.shape(x)[2]

            dig = tf.matrix_diag(
                (([0] * scatter_rate + [1]) * kernel_size)[:kernel_size])

            dig = tf.tile(dig, [
                tf.cast(tf.math.ceil(w / kernel_size), dtype=tf.int32),
                tf.cast(tf.math.ceil(w / kernel_size), dtype=tf.int32)
            ])[:w, :h]
            dig = tf.tile(tf.expand_dims(tf.expand_dims(dig, axis=0), axis=-1),
                          [b, 1, 1, tf.shape(x)[-1]])

            x_ = x * tf.cast(dig, dtype=tf.float32)
            num =tf.cast(b,dtype=tf.float64)*tf.math.floor(w/kernel_size)*tf.math.floor(h/kernel_size)*tf.cast(tf.math.floor(kernel_size/(scatter_rate+1)),dtype=tf.float64)+\
            tf.cast(b,dtype=tf.float64)*tf.math.floor(tf.floormod(w,kernel_size)/(scatter_rate+1))*tf.math.floor(h/kernel_size)+\
            tf.cast(b,dtype=tf.float64)*tf.math.floor(tf.floormod(h,kernel_size)/(scatter_rate+1))*tf.math.floor(w/kernel_size)+\
            tf.cast(b,dtype=tf.float64)*tf.math.floor(tf.reduce_min([tf.floormod(w,kernel_size),tf.floormod(h,kernel_size)])/(scatter_rate+1))
            ave = tf.reduce_sum(x_, axis=[0, 1, 2]) / tf.expand_dims(
                tf.cast(num, dtype=tf.float32), axis=-1)
            return ave
示例#45
0
 def dx_dtheta_log_px(self, dmu_log_px_, w_px_i_px_norm, exponent_, xi_):
     # Returns a n * d * # of components matrix
     dx_dw_log_px_, dx_dmu_log_px_, dx_dsigma2_log_px_ = [], [], []
     zeta = tf.reduce_sum(dmu_log_px_, [0])
     for i in range(self.weights.shape[0]):
         zeta_m_exponent = tf.expand_dims(zeta - exponent_[i], 1)
         w_px_i_px_norm_i = tf.expand_dims(w_px_i_px_norm[i], -1)
         dx_dw_log_px_.append(zeta_m_exponent * w_px_i_px_norm_i)
         diag_precision = tf.diag(1. / self.distributions[i].variance())
         exponent_i_tensor = tf.expand_dims(exponent_[i], -1)
         dx_dmu_log_px_.append(
             (tf.matmul(exponent_i_tensor, zeta_m_exponent) +
              diag_precision) * w_px_i_px_norm_i)
         xi_i_tensor = tf.expand_dims(xi_[i], -1)
         exponent_i_diag = tf.matrix_diag(exponent_[i])
         diag_precision = tf.expand_dims(diag_precision, 0)
         dx_dsigma2_log_px_.append(
             (tf.matmul(xi_i_tensor, zeta_m_exponent) +
              exponent_i_diag * diag_precision) * w_px_i_px_norm_i)
     dx_dw_log_px, dx_dmu_log_px, dx_dsigma2_log_px = tf.stack(
         dx_dw_log_px_), tf.stack(dx_dmu_log_px_), tf.stack(
             dx_dsigma2_log_px_)
     return [dx_dw_log_px, dx_dmu_log_px, dx_dsigma2_log_px]
示例#46
0
        def _no_cho(Kf=Kf, y=y):
            Kf = (Kf + tf.transpose(Kf, perm=[0, 2, 1])) / 2.
            e, v = tf.self_adjoint_eig(Kf)
            e = tf.where(e > 1e-14, e, 1e-14 * tf.ones_like(e))
            Kf = tf.matmul(tf.matmul(v, tf.matrix_diag(e), transpose_a=True),
                           v)

            logdet = tf.reduce_sum(tf.where(e > 1e-14, tf.log(e),
                                            tf.zeros_like(e)),
                                   axis=-1,
                                   name='logdet')

            #batch_size, n, 1
            alpha = tf.squeeze(tf.matrix_solve(Kf,
                                               tf.expand_dims(y, -1),
                                               name='solve_alpha'),
                               axis=2)
            fstar = tf.matmul(Knm, tf.expand_dims(alpha, -1), transpose_a=True)
            cov = Kmm
            cov -= tf.matmul(Knm, tf.matrix_solve(Kf, Knm), transpose_a=True)
            log_mar_like = (-tf.reduce_sum(y * alpha, axis=1) - logdet -
                            n * np.log(2. * np.pi)) / 2.
            return fstar, cov, log_mar_like
示例#47
0
  def testSampleWithBroadcastScale(self):
    # mu corresponds to a 2-batch of 3-variate normals
    mu = np.zeros([2, 3])

    # diag corresponds to no batches of 3-variate normals
    diag = np.ones([3])

    dist = tfd.VectorExponentialDiag(mu, diag, validate_args=True)

    mean = dist.mean()
    self.assertAllEqual([2, 3], mean.shape)
    self.assertAllClose(mu + diag, self.evaluate(mean))

    n = int(1e4)
    samps = self.evaluate(dist.sample(n, seed=0))
    samps_centered = samps - samps.mean(axis=0)
    cov_mat = self.evaluate(tf.matrix_diag(diag))**2
    sample_cov = np.matmul(
        samps_centered.transpose([1, 2, 0]), samps_centered.transpose([1, 0, 2
                                                                      ])) / n

    self.assertAllClose(mu + diag, samps.mean(axis=0), atol=0.10, rtol=0.05)
    self.assertAllClose([cov_mat, cov_mat], sample_cov, atol=0.10, rtol=0.05)
示例#48
0
def ds(x):
    #   kx   ky   qx   qy   om
    # x[0] x[1] x[2] x[3] x[4]

    topkq = -complex(0, 1) * V0 * ((x[0] + x[2]) - complex(0, 1) *
                                   (x[1] + x[3]))
    botkq = complex(0, 1) * V0 * ((x[0] + x[2]) + complex(0, 1) *
                                  (x[1] + x[3]))
    innkq = x[4] + complex(0, 1) * Gamm - A * ((x[0] + x[2])**2 +
                                               (x[1] + x[3])**2) - V2

    topk = -complex(0, 1) * V0 * (x[0] - complex(0, 1) * x[1])
    botk = complex(0, 1) * V0 * (x[0] + complex(0, 1) * x[1])
    innk = x[4] + complex(0, 1) * Gamm - A * (x[0]**2 + x[1]**2) - V2

    # cent = tf.arange(-(N - 1) / 2, (N - 1) / 2 + 1, 1)

    d = hOmg * tf.matrix_diag(cent)

    Ginkq = tf.matrix_diag(topkq, k=1) + tf.matrix_diag(
        botkq, k=1) + tf.matrix_diag(innkq, k=0) - d
    Gink = tf.matrix_diag(topk, k=1) + tf.matrix_diag(
        botk, k=1) + tf.matrix_diag(innk, k=0) - d

    Grkq = tf.linalg.inv(Ginkq)
    Gakq = tf.transpose(tf.conj(Grkq))

    Grk = tf.linalg.inv(Gink)
    Gak = tf.transpose(tf.conj(Grk))

    fer = tf.heaviside(-(d + tf.eye(N) * (x[4] - mu)), 0)

    in1 = tf.matmul(Grkq, tf.matmul(Grk, tf.matmul(fer, Gak)))
    in2 = tf.matmul(Grkq, tf.matmul(fer, tf.matmul(Gakq, Gak)))
    tr = tf.trace(in1 + in2)
    # HERE i will divide by DOS, multiply by 2 for spin, and divide by (2pi)^3

    dchi = -(4) * Gamm * tr / math.pi**2

    return dchi
示例#49
0
def loss(inputs, labels):
    norm_inputs = tf.nn.l2_normalize(inputs, axis=1)

    dot_matrix = tf.expand_dims(norm_inputs, 1) * tf.expand_dims(
        norm_inputs, 0)
    sim_matrix = tf.reduce_sum(dot_matrix, axis=2)

    mask = tf.equal(tf.expand_dims(labels, axis=1),
                    tf.expand_dims(labels, axis=0))
    mask = tf.cast(mask, dtype=tf.float32)
    pos_mask = mask - tf.matrix_diag(tf.ones_like(labels, dtype=tf.float32))
    neg_mask = 1.0 - mask

    easy_a_p = tf.reduce_max(sim_matrix * pos_mask - 1e10 * (1.0 - pos_mask),
                             axis=1,
                             keepdims=True)

    easy_a_n = tf.reduce_min(sim_matrix * neg_mask + 1e10 * (1.0 - neg_mask),
                             axis=1,
                             keepdims=True)
    easy_a_n_mask = tf.logical_and(tf.equal(sim_matrix, easy_a_n),
                                   tf.cast(neg_mask, dtype=tf.bool))
    sh_a_n_mask = tf.logical_and(tf.less(sim_matrix, easy_a_p),
                                 tf.cast(neg_mask, dtype=tf.bool))
    sh_a_n_mask = tf.logical_or(sh_a_n_mask, easy_a_n_mask)
    sh_a_n_mask = tf.cast(sh_a_n_mask, dtype=tf.float32)
    sh_a_n = tf.reduce_max(sim_matrix * sh_a_n_mask - 1e10 * (1 - sh_a_n_mask),
                           axis=1,
                           keepdims=True)

    easy_a_p_exp = tf.exp(easy_a_p)
    sh_a_n_exp = tf.exp(sh_a_n)
    ep_loss = -tf.log(easy_a_p_exp / (easy_a_p_exp + sh_a_n_exp))

    ep_loss_mean = tf.reduce_mean(ep_loss)

    return ep_loss_mean
示例#50
0
    def kb_module(self, H, ent_emb, ent_W):
        h_h, w = int(H.get_shape()[1]), int(H.get_shape()[2])  #30,64
        print H.get_shape(), ent_emb.get_shape()
        h_e, w_e = int(ent_emb.get_shape()[2]), int(ent_emb.get_shape()[3])  #5

        out1 = tf.reduce_mean(H, axis=1)  #(?,64)

        reshape_h1 = tf.expand_dims(out1, 1)  #(?,1,64)
        reshape_h1 = tf.expand_dims(reshape_h1, 1)  #(?,1,1,64)
        reshape_h1 = tf.tile(reshape_h1, [1, h_h, h_e, 1])  #(?,30,5,64)
        reshape_h1 = tf.reshape(reshape_h1, [-1, w])  #(? * 30 * 5,64)
        reshape_h2 = tf.reshape(ent_emb, [-1, w_e])  #(? * 30 * 5,64)
        print reshape_h1.get_shape(), reshape_h2.get_shape()
        M = tf.tanh(
            tf.add(tf.matmul(reshape_h1, ent_W['Wqm']),
                   tf.matmul(reshape_h2, ent_W['Wam'])))  #(?,att)
        M = tf.matmul(M, ent_W['Wms'])  #(?,1)

        S = tf.reshape(M, [-1, h_e])  #(?,5)
        S = tf.nn.softmax(S)

        S_diag = tf.matrix_diag(S)  #(?,5,5)
        reshape_ent = tf.reshape(ent_emb, [-1, h_e, w_e])  #(?*30,5,64)
        attention_a = tf.matmul(S_diag, reshape_ent)  #(?*30,5,64)
        attention_a = tf.reshape(attention_a,
                                 [-1, h_h, h_e, w_e])  #(?,30,5,64)
        #attention_a = tf.reshape(attention_a, [-1, h_h*h_e, w]) #(?,30,5,64)

        out2 = tf.reduce_mean(attention_a, axis=2)
        #output_a = self.avg_pooling(attention_a)
        #output_a = self.max_pooling(attention_a)

        #out = tf.reduce_mean(tf.concat([H, out2],2), axis=1)
        #return tf.tanh(out)
        return tf.concat([H, out2], 2), out2
        return tf.concat([H, out2], 2)
        return out1, out2
示例#51
0
def _expectation(p, kern1, feat1, kern2, feat2, nghp=None):
    """
    Compute the expectation:
    expectation[n] = <Ka_{Z1, x_n} Kb_{x_n, Z2}>_p(x_n)
        - Ka_{.,.}, Kb_{.,.} :: Linear kernels
    Ka and Kb as well as Z1 and Z2 can differ from each other, but this is supported
    only if the Gaussian p is Diagonal (p.cov NxD) and Ka, Kb have disjoint active_dims
    in which case the joint expectations simplify into a product of expectations

    :return: NxMxM
    """
    if kern1.on_separate_dims(kern2) and isinstance(
            p, DiagonalGaussian):  # no joint expectations required
        eKxz1 = expectation(p, (kern1, feat1))
        eKxz2 = expectation(p, (kern2, feat2))
        return eKxz1[:, :, None] * eKxz2[:, None, :]

    if kern1 != kern2 or feat1 != feat2:
        raise NotImplementedError(
            "The expectation over two kernels has only an "
            "analytical implementation if both kernels are equal.")

    kern = kern1
    feat = feat1

    with params_as_tensors_for(kern), params_as_tensors_for(feat):
        # use only active dimensions
        Xcov = kern._slice_cov(
            tf.matrix_diag(p.cov) if isinstance(p, DiagonalGaussian) else p.cov
        )
        Z, Xmu = kern._slice(feat.Z, p.mu)

        N = tf.shape(Xmu)[0]
        var_Z = kern.variance * Z
        tiled_Z = tf.tile(tf.expand_dims(var_Z, 0), (N, 1, 1))  # NxMxD
        XX = Xcov + tf.expand_dims(Xmu, 1) * tf.expand_dims(Xmu, 2)  # NxDxD
        return tf.matmul(tf.matmul(tiled_Z, XX), tiled_Z, transpose_b=True)
    def get_kl_terms(self,q_mu,q_sqrt):
        if self.white:
            alpha = tf.transpose(q_mu)[:,:,None]  # DxMx1
        else:
            alpha = tf.matrix_triangular_solve(self.Lu_tiled,tf.transpose(q_mu)[:,:,None], lower=True)  # DxMxM * DxMx1 --> DxMx1

        if self.q_diag:
            Lq = Lq_diag = q_sqrt # MxD
            Lq_full = tf.matrix_diag(tf.transpose(q_sqrt))  # DxMxM
        else:
            Lq = Lq_full = tf.matrix_band_part(q_sqrt, -1, 0)  # force lower triangle # DxMxM
            Lq_diag = tf.transpose(tf.matrix_diag_part(Lq))  # MxD

        # Mahalanobis term: μqᵀ Σp⁻¹ μq
        mahalanobis = tf.reduce_sum(tf.square(alpha),axis=[1,2])[:,None] # Dx1

        # Log-determinant of the covariance of q(x):
        logdet_qcov = tf.reduce_sum(tf.log(tf.square(Lq_diag)),axis=0)[:,None] # Dx1

        # Trace term: tr(Σp⁻¹ Σq)
        if self.white:
            if self.q_diag:
                trace = tf.reduce_sum(tf.square(Lq),axis=0)[:,None] # MxD --> Dx1
            else:
                trace = tf.reduce_sum(tf.square(Lq),axis=[1,2])[:,None] # DxMxM --> Dx1
        else:
            LpiLq   = tf.matrix_triangular_solve(self.Lu_tiled, Lq_full, lower=True) # DxMxM
            trace   = tf.reduce_sum(tf.square(LpiLq),axis=[1,2])[:,None] # Dx1

        # Log-determinant of the covariance of p(x):
        if not self.white:
            log_sqdiag_Lp = tf.stack([tf.log(tf.square(tf.matrix_diag_part(self.Lu_tiled[d]))) for d in range(self.num_outputs)],axis=0) #DxM
            logdet_pcov = tf.reduce_sum(log_sqdiag_Lp,axis=1)[:,None] #Dx1
        else:
            logdet_pcov = 0

        return logdet_pcov, logdet_qcov, mahalanobis, trace
示例#53
0
def _expectation(p, kern, feat, none1, none2, nghp=None):
    """
    Compute the expectation:
    <K_{X, Z}>_p(X)
        - K_{.,.} :: RBF kernel

    :return: NxM
    """
    with params_as_tensors_for(kern), params_as_tensors_for(feat):
        # use only active dimensions
        Xcov = kern._slice_cov(p.cov)
        Z, Xmu = kern._slice(feat.Z, p.mu)
        D = tf.shape(Xmu)[1]
        if kern.ARD:
            lengthscales = kern.lengthscales
        else:
            lengthscales = tf.zeros(
                (D, ), dtype=settings.tf_float) + kern.lengthscales

        chol_L_plus_Xcov = tf.cholesky(tf.matrix_diag(lengthscales**2) +
                                       Xcov)  # NxDxD

        all_diffs = tf.transpose(Z) - tf.expand_dims(Xmu, 2)  # NxDxM
        exponent_mahalanobis = tf.matrix_triangular_solve(chol_L_plus_Xcov,
                                                          all_diffs,
                                                          lower=True)  # NxDxM
        exponent_mahalanobis = tf.reduce_sum(tf.square(exponent_mahalanobis),
                                             1)  # NxM
        exponent_mahalanobis = tf.exp(-0.5 * exponent_mahalanobis)  # NxM

        sqrt_det_L = tf.reduce_prod(lengthscales)
        sqrt_det_L_plus_Xcov = tf.exp(
            tf.reduce_sum(tf.log(tf.matrix_diag_part(chol_L_plus_Xcov)),
                          axis=1))
        determinants = sqrt_det_L / sqrt_det_L_plus_Xcov  # N

        return kern.variance * (determinants[:, None] * exponent_mahalanobis)
示例#54
0
    def call(self, x, mask=None):
        if not self.built:
            raise Exception("Secondary stat layer not built")
        logging.debug('Secondary_stat parameter', type(x))  # Confirm the type of x is indeed tensor4D
        cov_mat, x_mean = self.calculate_pre_cov(x)
        # print('call during second {}'.format(self.eps))
        # cov_mat += self.eps * self.b
        if self.robust:
            """ Implement the robust estimate, by apply an elementwise function to it. """
            if K.backend() != 'tensorflow':
                raise RuntimeError("Not support for theano now")
            import tensorflow as tf
            # with tf.device('/cpu:0'):
            s, u = tf.self_adjoint_eig(cov_mat)
            comp = tf.zeros_like(s)
            s = tf.where(tf.less(s, comp), comp, s)
            # s = tf.Print(s, [s], message='s:', summarize=self.out_dim)
            inner = robust_estimate_eigenvalues(s, alpha=self.cov_alpha)
            inner = tf.identity(inner, 'RobustEigen')
            # inner = tf.Print(inner, [inner], message='inner:', summarize=self.out_dim)
            cov_mat = tf.matmul(u, tf.matmul(tf.matrix_diag(inner), tf.transpose(u, [0, 2, 1])))

        if self.cov_mode == 'mean' or self.cov_mode == 'pmean':
            # Encode mean into Cov mat.
            addition_array = K.mean(x_mean, axis=1, keepdims=True)
            addition_array /= addition_array  # Make it 1
            if self.cov_mode == 'pmean':
                x_mean = self.mean_p * x_mean
                new_cov = K.concatenate(
                    [cov_mat + K.batch_dot(x_mean, K.permute_dimensions(x_mean, (0, 2, 1))), x_mean])
            else:
                new_cov = K.concatenate([cov_mat, x_mean])
            tmp = K.concatenate([K.permute_dimensions(x_mean, (0, 2, 1)), addition_array])
            new_cov = K.concatenate([new_cov, tmp], axis=1)
            cov_mat = K.identity(new_cov, 'final_cov_mat')

        return cov_mat
示例#55
0
def _expectation(p, mean, none, kern, feat, nghp=None):
    """
    Compute the expectation:
    expectation[n] = <x_{n+1} K_{x_n, Z}>_p(x_{n:n+1})
        - K_{.,.} :: RBF kernel
        - p       :: MarkovGaussian distribution (p.cov 2x(N+1)xDxD)

    :return: NxDxM
    """
    Xmu, Xcov = p.mu, p.cov

    with tf.control_dependencies([tf.assert_equal(
            tf.shape(Xmu)[1], tf.constant(kern.input_dim, settings.tf_int),
            message="Currently cannot handle slicing in exKxz.")]):
        Xmu = tf.identity(Xmu)

    with params_as_tensors_for(kern, feat):
        D = tf.shape(Xmu)[1]
        lengthscales = kern.lengthscales if kern.ARD \
            else tf.zeros((D,), dtype=settings.float_type) + kern.lengthscales

        chol_L_plus_Xcov = tf.cholesky(tf.matrix_diag(lengthscales ** 2) + Xcov[0, :-1])  # NxDxD
        all_diffs = tf.transpose(feat.Z) - tf.expand_dims(Xmu[:-1], 2)  # NxDxM

        sqrt_det_L = tf.reduce_prod(lengthscales)
        sqrt_det_L_plus_Xcov = tf.exp(tf.reduce_sum(tf.log(tf.matrix_diag_part(chol_L_plus_Xcov)), axis=1))
        determinants = sqrt_det_L / sqrt_det_L_plus_Xcov  # N

        exponent_mahalanobis = tf.cholesky_solve(chol_L_plus_Xcov, all_diffs)  # NxDxM
        non_exponent_term = tf.matmul(Xcov[1, :-1], exponent_mahalanobis, transpose_a=True)
        non_exponent_term = tf.expand_dims(Xmu[1:], 2) + non_exponent_term  # NxDxM

        exponent_mahalanobis = tf.reduce_sum(all_diffs * exponent_mahalanobis, 1)  # NxM
        exponent_mahalanobis = tf.exp(-0.5 * exponent_mahalanobis)  # NxM

        return kern.variance * (determinants[:, None] * exponent_mahalanobis)[:, None, :] * non_exponent_term
def get_feature(input_q, input_a, att_W,index):
    h_q, w = int(input_q.get_shape()[1]), int(input_q.get_shape()[2])
    h_a = int(input_a.get_shape()[1])

    output_q = max_pooling(input_q)

    reshape_q = tf.expand_dims(output_q, 1)
    reshape_q = tf.tile(reshape_q, [1, h_a, 1])
    reshape_q = tf.reshape(reshape_q, [-1, w])
    reshape_a = tf.reshape(input_a, [-1, w])

    M = tf.tanh(tf.add(tf.matmul(reshape_q, tf.squeeze(att_W['Wqm'][:,:,index])), tf.matmul(reshape_a, tf.squeeze(att_W['Wam'][:,:,index]))))
    M = tf.matmul(M, tf.expand_dims(att_W['Wms'][:,index],-1))

    S = tf.reshape(M, [-1, h_a])
    S = tf.nn.softmax(S)

    S_diag = tf.matrix_diag(S)
    attention_a = tf.matmul(S_diag, input_a)
    attention_a = tf.reshape(attention_a, [-1, h_a, w])

    output_a = max_pooling(attention_a)

    return tf.tanh(output_q), tf.tanh(output_a)
示例#57
0
    def _attention_module(self, query, key, value, unit, in_kp):
        with tf.variable_scope('attention', reuse=True):
            query = tf.layers.dense(
                query,
                unit,
                name='qk_map',
                activation=tf.nn.relu,
                use_bias=False,
                kernel_initializer=self.u_init,
                reuse=tf.AUTO_REUSE,
                kernel_regularizer=tf.contrib.layers.l2_regularizer(0.003))
            query = tf.nn.dropout(query, in_kp)

            key = tf.layers.dense(
                key,
                self.emb_size,
                name='qk_map',
                activation=tf.nn.relu,
                use_bias=False,
                kernel_initializer=self.u_init,
                reuse=tf.AUTO_REUSE,
                kernel_regularizer=tf.contrib.layers.l2_regularizer(0.003))
            key = tf.nn.dropout(key, in_kp)

            score = tf.matmul(query, tf.transpose(key, [0, 2, 1])) / math.sqrt(
                self.emb_size)  # [B,T,T]

            #masks the diagonal of the affinity matrix
            a_mask = tf.ones([tf.shape(score)[1], tf.shape(score)[2]])
            a_mask = a_mask - tf.matrix_diag(tf.ones([tf.shape(score)[1]]))
            a_mask = tf.expand_dims(a_mask, [0])
            a_mask = tf.tile(a_mask, [tf.shape(score)[0], 1, 1])
            score *= a_mask
            score = tf.nn.softmax(score, axis=2)
            output = tf.matmul(score, value)
            return output
示例#58
0
	def get_g(self, h_stack, x_t):
		"""
		calculate y_t ~ g(*|h_t-1, x_t)
		h_stack.shape = (n_particles, batch_size, Dh)
		x_t.shape 	  = (n_particles, batch_size, Dx)
		"""
		x_t_ft = self.get_x_ft(x_t)
		# x_t_ft.shape = (n_particles, batch_size, Dx_1)
		with tf.variable_scope(self.variable_scope + '/get_g'):
			h_x_concat = tf.concat((h_stack, x_t_ft), axis = 2, name = 'h_x_concat')
			mu    = fully_connected(h_x_concat, self.Dy, 
									weights_initializer=xavier_initializer(uniform=False), 
									activation_fn = None, 
									reuse = tf.AUTO_REUSE, scope = "mu")
			# mu.shape 				= (n_paticles, batch_size, Dx)
			sigma = fully_connected(h_x_concat, self.Dy,
									weights_initializer=xavier_initializer(uniform=False),
									biases_initializer=tf.constant_initializer(0.6),
									activation_fn = tf.nn.softplus, 
									reuse = tf.AUTO_REUSE, scope = "sigma") + self.sigma_cons
			# sigma.shape 			= (n_paticles, batch_size, Dx)
			g = tfd.MultivariateNormalFullCovariance(loc = mu, covariance_matrix = tf.matrix_diag(sigma), 
													 name = "g")
			return g 
示例#59
0
    def build_likelihood(self, Z, kern, kern_t, give_KL=True):
        """
        
        :param Z: inducing points 
        :param kern: kernel for the q(X)
        :param kern_t: kernel for the p(X)
        :param give_KL: 
        :return: 
        """
        # "The Dynamical Variational GP-LVM for Sequence Data" part in sec 3.3 of Andreas Damianou's Phd thesis.
        #########################################
        Kxx = kern_t.K(self.t) + tf.eye(self.num_data, dtype=float_type) * 1e-6 # N x N, prior covariance for p(X)
        Lx = tf.cholesky(Kxx)
        Lambda = tf.matrix_diag(tf.transpose(self.X_variational_var)) # Q x N x N, prior covariance for q(X)
        tmp = tf.eye(self.num_data, dtype=float_type) + tf.einsum('ijk,kl->ijl', tf.einsum('ij,kil->kjl', Lx, Lambda), Lx) # I + Lx^T x Lambda x Lx in batch mode
        Ltmp = tf.cholesky(tmp) # Q x N x N
        tmp2 = tf.matrix_triangular_solve(Ltmp, tf.tile(tf.expand_dims(tf.transpose(Lx), 0), tf.stack([self.num_latent, 1, 1])))
        S_full = tf.einsum('ijk,ijl->ikl', tmp2, tmp2) # Q x N x N
        S = tf.transpose(tf.matrix_diag_part(S_full)) # N x Q, marginal distribution of multivariate normal, from column-wise to row-wise.
        mu = tf.matmul(Kxx, self.X_variational_mean) # N x Q
        ##########################################

        psi0 = tf.reduce_sum(kern.eKdiag(mu, S), 0) # N
        psi1 = kern.eKxz(Z, mu, S) # N x M
        psi2 = tf.reduce_sum(kern.eKzxKxz(Z, mu, S), 0) # N x M x M

        # compute the KL[q(X) || p(X)]
        NQ = tf.cast(tf.size(mu), float_type)
        if give_KL:
            KL = -0.5 * NQ
            KL += tf.reduce_sum(tf.log(tf.matrix_diag_part(Ltmp))) # trace tricks
            KL += 0.5 * tf.reduce_sum(tf.trace(tf.cholesky_solve(tf.tile(tf.expand_dims(Lx, 0),
                                               tf.stack([self.num_latent, 1, 1])) , S_full + tf.einsum('ji,ki->ijk', mu, mu))))
            return KL, psi0, psi1, psi2
        else:
            return psi0, psi1, psi2
示例#60
0
    def build_predict_fs(self, Xnew, full_cov=False):
        """
        The posterior variance of F is given by
            q(f) = N(f | K alpha + mean, [K^-1 + diag(lambda**2)]^-1)
        Here we project this to F*, the values of the GP at Xnew which is given
        by
           q(F*) = N ( F* | K_{*F} alpha + mean, K_{**} - K_{*f}[K_{ff} +
                                           diag(lambda**-2)]^-1 K_{f*} )
        """
        f_means, f_vars = [], []
        for d in range(self.D):
            # compute kernel things
            Kx = self.kerns[d].K(self.X, Xnew)
            K = self.kerns[d].K(self.X)

            # predictive mean
            f_mean = tf.matmul(Kx, self.q_alpha[d, :, :],
                               transpose_a=True) + self.mean_functions[d](Xnew)

            # predictive var
            A = K + tf.matrix_diag(
                tf.transpose(1. / tf.square(self.q_lambda[d, :, :])))
            L = tf.cholesky(A)
            Kx_tiled = tf.tile(tf.expand_dims(Kx, 0),
                               [self.num_latent.value, 1, 1])
            LiKx = tf.matrix_triangular_solve(L, Kx_tiled)
            if full_cov:
                f_var = self.kerns[d].K(Xnew) - tf.matmul(
                    LiKx, LiKx, transpose_a=True)
            else:
                f_var = self.kerns[d].Kdiag(Xnew) - tf.reduce_sum(
                    tf.square(LiKx), 1)
            f_means.append(f_mean)
            f_vars.append(tf.transpose(f_var))

        return tf.stack(f_means), tf.stack(f_vars)