Пример #1
0
    def _forward_log_det_jacobian(self, x):
        # y = sinh((arcsinh(x) + skewness) * tailweight) * multiplier
        # Using sinh' = cosh, arcsinh'(x) = 1 / sqrt(x**2 + 1),
        # dy/dx
        # = cosh((arcsinh(x) + skewness) * tailweight) * tailweight / sqrt(x**2 + 1)
        # * multiplier

        tailweight = tf.convert_to_tensor(self.tailweight)

        return (tfp_math.log_cosh((tf.asinh(x) + self.skewness) * tailweight) -
                0.5 * tfp_math.log1psquare(x) + tf.math.log(tailweight) +
                tf.math.log(self._output_multiplier(tailweight)))
Пример #2
0
    def _forward_log_det_jacobian(self, x):
        # y = sinh((arcsinh(x) + skewness) * tailweight)
        # Using sinh' = cosh, arcsinh'(x) = 1 / sqrt(x**2 + 1),
        # dy/dx
        # = cosh((arcsinh(x) + skewness) * tailweight) * tailweight / sqrt(x**2 + 1)

        # This is computed inside the log to avoid catastrophic cancellations
        # from cosh((arcsinh(x) + skewness) * tailweight) and sqrt(x**2 + 1).
        return (tf.math.log(
            tf.cosh((tf.asinh(x) + self.skewness) * self.tailweight)
            # TODO(srvasude): Consider using cosh(arcsinh(x)) in cases
            # where (arcsinh(x) + skewness) * tailweight ~= arcsinh(x).
            / _sqrtx2p1(x)) + tf.math.log(self.tailweight))
Пример #3
0
    def _inverse_log_det_jacobian(self, y):
        # x = sinh(arcsinh(y / multiplier) / tailweight - skewness)
        # Using sinh' = cosh, arcsinh'(y) = 1 / sqrt(y**2 + 1),
        # dx/dy
        # = cosh(arcsinh(y / multiplier) / tailweight - skewness)
        #     / (tailweight * sqrt((y / multiplier)**2 + 1)) / multiplier

        tailweight = tf.convert_to_tensor(self.tailweight)
        multiplier = self._output_multiplier(tailweight)
        y = y / multiplier

        return (tfp_math.log_cosh(tf.asinh(y) / tailweight - self.skewness) -
                0.5 * tfp_math.log1psquare(y) - tf.math.log(tailweight) -
                tf.math.log(multiplier))
Пример #4
0
    def _inverse_log_det_jacobian(self, y):
        # x = sinh(arcsinh(y) / tailweight - skewness)
        # Using sinh' = cosh, arcsinh'(y) = 1 / sqrt(y**2 + 1),
        # dx/dy
        # = cosh(arcsinh(y) / tailweight - skewness)
        #     / (tailweight * sqrt(y**2 + 1))

        # This is computed inside the log to avoid catastrophic cancellations
        # from cosh((arcsinh(y) / tailweight) - skewness) and sqrt(x**2 + 1).
        return (tf.math.log(
            tf.cosh(tf.asinh(y) / self.tailweight - self.skewness)
            # TODO(srvasude): Consider using cosh(arcsinh(x)) in cases
            # where (arcsinh(x) / tailweight) - skewness ~= arcsinh(x).
            / _sqrtx2p1(y)) - tf.math.log(self.tailweight))
Пример #5
0
  def _inverse_log_det_jacobian(self, y):
    # x = sinh(arcsinh(y / multiplier) / tailweight - skewness)
    # Using sinh' = cosh, arcsinh'(y) = 1 / sqrt(y**2 + 1),
    # dx/dy
    # = cosh(arcsinh(y / multiplier) / tailweight - skewness)
    #     / (tailweight * sqrt((y / multiplier)**2 + 1)) / multiplier

    # This is computed inside the log to avoid catastrophic cancellations
    # from cosh((arcsinh(y / multiplier) / tailweight)
    # - skewness) and sqrt(x**2 + 1).
    tailweight = tf.convert_to_tensor(self.tailweight)
    multiplier = self._output_multiplier(tailweight)
    y = y / multiplier
    subtract_term = tf.math.log(multiplier)
    return (tf.math.log(
        tf.cosh(tf.asinh(y) / tailweight - self.skewness)
        # TODO(srvasude): Consider using cosh(arcsinh(x)) in cases
        # where (arcsinh(x) / tailweight) - skewness ~= arcsinh(x).
        / _sqrtx2p1(y)) - tf.math.log(tailweight)) - subtract_term
Пример #6
0
 def _inverse(self, y):
     tailweight = tf.convert_to_tensor(self.tailweight)
     multiplier = self._output_multiplier(tailweight)
     return tf.sinh(tf.asinh(y / multiplier) / tailweight - self.skewness)
Пример #7
0
 def _forward(self, x):
     tailweight = tf.convert_to_tensor(self.tailweight)
     multiplier = self._output_multiplier(tailweight)
     bijector_output = tf.sinh((tf.asinh(x) + self.skewness) * tailweight)
     return bijector_output * multiplier
Пример #8
0
 def _output_multiplier(self, tailweight):
     return self._scale_number / tf.sinh(
         tf.asinh(self._scale_number) * tailweight)
Пример #9
0
    def call(self, inputs):
        if (not isinstance(inputs, random_variable.RandomVariable)
                and not isinstance(self.kernel, random_variable.RandomVariable)
                and not isinstance(self.bias, random_variable.RandomVariable)):
            return super(DenseDVI, self).call(inputs)
        self.call_weights()
        inputs_mean, inputs_variance, inputs_covariance = get_moments(inputs)
        kernel_mean, kernel_variance, _ = get_moments(self.kernel)
        if self.use_bias:
            bias_mean, _, bias_covariance = get_moments(self.bias)

        # E[outputs] = E[inputs] * E[kernel] + E[bias]
        mean = tf.tensordot(inputs_mean, kernel_mean, [[-1], [0]])
        if self.use_bias:
            mean = tf.nn.bias_add(mean, bias_mean)

        # Cov = E[inputs**2] Cov(kernel) + E[W]^T Cov(inputs) E[W] + Cov(bias)
        # For first term, assume Cov(kernel) = 0 on off-diagonals so we only
        # compute diagonal term.
        covariance_diag = tf.tensordot(inputs_variance + inputs_mean**2,
                                       kernel_variance, [[-1], [0]])
        # Compute quadratic form E[W]^T Cov E[W] from right-to-left. First is
        #  [..., features, features], [features, units] -> [..., features, units].
        cov_w = tf.tensordot(inputs_covariance, kernel_mean, [[-1], [0]])
        # Next is [..., features, units], [features, units] -> [..., units, units].
        w_cov_w = tf.tensordot(cov_w, kernel_mean, [[-2], [0]])
        covariance = w_cov_w
        if self.use_bias:
            covariance += bias_covariance
        covariance = tf.linalg.set_diag(
            covariance,
            tf.linalg.diag_part(covariance) + covariance_diag)

        if self.activation in (tf.keras.activations.relu, tf.nn.relu):
            # Compute activation's moments with variable names from Wu et al. (2018).
            variance = tf.linalg.diag_part(covariance)
            scale = tf.sqrt(variance)
            mu = mean / (scale + tf.keras.backend.epsilon())
            mean = scale * soft_relu(mu)

            pairwise_variances = (tf.expand_dims(variance, -1) *
                                  tf.expand_dims(variance, -2)
                                  )  # [..., units, units]
            rho = covariance / tf.sqrt(pairwise_variances +
                                       tf.keras.backend.epsilon())
            rho = tf.clip_by_value(rho,
                                   -1. / (1. + tf.keras.backend.epsilon()),
                                   1. / (1. + tf.keras.backend.epsilon()))
            s = covariance / (rho + tf.keras.backend.epsilon())
            mu1 = tf.expand_dims(mu, -1)  # [..., units, 1]
            mu2 = tf.linalg.matrix_transpose(mu1)  # [..., 1, units]
            a = (soft_relu(mu1) * soft_relu(mu2) +
                 rho * tfp.distributions.Normal(0., 1.).cdf(mu1) *
                 tfp.distributions.Normal(0., 1.).cdf(mu2))
            gh = tf.asinh(rho)
            bar_rho = tf.sqrt(1. - rho**2)
            gr = gh + rho / (1. + bar_rho)
            # Include numerically stable versions of gr and rho when multiplying or
            # dividing them. The sign of gr*rho and rho/gr is always positive.
            safe_gr = tf.abs(gr) + 0.5 * tf.keras.backend.epsilon()
            safe_rho = tf.abs(rho) + tf.keras.backend.epsilon()
            exp_negative_q = gr / (
                2. * math.pi) * tf.exp(-safe_rho / (2. * safe_gr *
                                                    (1 + bar_rho)) +
                                       (gh - rho) /
                                       (safe_gr * safe_rho) * mu1 * mu2)
            covariance = s * (a + exp_negative_q)
        elif self.activation not in (tf.keras.activations.linear, None):
            raise NotImplementedError(
                'Activation is {}. Deterministic variational '
                'inference is only available if activation is '
                'ReLU or None.'.format(self.activation))

        return generated_random_variables.MultivariateNormalFullCovariance(
            mean, covariance)
Пример #10
0
 def _inverse(self, y):
     return tf.sinh(tf.asinh(y) / self.tailweight - self.skewness)
Пример #11
0
 def _forward(self, x):
     return tf.sinh((tf.asinh(x) + self.skewness) * self.tailweight)
Пример #12
0
 def _inverse(self, y):
     return tf.asinh(y)