def _forward_log_det_jacobian(self, x): # y = sinh((arcsinh(x) + skewness) * tailweight) * multiplier # Using sinh' = cosh, arcsinh'(x) = 1 / sqrt(x**2 + 1), # dy/dx # = cosh((arcsinh(x) + skewness) * tailweight) * tailweight / sqrt(x**2 + 1) # * multiplier tailweight = tf.convert_to_tensor(self.tailweight) return (tfp_math.log_cosh((tf.asinh(x) + self.skewness) * tailweight) - 0.5 * tfp_math.log1psquare(x) + tf.math.log(tailweight) + tf.math.log(self._output_multiplier(tailweight)))
def _forward_log_det_jacobian(self, x): # y = sinh((arcsinh(x) + skewness) * tailweight) # Using sinh' = cosh, arcsinh'(x) = 1 / sqrt(x**2 + 1), # dy/dx # = cosh((arcsinh(x) + skewness) * tailweight) * tailweight / sqrt(x**2 + 1) # This is computed inside the log to avoid catastrophic cancellations # from cosh((arcsinh(x) + skewness) * tailweight) and sqrt(x**2 + 1). return (tf.math.log( tf.cosh((tf.asinh(x) + self.skewness) * self.tailweight) # TODO(srvasude): Consider using cosh(arcsinh(x)) in cases # where (arcsinh(x) + skewness) * tailweight ~= arcsinh(x). / _sqrtx2p1(x)) + tf.math.log(self.tailweight))
def _inverse_log_det_jacobian(self, y): # x = sinh(arcsinh(y / multiplier) / tailweight - skewness) # Using sinh' = cosh, arcsinh'(y) = 1 / sqrt(y**2 + 1), # dx/dy # = cosh(arcsinh(y / multiplier) / tailweight - skewness) # / (tailweight * sqrt((y / multiplier)**2 + 1)) / multiplier tailweight = tf.convert_to_tensor(self.tailweight) multiplier = self._output_multiplier(tailweight) y = y / multiplier return (tfp_math.log_cosh(tf.asinh(y) / tailweight - self.skewness) - 0.5 * tfp_math.log1psquare(y) - tf.math.log(tailweight) - tf.math.log(multiplier))
def _inverse_log_det_jacobian(self, y): # x = sinh(arcsinh(y) / tailweight - skewness) # Using sinh' = cosh, arcsinh'(y) = 1 / sqrt(y**2 + 1), # dx/dy # = cosh(arcsinh(y) / tailweight - skewness) # / (tailweight * sqrt(y**2 + 1)) # This is computed inside the log to avoid catastrophic cancellations # from cosh((arcsinh(y) / tailweight) - skewness) and sqrt(x**2 + 1). return (tf.math.log( tf.cosh(tf.asinh(y) / self.tailweight - self.skewness) # TODO(srvasude): Consider using cosh(arcsinh(x)) in cases # where (arcsinh(x) / tailweight) - skewness ~= arcsinh(x). / _sqrtx2p1(y)) - tf.math.log(self.tailweight))
def _inverse_log_det_jacobian(self, y): # x = sinh(arcsinh(y / multiplier) / tailweight - skewness) # Using sinh' = cosh, arcsinh'(y) = 1 / sqrt(y**2 + 1), # dx/dy # = cosh(arcsinh(y / multiplier) / tailweight - skewness) # / (tailweight * sqrt((y / multiplier)**2 + 1)) / multiplier # This is computed inside the log to avoid catastrophic cancellations # from cosh((arcsinh(y / multiplier) / tailweight) # - skewness) and sqrt(x**2 + 1). tailweight = tf.convert_to_tensor(self.tailweight) multiplier = self._output_multiplier(tailweight) y = y / multiplier subtract_term = tf.math.log(multiplier) return (tf.math.log( tf.cosh(tf.asinh(y) / tailweight - self.skewness) # TODO(srvasude): Consider using cosh(arcsinh(x)) in cases # where (arcsinh(x) / tailweight) - skewness ~= arcsinh(x). / _sqrtx2p1(y)) - tf.math.log(tailweight)) - subtract_term
def _inverse(self, y): tailweight = tf.convert_to_tensor(self.tailweight) multiplier = self._output_multiplier(tailweight) return tf.sinh(tf.asinh(y / multiplier) / tailweight - self.skewness)
def _forward(self, x): tailweight = tf.convert_to_tensor(self.tailweight) multiplier = self._output_multiplier(tailweight) bijector_output = tf.sinh((tf.asinh(x) + self.skewness) * tailweight) return bijector_output * multiplier
def _output_multiplier(self, tailweight): return self._scale_number / tf.sinh( tf.asinh(self._scale_number) * tailweight)
def call(self, inputs): if (not isinstance(inputs, random_variable.RandomVariable) and not isinstance(self.kernel, random_variable.RandomVariable) and not isinstance(self.bias, random_variable.RandomVariable)): return super(DenseDVI, self).call(inputs) self.call_weights() inputs_mean, inputs_variance, inputs_covariance = get_moments(inputs) kernel_mean, kernel_variance, _ = get_moments(self.kernel) if self.use_bias: bias_mean, _, bias_covariance = get_moments(self.bias) # E[outputs] = E[inputs] * E[kernel] + E[bias] mean = tf.tensordot(inputs_mean, kernel_mean, [[-1], [0]]) if self.use_bias: mean = tf.nn.bias_add(mean, bias_mean) # Cov = E[inputs**2] Cov(kernel) + E[W]^T Cov(inputs) E[W] + Cov(bias) # For first term, assume Cov(kernel) = 0 on off-diagonals so we only # compute diagonal term. covariance_diag = tf.tensordot(inputs_variance + inputs_mean**2, kernel_variance, [[-1], [0]]) # Compute quadratic form E[W]^T Cov E[W] from right-to-left. First is # [..., features, features], [features, units] -> [..., features, units]. cov_w = tf.tensordot(inputs_covariance, kernel_mean, [[-1], [0]]) # Next is [..., features, units], [features, units] -> [..., units, units]. w_cov_w = tf.tensordot(cov_w, kernel_mean, [[-2], [0]]) covariance = w_cov_w if self.use_bias: covariance += bias_covariance covariance = tf.linalg.set_diag( covariance, tf.linalg.diag_part(covariance) + covariance_diag) if self.activation in (tf.keras.activations.relu, tf.nn.relu): # Compute activation's moments with variable names from Wu et al. (2018). variance = tf.linalg.diag_part(covariance) scale = tf.sqrt(variance) mu = mean / (scale + tf.keras.backend.epsilon()) mean = scale * soft_relu(mu) pairwise_variances = (tf.expand_dims(variance, -1) * tf.expand_dims(variance, -2) ) # [..., units, units] rho = covariance / tf.sqrt(pairwise_variances + tf.keras.backend.epsilon()) rho = tf.clip_by_value(rho, -1. / (1. + tf.keras.backend.epsilon()), 1. / (1. + tf.keras.backend.epsilon())) s = covariance / (rho + tf.keras.backend.epsilon()) mu1 = tf.expand_dims(mu, -1) # [..., units, 1] mu2 = tf.linalg.matrix_transpose(mu1) # [..., 1, units] a = (soft_relu(mu1) * soft_relu(mu2) + rho * tfp.distributions.Normal(0., 1.).cdf(mu1) * tfp.distributions.Normal(0., 1.).cdf(mu2)) gh = tf.asinh(rho) bar_rho = tf.sqrt(1. - rho**2) gr = gh + rho / (1. + bar_rho) # Include numerically stable versions of gr and rho when multiplying or # dividing them. The sign of gr*rho and rho/gr is always positive. safe_gr = tf.abs(gr) + 0.5 * tf.keras.backend.epsilon() safe_rho = tf.abs(rho) + tf.keras.backend.epsilon() exp_negative_q = gr / ( 2. * math.pi) * tf.exp(-safe_rho / (2. * safe_gr * (1 + bar_rho)) + (gh - rho) / (safe_gr * safe_rho) * mu1 * mu2) covariance = s * (a + exp_negative_q) elif self.activation not in (tf.keras.activations.linear, None): raise NotImplementedError( 'Activation is {}. Deterministic variational ' 'inference is only available if activation is ' 'ReLU or None.'.format(self.activation)) return generated_random_variables.MultivariateNormalFullCovariance( mean, covariance)
def _inverse(self, y): return tf.sinh(tf.asinh(y) / self.tailweight - self.skewness)
def _forward(self, x): return tf.sinh((tf.asinh(x) + self.skewness) * self.tailweight)
def _inverse(self, y): return tf.asinh(y)