def cdf(self, value): if self._validate_args: self._validate_sample(value) return 0.5 - 0.5 * (value - self.loc).sign() * torch.expm1(-(value - self.loc).abs() / self.scale)
def cdf(self, value): self._validate_log_prob_arg(value) return 0.5 - 0.5 * (value - self.loc).sign() * torch.expm1(-(value - self.loc).abs() / self.scale)
def cdf(self, value): self._validate_log_prob_arg(value) return 0.5 - 0.5 * (value - self.loc).sign() * torch.expm1( -(value - self.loc).abs() / self.scale)
def test_expm1(x, y): c = torch.expm1(torch.add(x, y)) return c
def forward(self, xs: torch.Tensor, **kwargs): # Get the batch size batch_size = xs.size(0) # Keep a dict to assign attributes to nodes. Create one if not already existent node_attr = kwargs.setdefault('attr', dict()) # In this dict, store the probability of arriving at this node. # It is assumed that when a parent node calls forward on this node it passes its node_attr object with the call # and that it sets the path probability of arriving at its child # Therefore, if this attribute is not present this node is assumed to not have a parent. # The probability of arriving at this node should thus be set to 1 (as this would be the root in this case) # The path probability is tracked for all x in the batch if not self._log_probabilities: pa = node_attr.setdefault((self, 'pa'), torch.ones(batch_size, device=xs.device)) else: pa = node_attr.setdefault((self, 'pa'), torch.ones(batch_size, device=xs.device)) # Obtain the probabilities of taking the right subtree ps = self.g(xs, **kwargs) # shape: (bs,) if not self._log_probabilities: # Store decision node probabilities as node attribute node_attr[self, 'ps'] = ps # Store path probabilities of arriving at child nodes as node attributes node_attr[self.l, 'pa'] = (1 - ps) * pa node_attr[self.r, 'pa'] = ps * pa # # Store alpha value for this batch for this decision node # node_attr[self, 'alpha'] = torch.sum(pa * ps) / torch.sum(pa) # Obtain the unweighted probability distributions from the child nodes l_dists, _ = self.l.forward(xs, **kwargs) # shape: (bs, k) r_dists, _ = self.r.forward(xs, **kwargs) # shape: (bs, k) # Weight the probability distributions by the decision node's output ps = ps.view(batch_size, 1) return (1 - ps) * l_dists + ps * r_dists, node_attr # shape: (bs, k) else: # Store decision node probabilities as node attribute node_attr[self, 'ps'] = ps # Store path probabilities of arriving at child nodes as node attributes # source: rewritten to pytorch from # https://github.com/tensorflow/probability/blob/v0.9.0/tensorflow_probability/python/math/generic.py#L447-L471 x = torch.abs( ps) + 1e-7 # add small epsilon for numerical stability oneminusp = torch.where(x < np.log(2), torch.log(-torch.expm1(-x)), torch.log1p(-torch.exp(-x))) node_attr[self.l, 'pa'] = oneminusp + pa node_attr[self.r, 'pa'] = ps + pa # Obtain the unweighted probability distributions from the child nodes l_dists, _ = self.l.forward(xs, **kwargs) # shape: (bs, k) r_dists, _ = self.r.forward(xs, **kwargs) # shape: (bs, k) # Weight the probability distributions by the decision node's output ps = ps.view(batch_size, 1) oneminusp = oneminusp.view(batch_size, 1) logs_stacked = torch.stack((oneminusp + l_dists, ps + r_dists)) return torch.logsumexp(logs_stacked, dim=0), node_attr # shape: (bs,)
def log1mexp(x): return torch.where(x > -0.693, torch.log(-torch.expm1(x)), torch.log1p(-torch.exp(x)))
def forward(self, input, target): input = torch.expm1(input) target = torch.expm1(target) return F.l1_loss(input, target, reduction=self.reduction)
def inverse(self, y): if torch.any(y < self.lower): raise ValueError("values must be at least %s" % self.lower) return y-self.lower + torch.log(-torch.expm1(-self.beta*(y-self.lower)))/self.beta
def inv_softplus(y): """The inverse of tf.nn.softplus().""" y = torch.as_tensor(y) return torch.where(y > 87.5, y, torch.log(torch.expm1(y)))
def expm1_safe(x): """The same as tf.math.expm1(x), but clamps the input to prevent NaNs.""" x = torch.as_tensor(x) return torch.expm1(torch.min(x, torch.tensor(87.5).to(x)))
def lossfun(x, alpha, scale, approximate=False, epsilon=1e-6): r"""Implements the general form of the loss. This implements the rho(x, \alpha, c) function described in "A General and Adaptive Robust Loss Function", Jonathan T. Barron, https://arxiv.org/abs/1701.03077. Args: x: The residual for which the loss is being computed. x can have any shape, and alpha and scale will be broadcasted to match x's shape if necessary. Must be a tensor of floats. alpha: The shape parameter of the loss (\alpha in the paper), where more negative values produce a loss with more robust behavior (outliers "cost" less), and more positive values produce a loss with less robust behavior (outliers are penalized more heavily). Alpha can be any value in [-infinity, infinity], but the gradient of the loss with respect to alpha is 0 at -infinity, infinity, 0, and 2. Must be a tensor of floats with the same precision as `x`. Varying alpha allows for smooth interpolation between a number of discrete robust losses: alpha=-Infinity: Welsch/Leclerc Loss. alpha=-2: Geman-McClure loss. alpha=0: Cauchy/Lortentzian loss. alpha=1: Charbonnier/pseudo-Huber loss. alpha=2: L2 loss. scale: The scale parameter of the loss. When |x| < scale, the loss is an L2-like quadratic bowl, and when |x| > scale the loss function takes on a different shape according to alpha. Must be a tensor of single-precision floats. approximate: a bool, where if True, this function returns an approximate and faster form of the loss, as described in the appendix of the paper. This approximation holds well everywhere except as x and alpha approach zero. epsilon: A float that determines how inaccurate the "approximate" version of the loss will be. Larger values are less accurate but more numerically stable. Must be great than single-precision machine epsilon. Returns: The losses for each element of x, in the same shape and precision as x. """ assert torch.is_tensor(x) assert torch.is_tensor(scale) assert torch.is_tensor(alpha) assert alpha.dtype == x.dtype assert scale.dtype == x.dtype assert (scale > 0).all() if approximate: # `epsilon` must be greater than single-precision machine epsilon. assert epsilon > np.finfo(np.float32).eps # Compute an approximate form of the loss which is faster, but innacurate # when x and alpha are near zero. b = torch.abs(alpha - 2) + epsilon d = torch.where(alpha >= 0, alpha + epsilon, alpha - epsilon) loss = (b / d) * (torch.pow((x / scale)**2 / b + 1., 0.5 * d) - 1.) else: # Compute the exact loss. # This will be used repeatedly. squared_scaled_x = (x / scale)**2 # The loss when alpha == 2. loss_two = 0.5 * squared_scaled_x # The loss when alpha == 0. loss_zero = util.log1p_safe(0.5 * squared_scaled_x) # The loss when alpha == -infinity. loss_neginf = -torch.expm1(-0.5 * squared_scaled_x) # The loss when alpha == +infinity. loss_posinf = util.expm1_safe(0.5 * squared_scaled_x) # The loss when not in one of the above special cases. machine_epsilon = torch.tensor(np.finfo(np.float32).eps).to(x) # Clamp |2-alpha| to be >= machine epsilon so that it's safe to divide by. beta_safe = torch.max(machine_epsilon, torch.abs(alpha - 2.)) # Clamp |alpha| to be >= machine epsilon so that it's safe to divide by. alpha_safe = torch.where(alpha >= 0, torch.ones_like(alpha), -torch.ones_like(alpha)) * torch.max( machine_epsilon, torch.abs(alpha)) loss_otherwise = (beta_safe / alpha_safe) * ( torch.pow(squared_scaled_x / beta_safe + 1., 0.5 * alpha) - 1.) # Select which of the cases of the loss to return. loss = torch.where( alpha == -float('inf'), loss_neginf, torch.where( alpha == 0, loss_zero, torch.where( alpha == 2, loss_two, torch.where(alpha == float('inf'), loss_posinf, loss_otherwise)))) return loss
def pointwise_ops(self): a = torch.randn(4) b = torch.randn(4) t = torch.tensor([-1, -2, 3], dtype=torch.int8) r = torch.tensor([0, 1, 10, 0], dtype=torch.int8) t = torch.tensor([-1, -2, 3], dtype=torch.int8) s = torch.tensor([4, 0, 1, 0], dtype=torch.int8) f = torch.zeros(3) g = torch.tensor([-1, 0, 1]) w = torch.tensor([0.3810, 1.2774, -0.2972, -0.3719, 0.4637]) return ( torch.abs(torch.tensor([-1, -2, 3])), torch.absolute(torch.tensor([-1, -2, 3])), torch.acos(a), torch.arccos(a), torch.acosh(a.uniform_(1.0, 2.0)), torch.add(a, 20), torch.add(a, b, out=a), b.add(a), b.add(a, out=b), b.add_(a), b.add(1), torch.add(a, torch.randn(4, 1), alpha=10), torch.addcdiv(torch.randn(1, 3), torch.randn(3, 1), torch.randn(1, 3), value=0.1), torch.addcmul(torch.randn(1, 3), torch.randn(3, 1), torch.randn(1, 3), value=0.1), torch.angle(a), torch.asin(a), torch.arcsin(a), torch.asinh(a), torch.arcsinh(a), torch.atan(a), torch.arctan(a), torch.atanh(a.uniform_(-1.0, 1.0)), torch.arctanh(a.uniform_(-1.0, 1.0)), torch.atan2(a, a), torch.bitwise_not(t), torch.bitwise_and(t, torch.tensor([1, 0, 3], dtype=torch.int8)), torch.bitwise_or(t, torch.tensor([1, 0, 3], dtype=torch.int8)), torch.bitwise_xor(t, torch.tensor([1, 0, 3], dtype=torch.int8)), torch.ceil(a), torch.ceil(float(torch.tensor(0.5))), torch.ceil(torch.tensor(0.5).item()), torch.clamp(a, min=-0.5, max=0.5), torch.clamp(a, min=0.5), torch.clamp(a, max=0.5), torch.clip(a, min=-0.5, max=0.5), torch.conj(a), torch.copysign(a, 1), torch.copysign(a, b), torch.cos(a), torch.cosh(a), torch.deg2rad( torch.tensor([[180.0, -180.0], [360.0, -360.0], [90.0, -90.0]])), torch.div(a, b), a.div(b), a.div(1), a.div_(b), torch.divide(a, b, rounding_mode="trunc"), torch.divide(a, b, rounding_mode="floor"), torch.digamma(torch.tensor([1.0, 0.5])), torch.erf(torch.tensor([0.0, -1.0, 10.0])), torch.erfc(torch.tensor([0.0, -1.0, 10.0])), torch.erfinv(torch.tensor([0.0, 0.5, -1.0])), torch.exp(torch.tensor([0.0, math.log(2.0)])), torch.exp(float(torch.tensor(1))), torch.exp2(torch.tensor([0.0, math.log(2.0), 3.0, 4.0])), torch.expm1(torch.tensor([0.0, math.log(2.0)])), torch.fake_quantize_per_channel_affine( torch.randn(2, 2, 2), (torch.randn(2) + 1) * 0.05, torch.zeros(2), 1, 0, 255, ), torch.fake_quantize_per_tensor_affine(a, 0.1, 0, 0, 255), torch.float_power(torch.randint(10, (4, )), 2), torch.float_power(torch.arange(1, 5), torch.tensor([2, -3, 4, -5])), torch.floor(a), torch.floor(float(torch.tensor(1))), torch.floor_divide(torch.tensor([4.0, 3.0]), torch.tensor([2.0, 2.0])), torch.floor_divide(torch.tensor([4.0, 3.0]), 1.4), torch.fmod(torch.tensor([-3, -2, -1, 1, 2, 3]), 2), torch.fmod(torch.tensor([1, 2, 3, 4, 5]), 1.5), torch.frac(torch.tensor([1.0, 2.5, -3.2])), torch.randn(4, dtype=torch.cfloat).imag, torch.ldexp(torch.tensor([1.0]), torch.tensor([1])), torch.ldexp(torch.tensor([1.0]), torch.tensor([1, 2, 3, 4])), torch.lerp(torch.arange(1.0, 5.0), torch.empty(4).fill_(10), 0.5), torch.lerp( torch.arange(1.0, 5.0), torch.empty(4).fill_(10), torch.full_like(torch.arange(1.0, 5.0), 0.5), ), torch.lgamma(torch.arange(0.5, 2, 0.5)), torch.log(torch.arange(5) + 10), torch.log10(torch.rand(5)), torch.log1p(torch.randn(5)), torch.log2(torch.rand(5)), torch.logaddexp(torch.tensor([-1.0]), torch.tensor([-1, -2, -3])), torch.logaddexp(torch.tensor([-100.0, -200.0, -300.0]), torch.tensor([-1, -2, -3])), torch.logaddexp(torch.tensor([1.0, 2000.0, 30000.0]), torch.tensor([-1, -2, -3])), torch.logaddexp2(torch.tensor([-1.0]), torch.tensor([-1, -2, -3])), torch.logaddexp2(torch.tensor([-100.0, -200.0, -300.0]), torch.tensor([-1, -2, -3])), torch.logaddexp2(torch.tensor([1.0, 2000.0, 30000.0]), torch.tensor([-1, -2, -3])), torch.logical_and(r, s), torch.logical_and(r.double(), s.double()), torch.logical_and(r.double(), s), torch.logical_and(r, s, out=torch.empty(4, dtype=torch.bool)), torch.logical_not(torch.tensor([0, 1, -10], dtype=torch.int8)), torch.logical_not( torch.tensor([0.0, 1.5, -10.0], dtype=torch.double)), torch.logical_not( torch.tensor([0.0, 1.0, -10.0], dtype=torch.double), out=torch.empty(3, dtype=torch.int16), ), torch.logical_or(r, s), torch.logical_or(r.double(), s.double()), torch.logical_or(r.double(), s), torch.logical_or(r, s, out=torch.empty(4, dtype=torch.bool)), torch.logical_xor(r, s), torch.logical_xor(r.double(), s.double()), torch.logical_xor(r.double(), s), torch.logical_xor(r, s, out=torch.empty(4, dtype=torch.bool)), torch.logit(torch.rand(5), eps=1e-6), torch.hypot(torch.tensor([4.0]), torch.tensor([3.0, 4.0, 5.0])), torch.i0(torch.arange(5, dtype=torch.float32)), torch.igamma(a, b), torch.igammac(a, b), torch.mul(torch.randn(3), 100), b.mul(a), b.mul(5), b.mul(a, out=b), b.mul_(a), b.mul_(5), torch.multiply(torch.randn(4, 1), torch.randn(1, 4)), torch.mvlgamma(torch.empty(2, 3).uniform_(1.0, 2.0), 2), torch.tensor([float("nan"), float("inf"), -float("inf"), 3.14]), torch.nan_to_num(w), torch.nan_to_num_(w), torch.nan_to_num(w, nan=2.0), torch.nan_to_num(w, nan=2.0, posinf=1.0), torch.neg(torch.randn(5)), # torch.nextafter(torch.tensor([1, 2]), torch.tensor([2, 1])) == torch.tensor([eps + 1, 2 - eps]), torch.polygamma(1, torch.tensor([1.0, 0.5])), torch.polygamma(2, torch.tensor([1.0, 0.5])), torch.polygamma(3, torch.tensor([1.0, 0.5])), torch.polygamma(4, torch.tensor([1.0, 0.5])), torch.pow(a, 2), torch.pow(2, float(torch.tensor(0.5))), torch.pow(torch.arange(1.0, 5.0), torch.arange(1.0, 5.0)), torch.rad2deg( torch.tensor([[3.142, -3.142], [6.283, -6.283], [1.570, -1.570]])), torch.randn(4, dtype=torch.cfloat).real, torch.reciprocal(a), torch.remainder(torch.tensor([-3.0, -2.0]), 2), torch.remainder(torch.tensor([1, 2, 3, 4, 5]), 1.5), torch.round(a), torch.round(torch.tensor(0.5).item()), torch.rsqrt(a), torch.sigmoid(a), torch.sign(torch.tensor([0.7, -1.2, 0.0, 2.3])), torch.sgn(a), torch.signbit(torch.tensor([0.7, -1.2, 0.0, 2.3])), torch.sin(a), torch.sinc(a), torch.sinh(a), torch.sqrt(a), torch.square(a), torch.sub(torch.tensor((1, 2)), torch.tensor((0, 1)), alpha=2), b.sub(a), b.sub_(a), b.sub(5), torch.sum(5), torch.tan(a), torch.tanh(a), torch.true_divide(a, a), torch.trunc(a), torch.trunc_(a), torch.xlogy(f, g), torch.xlogy(f, g), torch.xlogy(f, 4), torch.xlogy(2, g), )
def denormalise_spectrogram_torch(mag): return torch.expm1(mag)
def get_rho(sigma, delta): rho = torch.log(torch.expm1(delta * torch.abs(sigma)) + 1e-20) return rho