def forward(self, data: Tensor, state: Dict[str, Any]) -> Tuple[Tensor, Tensor]: lam = self.beta.sample() lam = maximum(lam, (1 - lam)) cut_x = self.uniform.sample() cut_y = self.uniform.sample() bbox_x1, bbox_x2, bbox_y1, bbox_y2, width, height = self._get_patch_coordinates( data, cut_x, cut_y, lam=lam) if tf.is_tensor(data): patches = roll( data, shift=1, axis=0)[:, bbox_y1:bbox_y2, bbox_x1:bbox_x2, :] - data[:, bbox_y1:bbox_y2, bbox_x1:bbox_x2, :] patches = tf.pad(patches, [[0, 0], [bbox_y1, height - bbox_y2], [bbox_x1, width - bbox_x2], [0, 0]], mode="CONSTANT", constant_values=0) data = data + patches else: data[:, :, bbox_y1:bbox_y2, bbox_x1:bbox_x2] = roll(data, shift=1, axis=0)[:, :, bbox_y1:bbox_y2, bbox_x1:bbox_x2] # adjust lambda to match pixel ratio lam = 1 - cast( ((bbox_x2 - bbox_x1) * (bbox_y2 - bbox_y1)), dtype="float32") / (width * height) return data, lam
def forward(self, data: List[Tensor], state: Dict[str, Any]) -> List[Tensor]: if self.shared_beta: lam = self.beta.sample() else: lam = self.beta.sample(sample_shape=(data[0].shape[0], )) shape = [-1] + [1] * (len(data[0].shape) - 1) lam = reshape(lam, shape) lam = maximum(lam, (1 - lam)) mix = [lam * elem + (1.0 - lam) * roll(elem, shift=1, axis=0) for elem in data] return mix + [lam]
def forward(self, data: List[Tensor], state: Dict[str, Any]) -> Union[Tensor, List[Tensor]]: base_loss = self.loss.forward(data, state) tau = self._accumulate_tau(base_loss, state['mode'], state['warmup']) beta = (base_loss - tau) / self.lam # TODO The authors say to remove the gradients. Need to check whether this is necessary (speed or metrics) ln_sigma = -lambertw(0.5 * maximum(self.cap, beta)) super_loss = (base_loss - tau) * exp(ln_sigma) + self.lam * pow( ln_sigma, 2) if self.average_loss: super_loss = reduce_mean(super_loss) if len(self.outputs) == 2: # User requested that the confidence score be returned return [super_loss, exp(ln_sigma)] return super_loss
def iwd(tensor: Tensor, power: float = 1.0, max_prob: float = 0.95, pairwise_distance: float = 1.0, eps: Optional[Tensor] = None) -> Tensor: """Compute the Inverse Weighted Distance from the given input. This can be used as an activation function for the final layer of a neural network instead of softmax. For example, instead of: model.add(layers.Dense(classes, activation='softmax')), you could use: model.add(layers.Dense(classes, activation=lambda x: iwd(tf.nn.sigmoid(x)))) This method can be used with Numpy data: ```python n = np.array([[0.5]*5, [0]+[1]*4]) b = fe.backend.iwd(n) # [[0.2, 0.2, 0.2, 0.2, 0.2], [0.95, 0.0125, 0.0125, 0.0125, 0.0125]] ``` This method can be used with TensorFlow tensors: ```python t = tf.constant([[0.5]*5, [0]+[1]*4]) b = fe.backend.iwd(n) # [[0.2, 0.2, 0.2, 0.2, 0.2], [0.95, 0.0125, 0.0125, 0.0125, 0.0125]] ``` This method can be used with PyTorch tensors: ```python p = torch.tensor([[0.5]*5, [0]+[1]*4]) b = fe.backend.iwd(n) # [[0.2, 0.2, 0.2, 0.2, 0.2], [0.95, 0.0125, 0.0125, 0.0125, 0.0125]] ``` Args: tensor: The input value. Should be of shape (Batch, C) where every element in C corresponds to a (non-negative) distance to a target class. power: The power to raise the inverse distances to. 1.0 results in a fairly intuitive probability output. Larger powers can widen regions of certainty, whereas values between 0 and 1 can widen regions of uncertainty. max_prob: The maximum probability to assign to a class estimate when it is distance zero away from the target. For numerical stability this must be less than 1.0. We have found that using smaller values like 0.95 can lead to natural adversarial robustness. pairwise_distance: The distance to any other class when the distance to a target class is zero. For example, if you have a perfect match for class 'a', what distance should be reported to class 'b'. If you have a metric where this isn't constant, just use an approximate expected distance. In that case `max_prob` will only give you approximate control over the true maximum probability. eps: The numeric stability constant to be used when d approaches zero. If None then it will be computed using `max_prob` and `pairwise_distance`. If not None, then `max_prob` and `pairwise_distance` will be ignored. Returns: A probability distribution of shape (Batch, C) where smaller distances from `tensor` correspond to larger probabilities. """ if eps is None: eps = np.array(pairwise_distance * math.pow( (1.0 - max_prob) / (max_prob * (tensor.shape[-1] - 1)), 1 / power), dtype=TENSOR_TO_NP_DTYPE[tensor.dtype]) eps = to_tensor( eps, target_type='torch' if isinstance(tensor, torch.Tensor) else 'tf' if tf.is_tensor(tensor) else 'np') if isinstance(eps, torch.Tensor): eps = eps.to("cuda:0" if torch.cuda.is_available() else "cpu") tensor = maximum(tensor, eps) tensor = tensor_pow(1.0 / tensor, power) tensor = tensor / reshape(reduce_sum(tensor, axis=-1), shape=[-1, 1]) return tensor