def forward(self, p, q, pad_mask=None): """ Args: p(obj:`Tensor`): the first forward logits of training examples. q(obj:`Tensor`): the second forward logits of training examples. pad_mask(obj:`Tensor`, optional): The Tensor containing the binary mask to index with, it's data type is bool. Returns: loss(obj:`Tensor`): the rdrop loss of p and q """ p_loss = F.kl_div(F.log_softmax(p, axis=-1), F.softmax(q, axis=-1), reduction=self.reduction) q_loss = F.kl_div(F.log_softmax(q, axis=-1), F.softmax(p, axis=-1), reduction=self.reduction) # pad_mask is for seq-level tasks if pad_mask is not None: p_loss = paddle.masked_select(p_loss, pad_mask) q_loss = paddle.masked_select(q_loss, pad_mask) # You can choose whether to use function "sum" and "mean" depending on your task p_loss = p_loss.sum() q_loss = q_loss.sum() loss = (p_loss + q_loss) / 2 return loss
def forward(self, out1, out2): if self.act is not None: out1 = self.act(out1) out2 = self.act(out2) log_out1 = paddle.log(out1) log_out2 = paddle.log(out2) loss = (F.kl_div(log_out1, out2, reduction='batchmean') + F.kl_div(log_out2, out1, reduction='batchmean')) / 2.0 return {"DMLLoss": loss}
def forward(self, out1, out2): if self.act is not None: out1 = self.act(out1) out2 = self.act(out2) if len(out1.shape) < 2: log_out1 = paddle.log(out1) log_out2 = paddle.log(out2) loss = (F.kl_div(log_out1, out2, reduction='batchmean') + F.kl_div(log_out2, out1, reduction='batchmean')) / 2.0 else: loss = self.jskl_loss(out1, out2) return loss
def forward(self, out1, out2): if self.act is not None: out1 = self.act(out1) out2 = self.act(out2) if self.use_log: # for recognition distillation, log is needed for feature map log_out1 = paddle.log(out1) log_out2 = paddle.log(out2) loss = (F.kl_div( log_out1, out2, reduction='batchmean') + F.kl_div( log_out2, out1, reduction='batchmean')) / 2.0 else: # for detection distillation log is not needed loss = self.jskl_loss(out1, out2) return loss
def label_smooth_loss(self, X, target, smooth_value=0.1): """label smooth loss""" if self.training: logits = paddle.log_softmax(X, axis=1) size = X.size()[1] one_hot = paddle.full(X.size(), smooth_value / (size - 1)).to(X.device) one_hot.scatter_(1, target.unsqueeze(0), 1 - smooth_value) loss = F.kl_div(logits, one_hot, reduction="batchmean") return loss.unsqueeze(0) else: return paddle.nn.functional.cross_entropy(X, target, reduction="none")
def forward(self, input, label): """ Args: input(Tensor): The input tensor. label(Tensor): The label tensor. The shape of label is the same as input. Returns: Tensor: The kl loss. """ assert input.shape == label.shape, \ "The shape of label should be the same as input." if self.act is not None: input = self.act(input) label = self.act(label) log_input = paddle.log(input) loss = F.kl_div(log_input, label, reduction=self.reduction) return loss
def KL(pred, target): pred = F.log_softmax(pred) target = F.softmax(target) loss = F.kl_div(pred, target) return loss