Exemplo n.º 1
0
    def hybrid_forward(self, F, supports, queries, sample_weight=None):
        """
        Computes prototypical loss
        :param F:
        :param supports:  <Nc*Ns x E>
        :param queries:   <Nc*Nq x E>
        :return:
        """
        supports = F.reshape(supports, (self.nc, self.ns, -1))  # <Nc x Ns x E>
        prototypes = F.mean(supports, axis=1)  # <Nc x E>

        # Compute distance between queries and prototypes
        square_queries = queries.square().sum(axis=1, keepdims=True)
        square_prototypes = prototypes.square().sum(axis=1,
                                                    keepdims=True)  # <Nc*Ns x 1>
        pairwise_distance_square = square_queries + square_prototypes.transpose() - 2.0 * (
            F.dot(queries, prototypes.transpose()))  # <Nc*Nq x Nc>

        # We construct the labels based on sampled clusters
        labels = F.repeat(F.arange(self.nc), self.nq)

        pred = F.log_softmax(-pairwise_distance_square, self.axis)
        loss = -F.pick(pred, labels, axis=self.axis, keepdims=True)

        loss = _apply_weighting(F, loss, self._weight, sample_weight)
        return F.mean(loss, axis=self._batch_axis, exclude=True)
Exemplo n.º 2
0
    def hybrid_forward(self, F, pred, label, sample_weight=None):
        #각 label 문장의 마지막 문자('END') 인덱스 정보 추출
        label = F.cast(label, dtype='float32')
        label_sent_length = F.argmax(F.where(label == self.end_idx,
                                             F.ones_like(label),
                                             F.zeros_like(label)),
                                     axis=1)

        if not self._from_logits:
            pred = F.log_softmax(pred, self._axis)
        if self._sparse_label:
            loss = -F.pick(pred, label, axis=self._axis, keepdims=True)
        else:
            label = _reshape_like(F, label, pred)
            loss = -F.sum(pred * label, axis=self._axis, keepdims=True)
        loss = _apply_weighting(F, loss, self._weight, sample_weight)
        #(N, 30, val)
        #길이를 초과하는 영역에 대해서 0로 loss 마스킹을 수행함
        loss = F.transpose(loss, (1, 0, 2))
        loss = F.SequenceMask(loss,
                              sequence_length=label_sent_length + 1,
                              use_sequence_length=True)
        loss = F.transpose(loss, (1, 0, 2))
        return F.sum(loss, axis=self._batch_axis,
                     exclude=True) / (label_sent_length + 1)
Exemplo n.º 3
0
    def hybrid_forward(self, F, pred, labels, positive_proxy, negative_proxies):
        """
        :param F:
        :param pred: BxE
        :param positive_proxy: BxE
        :param negative_proxies: B x (C-1) x E
        :return:
        """
        beta = self._beta(labels).squeeze()  # <B>
        beta_b = F.repeat(beta, repeats=self._num_classes - 1, axis=0)
        beta_reg_loss = F.sum(beta) * self._nu

        positive_proxy = F.L2Normalization(positive_proxy)  # BxE
        pred_b = F.repeat(pred, repeats=self._num_classes - 1, axis=0)  # B*(C-1) x E
        # positive_proxy_b = F.repeat(positive_proxy, repeats=self._num_classes - 1, axis=0)  # B*(C-1) x E
        negative_proxies_b = F.reshape_like(negative_proxies, pred_b)  # B*(C-1) x E
        negative_proxies_b = F.L2Normalization(negative_proxies_b)  # B*(C-1) x E

        d_ap = F.sum(F.square(positive_proxy - pred), axis=1)  # B
        d_ap = F.repeat(d_ap, repeats=self._num_classes - 1, axis=0)  # B*(C-1)
        d_an = F.sum(F.square(negative_proxies_b - pred_b), axis=1)  # B*(C-1)

        pos_loss = F.relu(d_ap - beta_b + self._margin)
        neg_loss = F.relu(beta_b - d_an + self._margin)

        pair_cnt = F.sum((pos_loss > 0.0) + (neg_loss > 0.0))
        loss = (F.sum(pos_loss + neg_loss) + beta_reg_loss) / pair_cnt

        # loss = pos_loss + neg_loss + beta_reg_loss
        # pair_cnt = F.sum(loss > 0.0)
        return _apply_weighting(F, loss, self._weight, None)
    def hybrid_forward(
        self, 
        F, 
        true_posterior_arr,
        generated_posterior_arr,
        sample_weight=None
    ):
        '''
        Forward propagation, computing losses.

        Args:
            F:                          `mxnet.ndarray` or `mxnet.symbol`.
            true_posterior_arr:         `mxnet.ndarray` or `mxnet.symbol` of true posterior
                                        inferenced by the discriminator.

            generated_posterior_arr:    `mxnet.ndarray` or `mxnet.symbol` of fake posterior
                                        inferenced by the generator.

        Returns:
            `mxnet.ndarray` or `mxnet.symbol` of loss.
        '''
        loss = true_posterior_arr + F.maximum(0, self.__margin - generated_posterior_arr)
        loss = _apply_weighting(F, loss, self._weight, sample_weight)

        self.epoch += 1
        if self.epoch % self.__margin_decay_epoch == 0:
            self.__margin = self.__margin * self.__margin_decay_rate

        return F.mean(loss, axis=self._batch_axis, exclude=True)
Exemplo n.º 5
0
 def hybrid_forward(self,
                    F,
                    pred,
                    label,
                    sample_weight=None,
                    pos_weight=None):
     label = _reshape_like(F, label, pred)
     if not self._from_sigmoid:
         if pos_weight is None:
             # We use the stable formula: max(x, 0) - x * z + log(1 + exp(-abs(x)))
             loss = F.relu(pred) - pred * label + F.Activation(
                 -F.abs(pred), act_type='softrelu')
         else:
             # We use the stable formula: x - x * z + (1 + z * pos_weight - z) * \
             #    (log(1 + exp(-abs(x))) + max(-x, 0))
             log_weight = 1 + F.broadcast_mul(pos_weight - 1, label)
             loss = pred - pred * label + log_weight * (F.Activation(
                 -F.abs(pred), act_type='softrelu') + F.relu(-pred))
     else:
         eps = 1e-12
         if pos_weight is None:
             loss = -(F.log(pred + eps) * label + F.log(1. - pred + eps) *
                      (1. - label))
         else:
             loss = -(
                 F.broadcast_mul(F.log(pred + eps) * label, pos_weight) +
                 F.log(1. - pred + eps) * (1. - label))
     loss = _apply_weighting(F, loss, self._weight, sample_weight)
     return F.mean(loss, axis=self._batch_axis, exclude=True)
Exemplo n.º 6
0
    def hybrid_forward(self, F, pred, label, sample_weight=None):
        if not self._from_logits:
            pred = F.sigmoid(pred)

        one_hot = label > 0
        pt = F.where(one_hot, pred, 1 - pred)

        t = F.ones_like(one_hot)
        alpha = F.where(one_hot, self._alpha * t, (1 - self._alpha) * t)
        beta = (1 - pt)**self._gamma

        if self._normalize:
            t_sum = F.sum(t, axis=(-2, -1), keepdims=True)
            beta_sum = F.sum(beta, axis=(-2, -1), keepdims=True)
            mult = t_sum / (beta_sum + self._eps)
            beta = F.broadcast_mul(beta, mult)

            self._k_sum = 0.9 * self._k_sum + 0.1 * mult.asnumpy().mean()

        loss = -alpha * beta * F.log(F.minimum(pt + self._eps, 1))
        sample_weight = label != -1

        loss = _apply_weighting(F, loss, self._weight, sample_weight)
        if self._size_average:
            tsum = F.sum(sample_weight, axis=self._batch_axis, exclude=True)
            loss = F.sum(loss, axis=self._batch_axis,
                         exclude=True) / (tsum + self._eps)
        else:
            loss = F.sum(loss, axis=self._batch_axis, exclude=True)

        return self._scale * loss
Exemplo n.º 7
0
 def hybrid_forward(self, F, pred, label, sample_weight=None):
     """Forward"""
     pred = F.log(pred)
     if self._sparse_label:
         loss = -F.pick(pred, label, axis=self._axis, keepdims=True)
     else:
         label = _reshape_like(F, label, pred)
         loss = -F.sum(pred*label, axis=self._axis, keepdims=True)
     loss = _apply_weighting(F, loss, self._weight, sample_weight)
     return F.mean(loss, axis=self._batch_axis, exclude=True)
Exemplo n.º 8
0
 def hybrid_forward(self, F, pred, label, sample_weight=None):
     log_pred = F.log_softmax(pred, self._axis)
     chosen_log_pred = F.pick(log_pred,
                              label,
                              axis=self._axis,
                              keepdims=True)
     chosen_pred = F.exp(chosen_log_pred)
     loss = -self._alpha * (1 - chosen_pred)**self._gamma * chosen_log_pred
     loss = _apply_weighting(F, loss, self._weight, sample_weight)
     return F.mean(loss, axis=self._batch_axis, exclude=True)
Exemplo n.º 9
0
 def hybrid_forward(self, F, pred, label, sample_weight=None):
     if not self._from_logits:
         pred = F.log_softmax(pred, self._axis)
     if self._sparse_label:
         loss = -F.pick(pred, label, axis=self._axis, keepdims=True)
     else:
         label = _reshape_like(F, label, pred)
         loss = -F.sum(pred * label, axis=self._axis, keepdims=True)
     loss = _apply_weighting(F, loss, self._weight, sample_weight)
     diceloss = self.dice_loss(F, pred, label)
     return F.mean(loss, axis=self._batch_axis, exclude=True) + diceloss
Exemplo n.º 10
0
 def hybrid_forward(self, F, output, label, sample_weight=None):
     if not self._from_logits:
         output = F.log_softmax(output, axis=self._axis)
     if self._sparse_label:
         valid_label_map = (label != self._ignore_label).astype('float32')
         loss = -(F.pick(output, label, axis=self._axis, keepdims=True) * valid_label_map)
     else:
         label = _reshape_like(F, label, pred)
         loss = -F.sum(pred*label, axis=self._axis, keepdims=True)
     loss = _apply_weighting(F, loss, self._weight, sample_weight)
     return F.mean(loss, axis=self._batch_axis, exclude=True) * \
         valid_label_map.size / F.sum(valid_label_map)
Exemplo n.º 11
0
    def hybrid_forward(self, F, anchors, positives, labels, sample_weight=None):
        """
        Computes the loss on the given data
        :param F: mx.nd or mx.sym
        :param anchors: anchor embeddings, <BxE> where B: batch size, E: embedding dimension
        :param positives: positive embeddings, same shape and label than anchors <BxE>
        :param labels: Labels of embeddings <B>
        :param sample_weight: weights of logits, see mx.loss
        :return:
        """
        reg_anchor = F.mean(F.sum(anchors.square(), axis=1), axis=self._batch_axis, exclude=True)
        reg_positive = F.mean(F.sum(positives.square(), axis=1), axis=self._batch_axis, exclude=True)
        l2loss = self._l2_reg * (reg_anchor + reg_positive)

        # Get per pair similarities.
        similarity_matrix = F.dot(anchors, positives, transpose_a=False, transpose_b=True)

        labels = labels.expand_dims(1)

        labels_remapped = F.broadcast_equal(labels, labels.transpose())
        labels_remapped = F.broadcast_div(labels_remapped, F.sum(labels_remapped, axis=1, keepdims=True))

        # Add the softmax loss.
        labels_remapped = labels_remapped.astype(dtype='float32')
        xent_loss = F.sum(F.log_softmax(similarity_matrix, -1) * -labels_remapped, axis=-1, keepdims=True)
        xent_loss = _apply_weighting(F, xent_loss, self._weight, sample_weight)
        xent_loss = F.mean(xent_loss, axis=self._batch_axis, exclude=True)

        loss = l2loss + xent_loss

        if self._symmetric:
            similarity_matrix = F.dot(positives, anchors, transpose_a=False, transpose_b=True)
            xent_loss = F.sum(F.log_softmax(similarity_matrix, -1) * -labels_remapped, axis=-1, keepdims=True)
            xent_loss = _apply_weighting(F, xent_loss, self._weight, sample_weight)
            xent_loss = F.mean(xent_loss, axis=self._batch_axis, exclude=True)

            loss = (loss + l2loss + xent_loss) * 0.5

        return loss
Exemplo n.º 12
0
 def hybrid_forward(self, F, output, label, sample_weight=None):
     if not self._from_logits:
         output = F.log_softmax(output)
     if self._sparse_label:
         # loss = -F.pick(output, label, axis=self._axis, keepdims=True)
         l = -F.pick(output, label, axis=self._axis, keepdims=True)
         d = nd.array([0 if i.asscalar() < 0 else 1
                       for i in label]).reshape((-1, 1))
         loss = l * d
     else:
         loss = -F.sum(output * label, axis=self._axis, keepdims=True)
     loss = _apply_weighting(F, loss, self._weight, sample_weight)
     return F.mean(loss, axis=self._batch_axis, exclude=True)
Exemplo n.º 13
0
    def forward(self, labels, y_pred):

        labels_onehot = labels  #nd.one_hot(labels, self.num_classes)

        first_term_base = nd.square(nd.maximum(0.9 - y_pred, 0))
        second_term_base = nd.square(nd.maximum(y_pred - 0.1, 0))
        # import pdb; pdb.set_trace()
        margin_loss = labels_onehot * first_term_base + self.lambda_value * (
            1 - labels_onehot) * second_term_base
        margin_loss = margin_loss.sum(axis=1)

        loss = nd.mean(margin_loss, axis=self._batch_axis, exclude=True)
        loss = _apply_weighting(nd, loss, self._weight / 2, self.sample_weight)
        return nd.mean(loss, axis=self._batch_axis, exclude=True)
Exemplo n.º 14
0
    def hybrid_forward(self, F, pred, label):
        label = _reshape_like(F, label, pred)
        sample_weight = label != self._ignore_label
        label = F.where(sample_weight, label, F.zeros_like(label))

        if not self._from_sigmoid:
            loss = F.relu(pred) - pred * label + \
                F.Activation(-F.abs(pred), act_type='softrelu')
        else:
            eps = 1e-12
            loss = -(F.log(pred + eps) * label + F.log(1. - pred + eps) *
                     (1. - label))

        loss = _apply_weighting(F, loss, self._weight, sample_weight)
        return F.mean(loss, axis=self._batch_axis, exclude=True)
Exemplo n.º 15
0
 def _mixup_forward(self, F, pred, label1, label2, lam, sample_weight=None):
     if not self._from_logits:
         pred = F.log_softmax(pred, self._axis)
     if self._sparse_label:
         loss1 = -F.pick(pred, label1, axis=self._axis, keepdims=True)
         loss2 = -F.pick(pred, label2, axis=self._axis, keepdims=True)
         loss = lam * loss1 + (1 - lam) * loss2
     else:
         label1 = _reshape_like(F, label1, pred)
         label2 = _reshape_like(F, label2, pred)
         loss1 = -F.sum(pred*label1, axis=self._axis, keepdims=True)
         loss2 = -F.sum(pred*label2, axis=self._axis, keepdims=True)
         loss = lam * loss1 + (1 - lam) * loss2
     loss = _apply_weighting(F, loss, self._weight, sample_weight)
     return F.mean(loss, axis=self._batch_axis, exclude=True)
Exemplo n.º 16
0
 def _mixup_forward(self, F, pred, label1, label2, lam, sample_weight=None):
     if not self._from_logits:
         pred = F.log_softmax(pred, self._axis)
     if self._sparse_label:
         loss1 = -F.pick(pred, label1, axis=self._axis, keepdims=True)
         loss2 = -F.pick(pred, label2, axis=self._axis, keepdims=True)
         loss = lam * loss1 + (1 - lam) * loss2
     else:
         label1 = _reshape_like(F, label1, pred)
         label2 = _reshape_like(F, label2, pred)
         loss1 = -F.sum(pred * label1, axis=self._axis, keepdims=True)
         loss2 = -F.sum(pred * label2, axis=self._axis, keepdims=True)
         loss = lam * loss1 + (1 - lam) * loss2
     loss = _apply_weighting(F, loss, self._weight, sample_weight)
     return F.mean(loss, axis=self._batch_axis, exclude=True)
Exemplo n.º 17
0
    def hybrid_forward(self, F, generated_posterior_arr, sample_weight=None):
        '''
        Forward propagation, computing losses.

        Args:
            F:                          `mxnet.ndarray` or `mxnet.symbol`.
            generated_posterior_arr:    `mxnet.ndarray` or `mxnet.symbol` of fake posterior
                                        inferenced by the generator.

        Returns:
            `mxnet.ndarray` or `mxnet.symbol` of loss.
        '''
        loss = F.log(1 - generated_posterior_arr + 1e-08)
        loss = _apply_weighting(F, loss, self._weight, sample_weight)
        return F.mean(loss, axis=self._batch_axis, exclude=True)
Exemplo n.º 18
0
    def hybrid_forward(self, F, orign_arr, dest_arr, sample_weight=None):
        '''
        Forward propagation, computing L2 norm.

        Args:
            F:           `mxnet.ndarray` or `mxnet.symbol`.
            orign_arr:   `mxnet.ndarray` or `mxnet.symbol` of origins.
            dest_arr:    `mxnet.ndarray` or `mxnet.symbol` of destinations.
        
        Returns:
            `mxnet.ndarray` or `mxnet.symbol` of loss.
        '''
        dest_arr = _reshape_like(F, dest_arr, orign_arr)
        loss = F.sqrt(F.mean(F.square(orign_arr - dest_arr), axis=1))
        loss = _apply_weighting(F, loss, self._weight, sample_weight)
        return F.mean(loss, axis=self._batch_axis, exclude=True)
Exemplo n.º 19
0
 def hybrid_forward(self, F, pred, label, sample_weight=None):
     """Loss forward"""
     if not self._from_logits:
         pred = F.sigmoid(pred)
     if self._sparse_label:
         one_hot = F.one_hot(label, self._num_class)
     else:
         one_hot = label > 0
     pt = F.where(one_hot, pred, 1 - pred)
     t = F.ones_like(one_hot)
     alpha = F.where(one_hot, self._alpha * t, (1 - self._alpha) * t)
     loss = -alpha * ((1 - pt) ** self._gamma) * F.log(F.minimum(pt + self._eps, 1))
     loss = _apply_weighting(F, loss, self._weight, sample_weight)
     if self._size_average:
         return F.mean(loss, axis=self._batch_axis, exclude=True)
     else:
         return F.sum(loss, axis=self._batch_axis, exclude=True)
Exemplo n.º 20
0
 def hybrid_forward(self, F, pred, label, sample_weight=None):
     """Loss forward"""
     if not self._from_logits:
         pred = F.sigmoid(pred)
     if self._sparse_label:
         one_hot = F.one_hot(label, self._num_class)
     else:
         one_hot = label > 0
     pt = F.where(one_hot, pred, 1 - pred)
     t = F.ones_like(one_hot)
     alpha = F.where(one_hot, self._alpha * t, (1 - self._alpha) * t)
     loss = -alpha * ((1 - pt) ** self._gamma) * F.log(F.minimum(pt + self._eps, 1))
     loss = _apply_weighting(F, loss, self._weight, sample_weight)
     if self._size_average:
         return F.mean(loss, axis=self._batch_axis, exclude=True)
     else:
         return F.sum(loss, axis=self._batch_axis, exclude=True)
Exemplo n.º 21
0
 def hybrid_forward(self, F, pred, label):
     """Compute loss"""
     softmaxout = F.SoftmaxOutput(
         pred,
         label.astype(pred.dtype),
         ignore_label=self._ignore_label,
         multi_output=self._sparse_label,
         use_ignore=True,
         normalization='valid' if self._size_average else 'null')
     loss = -F.pick(F.log(softmaxout), label, axis=1, keepdims=True)
     loss = F.where(
         label.expand_dims(axis=1) == self._ignore_label,
         F.zeros_like(loss), loss)
     sample_weight = F.where(
         label.expand_dims(axis=1) == 0, F.ones_like(loss),
         F.ones_like(loss) * 10)
     loss = _apply_weighting(F, loss, 1., sample_weight)
     return F.mean(loss, axis=self._batch_axis, exclude=True)
Exemplo n.º 22
0
 def hybrid_forward(self, F, pred, label, sample_weight=None):
     """Compute loss"""
     if not self._from_logits:
         pred = F.log_softmax(pred, axis=self._axis)
     if self._sparse_label:
         if self._size_average:
             valid_label_map = (label != self._ignore_label).astype('float32')
         loss = -F.pick(pred, label, axis=self._axis, keepdims=True)
         loss = F.where(label.expand_dims(axis=self._axis) == self._ignore_label,
                        F.zeros_like(loss), loss)
     else:
         label = _reshape_like(F, label, pred)
         loss = -F.sum(pred*label, axis=self._axis, keepdims=True)
     loss = _apply_weighting(F, loss, self._weight, sample_weight)
     if self._size_average and self._sparse_label:
         return F.mean(loss, axis=self._batch_axis, exclude=True) * \
             valid_size / F.sum(valid_label_map)
     else:
         return F.mean(loss, axis=self._batch_axis, exclude=True)
Exemplo n.º 23
0
    def hybrid_forward(self,
                       F,
                       images,
                       num_classes,
                       labels,
                       X_l2norm,
                       lambda_value=0.5,
                       sample_weight=None):
        self.num_classes = num_classes
        labels_onehot = nd.one_hot(labels, num_classes)
        first_term_base = F.square(nd.maximum(0.9 - X_l2norm, 0))
        second_term_base = F.square(nd.maximum(X_l2norm - 0.1, 0))
        # import pdb; pdb.set_trace()
        margin_loss = labels_onehot * first_term_base + lambda_value * (
            1 - labels_onehot) * second_term_base
        margin_loss = margin_loss.sum(axis=1)

        loss = F.mean(margin_loss, axis=self._batch_axis, exclude=True)
        loss = _apply_weighting(F, loss, self._weight / 2, sample_weight)
        return F.mean(loss, axis=self._batch_axis, exclude=True)
Exemplo n.º 24
0
    def hybrid_forward(self, F, pred, label, sample_weight=None):
        """Loss forward"""
        if not self._from_logits:
            pred = F.sigmoid(pred)
        one_hot = F.one_hot(label, self._num_class)
        one_hot = F.slice_axis(one_hot, begin=1, end=None, axis=-1)
        pt = F.where(one_hot, pred, 1 - pred)
        t = F.ones_like(one_hot)
        alpha = F.where(one_hot, self._alpha * t, (1 - self._alpha) * t)
        loss = -alpha * (
            (1 - pt)**self._gamma) * F.log(F.minimum(pt + self._eps, 1))
        loss = _apply_weighting(F, loss, self._weight, sample_weight)

        # Method 2:
        # pos_part = F.power(1 - pred, self._gamma) * one_hot * \
        #         F.log(pred + self._eps)
        # neg_part = F.power(pred, self._gamma) * (1 - one_hot) * \
        #         F.log(1 - pred + self._eps)
        # loss = -F.sum(self._alpha * pos_part + (1 - self._alpha) * neg_part, axis=-1)
        # loss = _apply_weighting(F, loss, self._weight, sample_weight)
        pos_mask = (label > 0)
        return F.sum(loss) / F.maximum(F.sum(pos_mask), 1)
Exemplo n.º 25
0
    def hybrid_forward(self, F, pred, label, sample_weight=None):
        one_hot = label > 0
        t = F.ones_like(one_hot)

        if not self._from_logits:
            pred = F.sigmoid(pred)

        alpha = F.where(one_hot, self._alpha * t, (1 - self._alpha) * t)
        pt = F.where(one_hot, pred, 1 - pred)
        pt = F.where(label != self._ignore_label, pt, F.ones_like(pt))

        beta = (1 - pt)**self._gamma

        t_sum = F.sum(t, axis=(-2, -1), keepdims=True)
        beta_sum = F.sum(beta, axis=(-2, -1), keepdims=True)
        mult = t_sum / (beta_sum + self._eps)
        if self._detach_delimeter:
            mult = mult.detach()
        beta = F.broadcast_mul(beta, mult)

        ignore_area = F.sum(label == -1, axis=0, exclude=True).asnumpy()
        sample_mult = F.mean(mult, axis=0, exclude=True).asnumpy()
        if np.any(ignore_area == 0):
            self._k_sum = 0.9 * self._k_sum + 0.1 * sample_mult[ignore_area ==
                                                                0].mean()

        loss = -alpha * beta * F.log(F.minimum(pt + self._eps, 1))
        sample_weight = label != self._ignore_label

        loss = _apply_weighting(F, loss, self._weight, sample_weight)
        if self._size_average:
            bsum = F.sum(sample_weight, axis=self._batch_axis, exclude=True)
            loss = F.sum(loss, axis=self._batch_axis,
                         exclude=True) / (bsum + self._eps)
        else:
            loss = F.sum(loss, axis=self._batch_axis, exclude=True)

        return self._scale * loss
Exemplo n.º 26
0
    def hybrid_forward(self, F, pred, label, sample_weight=None):
        if not self._from_logits:
            pred = F.sigmoid(pred)

        one_hot = label > 0
        pt = F.where(one_hot, pred, 1 - pred)

        t = label != -1
        alpha = F.where(one_hot, self._alpha * t, (1 - self._alpha) * t)
        beta = (1 - pt)**self._gamma

        loss = -alpha * beta * F.log(F.minimum(pt + self._eps, 1))
        sample_weight = label != -1

        loss = _apply_weighting(F, loss, self._weight, sample_weight)
        if self._size_average:
            tsum = F.sum(label == 1, axis=self._batch_axis, exclude=True)
            loss = F.sum(loss, axis=self._batch_axis,
                         exclude=True) / (tsum + self._eps)
        else:
            loss = F.sum(loss, axis=self._batch_axis, exclude=True)

        return self._scale * loss
Exemplo n.º 27
0
 def hybrid_forward(self, F, pred, label, mask, sample_weight=None):
     label = _reshape_like(F, label, pred)
     loss = F.abs(label * mask - pred * mask)
     loss = _apply_weighting(F, loss, self._weight, sample_weight)
     norm = F.sum(mask).clip(1, 1e30)
     return F.sum(loss) / norm
 def hybrid_forward(self, F, pred, label, sample_weight=None):
     label = _reshape_like(F, label, pred)
     loss = F.square(pred - label)
     loss = _apply_weighting(F, loss, self._weight, sample_weight)
     return F.mean(loss, axis=self._batch_axis, exclude=True)
Exemplo n.º 29
0
    def hybrid_forward(self,
                       F,
                       pretext_pred_arr,
                       pred_arr,
                       pretext_label_arr,
                       label_arr,
                       sample_weight=None):
        '''
        Forward propagation, computing losses.

        Args:
            F:                      `mxnet.ndarray` or `mxnet.symbol`.
            pretext_pred_arr:       `mxnet.ndarray` or `mxnet.symbol` of predicted data in pretext, or target domain.
            pred_arr:               `mxnet.ndarray` or `mxnet.symbol` of inferenced labeled feature points in source domain.
            pretext_label_arr:      `mxnet.ndarray` or `mxnet.symbol` of label data in pretext.
            label_arr:              `mxnet.ndarray` or `mxnet.symbol` of label data in source domain.

            sample_weight:          element-wise weighting tensor. 
                                    Must be broadcastable to the same shape as label. 
                                    For example, if label has shape (64, 10) and you want to weigh 
                                    each sample in the batch separately, sample_weight should have shape (64, 1).

        Returns:
            `mxnet.ndarray` or `mxnet.symbol` of loss.
        '''
        if not self._from_logits:
            if self.__log_softmax_flag is True:
                pred_arr = F.log_softmax(pred_arr, self._axis)
            else:
                pred_arr = pred_arr - F.reshape(
                    F.max(pred_arr, axis=self._axis), shape=(-1, 1))
                pred_arr = F.exp(pred_arr)
                pred_arr = pred_arr / F.reshape(
                    F.sum(pred_arr, axis=self._axis), shape=(-1, 1))

        if self._sparse_label:
            classification_loss_arr = -F.pick(
                pred_arr, label_arr, axis=self._axis, keepdims=True)
        else:
            label_arr = _reshape_like(F, label_arr, pred_arr)
            classification_loss_arr = -F.sum(
                pred_arr * label_arr, axis=self._axis, keepdims=True)

        if self.__grad_clip_threshold > 0:
            classification_loss_norm = F.norm(classification_loss_arr)
            if classification_loss_norm.asscalar(
            ) > self.__grad_clip_threshold:
                classification_loss_arr = classification_loss_arr * self.__grad_clip_threshold / classification_loss_norm

        pretext_label_arr = _reshape_like(F, pretext_label_arr,
                                          pretext_pred_arr)
        pretext_loss_arr = -F.sum(pretext_pred_arr * pretext_label_arr,
                                  axis=self._axis,
                                  keepdims=True) / 4

        if self.__grad_clip_threshold > 0:
            pretext_loss_norm = F.norm(pretext_loss_arr)
            if pretext_loss_norm.asscalar() > self.__grad_clip_threshold:
                pretext_loss_arr = pretext_loss_arr * self.__grad_clip_threshold / pretext_loss_norm

        if self.__classification_weight is None:
            classification_loss_arr = _apply_weighting(
                F, classification_loss_arr, self._weight, sample_weight)
        else:
            classification_loss_arr = _apply_weighting(
                F, classification_loss_arr, self.__classification_weight,
                sample_weight)

        if self.__pretext_weight is None:
            pretext_loss_arr = _apply_weighting(F, pretext_loss_arr,
                                                self._weight, sample_weight)
        else:
            pretext_loss_arr = _apply_weighting(F, pretext_loss_arr,
                                                self.__pretext_weight,
                                                sample_weight)

        classification_loss = F.mean(classification_loss_arr,
                                     axis=self._batch_axis,
                                     exclude=True)
        pretext_loss = F.mean(pretext_loss_arr,
                              axis=self._batch_axis,
                              exclude=True)

        total_loss = classification_loss + pretext_loss
        return total_loss, classification_loss, pretext_loss
Exemplo n.º 30
0
    def hybrid_forward(self, F, pred, label, sample_weight=None):
        """Compute YOLOv3 losses.

        :param pred:    (B, N, 4)
        :param label:   (B, N, 4)
        :param sample_weight:
        :return:
        """
        label = F.stop_gradient(label)
        label = gloss._reshape_like(F, label, pred)
        # pred = pred.reshape(-1, 4).T
        # label = label.reshape(-1, 4).T
        # pred = F.transpose(pred)
        # label = F.transpose(label)
        if self.x1y1x2y2:
            b1_xmin, b1_ymin, b1_xmax, b1_ymax = F.split(pred,
                                                         axis=-1,
                                                         num_outputs=4)
            b2_xmin, b2_ymin, b2_xmax, b2_ymax = F.split(label,
                                                         axis=-1,
                                                         num_outputs=4)
        else:
            b1_xmin, b1_ymin, b1_xmax, b1_ymax = self._center2corner(pred)
            b2_xmin, b2_ymin, b2_xmax, b2_ymax = self._center2corner(label)

        # Intersection area
        MAX = 1e5
        inter_w = F.clip(
            F.elemwise_sub(F.minimum(b1_xmax, b2_xmax),
                           F.maximum(b1_xmin, b2_xmin)), 0, MAX)
        inter_h = F.clip(
            F.elemwise_sub(F.minimum(b1_ymax, b2_ymax),
                           F.maximum(b1_ymin, b2_ymin)), 0, MAX)
        # inter_w = F.where(inter_w < 0., F.zeros_like(inter_w), inter_w)
        # inter_h = F.where(inter_h < 0., F.zeros_like(inter_h), inter_h)
        inter = F.elemwise_mul(inter_w, inter_h)

        # Union Area
        w1, h1 = F.elemwise_sub(b1_xmax,
                                b1_xmin), F.elemwise_sub(b1_ymax, b1_ymin)
        w2, h2 = F.elemwise_sub(b2_xmax,
                                b2_xmin), F.elemwise_sub(b2_ymax, b2_ymin)
        # w1 = F.where(w1 < 0., F.zeros_like(w1), w1)
        # h1 = F.where(h1 < 0., F.zeros_like(h1), h1)
        # w2 = F.where(w2 < 0., F.zeros_like(w2), w2)
        # h2 = F.where(h2 < 0., F.zeros_like(h2), h2)
        union = F.elemwise_mul(w1, h1) + F.elemwise_mul(w2, h2)

        iou = F.elemwise_div(inter, union + 1e-16)  # iou

        # From: https://github.com/ultralytics/yolov3
        # GIOU
        cw = F.elemwise_sub(
            F.maximum(b1_xmax, b2_xmax),
            F.minimum(b1_xmin,
                      b2_xmin))  # convex (smallest enclosing box) width
        ch = F.elemwise_sub(F.maximum(b1_ymax, b2_ymax),
                            F.minimum(b1_ymin, b2_ymin))  # convex height
        # cw = F.where(cw < 0., F.zeros_like(cw), cw)
        # ch = F.where(ch < 0., F.zeros_like(ch), ch)
        if self.loss_type == 'giou':
            c_area = F.elemwise_mul(cw, ch) + 1e-16  # convex area
            giou = iou - (c_area - union) / c_area  # GIoU
            loss = 1. - giou
        else:
            # convex diagonal squared
            c2 = cw**2 + ch**2 + 1e-16
            # centerpoint distance squared
            rho2 = F.square((b2_xmin + b2_xmax) -
                            (b1_xmin + b1_xmax)) / 4 + F.square(
                                ((b2_ymin + b2_ymax) -
                                 (b1_ymin + b1_ymax))) / 4
            if self.loss_type == 'diou':
                diou = iou - rho2 / c2
                loss = 1. - diou
            elif self.loss_type == 'ciou':
                v = (4 / mx.np.pi**2) * F.power(
                    F.arctan(w2 / (h2 + 1e-16)) - F.arctan(w1 /
                                                           (h1 + 1e-16)), 2)
                # TODO without pause(), coverage will be faster
                with mx.autograd.pause():
                    alpha = v / (1. - iou + v + 1e-16)
                    alpha = F.stop_gradient(alpha)
                ciou = iou - (rho2 / c2 + v * alpha)
                loss = 1. - ciou
            else:
                raise ValueError(
                    f'unknown loss_type: {self.loss_type}, available: giou, diou, ciou'
                )
        loss = gloss._apply_weighting(F, loss, self._weight, sample_weight)
        if gloss.is_np_array():
            if F is mx.ndarray:
                return F.np.mean(loss, axis=tuple(range(1, loss.ndim)))
            else:
                return F.npx.batch_flatten(loss).mean(axis=1)
        else:
            return F.mean(loss, axis=self._batch_axis, exclude=True)
Exemplo n.º 31
0
    def hybrid_forward(self,
                       F,
                       decoded_arr,
                       pred_arr,
                       observed_arr,
                       label_arr,
                       sample_weight=None):
        '''
        Forward propagation, computing losses.

        Args:
            F:                      `mxnet.ndarray` or `mxnet.symbol`.
            decoded_arr:            `mxnet.ndarray` or `mxnet.symbol` of decoded feature points.
            pred_arr:               `mxnet.ndarray` or `mxnet.symbol` of inferenced labeled feature points.
            observed_arr:           `mxnet.ndarray` or `mxnet.symbol` of observed data points.
            label_arr:              `mxnet.ndarray` or `mxnet.symbol` of label data.
            sample_weight:          element-wise weighting tensor. 
                                    Must be broadcastable to the same shape as label. 
                                    For example, if label has shape (64, 10) and you want to weigh 
                                    each sample in the batch separately, sample_weight should have shape (64, 1).

        Returns:
            `mxnet.ndarray` or `mxnet.symbol` of loss.
        '''
        if not self._from_logits:
            if self.__log_softmax_flag is True:
                pred_arr = F.log_softmax(pred_arr, self._axis)
            else:
                pred_arr = pred_arr - F.reshape(
                    F.max(pred_arr, axis=self._axis), shape=(-1, 1))
                pred_arr = F.exp(pred_arr)
                pred_arr = pred_arr / F.reshape(
                    F.sum(pred_arr, axis=self._axis), shape=(-1, 1))

        if self._sparse_label:
            classification_loss_arr = -F.pick(
                pred_arr, label_arr, axis=self._axis, keepdims=True)
        else:
            label_arr = _reshape_like(F, label_arr, pred_arr)
            classification_loss_arr = -F.sum(
                pred_arr * label_arr, axis=self._axis, keepdims=True)

        if self.__grad_clip_threshold > 0:
            classification_loss_norm = F.norm(classification_loss_arr)
            if classification_loss_norm.asscalar(
            ) > self.__grad_clip_threshold:
                classification_loss_arr = classification_loss_arr * self.__grad_clip_threshold / classification_loss_norm

        if self.__classification_weight is None:
            classification_loss_arr = _apply_weighting(
                F, classification_loss_arr, self._weight, sample_weight)
        else:
            classification_loss_arr = _apply_weighting(
                F, classification_loss_arr, self.__classification_weight,
                sample_weight)

        classification_loss_arr = _apply_weighting(F, classification_loss_arr,
                                                   self.__rc_lambda,
                                                   sample_weight)
        classification_loss = F.mean(classification_loss_arr,
                                     axis=self._batch_axis,
                                     exclude=True)

        observed_arr = _reshape_like(F, observed_arr, decoded_arr)
        reconstruction_loss_arr = F.square(observed_arr - decoded_arr)

        if self.__grad_clip_threshold > 0:
            reconstruction_loss_norm = F.norm(reconstruction_loss_arr)
            if reconstruction_loss_norm.asscalar(
            ) > self.__grad_clip_threshold:
                reconstruction_loss_arr = reconstruction_loss_arr * self.__grad_clip_threshold / reconstruction_loss_norm

        if self.__reconstruction_weight is None:
            reconstruction_loss_arr = _apply_weighting(
                F, reconstruction_loss_arr, self._weight / 2, sample_weight)
        else:
            reconstruction_loss_arr = _apply_weighting(
                F, reconstruction_loss_arr, self.__reconstruction_weight / 2,
                sample_weight)

        reconstruction_loss_arr = _apply_weighting(F, reconstruction_loss_arr,
                                                   (1 - self.__rc_lambda),
                                                   sample_weight)
        reconstruction_loss = F.mean(reconstruction_loss_arr,
                                     axis=self._batch_axis,
                                     exclude=True)

        return classification_loss + reconstruction_loss, classification_loss, reconstruction_loss
Exemplo n.º 32
0
    def hybrid_forward(self, F, labels, *args, **kwargs):
        """
        Computes the loss on the given data
        :param F: mx.nd or mx.sym
        :param anchors: anchor embeddings, <BxE> where B: batch size, E: embedding dimension
        :param positives: positive embeddings, same shape and label than anchors <BxE>
        :param labels: Labels of embeddings <B>
        :param sample_weight: weights of logits, see mx.loss
        :return:
        """
        block_size = len(args) / 2
        anchors = list(args[:block_size])
        positives = list(args[block_size:])

        # flatten last here
        reg_anchor = F.mean(F.sum(anchors[-1].square(), axis=1),
                            axis=self._batch_axis,
                            exclude=True)
        reg_positive = F.mean(F.sum(positives[-1].square(), axis=1),
                              axis=self._batch_axis,
                              exclude=True)
        l2loss = self._l2_reg * (reg_anchor + reg_positive)

        # Get per pair similarities.
        perceptual_similarity_matrix = [
            self._simblocks[i](a, p).expand_dims(0)
            for i, (a, p) in enumerate(zip(anchors[:-1], positives[:-1]))
        ]
        perceptual_similarity_matrix = F.concat(*perceptual_similarity_matrix,
                                                dim=0)
        perceptual_similarity_matrix = F.sum(perceptual_similarity_matrix,
                                             axis=0)

        # Get npairs similarity matrix
        similarity_matrix = F.dot(anchors[-1],
                                  positives[-1],
                                  transpose_a=False,
                                  transpose_b=True)

        labels = labels.expand_dims(1)

        labels_remapped = F.broadcast_equal(labels, labels.transpose())
        labels_remapped = F.broadcast_div(
            labels_remapped, F.sum(labels_remapped, axis=1, keepdims=True))
        labels_remapped = labels_remapped.astype(dtype='float32')

        # Add the softmax loss
        xent_loss = F.sum(F.log_softmax(similarity_matrix, -1) *
                          -labels_remapped,
                          axis=-1,
                          keepdims=True)
        xent_loss = _apply_weighting(F, xent_loss, self._weight,
                                     kwargs.get('sample_weight'))
        xent_loss = F.mean(xent_loss, axis=self._batch_axis, exclude=True)

        # Add the perceptual softmax loss
        perc_xent_loss = F.sum(
            F.log_softmax(perceptual_similarity_matrix, -1) * -labels_remapped,
            axis=-1,
            keepdims=True)
        perc_xent_loss = _apply_weighting(F, perc_xent_loss, self._weight,
                                          kwargs.get('sample_weight'))
        perc_xent_loss = F.mean(perc_xent_loss,
                                axis=self._batch_axis,
                                exclude=True)

        loss = (xent_loss + perc_xent_loss) * 0.5

        if self._symmetric:
            perceptual_similarity_matrix = [
                self._simblocks[i](a, p).expand_dims(0)
                for i, (a, p) in enumerate(zip(positives[:-1], anchors[:-1]))
            ]
            perceptual_similarity_matrix = F.concat(
                *perceptual_similarity_matrix, dim=0)
            perceptual_similarity_matrix = F.sum(perceptual_similarity_matrix,
                                                 axis=0)

            similarity_matrix = F.dot(positives,
                                      anchors,
                                      transpose_a=False,
                                      transpose_b=True)

            xent_loss = F.sum(F.log_softmax(similarity_matrix, -1) *
                              -labels_remapped,
                              axis=-1,
                              keepdims=True)
            xent_loss = _apply_weighting(F, xent_loss, self._weight,
                                         kwargs.get('sample_weight'))
            xent_loss = F.mean(xent_loss, axis=self._batch_axis, exclude=True)

            perc_xent_loss = F.sum(
                F.log_softmax(perceptual_similarity_matrix, -1) *
                -labels_remapped,
                axis=-1,
                keepdims=True)
            perc_xent_loss = _apply_weighting(F, perc_xent_loss, self._weight,
                                              kwargs.get('sample_weight'))
            perc_xent_loss = F.mean(perc_xent_loss,
                                    axis=self._batch_axis,
                                    exclude=True)

            loss = loss + (xent_loss + perc_xent_loss) * 0.5
            loss = loss * 0.5

        return loss + l2loss