Exemplo n.º 1
0
    def detect(self, predicted_locs, predicted_scores, threshold, max_overlap):
        batch_size = predicted_locs.size(0)
        predicted_scores = torch.nn.functional.softmax(
            predicted_scores, dim=2)  # (batch_size, 8732, 2)

        all_image_boxes = list()
        all_image_scores = list()

        for i in range(batch_size):
            decode_locs = cxcy_to_xy(
                gcxgcy_to_cxcy(predicted_locs[i], self.priors_cxcy))

            image_boxes = list()
            image_scores = list()

            text_scores = predicted_scores[i][:, 1]
            score_above_threshold = text_scores > threshold
            n_score_above_threshold = score_above_threshold.sum().item()

            text_scores = text_scores[score_above_threshold]
            text_decoded_locs = decode_locs[score_above_threshold]

            overlap = IoU(xy_to_cxcy(text_decoded_locs),
                          xy_to_cxcy(text_decoded_locs))

            suppress = torch.zeros(n_score_above_threshold,
                                   dtype=torch.uint8).to(device)

            for box in range(text_decoded_locs.size(0)):
                if suppress[box] == 1:
                    continue

                suppress, _ = torch.max(suppress, overlap[box] > max_overlap)
                suppress[box] = 0

            image_boxes.append(text_decoded_locs[1 - suppress])
            image_scores.append(text_scores[1 - suppress])

            if len(image_boxes) == 0:
                image_boxes.append(
                    torch.FloatTensor([0., 0., 1., 1.]).to(device))
                image_scores.append(torch.FloatTensor([0.]).to(device))

            image_boxes = torch.cat(image_boxes, dim=0)
            image_scores = torch.cat(image_scores, dim=0)

            all_image_boxes.append(image_boxes)
            all_image_scores.append(image_scores)

        return all_image_boxes, all_image_scores
Exemplo n.º 2
0
    def match_gt_priors(self, boxes, labels):
        ''' Given gt boxes, labels and (8732) priors, match them into the most suited priors
        N: batch size
        Params:
            boxes: true object bounding boxes in boundary coordinates, (xy), a list of N tensors: N(n_objects, 4)
            labels: true object labels, a list of N tensors: N(n_objects,)
        Return: 
            truth_offsets: tensor (N, 8732, 4)
            truth_classes: tensor (N, 8732,)
        '''
        N = len(boxes) #batch size
        n_priors = self.priors_cxcy.size(0)
        # print(n_priors)
        
        truth_offsets = torch.zeros((N, n_priors, 4), dtype=torch.float).to(device)
        truth_classes = torch.zeros((N, n_priors), dtype=torch.long).to(device)
        
        # for each image
        for i in range(N):
            n_objects = labels[i].shape[0]

            overlap = find_jaccard_overlap(self.priors_xy, boxes[i]) #(n_priors, n_boxes)
            # print(overlap, overlap.shape)
            
            # for each prior, find the max iou and the coresponding object id
            prior_iou, prior_obj = overlap.max(dim=1) #(n_priors)
            # print(prior_iou, prior_obj)
            
            # for each object, find the most suited prior id
            _, object_prior = overlap.max(dim=0) #(n_objects)
            # print(_, object_prior)
            # for each object, assign its most suited prior with object id 
            for j in range(n_objects): prior_obj[object_prior[j]] = j
            # for each object, assign its most suited prior with hight iou to ensure it qualifies the thresholding 
            prior_iou[object_prior] = 1.
            
            # match bbox coordinates
            boxes_xy = boxes[i][prior_obj] # (8732, 4)
            # print(boxes[0].shape, prior_obj, boxes_xy.shape)
            
            # match prior class
            prior_class = labels[i][prior_obj]  # (8732)
            # thresholding: assign prior with iou < threshold to the class 0: background
            prior_class[prior_iou < self.threshold] = 0
            
            # save into the truth tensors
            truth_offsets[i,:,:] = cxcy_to_gcxgcy(xy_to_cxcy(boxes_xy), self.priors_cxcy)
            truth_classes[i,:] = prior_class
        
        return truth_offsets, truth_classes
Exemplo n.º 3
0
    def forward(self, predicted_locs, predicted_scores, boxes, labels, device):
        """
        Forward propagation.

        :param predicted_locs: predicted locations/boxes w.r.t the 8732 prior boxes, a tensor of dimensions (N, 8732, 4)
        :param predicted_scores: class scores for each of the encoded locations/boxes, a tensor of dimensions (N, 8732, n_classes)
        :param boxes: true  object bounding boxes in boundary coordinates, a list of N tensors
        :param labels: true object labels, a list of N tensors
        :return: multibox loss, a scalar
        """
        batch_size = predicted_locs.size(0)
        n_priors = self.priors_cxcy.size(0)
        n_classes = predicted_scores.size(2)

        assert n_priors == predicted_locs.size(1) == predicted_scores.size(1)

        true_locs = torch.zeros((batch_size, n_priors, 4),
                                dtype=torch.float).to(device)  # (N, 8732, 4)
        true_classes = torch.zeros((batch_size, n_priors),
                                   dtype=torch.long).to(device)  # (N, 8732)

        # For each image
        for i in range(batch_size):
            n_objects = boxes[i].size(0)

            overlap = find_jaccard_overlap(boxes[i],
                                           self.priors_xy)  # (n_objects, 8732)

            # For each prior, find the object that has the maximum overlap
            overlap_for_each_prior, object_for_each_prior = overlap.max(
                dim=0)  # (8732)

            # We don't want a situation where an object is not represented in our positive (non-background) priors -
            # 1. An object might not be the best object for all priors, and is therefore not in object_for_each_prior.
            # 2. All priors with the object may be assigned as background based on the threshold (0.5).

            # To remedy this -
            # First, find the prior that has the maximum overlap for each object.
            _, prior_for_each_object = overlap.max(dim=1)  # (N_o)

            # Then, assign each object to the corresponding maximum-overlap-prior. (This fixes 1.)
            object_for_each_prior[prior_for_each_object] = torch.LongTensor(
                range(n_objects)).to(device)

            # To ensure these priors qualify, artificially give them an overlap of greater than 0.5. (This fixes 2.)
            overlap_for_each_prior[prior_for_each_object] = 1.

            # Labels for each prior
            label_for_each_prior = labels[i][object_for_each_prior]  # (8732)
            # Set priors whose overlaps with objects are less than the threshold to be background (no object)
            label_for_each_prior[
                overlap_for_each_prior < self.threshold] = 0  # (8732)

            # Store
            true_classes[i] = label_for_each_prior

            # Encode center-size object coordinates into the form we regressed predicted boxes to
            true_locs[i] = cxcy_to_gcxgcy(
                xy_to_cxcy(boxes[i][object_for_each_prior]),
                self.priors_cxcy)  # (8732, 4)

        # Identify priors that are positive (object/non-background)
        positive_priors = true_classes != 0  # (N, 8732)

        # LOCALIZATION LOSS

        # Localization loss is computed only over positive (non-background) priors
        loc_loss = self.smooth_l1(predicted_locs[positive_priors],
                                  true_locs[positive_priors])  # (), scalar

        # Note: indexing with a torch.uint8 (byte) tensor flattens the tensor when indexing is across multiple dimensions (N & 8732)
        # So, if predicted_locs has the shape (N, 8732, 4), predicted_locs[positive_priors] will have (total positives, 4)

        # CONFIDENCE LOSS

        # Confidence loss is computed over positive priors and the most difficult (hardest) negative priors in each image
        # That is, FOR EACH IMAGE,
        # we will take the hardest (neg_pos_ratio * n_positives) negative priors, i.e where there is maximum loss
        # This is called Hard Negative Mining - it concentrates on hardest negatives in each image, and also minimizes pos/neg imbalance

        # Number of positive and hard-negative priors per image
        n_positives = positive_priors.sum(dim=1)  # (N)
        n_hard_negatives = self.neg_pos_ratio * n_positives  # (N)

        # First, find the loss for all priors
        conf_loss_all = self.cross_entropy(predicted_scores.view(
            -1, n_classes), true_classes.view(-1))  # (N * 8732)
        conf_loss_all = conf_loss_all.view(batch_size, n_priors)  # (N, 8732)

        # We already know which priors are positive
        conf_loss_pos = conf_loss_all[positive_priors]  # (sum(n_positives))

        # Next, find which priors are hard-negative
        # To do this, sort ONLY negative priors in each image in order of decreasing loss and take top n_hard_negatives
        conf_loss_neg = conf_loss_all.clone()  # (N, 8732)
        conf_loss_neg[
            positive_priors] = 0.  # (N, 8732), positive priors are ignored (never in top n_hard_negatives)
        conf_loss_neg, _ = conf_loss_neg.sort(
            dim=1, descending=True)  # (N, 8732), sorted by decreasing hardness
        hardness_ranks = torch.LongTensor(
            range(n_priors)).unsqueeze(0).expand_as(conf_loss_neg).to(
                device)  # (N, 8732)
        hard_negatives = hardness_ranks < n_hard_negatives.unsqueeze(
            1)  # (N, 8732)
        conf_loss_hard_neg = conf_loss_neg[
            hard_negatives]  # (sum(n_hard_negatives))

        # As in the paper, averaged over positive priors only, although computed over both positive and hard-negative priors
        conf_loss = (conf_loss_hard_neg.sum() + conf_loss_pos.sum()
                     ) / n_positives.sum().float()  # (), scalar

        # TOTAL LOSS

        return conf_loss + self.alpha * loc_loss
Exemplo n.º 4
0
    def forward(self, predicted_locs, predicted_scores, boxes, labels):
        """
        Forward propagation.
        :param predicted_locs: predicted locations/boxes w.r.t the 8732 prior boxes, a tensor of dimensions (N, 8732, 4)
        :param predicted_scores: class scores for each of the encoded locations/boxes, a tensor of dimensions (N, 8732, n_classes)
        :param boxes: true  object bounding boxes in boundary coordinates, a list of N tensors
        :param labels: true object labels, a list of N tensors
        :return: multibox loss, a scalar
        """
        batch_size = predicted_locs.size(0)
        n_priors = self.priors_cxcy.size(0)
        n_classes = predicted_scores.size(2)

        assert n_priors == predicted_locs.size(1) == predicted_scores.size(1)

        true_locs = torch.zeros((batch_size, n_priors, 4), dtype=torch.float).to(device)  # (N, 8732, 4)
        true_classes = torch.zeros((batch_size, n_priors), dtype=torch.long).to(device)  # (N, 8732)

        # For each image
        for i in range(batch_size):
            n_objects = boxes[i].size(0)

            overlap = find_jaccard_overlap(boxes[i],
                                           self.priors_xy)  # (n_objects, 8732)

            # For each prior, find the object that has the maximum overlap
            overlap_for_each_prior, object_for_each_prior = overlap.max(dim=0)  # (8732)

            # We don't want a situation where an object is not represented in our positive (non-background) priors -
            # 1. An object might not be the best object for all priors, and is therefore not in object_for_each_prior.
            # 2. All priors with the object may be assigned as background based on the threshold (0.5).

            # To remedy this -
            # First, find the prior that has the maximum overlap for each object.
            _, prior_for_each_object = overlap.max(dim=1)  # (N_o)

            # Then, assign each object to the corresponding maximum-overlap-prior. (This fixes 1.)
            object_for_each_prior[prior_for_each_object] = torch.LongTensor(range(n_objects)).to(device)

            # To ensure these priors qualify, artificially give them an overlap of greater than 0.5. (This fixes 2.)
            overlap_for_each_prior[prior_for_each_object] = 1.

            # Labels for each prior
            label_for_each_prior = labels[i][object_for_each_prior]  # (8732)
            # Set priors whose overlaps with objects are less than the threshold to be background (no object)
            label_for_each_prior[overlap_for_each_prior < self.threshold] = 0  # (8732)

            # Store
            true_classes[i] = label_for_each_prior

            # Encode center-size object coordinates into the form we regressed predicted boxes to
            true_locs[i] = cxcy_to_gcxgcy(xy_to_cxcy(boxes[i][object_for_each_prior]), self.priors_cxcy)  # (8732, 4)

        positive_priors = true_classes != 0  # (N, 8732)

        loc_loss = self.smooth_l1(predicted_locs[positive_priors], true_locs[positive_priors])  # (), scalar

        n_positives = positive_priors.sum(dim=1)  # (N)
        n_hard_negatives = self.neg_pos_ratio * n_positives  # (N)

        # First, find the loss for all priors
        conf_loss_all = self.cross_entropy(predicted_scores.view(-1, n_classes), true_classes.view(-1))  # (N * 8732)
        conf_loss_all = conf_loss_all.view(batch_size, n_priors)  # (N, 8732)

        # We already know which priors are positive
        conf_loss_pos = conf_loss_all[positive_priors]  # (sum(n_positives))

        # Next, find which priors are hard-negative
        # To do this, sort ONLY negative priors in each image in order of decreasing loss and take top n_hard_negatives
        conf_loss_neg = conf_loss_all.clone()  # (N, 8732)
        conf_loss_neg[positive_priors] = 0.  # (N, 8732), positive priors are ignored (never in top n_hard_negatives)
        conf_loss_neg, _ = conf_loss_neg.sort(dim=1, descending=True)  # (N, 8732), sorted by decreasing hardness
        hardness_ranks = torch.LongTensor(range(n_priors)).unsqueeze(0).expand_as(conf_loss_neg).to(device)  # (N, 8732)
        hard_negatives = hardness_ranks < n_hard_negatives.unsqueeze(1)  # (N, 8732)
        conf_loss_hard_neg = conf_loss_neg[hard_negatives]  # (sum(n_hard_negatives))

        conf_loss = (conf_loss_hard_neg.sum() + conf_loss_pos.sum()) / n_positives.sum().float()  # (), scalar

        return conf_loss + self.alpha * loc_loss
    def forward(self, locs_pred, cls_pred, boxes, labels):
        '''
            Forward propagation
            locs_pred: Pred location, a tensor of dimensions (N, 8732, 4)
            cls_pred:  Pred class scores for each of the encoded boxes, a tensor fo dimensions (N, 8732, n_classes)
            boxes: True object bouding boxes, a list of N tensors
            labels: True object labels, a list of N tensors
            
            Out: Mutilbox loss
        '''
        batch_size = locs_pred.size(0)  #N
        n_default_boxes = self.default_boxes.size(0)  #8732
        num_classes = cls_pred.size(2)  #num_classes

        t_locs = torch.zeros((batch_size, n_default_boxes, 4),
                             dtype=torch.float).to(device)  #(N, 8732, 4)
        t_classes = torch.zeros((batch_size, n_default_boxes),
                                dtype=torch.long).to(device)  #(N, 8732)

        default_boxes_xy = cxcy_to_xy(self.default_boxes)
        for i in range(batch_size):
            n_objects = boxes[i].size(0)

            overlap = find_IoU(boxes[i], default_boxes_xy)  #(n_objects, 8732)

            #for each default box, find the object has maximum overlap
            overlap_each_default_box, object_each_default_box = overlap.max(
                dim=0)  #(8732)

            #find default box has maximum oberlap for each object
            _, default_boxes_each_object = overlap.max(dim=1)

            object_each_default_box[
                default_boxes_each_object] = torch.LongTensor(
                    range(n_objects)).to(device)

            overlap_each_default_box[default_boxes_each_object] = 1.

            #Labels for each default box
            label_each_default_box = labels[i][
                object_each_default_box]  #(8732)

            label_each_default_box[
                overlap_each_default_box < self.threshold] = 0  #(8732)

            #Save
            t_classes[i] = label_each_default_box

            #Encode pred bboxes
            t_locs[i] = encode_bboxes(
                xy_to_cxcy(boxes[i][object_each_default_box]),
                self.default_boxes)  #(8732, 4)

        # Identify priors that are positive
        pos_default_boxes = t_classes != 0  #(N, 8732)

        #Localization loss
        #Localization loss is computed only over positive default boxes

        smooth_L1_loss = nn.SmoothL1Loss()
        loc_loss = smooth_L1_loss(locs_pred[pos_default_boxes],
                                  t_locs[pos_default_boxes])

        #Confidence loss
        #Apply hard negative mining

        #number of positive ad hard-negative default boxes per image
        n_positive = pos_default_boxes.sum(dim=1)
        n_hard_negatives = self.neg_pos * n_positive

        #Find the loss for all priors
        cross_entropy_loss = nn.CrossEntropyLoss(reduce=False)
        confidence_loss_all = cross_entropy_loss(cls_pred.view(
            -1, num_classes), t_classes.view(-1))  #(N*8732)
        confidence_loss_all = confidence_loss_all.view(
            batch_size, n_default_boxes)  #(N, 8732)

        confidence_pos_loss = confidence_loss_all[pos_default_boxes]

        #Find which priors are hard-negative
        confidence_neg_loss = confidence_loss_all.clone()  #(N, 8732)
        confidence_neg_loss[pos_default_boxes] = 0.
        confidence_neg_loss, _ = confidence_neg_loss.sort(dim=1,
                                                          descending=True)

        hardness_ranks = torch.LongTensor(range(n_default_boxes)).unsqueeze(
            0).expand_as(confidence_neg_loss).to(device)  # (N, 8732)

        hard_negatives = hardness_ranks < n_hard_negatives.unsqueeze(
            1)  # (N, 8732)

        confidence_hard_neg_loss = confidence_neg_loss[hard_negatives]

        confidence_loss = (
            confidence_hard_neg_loss.sum() +
            confidence_pos_loss.sum()) / n_positive.sum().float()

        return self.alpha * loc_loss + confidence_loss
Exemplo n.º 6
0
    def forward(self, predicted_locs, predicted_scores, boxes, labels):
        """
        Forward propagation.
        :param predicted_locs: predicted locations/boxes w.r.t the prior boxes, a tensor of dimensions (N, n_priors, 4)
        :param predicted_scores: class scores for each of the encoded locations/boxes, a tensor of dimensions (N, n_priors, n_classes)
        :param boxes: true  object bounding boxes in boundary coordinates, a list of N tensors
        :param labels: true object labels, a list of N tensors
        :return: multibox loss, a scalar
        """
        batch_size = predicted_locs.size(0)
        n_priors = self.priors_cxcy.size(0)
        n_classes = predicted_scores.size(2)
        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

        assert n_priors == predicted_locs.size(1) == predicted_scores.size(1)

        true_locs = torch.zeros((batch_size, n_priors, 4),
                                dtype=torch.float).to(device)
        true_classes = torch.zeros((batch_size, n_priors),
                                   dtype=torch.long).to(device)

        # For each image
        for i in range(batch_size):
            n_objects = boxes[i].size(0)

            overlap = find_jaccard_overlap(
                boxes[i], self.priors_xy)  # (n_objects, n_priors)

            # For each prior, find the object that has the maximum overlap
            overlap_for_each_prior, object_for_each_prior = overlap.max(
                dim=0)  # (n_priors)

            _, prior_for_each_object = overlap.max(dim=1)  # (N_o)

            # Then, assign each object to the corresponding maximum-overlap-prior. (This fixes 1.)
            object_for_each_prior[prior_for_each_object] = torch.LongTensor(
                range(n_objects)).to(device)

            # To ensure these priors qualify, artificially give them an overlap of greater than 0.5. (This fixes 2.)
            overlap_for_each_prior[prior_for_each_object] = 1.

            # Labels for each prior
            label_for_each_prior = labels[i][
                object_for_each_prior]  # (n_priors)
            # Set priors whose overlaps with objects are less than the threshold to be background (no object)
            label_for_each_prior[
                overlap_for_each_prior < self.threshold] = 0  # (n_priors)

            # Store
            true_classes[i] = label_for_each_prior

            # Encode center-size object coordinates into the form we regressed predicted boxes to
            true_locs[i] = cxcy_to_gcxgcy(
                xy_to_cxcy(boxes[i][object_for_each_prior]),
                self.priors_cxcy)  # (n_priors, 4)

        # Identify priors that are positive (object/non-background)
        positive_priors = true_classes != 0  # (N, n_priors)

        # Localization loss is computed only over positive (non-background) priors
        loc_loss = self.smooth_l1(predicted_locs[positive_priors],
                                  true_locs[positive_priors])  # (), scalar

        # Number of positive and hard-negative priors per image
        n_positives = positive_priors.sum(dim=1)  # (N)
        n_hard_negatives = self.neg_pos_ratio * n_positives  # (N)

        # First, find the loss for all priors
        conf_loss_all = self.cross_entropy(predicted_scores.view(
            -1, n_classes), true_classes.view(-1))  # (N * n_priors)
        conf_loss_all = conf_loss_all.view(batch_size,
                                           n_priors)  # (N, n_priors)

        # We already know which priors are positive
        conf_loss_pos = conf_loss_all[positive_priors]  # (sum(n_positives))

        # Next, find which priors are hard-negative
        # To do this, sort ONLY negative priors in each image in order of decreasing loss and take top n_hard_negatives
        conf_loss_neg = conf_loss_all.clone()  # (N, n_priors)
        conf_loss_neg[
            positive_priors] = 0.  # (N, n_priors), positive priors are ignored (never in top n_hard_negatives)
        conf_loss_neg, _ = conf_loss_neg.sort(
            dim=1,
            descending=True)  # (N, n_priors), sorted by decreasing hardness
        hardness_ranks = torch.LongTensor(
            range(n_priors)).unsqueeze(0).expand_as(conf_loss_neg).to(
                device)  # (N, n_priors)
        hard_negatives = hardness_ranks < n_hard_negatives.unsqueeze(
            1)  # (N, n_priors)
        conf_loss_hard_neg = conf_loss_neg[
            hard_negatives]  # (sum(n_hard_negatives))

        # As in the paper, averaged over positive priors only, although computed over both positive and hard-negative priors
        conf_loss = (conf_loss_hard_neg.sum() + conf_loss_pos.sum()
                     ) / n_positives.sum().float()  # (), scalar

        # TOTAL LOSS

        return conf_loss + self.alpha * loc_loss
Exemplo n.º 7
0
    def forward(self, output_boxes, output_scores, true_boxes, true_labels):

        batch_size = output_boxes.size(0)
        n_classes = output_boxes.size(2)
        n_priors = self.default_cxcy.size(0)

        gt_locs = torch.Tensor(batch_size, n_priors, 4).to(self.device)
        gt_class = torch.LongTensor(batch_size, n_priors).to(self.device)

        for im in range(batch_size):
            n_objects = true_boxes[im].size(0)

            # compute IoU for each ground truth box with default boxes
            # (n_objects, 8732)
            overlaps = find_jaccard_overlap(true_boxes[im], self.default_xy)

            # find highest-overlap object for each default, and then highest-
            # overlap default for each object
            overlap_per_default, object_per_default = overlaps.max(dim=0)
            overlap_per_object, default_per_object = overlaps.max(dim=1)

            # assign object to default box with highest overlap
            object_per_default[default_per_object] = torch.LongTensor(
                range(n_objects)).to(self.device)

            # give these default boxes an overlap of 1 (ensure positive)
            overlap_per_default[default_per_object] = 1.

            # assign labels to the default boxes according to the best overlap
            default_labels = true_labels[im][object_per_default]
            default_labels[overlap_per_default < self.threshold] = 0

            gt_class[im] = default_labels
            gt_locs[im] = cxcy_to_gcxgcy(
                xy_to_cxcy(true_boxes[im][object_per_default]),
                self.default_cxcy)

        positive_defaults = (gt_class > 0)

        # localization loss
        L_loc = self.smooth_l1(output_boxes[positive_defaults],
                               true_boxes[positive_defaults])

        # confidence loss
        n_positives = positive_defaults.sum(dim=1)  # (N)
        n_hard_negatives = self.hard_neg_scale * n_positives

        conf_all = output_scores.view(-1, n_classes)
        L_conf_all = self.cross_entropy(conf_all, gt_class.view(-1))
        L_conf_all = L_conf_all.view(batch_size, n_priors)  # (N, 8732)

        # We already know which priors are positive
        L_conf_pos = L_conf_all[positive_defaults]  # (sum(n_positives))

        # Next, find which priors are hard-negative
        # To do this, sort ONLY negative priors in each image in order of decreasing loss and take top n_hard_negatives
        L_conf_neg = L_con_all.clone()  # (N, 8732)
        L_conf_neg[
            positive_defaults] = 0.  # (N, 8732), positive priors are ignored (never in top n_hard_negatives)
        L_conf_neg, _ = L_conf_neg.sort(
            dim=1, descending=True)  # (N, 8732), sorted by decreasing hardness
        hardness_ranks = torch.LongTensor(
            range(n_priors)).unsqueeze(0).expand_as(L_conf_neg).to(
                device)  # (N, 8732)
        hard_negatives = hardness_ranks < n_hard_negatives.unsqueeze(1)
        L_conf_hard_neg = L_conf_neg[hard_negatives]

        # As in the paper, averaged over positive priors only, although computed over both positive and hard-negative priors
        L_conf = (L_conf_hard_neg.sum() +
                  L_conf_pos.sum()) / n_positives.sum().float()  # (), scalar

        loss = L_conf + self.alpha * L_loc
        return loss