示例#1
0
    def post_process_top_k(self, predicted_offsets, predicted_scores, score_threshold, iou_threshold, top_k):
        ''' return top_k detections sorted by confidence score
        Params:
            predicted_offsets: predicted offsets w.r.t the 8732 prior boxes, (gcxgcy), a tensor of dimensions (N, 8732, 4)
            predicted_scores: class scores for each of the encoded locations/boxes, a tensor of dimensions (N, 8732, n_classes)
            score_threshold: minimum threshold for a box to be considered a match for a certain class
            iou_threshold: maximum overlap two boxes can have so that the one with the lower score is not suppressed via NMS
            top_k: int, if the result contains more than k objects, just return k objects that have largest confidence score
        Return:
            detections: (boxes, labels, and scores), they are lists of N tensors
            boxes: N (n_boxes, 4)
            labels: N (n_boxes,)
            scores: N (n_boxes,)
        '''
        boxes = list()
        labels = list()
        scores = list()
        N, n_priors = predicted_offsets.shape[0:2]
        
        predicted_scores = F.softmax(predicted_scores, dim=2)  # (N, 8732, n_classes)
        
        # for each image in the batch
        for i in range(N):
            boxes_i = list()
            labels_i = list()
            scores_i = list()
            
            # convert gcxgcy to xy coordinates format
            boxes_xy = cxcy_to_xy(gcxgcy_to_cxcy(predicted_offsets[i], self.priors_cxcy)) # (8732, 4)

            for c in range(1, self.n_classes):
                # Keep only predicted boxes and scores where scores for this class are above the minimum score
                class_scores = predicted_scores[i][:, c]  # (8732)
                qualify_mask = class_scores > score_threshold
                n_qualified = qualify_mask.sum().item()
                if n_qualified == 0:
                    continue
                boxes_class_c = boxes_xy[qualify_mask]  # (n_qualified, 4)
                boxes_score_class_c = class_scores[qualify_mask]  # (n_qualified) <= 8732
                
                final_box_ids = nms(boxes_class_c, boxes_score_class_c, iou_threshold)  # (n_final_boxes,)
                
                boxes_i.extend(boxes_class_c[final_box_ids].tolist())
                labels_i.extend([c]*len(final_box_ids))
                scores_i.extend(boxes_score_class_c[final_box_ids].tolist())
        
            boxes.append(torch.FloatTensor(boxes_i).to(device))
            labels.append(torch.LongTensor(labels_i).to(device))
            scores.append(torch.FloatTensor(scores_i).to(device))
            
            # Filter top k objects that have largest confidence score
            if boxes[i].size(0) > top_k:
                scores[i], sort_ind = scores[i].sort(dim=0, descending=True)
                scores[i] = scores[i][:top_k]  # (top_k)
                boxes[i] = boxes[i][sort_ind[:top_k]]  # (top_k, 4)
                labels[i] = labels[i][sort_ind[:top_k]]  # (top_k)

        return boxes, labels, scores
示例#2
0
    def my_post_process(self, predicted_offsets, predicted_scores, score_threshold, iou_threshold, top_k=-1):
        ''' The differences from the previous my_post_process are:
        1: score_threshold is used to determine whether a box's class is background or objects
             E.g: let's say score_threshold=0.75, then boxes that have score of background class > 0.75 are considered background
        2: and then if the box contains an object, the object labels will be the argmax of the softmax output, background excluded
        Result:
        # See precision recall curve for more information
        # 2 times faster than post_process_top_k since a lot of background boxes was filtered out by score_threshold
        '''
        boxes = list()
        labels = list()
        scores = list()
        N, n_priors = predicted_offsets.shape[0:2]
        
        predicted_scores = F.softmax(predicted_scores, dim=2)  # (N, 8732, n_classes)
        
        obj_masks = (predicted_scores[:,:,0] < score_threshold) # (N,8732)
        
        # for each image in the batch
        for i in range(N):
            boxes_i = list()
            labels_i = list()
            scores_i = list()
            obj_mask = obj_masks[i] # (8732)
            
            if obj_mask.sum().item() > 0:
                # filter out boxes that are background
                obj_boxes = predicted_offsets[i][obj_mask]  # (n_obj_boxes, 4) # n_obj_boxes: number of boxes containing object
                obj_boxes_score, obj_boxes_class = predicted_scores[i,:,1:self.n_classes][obj_mask].max(dim=1) # (n_obj_boxes)
                obj_boxes_class += 1 #since we excluded background class, argmax is between 0-19, we need to add 1 -> 1-20

                # convert to xy coordinates format
                obj_boxes = cxcy_to_xy(gcxgcy_to_cxcy(obj_boxes, self.priors_cxcy[obj_mask])) # (n_qualified_boxes, 4)

                # Non-max suppression
                for class_i in obj_boxes_class.unique(sorted=False).tolist():
                    class_mask = (obj_boxes_class == class_i)
                    boxes_class_i = obj_boxes[class_mask]
                    boxes_score_class_i = obj_boxes_score[class_mask]
                    
                    final_box_ids = nms(boxes_class_i, boxes_score_class_i, iou_threshold)  # (n_final_boxes after suppresion,)
                    
                    boxes_i.extend(boxes_class_i[final_box_ids].tolist())
                    labels_i.extend([class_i]*len(final_box_ids))
                    scores_i.extend(boxes_score_class_i[final_box_ids].tolist())
        
            boxes.append(torch.FloatTensor(boxes_i).to(device))
            labels.append(torch.LongTensor(labels_i).to(device))
            scores.append(torch.FloatTensor(scores_i).to(device))
            
            # Filter top k objects that have largest confidence score
            if boxes[i].size(0) > top_k and top_k > 0:
                scores[i], sort_ind = scores[i].sort(dim=0, descending=True)
                scores[i] = scores[i][:top_k]  # (top_k)
                boxes[i] = boxes[i][sort_ind[:top_k]]  # (top_k, 4)
                labels[i] = labels[i][sort_ind[:top_k]]  # (top_k)
        
        return boxes, labels, scores
示例#3
0
    def __init__(self, priors_cxcy, threshold=0.5, neg_pos_ratio=3, alpha=1.):
        super(MultiBoxLoss, self).__init__()
        self.priors_cxcy = priors_cxcy
        self.priors_xy = cxcy_to_xy(priors_cxcy)
        self.threshold = threshold
        self.neg_pos_ratio = neg_pos_ratio
        self.alpha = alpha

        self.smooth_l1 = nn.L1Loss()
        self.cross_entropy = nn.CrossEntropyLoss(reduce=False)
示例#4
0
    def __init__(self, threshold, neg_pos_ratio, alpha, device):
        super().__init__()
        self.default_cxcy = get_default_boxes().to(device)
        self.default_xy = cxcy_to_xy(self.default_cxcy)

        self.threshold = threshold
        self.hard_neg_scale = neg_pos_ratio
        self.alpha = alpha
        self.device = device

        self.smooth_l1 = nn.SmoothL1Loss(reduction='none')
        self.cross_entropy = nn.CrossEntropyLoss(reduction='none')
示例#5
0
 def __init__(self, priors_cxcy, threshold=0.5, neg_pos_ratio=3, alpha=1., focal_loss=False):
     super(MultiBoxLoss, self).__init__()
     self.priors_cxcy = priors_cxcy
     self.priors_xy = cxcy_to_xy(priors_cxcy)
     self.threshold = threshold
     self.neg_pos_ratio = neg_pos_ratio
     self.alpha = alpha
     
     # loss functions
     self.smooth_l1 = nn.SmoothL1Loss()
     self.cross_entropy = nn.CrossEntropyLoss(reduction='none')
     self.focal_loss = FocalLoss(reduction='sum') if focal_loss else None
示例#6
0
    def __init__(self, priors_cxcy, threshold=0.5, neg_pos_ratio=3, alpha=1.):
        super(MultiBoxLoss, self).__init__()
        self.priors_cxcy = priors_cxcy
        self.priors_xy = utils.cxcy_to_xy(priors_cxcy)
        self.threshold = threshold
        self.neg_pos_ratio = neg_pos_ratio
        self.alpha = alpha

        self.device = torch.device(
            "cuda:0" if torch.cuda.is_available() else "cpu")

        self.smooth_l1 = nn.L1Loss()
        self.cross_entropy = nn.CrossEntropyLoss(reduce=False)
示例#7
0
    def detect(self, predicted_locs, predicted_scores, threshold, max_overlap):
        batch_size = predicted_locs.size(0)
        predicted_scores = torch.nn.functional.softmax(
            predicted_scores, dim=2)  # (batch_size, 8732, 2)

        all_image_boxes = list()
        all_image_scores = list()

        for i in range(batch_size):
            decode_locs = cxcy_to_xy(
                gcxgcy_to_cxcy(predicted_locs[i], self.priors_cxcy))

            image_boxes = list()
            image_scores = list()

            text_scores = predicted_scores[i][:, 1]
            score_above_threshold = text_scores > threshold
            n_score_above_threshold = score_above_threshold.sum().item()

            text_scores = text_scores[score_above_threshold]
            text_decoded_locs = decode_locs[score_above_threshold]

            overlap = IoU(xy_to_cxcy(text_decoded_locs),
                          xy_to_cxcy(text_decoded_locs))

            suppress = torch.zeros(n_score_above_threshold,
                                   dtype=torch.uint8).to(device)

            for box in range(text_decoded_locs.size(0)):
                if suppress[box] == 1:
                    continue

                suppress, _ = torch.max(suppress, overlap[box] > max_overlap)
                suppress[box] = 0

            image_boxes.append(text_decoded_locs[1 - suppress])
            image_scores.append(text_scores[1 - suppress])

            if len(image_boxes) == 0:
                image_boxes.append(
                    torch.FloatTensor([0., 0., 1., 1.]).to(device))
                image_scores.append(torch.FloatTensor([0.]).to(device))

            image_boxes = torch.cat(image_boxes, dim=0)
            image_scores = torch.cat(image_scores, dim=0)

            all_image_boxes.append(image_boxes)
            all_image_scores.append(image_scores)

        return all_image_boxes, all_image_scores
示例#8
0
    def __init__(self,
                 threshold=0.5,
                 neg_pos_ratio=3,
                 alpha=1.,
                 device=DEVICE):
        super(MultiBoxLoss, self).__init__()
        self.priors_cxcy = get_default_boxes().to(device)
        self.priors_xy = cxcy_to_xy(self.priors_cxcy)
        self.threshold = threshold
        self.neg_pos_ratio = neg_pos_ratio
        self.alpha = alpha
        self.device = device

        self.smooth_l1 = nn.L1Loss()
        self.cross_entropy = nn.CrossEntropyLoss(reduce=False)
示例#9
0
    def post_processing(self, pred, is_demo=False):

        if is_demo:
            self.assign_anchors_to_cpu()
            pred_loc = pred[0].to('cpu')
            pred_cls = pred[1].to('cpu')
        else:
            pred_loc = pred[0]
            pred_cls = pred[1]

        n_priors = self.center_anchor.size(0)
        assert n_priors == pred_loc.size(1) == pred_cls.size(1)

        # decode 에서 나온 bbox 는 center coord
        pred_bboxes = cxcy_to_xy(self.decode(pred_loc.squeeze())).clamp(
            0, 1)  # for batch 1, [67995, 4]
        pred_scores = pred_cls.squeeze()  # for batch 1, [67995, num_classes]

        # corner coordinates 를 x1y1x2y2 를 0 ~ 1 로 scaling 해줌
        # 0.3109697496017331 -> 0.3115717185294685 로 오름

        return pred_bboxes, pred_scores
示例#10
0
    def detect_objects(self, predicted_locs, predicted_scores, min_score,
                       max_overlap, top_k, device):
        """
        Decipher the 8732 locations and class scores (output of ths SSD300) to detect objects.

        For each class, perform Non-Maximum Suppression (NMS) on boxes that are above a minimum threshold.

        :param predicted_locs: predicted locations/boxes w.r.t the 8732 prior boxes, a tensor of dimensions (N, 8732, 4)
        :param predicted_scores: class scores for each of the encoded locations/boxes, a tensor of dimensions (N, 8732, n_classes)
        :param min_score: minimum threshold for a box to be considered a match for a certain class
        :param max_overlap: maximum overlap two boxes can have so that the one with the lower score is not suppressed via NMS
        :param top_k: if there are a lot of resulting detection across all classes, keep only the top 'k'
        :return: detections (boxes, labels, and scores), lists of length batch_size
        """
        batch_size = predicted_locs.size(0)
        n_priors = self.priors_cxcy.size(0)
        predicted_scores = F.softmax(predicted_scores,
                                     dim=2)  # (N, 8732, n_classes)

        # Lists to store final predicted boxes, labels, and scores for all images
        all_images_boxes = list()
        all_images_labels = list()
        all_images_scores = list()

        assert n_priors == predicted_locs.size(1) == predicted_scores.size(1)

        for i in range(batch_size):
            # Decode object coordinates from the form we regressed predicted boxes to
            decoded_locs = cxcy_to_xy(
                gcxgcy_to_cxcy(predicted_locs[i], self.priors_cxcy)
            )  # (8732, 4), these are fractional pt. coordinates

            # Lists to store boxes and scores for this image
            image_boxes = list()
            image_labels = list()
            image_scores = list()

            max_scores, best_label = predicted_scores[i].max(dim=1)  # (8732)

            # Check for each class
            for c in range(1, self.n_classes):
                # Keep only predicted boxes and scores where scores for this class are above the minimum score
                class_scores = predicted_scores[i][:, c]  # (8732)
                score_above_min_score = class_scores > min_score  # torch.uint8 (byte) tensor, for indexing
                n_above_min_score = score_above_min_score.sum().item()
                if n_above_min_score == 0:
                    continue
                class_scores = class_scores[
                    score_above_min_score]  # (n_qualified), n_min_score <= 8732
                class_decoded_locs = decoded_locs[
                    score_above_min_score]  # (n_qualified, 4)

                # Sort predicted boxes and scores by scores
                class_scores, sort_ind = class_scores.sort(
                    dim=0, descending=True)  # (n_qualified), (n_min_score)
                class_decoded_locs = class_decoded_locs[
                    sort_ind]  # (n_min_score, 4)

                # Find the overlap between predicted boxes
                overlap = find_jaccard_overlap(
                    class_decoded_locs,
                    class_decoded_locs)  # (n_qualified, n_min_score)

                # Non-Maximum Suppression (NMS)

                # A torch.uint8 (byte) tensor to keep track of which predicted boxes to suppress
                # 1 implies suppress, 0 implies don't suppress
                suppress = torch.zeros(
                    (n_above_min_score),
                    dtype=torch.uint8).to(device)  # (n_qualified)

                # Consider each box in order of decreasing scores
                for box in range(class_decoded_locs.size(0)):
                    # If this box is already marked for suppression
                    if suppress[box] == 1:
                        continue

                    # Suppress boxes whose overlaps (with this box) are greater than maximum overlap
                    # Find such boxes and update suppress indices
                    suppress = torch.max(suppress, overlap[box] > max_overlap)
                    # The max operation retains previously suppressed boxes, like an 'OR' operation

                    # Don't suppress this box, even though it has an overlap of 1 with itself
                    suppress[box] = 0

                # Store only unsuppressed boxes for this class
                image_boxes.append(class_decoded_locs[1 - suppress])
                image_labels.append(
                    torch.LongTensor(
                        (1 - suppress).sum().item() * [c]).to(device))
                image_scores.append(class_scores[1 - suppress])

            # If no object in any class is found, store a placeholder for 'background'
            if len(image_boxes) == 0:
                image_boxes.append(
                    torch.FloatTensor([[0., 0., 1., 1.]]).to(device))
                image_labels.append(torch.LongTensor([0]).to(device))
                image_scores.append(torch.FloatTensor([0.]).to(device))

            # Concatenate into single tensors
            image_boxes = torch.cat(image_boxes, dim=0)  # (n_objects, 4)
            image_labels = torch.cat(image_labels, dim=0)  # (n_objects)
            image_scores = torch.cat(image_scores, dim=0)  # (n_objects)
            n_objects = image_scores.size(0)

            # Keep only the top k objects
            if n_objects > top_k:
                image_scores, sort_ind = image_scores.sort(dim=0,
                                                           descending=True)
                image_scores = image_scores[:top_k]  # (top_k)
                image_boxes = image_boxes[sort_ind][:top_k]  # (top_k, 4)
                image_labels = image_labels[sort_ind][:top_k]  # (top_k)

            # Append to lists that store predicted boxes and scores for all images
            all_images_boxes.append(image_boxes)
            all_images_labels.append(image_labels)
            all_images_scores.append(image_scores)

        return all_images_boxes, all_images_labels, all_images_scores  # lists of length batch_size
示例#11
0
    def my_post_process_deprecated(self, predicted_offsets, predicted_scores, score_threshold, iou_threshold):
        ''' This approach based on my intuition that the box's class label should be the argmax of the softmax output,
        with this approach, score_threshold is not actually used properly since max of the softmax output is usually > 0.3
        And of course, this doesn't work well as the model's output is more biased toward class background, bc neg_pos_ratio=3
        So in many cases, the score for backdground overwhelm other classes like for example: (0.55, 0.01, 0.45, 0.04, 0.0,...),
        The result is that the recall is very low, it can't detect all the objects, however, precision is quite high, 
        like probably about >95%. But still, APs and mAP is low in general.
        
        Params:
            predicted_offsets: predicted offsets w.r.t the 8732 prior boxes, (gcxgcy), a tensor of dimensions (N, 8732, 4)
            predicted_scores: class scores for each of the encoded locations/boxes, a tensor of dimensions (N, 8732, n_classes)
            score_threshold: minimum threshold for a box to be considered a match for a certain class
            iou_threshold: maximum overlap two boxes can have so that the one with the lower score is not suppressed via NMS
        Return: 
            detections: (boxes, labels, and scores), they are lists of N tensors
            boxes: N (n_boxes, 4)
            labels: N (n_boxes,)
            scores: N (n_boxes,)
        '''
        boxes = list()
        labels = list()
        scores = list()
        N, n_priors = predicted_offsets.shape[0:2]
        
        predicted_scores = F.softmax(predicted_scores, dim=2)  # (N, 8732, n_classes)
        
        # for each box, find the largest score and the class_id with respect to it
        class_scores, class_ids = predicted_scores.max(dim=2) # (N, 8732) and (N, 8732)
        
        # for each image in the batch
        for i in range(N):
            boxes_i = list()
            labels_i = list()
            scores_i = list()
            
            # filter out boxes that are not qualified, that were predicted as background or with low confidence score
            qualify_mask = (class_ids[i] != 0) & (class_scores[i] > score_threshold) # (8732)
            qualified_boxes = predicted_offsets[i][qualify_mask]  # (n_qualified_boxes, 4)
            qualified_boxes_class = class_ids[i][qualify_mask]    # (n_qualified_boxes)
            qualified_boxes_score = class_scores[i][qualify_mask] # (n_qualified_boxes)
            
            if len(qualified_boxes) > 0:
                # convert to xy coordinates format
                qualified_boxes = cxcy_to_xy(gcxgcy_to_cxcy(qualified_boxes, self.priors_cxcy[qualify_mask])) # (n_qualified_boxes, 4)

                # Non-max suppression
                for class_i in qualified_boxes_class.unique(sorted=False).tolist():
                    class_mask = qualified_boxes_class == class_i
                    boxes_class_i = qualified_boxes[class_mask]
                    boxes_score_class_i = qualified_boxes_score[class_mask]
                    
                    final_box_ids = nms(boxes_class_i, boxes_score_class_i, iou_threshold)  # (n_final_boxes,)
                    
                    boxes_i.extend(boxes_class_i[final_box_ids].tolist())
                    labels_i.extend([class_i]*len(final_box_ids))
                    scores_i.extend(boxes_score_class_i[final_box_ids].tolist())
        
            boxes.append(torch.FloatTensor(boxes_i).to(device))
            labels.append(torch.LongTensor(labels_i).to(device))
            scores.append(torch.FloatTensor(scores_i).to(device))
        
        return boxes, labels, scores
 def detect(self, locs_pred, cls_pred, min_score, max_overlap, top_k):
     '''
         Detect objects, perform NMS on boxes that are above a minimum threshold.
         locs_pred: Pred location, a tensor of dimensions (N, 8732, 4)
         cls_pred: Pred class scores for each of the encoded boxes, a tensor fo dimensions (N, 8732, n_classes)
         min_score: min threshold 
         max_overlap: maximum overlap two boxes can have 
         top_k: if there are lot of resulting detection across all classes, keep only the top 'k'
         
         Out: detection list: boxes, labels, score
     '''
     batch_size = locs_pred.size(0)    #N
     n_default_boxes = self.default_boxes.size(0)    #8732
     cls_pred = F.softmax(cls_pred, dim= 2)    #(N, 8732, n_classes)
     assert n_default_boxes == locs_pred.size(1) == cls_pred.size(1)
     
     all_images_boxes = []
     all_images_labels = []
     all_images_scores = []
     
     for i in range(batch_size):
         #Decode object
         decoded_locs = cxcy_to_xy(decode_bboxes(locs_pred[i], self.default_boxes)) #(8732, 4)
         
         image_boxes = []
         image_labels = []
         image_scores = []
         
         max_scores, best_label = cls_pred[i].max(dim= 1)    #(8732)
         
         #Check for each class
         for c in range(1, self.num_classes):
             class_scores = cls_pred[i][:, c]    #8732
             score_above_min_score = class_scores > min_score
             n_above_min_score = score_above_min_score.sum().item()
             
             if n_above_min_score == 0:
                 continue
             
             class_scores = class_scores[score_above_min_score]    # <=8732
             class_decoded_locs = decoded_locs[score_above_min_score] # <=8732
             
             #Sort pred boxes and socores by scores
             class_scores, sort_id = class_scores.sort(dim= 0, descending= True)
             class_decoded_locs = class_decoded_locs[sort_id]
             
             #Find overlap between pred locs
             overlap = find_IoU(class_decoded_locs, class_decoded_locs)
             
             #Apply NMS
             suppress = torch.zeros((n_above_min_score), dtype=torch.uint8).to(device)
             
             for box_id in range(class_decoded_locs.size(0)):
                 if suppress[box_id] == 1:
                     continue
                 condition = overlap[box_id] > max_overlap
                 condition = torch.tensor(condition, dtype=torch.uint8).to(device)
                 suppress = torch.max(suppress, condition)
                 
                 suppress[box_id] = 0
             
             # Store only unsuppressed boxes for this class
             image_boxes.append(class_decoded_locs[1 - suppress])
             image_labels.append(torch.LongTensor((1 - suppress).sum().item() * [c]).to(device))
             image_scores.append(class_scores[1 - suppress])
         
         if len(image_boxes) == 0:
             image_boxes.append(torch.FloatTensor([[0., 0., 1., 1.]]).to(device))
             image_labels.append(torch.LongTensor([0]).to(device))
             image_scores.append(torch.FloatTensor([0.]).to(device))
         
         #Concat into single tensors
         image_boxes = torch.cat(image_boxes, dim= 0)    #(n_objects, 4)
         image_labels = torch.cat(image_labels, dim=0)  # (n_objects)
         image_scores = torch.cat(image_scores, dim=0)  # (n_objects)
         n_objects = image_scores.size(0)
         
         #Keep only the top k objects
         if n_objects > top_k:
             image_scores, sort_index = image_scores.sort(dim=0, descending=True)
             image_scores = image_scores[:top_k]  # (top_k)
             image_boxes = image_boxes[sort_index][:top_k]  # (top_k, 4)
             image_labels = image_labels[sort_index][:top_k]  # (top_k)
         
         all_images_boxes.append(image_boxes)
         all_images_labels.append(image_labels)
         all_images_scores.append(image_scores)
         
     return all_images_boxes, all_images_labels, all_images_scores        
    def forward(self, locs_pred, cls_pred, boxes, labels):
        '''
            Forward propagation
            locs_pred: Pred location, a tensor of dimensions (N, 8732, 4)
            cls_pred:  Pred class scores for each of the encoded boxes, a tensor fo dimensions (N, 8732, n_classes)
            boxes: True object bouding boxes, a list of N tensors
            labels: True object labels, a list of N tensors
            
            Out: Mutilbox loss
        '''
        batch_size = locs_pred.size(0)  #N
        n_default_boxes = self.default_boxes.size(0)  #8732
        num_classes = cls_pred.size(2)  #num_classes

        t_locs = torch.zeros((batch_size, n_default_boxes, 4),
                             dtype=torch.float).to(device)  #(N, 8732, 4)
        t_classes = torch.zeros((batch_size, n_default_boxes),
                                dtype=torch.long).to(device)  #(N, 8732)

        default_boxes_xy = cxcy_to_xy(self.default_boxes)
        for i in range(batch_size):
            n_objects = boxes[i].size(0)

            overlap = find_IoU(boxes[i], default_boxes_xy)  #(n_objects, 8732)

            #for each default box, find the object has maximum overlap
            overlap_each_default_box, object_each_default_box = overlap.max(
                dim=0)  #(8732)

            #find default box has maximum oberlap for each object
            _, default_boxes_each_object = overlap.max(dim=1)

            object_each_default_box[
                default_boxes_each_object] = torch.LongTensor(
                    range(n_objects)).to(device)

            overlap_each_default_box[default_boxes_each_object] = 1.

            #Labels for each default box
            label_each_default_box = labels[i][
                object_each_default_box]  #(8732)

            label_each_default_box[
                overlap_each_default_box < self.threshold] = 0  #(8732)

            #Save
            t_classes[i] = label_each_default_box

            #Encode pred bboxes
            t_locs[i] = encode_bboxes(
                xy_to_cxcy(boxes[i][object_each_default_box]),
                self.default_boxes)  #(8732, 4)

        # Identify priors that are positive
        pos_default_boxes = t_classes != 0  #(N, 8732)

        #Localization loss
        #Localization loss is computed only over positive default boxes

        smooth_L1_loss = nn.SmoothL1Loss()
        loc_loss = smooth_L1_loss(locs_pred[pos_default_boxes],
                                  t_locs[pos_default_boxes])

        #Confidence loss
        #Apply hard negative mining

        #number of positive ad hard-negative default boxes per image
        n_positive = pos_default_boxes.sum(dim=1)
        n_hard_negatives = self.neg_pos * n_positive

        #Find the loss for all priors
        cross_entropy_loss = nn.CrossEntropyLoss(reduce=False)
        confidence_loss_all = cross_entropy_loss(cls_pred.view(
            -1, num_classes), t_classes.view(-1))  #(N*8732)
        confidence_loss_all = confidence_loss_all.view(
            batch_size, n_default_boxes)  #(N, 8732)

        confidence_pos_loss = confidence_loss_all[pos_default_boxes]

        #Find which priors are hard-negative
        confidence_neg_loss = confidence_loss_all.clone()  #(N, 8732)
        confidence_neg_loss[pos_default_boxes] = 0.
        confidence_neg_loss, _ = confidence_neg_loss.sort(dim=1,
                                                          descending=True)

        hardness_ranks = torch.LongTensor(range(n_default_boxes)).unsqueeze(
            0).expand_as(confidence_neg_loss).to(device)  # (N, 8732)

        hard_negatives = hardness_ranks < n_hard_negatives.unsqueeze(
            1)  # (N, 8732)

        confidence_hard_neg_loss = confidence_neg_loss[hard_negatives]

        confidence_loss = (
            confidence_hard_neg_loss.sum() +
            confidence_pos_loss.sum()) / n_positive.sum().float()

        return self.alpha * loc_loss + confidence_loss
示例#14
0
    def detect_objects(self, predicted_locs, predicted_scores, min_score,
                       max_overlap, top_k):
        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        batch_size = predicted_locs.size(0)
        n_priors = self.priors_cxcy.size(0)
        predicted_scores = F.softmax(predicted_scores, dim=2)

        all_images_boxes = list()
        all_images_labels = list()
        all_images_scores = list()

        assert n_priors == predicted_locs.size(1) == predicted_scores.size(1)

        for i in range(batch_size):
            # Decode object coordinates from the form we regressed predicted boxes to
            decoded_locs = cxcy_to_xy(
                gcxgcy_to_cxcy(predicted_locs[i], self.priors_cxcy)
            )  # (n_priors, 4), these are fractional pt. coordinates

            image_boxes = list()
            image_labels = list()
            image_scores = list()

            max_scores, best_label = predicted_scores[i].max(dim=1)  # (8732)

            for c in range(1, self.n_classes):
                # Keep only predicted boxes and scores where scores for this class are above the minimum score
                class_scores = predicted_scores[i][:, c]
                score_above_min_score = class_scores > min_score  # torch.uint8 (byte) tensor, for indexing
                n_above_min_score = score_above_min_score.sum().item()
                if n_above_min_score == 0:
                    continue
                class_scores = class_scores[score_above_min_score]
                class_decoded_locs = decoded_locs[score_above_min_score]

                # Sort predicted boxes and scores by scores
                class_scores, sort_ind = class_scores.sort(dim=0,
                                                           descending=True)
                class_decoded_locs = class_decoded_locs[sort_ind]
                print('class_scores.shape', class_scores.shape,
                      'class_decoded_locs.shape', class_decoded_locs.shape)
                # Find the overlap between predicted boxes
                overlap = find_jaccard_overlap(class_decoded_locs,
                                               class_decoded_locs)

                # Non-Maximum Suppression (NMS)

                # A torch.uint8 (byte) tensor to keep track of which predicted boxes to suppress
                # 1 implies suppress, 0 implies don't suppress
                suppress = torch.max(
                    suppress, (overlap[box] > max_overlap).type(
                        torch.cuda.ByteTensor))  # (n_qualified)

                # Consider each box in order of decreasing scores
                for box in range(class_decoded_locs.size(0)):
                    # If this box is already marked for suppression
                    if suppress[box] == 1:
                        continue

                    # Suppress boxes whose overlaps (with this box) are greater than maximum overlap
                    # Find such boxes and update suppress indices
                    suppress = torch.max(suppress, overlap[box] > max_overlap)
                    # The max operation retains previously suppressed boxes, like an 'OR' operation

                    # Don't suppress this box, even though it has an overlap of 1 with itself
                    suppress[box] = 0

                # Store only unsuppressed boxes for this class
                image_boxes.append(class_decoded_locs[1 - suppress])
                image_labels.append(
                    torch.LongTensor(
                        (1 - suppress).sum().item() * [c]).to(device))
                image_scores.append(class_scores[1 - suppress])

            # If no object in any class is found, store a placeholder for 'background'
            if len(image_boxes) == 0:
                image_boxes.append(
                    torch.FloatTensor([[0., 0., 1., 1.]]).to(device))
                image_labels.append(torch.LongTensor([0]).to(device))
                image_scores.append(torch.FloatTensor([0.]).to(device))

            # Concatenate into single tensors
            image_boxes = torch.cat(image_boxes, dim=0)  # (n_objects, 4)
            image_labels = torch.cat(image_labels, dim=0)  # (n_objects)
            image_scores = torch.cat(image_scores, dim=0)  # (n_objects)
            n_objects = image_scores.size(0)

            # Keep only the top k objects
            if n_objects > top_k:
                image_scores, sort_ind = image_scores.sort(dim=0,
                                                           descending=True)
                image_scores = image_scores[:top_k]  # (top_k)
                image_boxes = image_boxes[sort_ind][:top_k]  # (top_k, 4)
                image_labels = image_labels[sort_ind][:top_k]  # (top_k)

            # Append to lists that store predicted boxes and scores for all images
            all_images_boxes.append(image_boxes)
            all_images_labels.append(image_labels)
            all_images_scores.append(image_scores)

        return all_images_boxes, all_images_labels, all_images_scores  # lists of length batch_size