Exemplo n.º 1
0
    def encode(self, boxes, labels):
        '''Encode target bounding boxes and class labels.

        We obey the Faster RCNN box coder:
          tx = (x - anchor_x) / anchor_w
          ty = (y - anchor_y) / anchor_h
          tw = log(w / anchor_w)
          th = log(h / anchor_h)

        Args:
          boxes: (tensor) bounding boxes of (xmin,ymin,xmax,ymax), sized [#obj, 4].
          labels: (tensor) object class labels, sized [#obj,].

        Returns:
          loc_targets: (tensor) encoded bounding boxes, sized [#anchors,4].
          cls_targets: (tensor) encoded class labels, sized [#anchors,].
        '''
        anchor_boxes = self.anchor_boxes
        ious = box_iou(anchor_boxes, boxes)
        max_ious, max_ids = ious.max(1)
        boxes = boxes[max_ids]

        boxes = change_box_order(boxes, 'xyxy2xywh')
        anchor_boxes = change_box_order(anchor_boxes, 'xyxy2xywh')

        loc_xy = (boxes[:, :2] - anchor_boxes[:, :2]) / anchor_boxes[:, 2:]
        loc_wh = torch.log(boxes[:, 2:] / anchor_boxes[:, 2:])
        loc_targets = torch.cat([loc_xy, loc_wh], 1)
        cls_targets = 1 + labels[max_ids]

        # cls_targets[max_ious<0.5] = 0
        # ignore = (max_ious>0.4) & (max_ious<0.5)  # ignore ious between [0.4,0.5]
        # cls_targets[ignore] = -1                  # mark ignored to -1
        return loc_targets, cls_targets
Exemplo n.º 2
0
    def encode(self, boxes, labels):
        '''Encode target bounding boxes and class labels.

        SSD coding rules:
          tx = (x - anchor_x) / (variance[0]*anchor_w)
          ty = (y - anchor_y) / (variance[0]*anchor_h)
          tw = log(w / anchor_w) / variance[1]
          th = log(h / anchor_h) / variance[1]

        Args:
          boxes: (tensor) bounding boxes of (xmin,ymin,xmax,ymax), sized [#obj, 4].
          labels: (tensor) object class labels, sized [#obj,].

        Returns:
          loc_targets: (tensor) encoded bounding boxes, sized [#anchors,4].
          cls_targets: (tensor) encoded class labels, sized [#anchors,].

        Reference:
          https://github.com/chainer/chainercv/blob/master/chainercv/links/model/ssd/multibox_coder.py
        '''
        def argmax(x):
            v, i = x.max(0)
            j = v.max(0)[1][0]
            return (i[j], j)

        default_boxes = self.default_boxes  # xywh
        default_boxes = change_box_order(default_boxes, 'xywh2xyxy')

        ious = box_iou(default_boxes, boxes)  # [#anchors, #obj]
        index = torch.LongTensor(len(default_boxes)).fill_(-1)
        masked_ious = ious.clone()
        while True:
            i, j = argmax(masked_ious)
            if masked_ious[i, j] < 1e-6:
                break
            index[i] = j
            masked_ious[i, :] = 0
            masked_ious[:, j] = 0

        mask = (index < 0) & (ious.max(1)[0] >= 0.5)
        if mask.any():
            t = ious[mask.nonzero().squeeze()]
            if len(t.shape) == 1:
                index[mask] = t[0].long()
            else:
                index[mask] = t.max(1)[1]

        boxes = boxes[index.clamp(min=0)]  # negative index not supported
        boxes = change_box_order(boxes, 'xyxy2xywh')
        default_boxes = change_box_order(default_boxes, 'xyxy2xywh')

        variances = (0.1, 0.2)
        loc_xy = (boxes[:, :2] -
                  default_boxes[:, :2]) / default_boxes[:, 2:] / variances[0]
        loc_wh = torch.log(boxes[:, 2:] / default_boxes[:, 2:]) / variances[1]
        loc_targets = torch.cat([loc_xy, loc_wh], 1)
        cls_targets = 1 + labels[index.clamp(min=0)]
        cls_targets[index < 0] = 0
        return loc_targets, cls_targets
Exemplo n.º 3
0
    def encode(self, boxes, labels):
        '''Encode target bounding boxes and class labels.

        SSD coding rules:
          tx = (x - anchor_x) / (variance[0]*anchor_w)
          ty = (y - anchor_y) / (variance[0]*anchor_h)
          tw = log(w / anchor_w)
          th = log(h / anchor_h)

        Args:
          boxes: (tensor) bounding boxes of (xmin,ymin,xmax,ymax), sized [#obj,4].
          labels: (tensor) object class labels, sized [#obj,].

        Returns:
          loc_targets: (tensor) encoded bounding boxes, sized [#anchors,4].
          cls_targets: (tensor) encoded class labels, sized [#anchors,].

        Reference:
          https://github.com/chainer/chainercv/blob/master/chainercv/links/model/ssd/multibox_coder.py
        '''
        def argmax(x):
            '''Find the max value index(row & col) of a 2D tensor.'''
            v, i = x.max(0)
            j = v.max(0)[1].item()
            return (i[j], j)

        anchor_boxes = self.anchor_boxes
        # print (anchor_boxes.size())
        ious = box_iou(anchor_boxes, boxes)  # [#anchors, #obj]
        index = torch.empty(anchor_boxes.size(0), dtype=torch.long).fill_(-1)
        masked_ious = ious.clone()
        while True:
            i, j = argmax(masked_ious)
            if masked_ious[i, j] < 1e-6:
                break
            index[i] = j
            masked_ious[i, :] = 0
            masked_ious[:, j] = 0

        mask = (index < 0) & (ious.max(1)[0] >= 0.5)
        if mask.any():
            index[mask] = ious[mask].max(1)[1]

        boxes = boxes[index.clamp(min=0)]  # negative index not supported
        boxes = change_box_order(boxes, 'xyxy2xywh')
        anchor_boxes = change_box_order(anchor_boxes, 'xyxy2xywh')

        loc_xy = (boxes[:, :2] - anchor_boxes[:, :2]) / anchor_boxes[:, 2:]
        loc_wh = torch.log(boxes[:, 2:] / anchor_boxes[:, 2:])
        loc_targets = torch.cat([loc_xy, loc_wh], 1)
        cls_targets = 1 + labels[index.clamp(min=0)]
        # print (labels.size())
        # print (cls_targets.size())
        cls_targets[index < 0] = 0
        return loc_targets, cls_targets
Exemplo n.º 4
0
def random_crop(img, boxes, labels, min_scale=0.3, max_aspect_ratio=2.):
    '''Randomly crop a PIL image.

    Args:
      img: (PIL.Image) image.
      boxes: (tensor) bounding boxes, sized [#obj, 4].
      labels: (tensor) bounding box labels, sized [#obj,].
      min_scale: (float) minimal image width/height scale.
      max_aspect_ratio: (float) maximum width/height aspect ratio.

    Returns:
      img: (PIL.Image) cropped image.
      boxes: (tensor) object boxes.
      labels: (tensor) object labels.
    '''
    img = img.copy()
    boxes = boxes.clone()
    labels = labels.clone()
    imw, imh = img.size
    params = [(0, 0, imw, imh)]  # crop roi (x,y,w,h) out
    for min_iou in (0, 0.1, 0.3, 0.5, 0.7, 0.9):
        for _ in range(100):
            scale = random.uniform(min_scale, 1)
            aspect_ratio = random.uniform(
                max(1 / max_aspect_ratio, scale * scale),
                min(max_aspect_ratio, 1 / (scale * scale)))
            w = int(imw * scale * math.sqrt(aspect_ratio))
            h = int(imh * scale / math.sqrt(aspect_ratio))

            x = random.randrange(imw - w)
            y = random.randrange(imh - h)

            roi = torch.tensor([[x, y, x + w, y + h]], dtype=torch.float)
            ious = box_iou(boxes, roi)
            if ious.min() >= min_iou:
                params.append((x, y, w, h))
                break

    x, y, w, h = random.choice(params)
    img = img.crop((x, y, x + w, y + h))

    center = (boxes[:, :2] + boxes[:, 2:]) / 2
    mask = (center[:,0]>=x) & (center[:,0]<=x+w) \
         & (center[:,1]>=y) & (center[:,1]<=y+h)
    if mask.any():
        boxes = boxes[mask] - torch.tensor([x, y, x, y], dtype=torch.float)
        boxes = box_clamp(boxes, 0, 0, w, h)
        labels = labels[mask]
    else:
        boxes = torch.tensor([[0, 0, 0, 0]], dtype=torch.float)
        labels = torch.tensor([0], dtype=torch.long)
    return img, boxes, labels
Exemplo n.º 5
0
    def nonlocal_matching(self):

        anchors_boxes = change_box_order(self.anchors_boxes, 'xyxy2xywh')
        xy = loc_preds[:, :2] * anchor_boxes[:, 2:] + anchor_boxes[:, :2]
        wh = loc_preds[:, 2:].exp() * anchor_boxes[:, 2:]
        box_preds = torch.cat([xy - wh / 2, xy + wh / 2], 1)

        ious = box_iou(box_preds, boxes)
        index = torch.empty(anchor_boxes.size(0), dtype=torch.long).fill_(-1)

        masked_ious = ious.clone()
        # pdb.set_trace()
        while True:
            i, j = argmax(masked_ious)
            if masked_ious[i, j] < 1e-6:
                break
            index[i] = j
            masked_ious[i, :] = 0
            masked_ious[:, j] = 0

        mask = (index < 0)  # chose the activated bbox
Exemplo n.º 6
0
    def encode(self, boxes, labels):
        '''Encode target bounding boxes and class labels.

        SSD coding rules:
          tx = (x - anchor_x) / (variance[0]*anchor_w)
          ty = (y - anchor_y) / (variance[0]*anchor_h)
          tw = log(w / anchor_w) / variance[1]
          th = log(h / anchor_h) / variance[1]

        Args:
          boxes: (tensor) bounding boxes of (xmin,ymin,xmax,ymax), sized [#obj, 4].
          labels: (tensor) object class labels, sized [#obj,].

        Returns:
          loc_targets: (tensor) encoded bounding boxes, sized [#anchors,4].
          cls_targets: (tensor) encoded class labels, sized [#anchors,].

        Reference:
          https://github.com/chainer/chainercv/blob/master/chainercv/links/model/ssd/multibox_coder.py
        '''
        def argmax(x):
            v, i = x.max(0)
            j = v.max(0)[1][0]
            return (i[j], j)

        # True?: default boxes are also known as "anchors" in some contexts
        # Or is an anchor a default position from which multiple default boxes
        # are formed?
        default_boxes = self.default_boxes  # xywh
        default_boxes = change_box_order(default_boxes, 'xywh2xyxy')

        ious = box_iou(default_boxes, boxes)  # [#anchors, #obj]
        index = torch.LongTensor(len(default_boxes)).fill_(-1)
        masked_ious = ious.clone()

        # Match ground truth boxes with default boxes based on IoU
        while True:
            i, j = argmax(masked_ious)
            if masked_ious[i, j] < 1e-6:
                break
            index[i] = j
            masked_ious[i, :] = 0
            masked_ious[:, j] = 0

        # Assign ground truth boxes to unmatched default boxes if the overlap is good enough.
        # Consequence: Some ground truth boxes are matched with multiple default boxes.
        # Clarification: Each default box can have at most one ground truth box matched with
        # it. Some default boxes will not be matched with a ground truth box.
        mask = (index < 0) & (ious.max(1)[0] >= 0.5)
        if mask.any():
            index[mask] = ious[mask.nonzero().squeeze()].max(1)[1]

        # Shape: (num_default_boxes, 4)
        # Each default box index is replaced with a ground truth box that it
        # was matched with. Unmatched default boxes are given the first ground
        # truth box, but this won't affect the location loss since unmatched
        # default boxes are tracked as "negative examples" via an index of -1.
        # Later, all class labels will be incremented, leaving the class label
        # of 0 free for new use. This is the class label we will assign to
        # negative examples, which are those with an index of -1.
        # I'm not sure why we couldn't just give negative examples a class
        # label of -1 and not change the original ground truth class labels.
        boxes = boxes[index.clamp(min=0)]  # negative index not supported

        boxes = change_box_order(boxes, 'xyxy2xywh')
        default_boxes = change_box_order(default_boxes, 'xyxy2xywh')

        variances = (0.1, 0.2)
        loc_xy = (boxes[:, :2] -
                  default_boxes[:, :2]) / default_boxes[:, 2:] / variances[0]
        loc_wh = torch.log(boxes[:, 2:] / default_boxes[:, 2:]) / variances[1]
        loc_targets = torch.cat([loc_xy, loc_wh], 1)

        # Add one to the label ID of each default box that was matched with a
        # ground truth box. Reason: We must make room for the "unassigned"
        # class. F.cross_entropy doesn't allow negative class numbers,
        # so we can use -1 for this class. Not sure why we don't use the next
        # available positive number, but this works.
        cls_targets = 1 + labels[index.clamp(min=0)]  # Positive examples
        # Assign a class ID of 0 to unmatched default boxes. These will be
        # considered negative examples in the location loss function.
        # See SSDLoss
        cls_targets[index < 0] = 0  # Negative examples

        return loc_targets, cls_targets