Exemplo n.º 1
0
 def encode(self, labels, boxes, input_size=None, test=False):
     '''
     编码xml中的object格式为bounding boxes regression的格式
     tx = (x - anchor_x) / anchor_w
     ty = (y - anchor_y) / anchor_h
     tw = log(w / anchor_w)
     th = log(h / anchor_h)
     注意,这个方法输入的是单张图片的objects,所以使用的时候必须一张图片一张
         图片的输入
     args:
         labels: tensor, 每个gtbb的标签,size是[#box,]
         boxes: tensor, ground truth bounding boxes,
             (xmin, ymin, xmax, ymax),size是[#box, 4]
         input_size:int/tuple,输入图像的大小
         test: 测试时使用;
     returns:
         cls_targets: tensor,每个anchor被赋予的标签,size是[#anchors, ],
             其中的值0代表背景类,1-k表示k个分类,-1表示忽略的anchors
         loc_targets: tensor,每个anchor被赋予的bbr的标签,size是
             [#anchors, 4],#anchors是所有特征图上的所有anchors
     '''
     if input_size is None:
         input_size = self.input_size
         anchor_boxes = self.anchor_boxes
     else:
         if len(input_size) != 2:
             raise ValueError('TCT的input_size不是1920x1200,所以不能是None')
         input_size = torch.tensor(input_size, dtype=torch.float)
         anchor_boxes = self._get_anchor_boxes(input_size)
     boxes = change_box_order(boxes, 'xyxy2xywh')
     # 计算每个anchor和每个gtbb间的iou,根据此来给标签
     ious = box_iou(anchor_boxes, boxes, order='xywh')
     max_ious, max_ids = ious.max(1)
     boxes = boxes[max_ids]
     if test:
         _, orders = max_ious.sort(0, True)
         loc_targets = change_box_order(anchor_boxes, 'xywh2xyxy')[orders]
     else:
         # 计算bbr的偏移量,即bbr的标签
         loc_xy = (boxes[:, :2] - anchor_boxes[:, :2]) / anchor_boxes[:, 2:]
         loc_wh = torch.log(boxes[:, 2:] / anchor_boxes[:, 2:])
         loc_targets = torch.cat([loc_xy, loc_wh], 1)
     cls_targets = 1 + labels[max_ids]  # 加1是为了空出0来给背景类
     # 规定背景类,规定忽略的anchors
     cls_targets[max_ious < self.iou_thre] = 0
     ignore = (max_ious > self.ignore_thres[0]) & \
         (max_ious < self.ignore_thres[1])
     cls_targets[ignore] = -1  # 这些anchors是不用的
     if test:
         cls_targets = cls_targets[orders]
     return cls_targets, loc_targets
Exemplo n.º 2
0
    def encode(self, gt_quad_boxes, labels, input_size):
        '''Encode target bounding boxes and class labels.

        TextBoxes++ quad_box encoder:
          tx_n = (x_n - anchor_x) / anchor_w
          ty_n = (y_n - anchor_y) / anchor_h

        Args:
          gt_quad_boxes: (tensor) bounding boxes of (xyxyxyxy), sized [#obj, 8].
          labels: (tensor) object class labels, sized [#obj, ].
          input_size: (int/tuple) model input size of (w,h).

        Returns:
          loc_targets: (tensor) encoded bounding boxes, sized [#anchors,8].
          cls_targets: (tensor) encoded class labels, sized [#anchors,].
        '''

        input_size = torch.Tensor([input_size, input_size]) if isinstance(input_size, int) \
            else torch.Tensor(input_size)

        anchor_rect_boxes = self._get_anchor_boxes(
            input_size)  # (num_anchor, 8)
        anchor_quad_boxes = change_box_order(anchor_rect_boxes,
                                             "xywh2quad")  # (num_anchor, 4)

        gt_rect_boxes = change_box_order(gt_quad_boxes, "quad2xyxy")

        ious = box_iou(anchor_rect_boxes, gt_rect_boxes)
        max_ious, max_ids = ious.max(1)

        # Each anchor box matches the largest iou with the gt box
        gt_quad_boxes = gt_quad_boxes[max_ids]  # (num_gt_boxes, 8)
        gt_rect_boxes = gt_rect_boxes[max_ids]  # (num_gt_boxes, 4)

        # for Rectangle boxes -> using in TextBoxes
        #gt_rect_boxes = change_box_order(gt_rect_boxes, "xyxy2xywh")
        #loc_rect_yx = (gt_rect_boxes[:, :2] - anchor_rect_boxes[:, :2]) / anchor_rect_boxes[:, 2:]
        #loc_rect_hw = torch.log(gt_rect_boxes[:, 2:] / anchor_rect_boxes[:, 2:])

        # for Quad boxes -> using in TextBoxes++
        anchor_boxes_hw = anchor_rect_boxes[:, 2:4].repeat(1, 4)
        loc_quad_yx = (gt_quad_boxes - anchor_quad_boxes) / anchor_boxes_hw

        # loc_targets = torch.cat([loc_rect_yx, loc_rect_hw, loc_quad_yx], dim=1) # (num_anchor, 12)
        loc_targets = loc_quad_yx
        cls_targets = labels[max_ids]

        cls_targets[max_ious < 0.5] = -1  # ignore (0.4~0.5) : -1
        cls_targets[max_ious < 0.4] = 0  # background (0.0~0.4): 0
        # positive (0.5~1.0) : 1
        return loc_targets, cls_targets
def find_best_pred(gt_boxes, pred_boxes):
    '''
    Find whether there is a predicted box for each ground box

    Args:
      gt_boxes: (FloatTensor) [N, 6]  zyxzyx
      pred_boxes: (FloatTensor) [M, 6]   zyxzyx
    
    Returns:
      count: (ndarray) (tp, fn, fp)
    '''
    tp = 0
    fn = 0
    fp = 0
    distance = box_distance(gt_boxes, pred_boxes)
    iou = box_iou(gt_boxes, pred_boxes)
    min_dists, min_ids = distance.min(1)
    best_ious, best_ids = iou.min(0)  # find best gt for predict
    gt_boxes = change_box_order(gt_boxes, order="zyxzyx2zyxdhw")
    for i in range(gt_boxes.size(0)):
        gt = gt_boxes[i, :]
        diameter = math.sqrt(gt[3]**2 + gt[4]**2 + gt[5]**2)
        radius = diameter / 2 + 10.
        if min_dists[i] <= radius:
            tp += 1
        else:
            fn += 1
    fp = pred_boxes.size(0) - tp

    return np.array([tp, fn, fp]), best_ious
Exemplo n.º 4
0
    def encode(self, boxes, labels, input_size):

        """We obey the Faster RCNN box coder:
        tx = (x - anchor_x) / anchor_w
        ty = (y - anchor_y) / anchor_h
        tw = log(w / anchor_w)
        th = log(h / anchor_h)
        args:
        boxes:Tensor(xmin, ymin, xmax, ymax) size(boxes_num, 4)
        labels:Tensor size(boxes_num,)
        return:
        target_cls:Tensor(anchor_num,)
        target_loc:Tensor(anchor_num, 4)
        """

        anchor_boxes = self._get_anchor_boxes(input_size)  # [anchor_num, 4]
        boxes = utils.change_box_order(boxes, 'xyxy2xywh')
        ious = utils.box_iou(anchor_boxes, boxes, order='xywh')  # [anchor_num, boxes_num]
        max_ious, max_ids = ious.max(1)  # (anchor_num,)
        boxes = boxes[max_ids]  # (anchor_num, 4), groundtruth
        loc_xy = (boxes[:, :2] - anchor_boxes[:, :2]) / anchor_boxes[:, 2:]
        loc_wh = torch.log(boxes[:, 2:] / anchor_boxes[:, 2:])
        target_loc = torch.cat([loc_xy, loc_wh], 1)
        target_cls = labels[max_ids]
        target_cls[max_ious < 0.5] = 0
        ignore = (max_ious < 0.5) & (max_ious >= 0.4)
        target_cls[ignore] = -1

        return target_loc, target_cls
Exemplo n.º 5
0
    def encode(self, boxes, labels, input_size):
        '''Encode target bounding boxes and class labels.

        Args:
          boxes: (tensor) bounding boxes of (xmin,ymin,xmax,ymax) in range [0,1], sized [#obj, 4].
          labels: (tensor) object class labels, sized [#obj,].
          input_size: (int) model input size.

        Returns:
          loc_targets: (tensor) encoded bounding boxes, sized [#total_anchors,4].
          cls_targets: (tensor) encoded class labels, sized [#total_anchors].
        '''
        anchor_boxes = self._get_anchor_boxes(input_size)
        boxes = change_box_order(boxes, 'xyxy2xywh')
        boxes = boxes * input_size  # scale to range [0,input_size]

        ious = box_iou(anchor_boxes, boxes, order='xywh')
        max_ious, max_ids = ious.max(1)
        boxes = boxes[max_ids]

        loc_xy = (boxes[:, :2] - anchor_boxes[:, :2]) / anchor_boxes[:, 2:]
        loc_wh = torch.log(boxes[:, 2:] / anchor_boxes[:, 2:])
        loc_targets = torch.cat([loc_xy, loc_wh], 1)
        cls_targets = 1 + labels[max_ids]

        cls_targets[max_ious < 0.4] = 0
        ignore = (max_ious > 0.4) & (max_ious < 0.5
                                     )  # ignore ious between [0.4,0.5]
        cls_targets[ignore] = -1  # for now just mark ignored to -1
        return loc_targets, cls_targets
Exemplo n.º 6
0
    def encode(self, boxes, labels, input_size):
        if isinstance(input_size, int):
            input_size = torch.Tensor([input_size, input_size])
        else:
            input_size = torch.Tensor(input_size)

        anchor_boxes = self.get_anchor_boxes(input_size)
        boxes = change_box_order(boxes, 'xyxy2xywh')
        boxes = boxes.float()
        ious = box_iou(anchor_boxes, boxes, order='xywh')
        #ious :每个候选框与多个目标框的ious 行:个数 列:分数
        #max_ids 与候选框最相近的目标框索引号,max_ious 为与最相近目标框的候选框的ious分数
        max_ious, max_ids = ious.max(1)
        #选出每个候选框对应的目标框
        boxes = boxes[max_ids]

        loc_xy = (boxes[:, :2] - anchor_boxes[:, :2]) / anchor_boxes[:, 2:]
        loc_wh = torch.log(boxes[:, 2:] / anchor_boxes[:, 2:])
        loc_targets = torch.cat([loc_xy, loc_wh], 1)

        cls_targets = 1 + labels[max_ids]

        cls_targets[max_ious < 0.1] = 0
        cls_targets[(max_ious >= 0.1) & (max_ious < 0.3)] = -1
        return loc_targets, cls_targets
Exemplo n.º 7
0
    def encode(self, boxes, labels, input_size):
        '''Encode target bounding boxes and class labels.

        Args:
          boxes: (tensor) bounding boxes of (xmin,ymin,xmax,ymax), sized [#obj, 4].
          labels: (tensor) object class labels, sized [#obj,].
          input_size: (int/tuple) model input size of (input_height, input_width).

        Returns:
          loc_targets: (tensor) encoded bounding boxes, sized [#anchors,4].
          cls_targets: (tensor) encoded class labels, sized [#anchors,].
        '''
        input_size = torch.Tensor([input_size,input_size]) if isinstance(input_size, int) \
                     else torch.Tensor(input_size)

        anchor_boxes = self._get_anchor_boxes(input_size)
        boxes = change_box_order(boxes, 'xyxy2xywh')

        ious = box_iou(anchor_boxes, boxes, order='xywh')
        max_ious, max_ids = ious.max(1)
        boxes = boxes[max_ids]

        loc_xy = (boxes[:, :2] - anchor_boxes[:, :2]) / anchor_boxes[:, 2:]
        loc_wh = torch.log(boxes[:, 2:] / anchor_boxes[:, 2:])
        loc_targets = torch.cat([loc_xy, loc_wh], 1)
        cls_targets = 1 + labels[max_ids]

        cls_targets[max_ious < 0.4] = 0
        ignore = (max_ious > 0.4) & (max_ious < 0.5
                                     )  # ignore ious between [0.4,0.5]
        cls_targets[ignore] = -1  # for now just mark ignored to -1
        return loc_targets, cls_targets
Exemplo n.º 8
0
    def __getitem__(self, index):
        img_id = self.images_list[index]
        img_record = self.images[img_id]

        path = img_record["file_name"]
        if self.img_dir is not None:
            path = os.path.join(self.img_dir, path)
        image = self._read_image(path)

        boxes = []  # each element is a tuple of (x1, y1, x2, y2, "class")
        for annotation in img_record["annotations"]:
            xyxy = self._from_pixels_to_pcnt(
                annotation["bbox"],
                img_record["width"],
                img_record["height"],
            )
            assert all(0 <= num <= 1 for num in xyxy), f"All numbers should be in range [0, 1], but got {xyxy}!"
            bbox_class = str(self.cid_to_class[annotation["category_id"]])
            boxes.append(xyxy + [str(bbox_class)])

        if self.transforms is not None:
            transformed = self.transforms(image=image, bboxes=boxes)
            image, boxes = transformed["image"], transformed["bboxes"]

        bboxes = np.zeros((self.num_anchors, 4), dtype=np.float32)
        classes = np.full(self.num_anchors, self.background_cls, dtype=np.int32)
        for idx, (x1, y1, x2, y2, box_cls) in enumerate(boxes):
            bboxes[idx, :] = [x1, y1, x2, y2]
            classes[idx] = int(box_cls)

        bboxes = torch.from_numpy(bboxes)
        bboxes = change_box_order(bboxes, "xyxy2xywh")
        classes = torch.LongTensor(classes)

        return image, bboxes, classes
    def decode(self, loc_preds, cls_preds, input_size):
        CLS_THRESH = 0.05
        NMS_THRESH = 0.4

        if isinstance(input_size, int):
            input_size = torch.Tensor([input_size, input_size])
        else:
            input_size = torch.Tensor(input_size)

        anchor_boxes = self.get_anchor_boxes(input_size)
        std=Variable(self.std).cuda()
        loc_preds=loc_preds*std
        loc_xy = loc_preds.data.cpu()[:, :2]
        loc_wh = loc_preds.data.cpu()[:, 2:]
        xy = loc_xy * anchor_boxes[:, 2:] + anchor_boxes[:, :2]
        wh = loc_wh.exp() * anchor_boxes[:, 2:]
        boxes = torch.cat([xy, wh], 1)
        boxes = change_box_order(boxes, 'xywh2xyxy')
        cls_preds=F.softmax(cls_preds,1)
        score, labels = cls_preds.max(1)
        ids =  (labels > 0)&(score>CLS_THRESH)
        ids = ids.nonzero().squeeze()
        if len(ids.size())==0:
            return None, None,None
        ids=ids.data.cpu()
        
        keep = box_nms(boxes.cpu()[ids], score.data.cpu()[ids], threshold=NMS_THRESH)
        return boxes.cpu()[ids][keep],labels.data.cpu()[ids][keep],score.data.cpu()[ids][keep]
def calc_scan_coord(boxes, start_coord):
    '''
    Calculate locations in scans

    Args:
      boxes: (FloatTensor) object locations in cubes [N, 6] zyxzyx
      start_coord: (FloatTensor) cube start location in scans [3]
    
    Returns:
      scan_loc: (FloatTensor) object locations in scans [N, 6] zyxzyx
    '''
    boxes = change_box_order(boxes, order="zyxzyx2zyxdhw")
    loc_zyx = boxes[:, :3]
    loc_dhw = boxes[:, 3:]
    cube_loc = start_coord.unsqueeze(0).expand_as(loc_zyx)
    loc_zyx += cube_loc
    scan_locs = torch.cat([loc_zyx, loc_dhw], 1)
    scan_locs = change_box_order(scan_locs, order="zyxdhw2zyxzyx")

    return scan_locs
Exemplo n.º 11
0
    def encode(self, boxes, labels, input_size):
        '''Encode target bounding boxes and class labels.

        We obey the Faster RCNN box coder:
          tx = (x - anchor_x) / anchor_w
          ty = (y - anchor_y) / anchor_h
          tw = log(w / anchor_w)
          th = log(h / anchor_h)

        Args:
          boxes: (tensor) bounding boxes of (xmin,ymin,xmax,ymax), sized [#obj, 4].
          labels: (tensor) object class labels, sized [#obj,].
          input_size: (int/tuple) model input size of (w,h).

        Returns:
          loc_targets: (tensor) encoded bounding boxes, sized [#anchors,4].
          cls_targets: (tensor) encoded class labels, sized [#anchors,].
        '''
        input_size = torch.Tensor([input_size, input_size]) if isinstance(input_size, int) \
            else torch.Tensor(input_size)
        anchor_boxes = self._get_anchor_boxes(input_size)
        boxes = change_box_order(boxes, 'xyxy2xywh')
        # if ((boxes[0][2] * boxes[0][3]).numpy() >32 * 32 / 2) :
        #     # print((boxes[0][2]*boxes[0][3]).numpy(),end='->')
        #     sptj='True'
        # else:
        #     sptj = 'False'
        # print('target locked -> ',sptj)

        ious = box_iou(anchor_boxes, boxes, order='xywh')
        max_ious, max_ids = ious.max(1)
        boxes = boxes[max_ids]

        loc_xy = (boxes[:, :2] - anchor_boxes[:, :2]) / anchor_boxes[:, 2:]
        loc_wh = torch.log(boxes[:, 2:] / anchor_boxes[:, 2:])
        loc_targets = torch.cat([loc_xy, loc_wh], 1)
        cls_targets = 1 + labels[max_ids]

        cls_targets[max_ious < 0.5] = 0
        ignore = (max_ious > 0.4) & (max_ious < 0.5
                                     )  # ignore ious between [0.4,0.5]
        cls_targets[ignore] = -1  # for now just mark ignored to -1
        return cls_targets, loc_targets
Exemplo n.º 12
0
    def encode(self, boxes, labels, input_size):
        '''Encode target bounding boxes and class labels.

        We obey the Faster RCNN box coder:
          tx = (x - anchor_x) / anchor_w
          ty = (y - anchor_y) / anchor_h
          tw = log(w / anchor_w)
          th = log(h / anchor_h)

        Args:
          boxes: (tensor) bounding boxes of (xmin,ymin,xmax,ymax), sized [#obj, 4].
          labels: (tensor) object class labels, sized [#obj,].
          input_size: (int/tuple) model input size of (w,h).

        Returns:
          loc_targets: (tensor) encoded bounding boxes, sized [#anchors,4].
          cls_targets: (tensor) encoded class labels, sized [#anchors,].
        '''
        input_size = torch.tensor([input_size, input_size], dtype=torch.float32) if isinstance(input_size, int) \
            else torch.tensor(input_size, dtype=torch.float32)
        anchor_boxes = self._get_anchor_boxes(input_size)

        if boxes.numel() == 0:
            # 0 is background class
            cls_targets = torch.zeros(anchor_boxes.size(0), dtype=torch.int64)
            loc_targets = torch.zeros_like(anchor_boxes, dtype=torch.float32)
        else:
            boxes = change_box_order(boxes, 'xyxy2xywh')

            ious = box_iou(anchor_boxes, boxes, order='xywh')
            max_ious, max_ids = ious.max(1)
            boxes = boxes[max_ids]

            loc_xy = (boxes[:, :2]-anchor_boxes[:, :2]) / anchor_boxes[:, 2:]
            loc_wh = torch.log(boxes[:, 2:]/anchor_boxes[:, 2:])
            loc_targets = torch.cat([loc_xy, loc_wh], 1)
            cls_targets = labels[max_ids]

            cls_targets[max_ious < 0.5] = 0 # 0 is background class
            ignore = (max_ious > 0.4) & (max_ious < 0.5)  # ignore ious between [0.4,0.5]
            cls_targets[ignore] = -1  # for now just mark ignored to -1

        return loc_targets, cls_targets
Exemplo n.º 13
0
    def decode(self, loc_preds, cls_preds, input_size):
        '''Decode outputs back to bouding box locations and class labels.

        Args:
          loc_preds: (tensor) predicted locations, sized [#anchors, 8].
          cls_preds: (tensor) predicted class labels, sized [#anchors, ].
          input_size: (int/tuple) model input size of (w,h).

        Returns:
          boxes: (tensor) decode box locations, sized [#obj,8].
          labels: (tensor) class labels for each box, sized [#obj,].
        '''

        input_size = torch.Tensor([input_size,input_size]) if isinstance(input_size, int) \
                     else torch.Tensor(input_size)

        anchor_rect_boxes = self._get_anchor_boxes(input_size).cuda()
        anchor_quad_boxes = change_box_order(anchor_rect_boxes, "xywh2quad")

        quad_boxes = anchor_quad_boxes + anchor_rect_boxes[:, 2:4].repeat(
            1, 4) * loc_preds  # [#anchor, 8]
        quad_boxes = torch.clamp(quad_boxes, 0, input_size[0])

        score, labels = cls_preds.sigmoid().max(1)  # focal loss
        #score, labels = softmax(cls_preds).max(1)          # OHEM+softmax

        # Classification score Threshold
        ids = score > self.cls_thresh
        ids = ids.nonzero().squeeze()  # [#obj,]

        score = score[ids]
        labels = labels[ids]
        quad_boxes = quad_boxes[ids].view(-1, 4, 2)

        quad_boxes = quad_boxes.cpu().data.numpy()
        score = score.cpu().data.numpy()

        if len(score.shape) is 0:
            return quad_boxes, labels, score
        else:
            keep = non_max_suppression_poly(quad_boxes, score, self.nms_thresh)
            return quad_boxes[keep], labels[keep], score[keep]
Exemplo n.º 14
0
    def encode(self, boxes, labels, input_size):
        '''Encode target bounding boxes and class labels.

        We obey the Faster RCNN box coder:
          tx = (x - anchor_x) / anchor_w
          ty = (y - anchor_y) / anchor_h
          tw = log(w / anchor_w)
          th = log(h / anchor_h)

        Then we scale [tx,ty,tw,th] by [10,10,5,5] times to make loc_loss larger.

        Args:
          boxes: (tensor) bounding boxes of (xmin,ymin,xmax,ymax), sized [#obj, 4].
          labels: (tensor) object class labels, sized [#obj,].
          input_size: (int/tuple) model input size of (input_height, input_width).

        Returns:
          loc_targets: (tensor) encoded bounding boxes, sized [#anchors,4].
          cls_targets: (tensor) encoded class labels, sized [#anchors,].

        Reference:
          https://github.com/tensorflow/models/blob/master/object_detection/box_coders/faster_rcnn_box_coder.py
        '''
        scale_factor = torch.Tensor([10,10,5,5])  # scale [tx,ty,tw,th]
        input_size = torch.Tensor([input_size,input_size]) if isinstance(input_size, int) \
                     else torch.Tensor(input_size)
        anchor_boxes = self._get_anchor_boxes(input_size)
        boxes = change_box_order(boxes, 'xyxy2xywh')

        ious = box_iou(anchor_boxes, boxes, order='xywh')
        max_ious, max_ids = ious.max(1)
        boxes = boxes[max_ids]

        loc_xy = (boxes[:,:2]-anchor_boxes[:,:2]) / anchor_boxes[:,2:]
        loc_wh = torch.log(boxes[:,2:]/anchor_boxes[:,2:])
        loc_targets = torch.cat([loc_xy,loc_wh], 1) * scale_factor
        cls_targets = 1 + labels[max_ids]

        cls_targets[max_ious<0.4] = 0
        ignore = (max_ious>0.4) & (max_ious<0.5)  # ignore ious between [0.4,0.5]
        cls_targets[ignore] = -1  # for now just mark ignored to -1
        return loc_targets, cls_targets
Exemplo n.º 15
0
    def encode(self, boxes, labels, input_size):
        if isinstance(input_size, int):
            input_size = torch.Tensor([input_size, input_size])
        else:
            input_size = torch.Tensor(input_size)
        
        anchor_boxes = self.get_anchor_boxes(input_size)
        boxes = change_box_order(boxes, 'xyxy2xywh')
        boxes = boxes.float()
        ious = box_iou(anchor_boxes, boxes, order='xywh')
        max_ious, max_ids = ious.max(1)
        boxes = boxes[max_ids]

        loc_xy = (boxes[:, :2] - anchor_boxes[:, :2]) / anchor_boxes[:, 2:]
        loc_wh = torch.log(boxes[:, 2:] / anchor_boxes[:, 2:])
        loc_targets = torch.cat([loc_xy, loc_wh], 1)

        cls_targets = 1 + labels[max_ids]
        cls_targets[max_ious < 0.4] = 0
        cls_targets[(max_ious >= 0.4) & (max_ious < 0.5)] = -1
        return loc_targets, cls_targets
Exemplo n.º 16
0
    def encode(self, boxes, labels, input_size):
        '''Encode target bounding boxes and class labels.

        We obey the Faster RCNN box coder:
          tx = (x - anchor_x) / anchor_w
          ty = (y - anchor_y) / anchor_h
          tw = log(w / anchor_w)
          th = log(h / anchor_h)

        Args:
          boxes: (tensor) bounding boxes of (xmin,ymin,xmax,ymax), sized [#obj, 4].
          labels: (tensor) object class labels, sized [#obj,].
          input_size: (int/tuple) model input size of (w,h).

        Returns:
          loc_targets: (tensor) encoded bounding boxes, sized [#anchors,4].
          cls_targets: (tensor) encoded class labels, sized [#anchors,].
        '''
        input_size = torch.Tensor([input_size,input_size]) if isinstance(input_size, int) \
                     else torch.Tensor(input_size)
        anchor_boxes = self._get_anchor_boxes(input_size)
        #print(anchor_boxes.shape) [49104,4]
        boxes = change_box_order(boxes, 'xyxy2xywh')

        ious = box_iou(anchor_boxes, boxes, order='xywh')
        #print(ious.shape) [num_anchors, obj]
        max_ious, max_ids = ious.max(1)
        boxes = boxes[max_ids]
        loc_xy = (boxes[:, :2] - anchor_boxes[:, :2]) / anchor_boxes[:, 2:]
        loc_wh = torch.log(boxes[:, 2:] / anchor_boxes[:, 2:])
        loc_targets = torch.cat([loc_xy, loc_wh], 1)
        cls_targets = 1 + labels[max_ids]
        #print(cls_targets.shape) torch.Size([49104])
        cls_targets[max_ious < 0.5] = 0
        #print(cls_targets)
        ignore = (max_ious > 0.4) & (max_ious < 0.5
                                     )  # ignore ious between [0.4,0.5]
        cls_targets[ignore] = -1  # for now just mark ignored to -1

        return loc_targets, cls_targets
Exemplo n.º 17
0
    def decode(self, loc_preds, cls_preds, input_size):
        CLS_THRESH = 0.05
        NMS_THRESH = 0.3

        if isinstance(input_size, int):
            input_size = torch.Tensor([input_size, input_size])
        else:
            input_size = torch.Tensor(input_size)

        anchor_boxes = self.get_anchor_boxes(input_size)
        loc_xy = loc_preds[:, :2]
        loc_wh = loc_preds[:, 2:]
        xy = loc_xy * anchor_boxes[:, 2:] + anchor_boxes[:, :2]
        wh = loc_wh.exp() * anchor_boxes[:, 2:]
        boxes = torch.cat([xy, wh], 1)
        boxes = change_box_order(boxes, 'xywh2xyxy')

        score, labels = cls_preds.max(1)
        ids = (score > CLS_THRESH) & (labels > 0)
        ids = ids.nonzero().squeeze()
        keep = box_nms(boxes[ids], score[ids], threshold=NMS_THRESH)
        return boxes[ids][keep], labels[ids][keep]
Exemplo n.º 18
0
    def encode(self, boxes, labels, input_size):
        '''Encode target bounding boxes and class labels.

        We obey the Faster RCNN box coder:
          tx = (x - anchor_x) / anchor_w
          ty = (y - anchor_y) / anchor_h
          tw = log(w / anchor_w)
          th = log(h / anchor_h)

        Args:
          boxes: (tensor) bounding boxes of (xmin,ymin,xmax,ymax), sized [#obj, 4].
          labels: (tensor) object class labels, sized [#obj,].
          input_size: (int/tuple) model input size of (w,h).

        Returns:
          loc_targets: (tensor) encoded bounding boxes, sized [#anchors,4].
          cls_targets: (tensor) encoded class labels, sized [#anchors,].
        '''
        input_size = torch.Tensor([input_size,input_size]) if isinstance(input_size, int) \
                     else torch.Tensor(input_size)
        anchor_boxes = self._get_anchor_boxes(input_size)
        boxes = change_box_order(boxes, 'xyxy2xywh')

        ious = box_iou(anchor_boxes, boxes, order='xywh')
        max_ious, max_ids = ious.max(1)
        boxes = boxes[max_ids]

        loc_xy = (boxes[:,:2]-anchor_boxes[:,:2]) / anchor_boxes[:,2:]
        loc_wh = torch.log(boxes[:,2:]/anchor_boxes[:,2:])
        loc_targets = torch.cat([loc_xy,loc_wh], 1)
        cls_targets = 1 + labels[max_ids]

        cls_targets[max_ious<0.5] = 0
        ignore = (max_ious>0.4) & (max_ious<0.5)  # ignore ious between [0.4,0.5]
        cls_targets[ignore] = -1  # for now just mark ignored to -1
        return loc_targets, cls_targets
Exemplo n.º 19
0
    def encode(self, boxes, labels, input_size):
        """
        Encode target bounding boxes and class labels.
        we obey the Faster RCNN box coder:
        tx = (x - anchor_x) / anchor_w
        ty = (y - anchor_y) / anchor_h
        tw = log(w / anchor_w)
        th = log(h / anchor_h)

        :param boxes: (tensor) bounding boxes of (xmin, ymin, xmax, ymax), sized [#obj, 4].
        :param labels: (tensor) object class labels, sized [#obj,].
        :param input_size: (int/tuple) input size of the original image
        :return:
            loc_targets: (tensor) encoded bounding boxes, sized [#anchors, 4].
            cls_targets: (tensor) encoded class labels, sized [#anchors,].
        """
        input_size = torch.Tensor([input_size, input_size]) if isinstance(
            input_size, int) else torch.Tensor(input_size)
        anchor_boxes = self._get_anchor_boxes(input_size)
        boxes = change_box_order(boxes, 'xyxy2xywh')

        ious = box_iou(anchor_boxes, boxes, order='xywh')
        max_ious, max_ids = ious.max(1)
        boxes = boxes[max_ids]

        loc_xy = (boxes[:, :2] - anchor_boxes[:, :2]) / anchor_boxes[:, 2:]
        loc_wh = torch.log(boxes[:, 2:] / anchor_boxes[:, 2:])
        loc_targets = torch.cat([loc_xy, loc_wh], 1)
        loc_targets = loc_targets / torch.Tensor([[0.1, 0.1, 0.2, 0.2]])
        cls_targets = labels[max_ids]

        cls_targets[max_ious < 0.4] = 0
        ignore = (max_ious >= 0.4) & (max_ious < 0.5
                                      )  # ignore ious between [0:q.4, 0.5]
        cls_targets[ignore] = -1
        return loc_targets, cls_targets
Exemplo n.º 20
0
    def encode(self, boxes, center_points, labels, colls_with, dimensions,
               bins, sines, coses, input_size):
        bins = bins.squeeze(1)
        sines = sines.squeeze(1)
        coses = coses.squeeze(1)
        '''Encode target bounding boxes and class labels.

        We obey the Faster RCNN box coder:
          tx = (x - anchor_x) / anchor_w
          ty = (y - anchor_y) / anchor_h
          tw = log(w / anchor_w)
          th = log(h / anchor_h)

        Args:
          boxes: (tensor) bounding boxes of (xmin,ymin,xmax,ymax), sized [#obj, 4].
          colls_with: (tensor) whether the vehicle collides with the player agent, sized [#obj] (binary)
          dimensions: (tensor), sized [#obj, 3]
          labels: (tensor) object class labels, sized [#obj,].
          input_size: (int/tuple) model input size of (w,h).

        Returns:
          loc_targets: (tensor) encoded bounding boxes, sized [#anchors,4].
          cls_targets: (tensor) encoded class labels, sized [#anchors,].
        '''
        input_size = torch.Tensor([input_size,input_size]) if isinstance(input_size, int) \
                     else torch.Tensor(input_size)
        anchor_boxes = self._get_anchor_boxes(input_size)

        try:
            boxes = change_box_order(boxes, 'xyxy2xywh')
        except:
            assert (0)
            print(
                "a vehicle-free frame, which should be eliminated in a clean dataset"
            )
            boxes = torch.Tensor([[0., 0., 0., 0.]])
            colls_with = torch.Tensor([0.])
            dimensions = torch.Tensor([[0., 0., 0.]])
            sines = torch.Tensor([0.])
            coses = torch.Tensor([0.])
            bins = torch.Tensor([0.])
            labels = torch.Tensor([0.])
            # orientations = torch.Tensor([0.])

        colls_with = torch.Tensor(colls_with)
        colls_with = colls_with
        dimensions = dimensions
        # orientations = orientations.float()

        ious = box_iou(anchor_boxes, boxes, order='xywh')
        max_ious, max_ids = ious.max(1)

        # select matching instance
        boxes = boxes[max_ids]
        center_points = center_points[max_ids]
        colls_with = colls_with[max_ids]
        dimensions = dimensions[max_ids]
        cls_targets = labels[max_ids]
        bins = bins[max_ids]
        sines = sines[max_ids]
        coses = coses[max_ids]
        # orientations = orientations[max_ids]

        # build offset referring to target anchors
        # print(boxes[0,0], "before more")
        loc_xy = (boxes[:, :2] - anchor_boxes[:, :2]) / anchor_boxes[:, 2:]
        loc_wh = torch.log(boxes[:, 2:] / anchor_boxes[:, 2:])
        # print(loc_xy[0, 0], loc_wh[0,0], "before")
        loc_targets = torch.cat([loc_xy, loc_wh], 1)  # sized [num_anchor, 4]
        center_xy = (center_points[:, :2] -
                     anchor_boxes[:, :2]) / anchor_boxes[:, 2:]
        center_depth = center_points[:, 2].unsqueeze(1)
        center_targets = torch.cat([center_xy, center_depth],
                                   1)  # sized [num_anchor, 3]

        # filter invalid or negative instance
        sines[max_ious < 0.5] = 0
        coses[max_ious < 0.5] = 0
        bins[max_ious < 0.5] = 0
        cls_targets[max_ious < 0.5] = 0
        colls_with[max_ious < 0.5] = 0
        dimensions[max_ious < 0.5] = 0
        # orientations[max_ious<0.5] = 0

        # ignore some not enough overlapped instances
        ignore = (max_ious > 0.4) & (max_ious < 0.5
                                     )  # ignore ious between [0.4,0.5]
        cls_targets[ignore] = -1  # for now just mark ignored to -1
        colls_with[ignore] = -1
        dimensions[ignore] = -1
        bins[ignore] = -1

        # colls_with[ignore] = -1
        # print(loc_targets[0, 0], "in encoder")
        return loc_targets, cls_targets, center_targets, colls_with, dimensions, bins, sines, coses
def iter_scan(scan,
              scan_array,
              patient_df,
              net,
              cube_size=64,
              stride=50,
              iou=0.01):
    scan_df = pd.DataFrame(columns=["scan_id", "z", "y", "x", "iou"])
    start_time = time.time()
    gt_boxes, gt_labels = annotation(patient_df)
    #print(gt_boxes, gt_labels)
    ais_gt_boxes, mia_gt_boxes = split_class(gt_boxes, gt_labels)
    #print(ais_gt_boxes, mia_gt_boxes)
    ais_locs = torch.FloatTensor(1, 6)
    ais_probs = torch.FloatTensor(1)

    mia_locs = torch.FloatTensor(1, 6)
    mia_probs = torch.FloatTensor(1)

    for z in range(0, scan_array.shape[0], stride):
        for y in range(0, scan_array.shape[1], stride):
            for x in range(0, scan_array.shape[2], stride):
                start_coord = torch.FloatTensor([z, y, x])
                end_coord = start_coord + torch.FloatTensor(
                    [cube_size, cube_size, cube_size])
                zmax = min(z + cube_size, scan_array.shape[0])
                ymax = min(y + cube_size, scan_array.shape[1])
                xmax = min(x + cube_size, scan_array.shape[2])
                cube_sample = np.zeros((cube_size, cube_size, cube_size),
                                       dtype=np.float32)
                cube_sample[:(zmax - z), :(ymax -
                                           y), :(xmax -
                                                 x)] = scan_array[z:zmax,
                                                                  y:ymax,
                                                                  x:xmax]
                cube_sample = np.expand_dims(cube_sample, 0)
                cube_sample = np.expand_dims(cube_sample, 0)
                input_cube = Variable(torch.from_numpy(cube_sample).cuda())
                locs, clss = net(input_cube)
                locs = locs.data.cpu().squeeze()
                clss = clss.data.cpu().squeeze()
                ais_boxes, ais_scores, ais_labels, mia_boxes, mia_scores, mia_labels = DataEncoder(
                ).decode(locs, clss, [cube_size, cube_size, cube_size])
                if not isinstance(ais_boxes, int):
                    ais_boxes = calc_scan_coord(ais_boxes, start_coord)
                    ais_locs = torch.cat([ais_locs, ais_boxes], 0)
                    ais_probs = torch.cat([ais_probs, ais_scores], 0)

                if not isinstance(mia_boxes, int):
                    mia_boxes = calc_scan_coord(mia_boxes, start_coord)
                    mia_locs = torch.cat([mia_locs, mia_boxes], 0)
                    mia_probs = torch.cat([mia_probs, mia_scores], 0)

    end_time = time.time()
    run_time = end_time - start_time
    print(run_time)
    if not isinstance(ais_gt_boxes, int):
        ais_locs = ais_locs[1:, :]
        ais_probs = ais_probs[1:]
        ais_keep = box_nms(ais_locs, ais_probs)
        ais_locs = ais_locs[ais_keep]
        ais_probs = ais_probs[ais_keep]
        ais_count, best_ious = find_best_pred(ais_gt_boxes, ais_locs)
        ais_locs = change_box_order(ais_locs, "zyxzyx2zyxdhw")
        for i in range(ais_locs.size(0)):
            insert = {
                "scan_id": scan,
                "z": ais_locs[i, 0],
                "y": ais_locs[i, 1],
                "x": ais_locs[i, 2],
                "iou": best_ious[i]
            }
            la_df = pd.DataFrame(data=insert, index=["0"])
            scan_df = scan_df.append(la_df, ignore_index=True)

    else:
        ais_count = np.zeros(3)

    if not isinstance(mia_gt_boxes, int):
        mia_locs = mia_locs[1:, :]
        mia_probs = mia_probs[1:]
        mia_keep = box_nms(mia_locs, mia_probs)
        mia_locs = mia_locs[mia_keep]
        mia_probs = mia_probs[mia_keep]
        mia_count, best_ious = find_best_pred(mia_gt_boxes, mia_locs)
        for i in range(mia_locs.size(0)):
            insert = {
                "scan_id": scan,
                "z": mia_locs[i, 0],
                "y": mia_locs[i, 1],
                "x": mia_locs[i, 2],
                "iou": best_ious[i]
            }
            la_df = pd.DataFrame(data=insert, index=["0"])
            scan_df = scan_df.append(la_df, ignore_index=True)
    else:
        mia_count = np.zeros(3)

    return ais_count, mia_count, scan_df
Exemplo n.º 22
0
    def encode(self, boxes, labels, input_size):
        '''
        Encode target bounding boxes and class labels.fm_d

        Implement the Faster RCNN box coder in 3D image:
          tz = (z - anchor_z) / anchor_d
          ty = (y - anchor_y) / anchor_h
          tx = (x - anchor_x) / anchor_w
          td = log(d / anchor_d)
          th = log(h / anchor_h)
          tx = log(w / anchor_w)

        Args:
          boxes: (tensor) bounding boxes of (zmin, ymin, xmin, zmax, ymax, xmax), sized [#obj, 6]
          labels: (tensor) object class labels, sized [#obj,]
          input_size: (int/tuple) model input size of (d, h, w)

        Returns:
          loc_targets: (tensor) encoded boudning boxes, sized [#anchors, 6]
          cls_targets: (tensor) encoded class labels, sized [#anchors,]
        '''
        if isinstance(input_size, int):
            input_size = torch.Tensor([input_size, input_size, input_size])
        else:
            input_size = torch.Tensor(input_size)
        anchor_boxes = self.get_anchor_boxes(input_size)  # (z, y, x, d, h, w)
        boxes = change_box_order(boxes, 'zyxzyx2zyxdhw')
        #print(boxes.size())
        ious = box_iou(anchor_boxes, boxes,
                       order="zyxdhw")  # num_anchors x objects
        max_ious, max_ids = ious.max(
            1
        )  # find the best object for each anchor, return ious_value and object index
        best_ious, best_ids = ious.max(
            0
        )  # find the best anchor for each object, return ious_value and anchor index
        boxes = boxes[max_ids]
        #print(boxes.size())

        loc_zyx = (boxes[:, :3] - anchor_boxes[:, :3]) / anchor_boxes[:, 3:]
        loc_dhw = boxes[:, 3:] / anchor_boxes[:, 3:]
        loc_dhw = loc_dhw.numpy()
        loc_dhw = np.log(loc_dhw)
        loc_dhw = torch.from_numpy(loc_dhw)
        loc_targets = torch.cat([loc_zyx, loc_dhw], 1)

        cls_targets = 1 + labels[
            max_ids]  # the background class = 0, so +1 for object classes
        #print(cls_targets.size())
        cls_targets[max_ious < 0.4] = 0

        for i in range(best_ids.size()[0]):
            cls_targets[best_ids[i]] = 1 + labels[i]

        ig_num = cls_targets.size()[0] - 100
        cls_targets_array = cls_targets.numpy()
        neg_idx = np.where(cls_targets_array == 0)
        if ig_num > len(neg_idx[0]):
            ig_num -= (ig_num - len(neg_idx[0]))
        ig_idx = np.random.choice(neg_idx[0], ig_num, replace=False)
        cls_targets_array[ig_idx] = -1
        cls_targets = torch.from_numpy(cls_targets_array)
        '''
        ignore = (max_ious > 0.15) & (max_ious < 0.4)
        cls_targets[ignore] = -1
         
        for i in range(best_ids.size()[0]):
            cls_targets[best_ids[i]] = 1 + labels[i]	
        '''
        return loc_targets, cls_targets
Exemplo n.º 23
0
    def encode(self, boxes, labels, input_size):
        '''Encode target bounding boxes and class labels.

        We obey the Faster RCNN box coder:
          tx = (x - anchor_x) / anchor_w
          ty = (y - anchor_y) / anchor_h
          tw = log(w / anchor_w)
          th = log(h / anchor_h)

        Args:
          boxes: (tensor) bounding boxes of (xmin,ymin,xmax,ymax), sized [#obj, 4].
          labels: (tensor) object class labels, sized [#obj,].
          input_size: (int/tuple) model input size of (w,h).

        Returns:
          loc_targets: (tensor) encoded bounding boxes, sized [#anchors,4].
          cls_targets: (tensor) encoded class labels, sized [#anchors,].
        '''
        NEG = 10
        input_size = torch.Tensor([input_size,input_size]) if isinstance(input_size, int) \
                     else torch.Tensor(input_size)
        anchor_boxes = self._get_anchor_boxes(input_size)
        boxes = change_box_order(boxes, 'xyxy2xywh')

        ious = box_iou(anchor_boxes, boxes, order='xywh')
        max_ious, max_ids = ious.max(1)
        
        boxes = boxes[max_ids]

        loc_xy = (boxes[:,:2] - anchor_boxes[:,:2]) / anchor_boxes[:,2:]
        loc_wh = torch.log(boxes[:,2:] / anchor_boxes[:,2:])
        loc_targets = torch.cat([loc_xy,loc_wh], 1)
        
        # 这里,我们设置正负采样比例为 1:3
        cls_targets = 1 + labels[max_ids] # 类别等于label加1, 最开始初始化为正类
#         print(cls_targets)
        
        cls_targets[max_ious < 0.1] = 0
        ignore = (max_ious > 0.05) & (max_ious < 0.1)
        cls_targets[ignore] = -1  # for now just mark ignored to -1
        '''
        cls_targets[max_ious < 0.1] = 0
#         print("cls_targets shape:", cls_targets.shape)
        pos = cls_targets > 0 
        n_pos = pos.data.float().sum().item()
#         print(n_pos)
        n_neg = NEG * n_pos if n_pos != 0 else NEG
        n_neg = int(n_neg)
#         print('n_neg',n_neg)
        
#         print(max_ious.shape)
        max_ious = max_ious.numpy().astype(np.float)
        neg_index = np.where(max_ious < 0.1)[0]
#         print("neg_index shape", neg_index.size)
#         print("neg_index", neg_index)
#         neg_index = neg_index.squeeze(1)
#         neg_index = neg_index.numpy().astype(np.int)
#         print("neg_index numpy shape", neg_index.shape)
        
        if neg_index.shape[0] > n_neg:
            disable_index = np.random.choice(
                neg_index, size=(len(neg_index) - n_neg), replace=False)
#             disable_index = disable_index.unsqueeze(1)
#             print("disable_index",disable_index.shape)
            disabel_index = torch.from_numpy(disable_index).float()
            cls_targets[disable_index] = -1
#         print("cls_targets",cls_targets)
#         pos_neg = cls_targets > -1  # exclude ignored anchors
#         print("pos_neg", pos_neg.data.float().sum().item())
# #         ignore = (max_ious > 0.05) & (max_ious<0.01)
# #         cls_targets[ignore] = -1  # for now just mark ignored to -1
        '''
        return loc_targets, cls_targets