Exemplo n.º 1
0
    def __getitem__(self, index):
        im_path = self.image_list[index]
        im = cv2.cvtColor(cv2.imread(im_path, cv2.IMREAD_COLOR),
                          cv2.COLOR_BGR2RGB)
        roidb = self._load_annotation(self.image_list[index])
        gt_inds = np.where(roidb['gt_classes'] != 0)[0]
        bboxes = roidb['boxes'][gt_inds, :]
        classes = roidb['gt_classes'][gt_inds]
        gt_boxes = np.zeros((len(gt_inds), 6), dtype=np.float32)
        if self.augment:
            transform = Augment(
                [
                    HorizontalFlip(0.5),
                    VerticalFlip(0.5),
                    Affine(degree=0, translate=0.1, scale=0., p=0.5),
                ],
                box_mode='xyxyxyxy',
            )
            im, bboxes = transform(im, bboxes)

        mask = mask_valid_boxes(quad_2_rbox(bboxes, 'xywha'), return_mask=True)
        bboxes = bboxes[mask]
        gt_boxes = gt_boxes[mask]
        classes = classes[mask]

        for i, bbox in enumerate(bboxes):
            gt_boxes[i, :5] = quad_2_rbox(np.array(bbox),
                                          mode='xyxya')  # 四点转xyxya(a为角度制)
            gt_boxes[i, 5] = classes[i]

        ## test augmentation
        # print(im.shape)
        # plot_gt(im, gt_boxes[:,:5], im_path, mode = 'xyxya')

        return {'image': im, 'boxes': gt_boxes, 'path': im_path}
Exemplo n.º 2
0
    def _load_annotation(self, index):
        root_dir, img_name = os.path.split(index)
        filename = os.path.join(root_dir, img_name[:-5] + '.xml')

        boxes, gt_classes = [], []
        with open(filename, 'r', encoding='utf-8-sig') as f:
            content = f.read()
            assert '<objects>' in content, 'Background picture occurred in %s' % filename
            objects = content.split('<object>')
            info = objects.pop(0)
            for obj in objects:
                assert len(obj) != 0, 'No onject found in %s' % filename
                points = obj[obj.find('<points>') +
                             8:obj.find('</points>')].split('<point>')[1:]
                coors = [x.split('<')[0].split(',') for x in points]
                (x1, y1), (x2, y2), (x3, y3), (x4, y4), *_ = coors
                x1, y1, x2, y2, x3, y3, x4, y4 = [
                    x for x in map(eval, [x1, y1, x2, y2, x3, y3, x4, y4])
                ]
                quad_box = np.array([(x1, y1), (x2, y2), (x3, y3), (x4, y4)])
                box = quad_2_rbox(quad_box)
                boxes.append(box)
                label = self.class_mapping(cls_id, self.level)
                gt_classes.append(label)
        return {'boxes': np.array(boxes), 'gt_classes': np.array(gt_classes)}
Exemplo n.º 3
0
    def __getitem__(self, index):
        # index是索引号,每次进8线程同时进行处理(num_worker),self.image_list[index]从trainval.txt的所有img中提取出遍历当前的图片名
        im_path = self.image_list[index]  # 获取当前张单张图像的绝对路径
        im = cv2.cvtColor(cv2.imread(im_path, cv2.IMREAD_COLOR),
                          cv2.COLOR_BGR2RGB)
        # 由当前的文件提取出xml的信息,dict格式内含两对kv:'boxes'-shape(nt,8)和'gt_classes'-shape(1,nt)
        roidb = self._load_annotation(self.image_list[index])
        gt_inds = np.where(roidb['gt_classes'] != 0)[0]
        nt = len(roidb['boxes'])
        # gt_boxes的格式为:xyxyac
        gt_boxes = np.zeros((len(gt_inds), 6), dtype=np.float32)
        if nt:
            bboxes = roidb['boxes'][gt_inds, :]
            classes = roidb['gt_classes'][gt_inds]
            if self.augment:
                transform = Augment(
                    [
                        HorizontalFlip(0.5),
                        VerticalFlip(0.5),
                        Affine(degree=0, translate=0.1, scale=0.1, p=0.5),
                        # Grayscale(0.3,p=0.5),
                        # # Contrast(0.15, p=0.3),
                        # Sharpen(0.15, p=0.2),
                        Noise(0.1, p=0.5),
                        # Gamma(0.2, p=0.4),
                        # Blur(1.3, p=0.5),
                    ],
                    box_mode='xywha',
                )
                im, bboxes = transform(im, bboxes)
            gt_boxes[:, :-1] = bboxes

            mask = mask_valid_boxes(quad_2_rbox(bboxes, 'xywha'),
                                    return_mask=True)
            bboxes = bboxes[mask]
            gt_boxes = gt_boxes[mask]
            classes = classes[mask]

            for i, bbox in enumerate(bboxes):
                gt_boxes[i, 5] = classes[i]
            gt_boxes = constraint_theta(gt_boxes)
            cx, cy, w, h = [gt_boxes[:, x] for x in range(4)]
            x1 = cx - 0.5 * w
            x2 = cx + 0.5 * w
            y1 = cy - 0.5 * h
            y2 = cy + 0.5 * h
            gt_boxes[:, 0] = x1
            gt_boxes[:, 1] = y1
            gt_boxes[:, 2] = x2
            gt_boxes[:, 3] = y2

            ## test augmentation
            print(im.shape)
            plot_gt(im, gt_boxes[:, :5], im_path, mode='xyxya')

        return {'image': im, 'boxes': gt_boxes, 'path': im_path}
Exemplo n.º 4
0
    def __call__(self, img, labels, mode=None):
        if random.random() < self.p:
            if mode == 'xywha':
                labels = rbox_2_quad(labels, mode = 'xywha')
                img, labels = random_affine(img, labels, 
                            degree=self.degree,translate=self.translate,
                            scale=self.scale,shear=self.shear ) 
                labels = quad_2_rbox(labels, mode = 'xywha')

            else:
                img, labels = random_affine(img, labels, 
                                degree=self.degree,translate=self.translate,
                                scale=self.scale,shear=self.shear ) 
        return img, labels 
    def __getitem__(self, index):
        im_path = self._image_path_from_index(self.image_list[index])
        im = cv2.cvtColor(cv2.imread(im_path, cv2.IMREAD_COLOR), cv2.COLOR_BGR2RGB)
        roidb = self._load_pascal_annotation(self.image_list[index])
        gt_inds = np.where(roidb['gt_classes'] != 0)[0]
        bboxes = roidb['boxes'][gt_inds, :]
        classes = roidb['gt_classes'][gt_inds]

        if self.random_flip and np.random.rand() >= 0.5:
            im = cv2.flip(im, 1, None)
            oldxs = bboxes[:, 0::2].copy()
            bboxes[:, 0::2] = im.shape[1] - oldxs - 1

        gt_boxes = np.empty((len(gt_inds), 6), dtype=np.float32)
        for i, bbox in enumerate(bboxes):
            gt_boxes[i, :5] = quad_2_rbox(np.array(bbox))
            gt_boxes[:, 5] = classes[i]
        return {'image': im, 'boxes': gt_boxes}
    def __getitem__(self, index):
        im_path = self._image_path_from_index(self.image_list[index])
        im = cv2.cvtColor(cv2.imread(im_path, cv2.IMREAD_COLOR),
                          cv2.COLOR_BGR2RGB)
        if im is not None:
            h, w = im.shape[:2]

        gt_path = self._annotation_path_from_index(self.annotation_list[index])
        if self.dformat == 'txt':
            roidb = self._load_annotation_txt(gt_path, h, w)
        elif self.dformat == 'json':
            roidb = self._load_annotation_json(gt_path)
        else:
            raise Exception('Data format not supported!')
        gt_inds = np.where(roidb['gt_classes'] != 0)[0]
        bboxes = roidb['boxes'][gt_inds, :]
        classes = roidb['gt_classes'][gt_inds]

        gt_boxes = np.empty((len(gt_inds), 6), dtype=np.float32)
        for i, bbox in enumerate(bboxes):
            gt_boxes[i, :5] = quad_2_rbox(np.array(bbox))
            gt_boxes[:, 5] = classes[i]

        return {'image': im, 'boxes': gt_boxes}
Exemplo n.º 7
0
def random_affine(img,
                  targets=(),
                  degree=10,
                  translate=.1,
                  scale=.1,
                  shear=10):
    # torchvision.transforms.RandomAffine(degree=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
    # https://medium.com/uruvideo/dataset-augmentation-with-random-homographies-a8f4b44830d4

    if targets is None:
        targets = []
    border = 0  # width of added border (optional)
    height = img.shape[0] + border * 2
    width = img.shape[1] + border * 2

    # Rotation and Scale
    R = np.eye(3)
    a = random.uniform(-degree, degree)
    # # # a += random.choice([-180, -90, 0, 90])  # add 90deg rotations to small rotations
    s = random.uniform(1 - scale, 1 + scale)
    R[:2] = cv2.getRotationMatrix2D(angle=a,
                                    center=(img.shape[1] / 2,
                                            img.shape[0] / 2),
                                    scale=s)

    # Translation
    T = np.eye(3)
    T[0,
      2] = random.uniform(-translate, translate
                          ) * img.shape[0] + border  # x translation (pixels)
    T[1,
      2] = random.uniform(-translate, translate
                          ) * img.shape[1] + border  # y translation (pixels)

    M = T @ R  # Combined rotation matrix. ORDER IS IMPORTANT HERE!!
    imw = cv2.warpAffine(img,
                         M[:2],
                         dsize=(width, height),
                         flags=cv2.INTER_AREA,
                         borderValue=(128, 128, 128))  # BGR order borderValue

    # Return warped points also
    targets[:, [0, 2, 4, 6]] = targets[:, [0, 2, 4, 6]] * M[
        0, 0] + targets[:, [1, 3, 5, 7]] * M[0, 1] + M[0, 2]
    targets[:, [1, 3, 5, 7]] = targets[:, [0, 2, 4, 6]] * M[
        1, 0] + targets[:, [1, 3, 5, 7]] * M[1, 1] + M[1, 2]
    for x in range(0, 8, 2):
        targets[:, x] = targets[:, x].clip(0, width)
    for y in range(1, 8, 2):
        targets[:, y] = targets[:, y].clip(0, height)
    rboxes = quad_2_rbox(targets)
    w = rboxes[:, 2]
    h = rboxes[:, 3]
    ar = np.maximum(w / (h + 1e-16), h / (w + 1e-16))
    mask = (w > 4) & \
        (h > 4) & \
        (ar < 15) & \
        np.array([(x>0).all() for x in targets]) & \
        np.array([(x[0::2]<width).all()  for x in targets]) & \
        np.array([(x[1::2]<height).all()  for x in targets])
    targets = targets[mask]
    return imw, targets, mask