示例#1
0
    def __getitem__(self, idx):
        """
        Load the data from list, and match the ground-truth bounding boxes with prior bounding boxes.
        :return bbox_tensor: matched bounding box, dim: (num_priors, 4)
        :return bbox_label: matched classification label, dim: (num_priors)
        """

        # TODO: implement data loading
        # 1. Load image as well as the bounding box with its label
        # 2. Normalize the image with self.mean and self.std
        # 3. Convert the bounding box from corner form (left-top, right-bottom): [(x,y), (x+w, y+h)] to
        #    center form: [(center_x, center_y, w, h)]
        # 4. Normalize the bounding box position value from 0 to 1

        item = self.dataset_list[idx]
        #print(item['image_path'])
        self.image_path = item['image_path']
        print(item['labels'])
        self.labels = torch.Tensor(np.asarray(item['labels']))
        self.bound_boxes = item['bound_boxes']
        self.bound_boxes = torch.Tensor(np.asarray(item['bound_boxes']))
        #print(self.bound_boxes.shape)

        img = Image.open(self.image_path)

        w, h = img.size
        self.bound_boxes /= torch.Tensor([w, h, w,
                                          h]).expand_as(self.bound_boxes)

        # resize image
        img = img.resize((self.image_size, self.image_size), Image.ANTIALIAS)
        # normalize_img
        img = np.asarray(img, dtype=np.float32)
        # normalise the image pixels to (-1,1)
        img = (img / 255.0) * 2 - 1

        # convert to tensor
        img_tensor = torch.Tensor(img.astype(float))
        img_tensor = img_tensor.view(
            (img.shape[2], img.shape[0], img.shape[1]))

        # 4. Do the augmentation if needed. e.g. random clip the bounding box or flip the bounding box

        # 5. Do the matching prior and generate ground-truth labels as well as the boxes
        bbox_tensor, bbox_label_tensor = match_priors(self.prior_bound_boxes,
                                                      self.bound_boxes,
                                                      self.labels,
                                                      iou_threshold=0.5)
        #[DEBUG] check the output.
        assert isinstance(bbox_label_tensor, torch.Tensor)
        assert isinstance(bbox_tensor, torch.Tensor)
        assert bbox_tensor.dim() == 2
        assert bbox_tensor.shape[1] == 4
        assert bbox_label_tensor.dim() == 1
        assert bbox_label_tensor.shape[0] == bbox_tensor.shape[0]

        return img_tensor, bbox_tensor, bbox_label_tensor
示例#2
0
    def test_priorbb(self):
        prior_layer_cfg = [
            # Example:
            {
                'layer_name': 'Conv5',
                'feature_dim_hw': (38, 38),
                'bbox_size': (30, 30),
                'aspect_ratio': (1.0, 1 / 2, 1 / 3, 2.0, 3.0, 1.0)
            },
            {
                'layer_name': 'Conv11',
                'feature_dim_hw': (19, 19),
                'bbox_size': (60, 60),
                'aspect_ratio': (1.0, 1 / 2, 1 / 3, 2.0, 3.0, 1.0)
            },
            {
                'layer_name': 'Conv14_2',
                'feature_dim_hw': (10, 10),
                'bbox_size': (111, 111),
                'aspect_ratio': (1.0, 1 / 2, 1 / 3, 2.0, 3.0, 1.0)
            },
            {
                'layer_name': 'Conv15_2',
                'feature_dim_hw': (5, 5),
                'bbox_size': (162, 162),
                'aspect_ratio': (1.0, 1 / 2, 1 / 3, 2.0, 3.0, 1.0)
            },
            {
                'layer_name': 'Conv16_2',
                'feature_dim_hw': (3, 3),
                'bbox_size': (213, 213),
                'aspect_ratio': (1.0, 1 / 2, 1 / 3, 2.0, 3.0, 1.0)
            },
            {
                'layer_name': 'Conv17_2',
                'feature_dim_hw': (1, 1),
                'bbox_size': (264, 264),
                'aspect_ratio': (1.0, 1 / 2, 1 / 3, 2.0, 3.0, 1.0)
            }
        ]
        pp = generate_prior_bboxes(prior_layer_cfg)

        print(pp[0:1], pp[39:40])
        temp = iou(pp[0:6], pp[0:1])
        print('iou', temp)
        gt_label = torch.tensor([1])
        # print(gt_label.dim[0])
        print('matching', match_priors(pp[0:38], pp[38:39], gt_label, 0.5))
        np.set_printoptions(threshold=np.inf)
        size_bounds = [0.2, 0.9]
        img_shape = [300, 300]
        # list = self.ssd_size_bounds_to_values(size_bounds,6,img_shape)
        # print(list)
        #prior_bbox = self.ssd_anchor_one_layer((300,300),(38,38),(30,60), [2, .5, 3, 1. / 3], 3)
        #print(prior_bbox)

        self.assertEqual('foo'.upper(), 'FOO')
 def __getitem__(self, idx):
     img_dir = self.img_dir_list[idx]
     json_dir = self.json_dir_list[idx]
     sample_img = cv2.imread(img_dir, cv2.COLOR_BGR2RGB)
     gt_bboxes, gt_labels = get_bbox_label(json_dir)
     gt_bboxes = torch.tensor(gt_bboxes, dtype=torch.float32)
     gt_labels = torch.tensor(gt_labels, dtype=torch.int32)
     # data augmentation
     data_augmentation = SSDAugmentation(mode= self.mode)
     sample_img = np.array(sample_img, dtype=np.float64)
     sample_img, gt_bboxes, gt_labels = data_augmentation(sample_img, gt_bboxes, gt_labels)
     # Do the matching prior and generate ground-truth labels as well as the boxes
     bbox_tensor, bbox_label_tensor = match_priors(self.prior_bboxes, gt_bboxes, gt_labels)
     output_prior_bboxes = self.prior_bboxes
     return sample_img, bbox_tensor, bbox_label_tensor.long(), output_prior_bboxes
    def __getitem__(self, idx):
        """
        Load the data from list, and match the ground-truth bounding boxes with prior bounding boxes.
        :return bbox_tensor: matched bounding box, dim: (num_priors, 4)
        :return bbox_label: matched classification label, dim: (num_priors)
        """

        # data loading
        # 1. Load image as well as the bounding box with its label
        item = self.dataset_list[idx]
        img_path = item['img']
        h = item['h']
        w = item['w']
        sample_labels = item['labels']
        sample_bboxes_corner = item['bboxes']
        img = Image.open(img_path)

        # data augment
        if self.train:
            img, sample_bboxes_corner = self.random_flip(
                img, sample_bboxes_corner)

        # crop
        img, sample_bboxes_corner, sample_labels = self.crop_img(
            img, sample_bboxes_corner, sample_labels, img_path, h)

        img = img.resize((self.img_size, self.img_size))

        # Convert the bounding box from corner form (left-top, right-bottom): [(x,y), (x+w, y+h)] to
        #    center form: [(center_x, center_y, w, h)]
        lt = sample_bboxes_corner[:, 0, :]
        rb = sample_bboxes_corner[:, 1, :]
        wh = rb - lt
        c = (lt + wh / 2)
        sample_bboxes = np.stack(
            (c[:, 0] / h, c[:, 1] / h, wh[:, 0] / h, wh[:, 1] / h),
            axis=1)  # crop

        # Normalize the image with self.mean and self.std
        sample_img = (np.array(img, dtype=np.float) - self.mean) / self.std
        img_tensor = torch.from_numpy(sample_img).float()
        sample_bboxes = torch.from_numpy(np.asarray(sample_bboxes)).float()
        sample_labels = torch.from_numpy(np.asarray(sample_labels)).float()

        # matching prior, generate ground-truth labels and boxes
        bbox_tensor, bbox_label_tensor, bbox_offset_tensor = match_priors(
            self.prior_bboxes.cpu(),
            sample_bboxes,
            sample_labels,
            iou_threshold=0.5)

        if self.show:
            self.show_bbox(img, sample_bboxes.numpy(),
                           self.prior_bboxes.cpu().numpy(),
                           bbox_label_tensor.numpy())

        # [DEBUG] check the output.
        assert isinstance(bbox_label_tensor, torch.Tensor)
        assert isinstance(bbox_tensor, torch.Tensor)
        assert bbox_tensor.dim() == 2
        assert bbox_tensor.shape[1] == 4
        assert bbox_label_tensor.dim() == 1
        assert bbox_label_tensor.shape[0] == bbox_tensor.shape[0]

        # return bbox_tensor, bbox_label_tensor, img_tensor
        return bbox_offset_tensor, bbox_label_tensor, img_tensor
    def __getitem__(self, index):
        """
        Load the data from list, and match the ground-truth bounding boxes with prior bounding boxes. Labels are include
        [car, traffic sign, person]. irrelevant objects are set to 0.
        :return bbox_tensor: matched bounding box, dim: (num_priors, 4).
        :return bbox_label: matched classification label, dim: (num_priors).
        """
        # Alert current dataset status.
        digit = str(len(str(len(self.dataset_list))))
        self.prepared_index += self.num_worker
        current_index = (self.prepared_index % len(self.dataset_list))
        n_instance = ('[{:' + digit + 'd}').format(current_index) + '/' + str(
            len(self.dataset_list)) + ']'
        n_percentage = '[{:6.2f}%]'.format(current_index * 100. /
                                           len(self.dataset_list))
        print('\r' +
              ('Preparing dataset at index [{:' + digit + 'd}').format(index) +
              ']' + n_instance + n_percentage,
              end='')

        if self.is_debug:
            pr = cProfile.Profile()
            pr.enable()
        else:
            pr = None

        # Prepare configurations.
        item = self.dataset_list[index]
        self.imgWidth = float(item['imgWidth'])
        self.imgHeight = float(item['imgHeight'])
        self.resize_ratio = min(self.imgHeight / 300., self.imgWidth / 300.)
        image = Image.open(item['file'])
        confidences, locations = self.sanitize(item)

        # Return the case there is no match at all.
        if confidences.nonzero().shape[0] == 0:
            image = self.resize(image)

            if image.shape != torch.Size([self.imgHeight, self.imgWidth, 3]):

                # Filter out broken input image by a 300x300x3 black patch.
                image = torch.zeros([300, 300, 3])
            else:

                # Crop the top left 300x300x3 patch if image is not corrupted.
                image = image[0:300, 0:300, :]

            image = self.normalize(image)
            image = image.view(
                (image.shape[2], image.shape[0], image.shape[1]))

            return image, confidences, locations

        # Resize the image and label first.
        image = self.resize(image)
        locations = self.resize(locations)

        # Prepare image array first to update crop.
        image = self.crop(image)
        image = self.brighten(image)
        image = self.normalize(image)

        # Prepare labels second to apply crop.
        locations = self.crop(locations)
        locations = self.normalize(locations)

        # Do the matching prior and generate ground-truth labels as well as the boxes.
        confidences = helper.match_priors(
            self.prior_bboxes,
            locations,
            iou_threshold=self.matching_iou_threshold)

        if self.is_debug:
            pr.disable()
            pr.print_stats(sort='time')

        # Reshape image to channel by X by Y.
        image = image.view((image.shape[2], image.shape[0], image.shape[1]))

        return image, confidences, self.prior_bboxes
示例#6
0
    def __getitem__(self, idx):
        """
        Load the data from list, and match the ground-truth bounding boxes with prior bounding boxes.
        :return bbox_tensor: matched bounding box, dim: (num_priors, 4)
        :return bbox_label: matched classification label, dim: (num_priors)
        """

        # TODO: implement data loading
        # 1. Load image as well as the bounding box with its label
        item = self.dataset_list[idx]
        img = Image.open(item['img_path'])
        label = item['label']
        bbox = item['bbox']
        bbox_arr = np.array(bbox).reshape(-1, 4)  # tuple to array

        # 2. Random crop to 1024*1024
        bbox_croped = []
        label_croped = []
        num_box_arr = len(bbox_arr)
        flag = False
        count = 0
        while flag is False:
            count += 1
            crop_startX = random.uniform(0, 1) * 1024
            crop_size = 1024
            # if after 200 random, still not find a good crop position, then let crop pos = bbox pos
            if count == 200:
                crop_startX = bbox_arr[0][0]
                crop_size = bbox_arr[0][2] - bbox_arr[0][0]
                # print('bbox_arr 200',bbox_arr)
                # print('img_path',item['img_path'])
                # print('crop_startX',crop_startX)
                # print('crop_size',crop_size)
            for i in range(num_box_arr):
                if bbox_arr[i][
                        2] > 2048:  # bamberg_000000_000441_gtCoarse_polygons.json strange data
                    bbox_arr[i][2] = 2048
                if bbox_arr[i][0] >= crop_startX and bbox_arr[i][
                        2] <= crop_startX + crop_size:
                    flag = True
                    box = [
                        bbox_arr[i][0] - crop_startX, bbox_arr[i][1],
                        bbox_arr[i][2] - crop_startX, bbox_arr[i][3]
                    ]
                    bbox_croped.append(box)
                    label_croped.append(label[i])

        crop_pos = (crop_startX, 0, crop_startX + crop_size, crop_size)
        img_croped = img.crop(crop_pos)
        resized_size = 300
        img_resized = img_croped.resize((resized_size, resized_size))
        # img_resized.save("img300.jpg", "JPEG")
        bbox_resized = np.divide(bbox_croped, crop_size / resized_size)

        # 3. Convert the bounding box from corner form (left-top, right-bottom): [(x,y), (x+w, y+h)] to
        #    center form: [(center_x, center_y, w, h)]
        bbox_center_form = bbox_helper.corner2center(
            torch.tensor(bbox_resized))

        # 4. Do the augmentation if needed. e.g. random clip the bounding box or flip the bounding box
        # Random flip
        will_flip = random.uniform(0, 1)
        if will_flip > 0.5:
            bbox_center_form[:,
                             0] = resized_size - bbox_center_form[:,
                                                                  0]  # x coordinates after flip
            img_resized = img_resized.transpose(Image.FLIP_LEFT_RIGHT)
        # common.drawRectsPLT(img_resized,bbox_helper.center2corner(bbox_center_form),[int(i) for i in label_croped])

        # Normalize image
        img_normalized = (img_resized - self.mean) / self.std

        # 5. Normalize the bounding box position value from 0 to 1,
        sample_labels = torch.tensor(label_croped, dtype=torch.float32)
        sample_bboxes = torch.tensor(bbox_center_form / resized_size,
                                     dtype=torch.float32)

        sample_img = np.asarray(img_normalized, dtype=np.float32)
        img_tensor = torch.from_numpy(sample_img)

        # 6. Do the matching prior and generate ground-truth labels as well as the boxes
        bbox_tensor, bbox_label_tensor = bbox_helper.match_priors(
            self.prior_bboxes, sample_bboxes, sample_labels, iou_threshold=0.5)

        # [DEBUG] check the output.
        assert isinstance(bbox_label_tensor, torch.Tensor)
        assert isinstance(bbox_tensor, torch.Tensor)
        assert bbox_tensor.dim() == 2
        assert bbox_tensor.shape[1] == 4
        assert bbox_label_tensor.dim() == 1
        assert bbox_label_tensor.shape[0] == bbox_tensor.shape[0]

        return bbox_tensor, bbox_label_tensor, img_tensor
示例#7
0
    def __getitem__(self, idx):
        """
        Load the data from list, and match the ground-truth bounding boxes with prior bounding boxes.
        :return bbox_tensor: matched bounding box, dim: (num_priors, 4)
        :return bbox_label: matched classification label, dim: (num_priors)
        """
        sample_idx = (lambda i, n: i // n
                      if n is not 0 else i)(idx, self.n_augmented)
        sample = self.dataset_list[sample_idx]

        # TODO: implement data loading
        # 1. Load image as well as the bounding box with its label
        image = Image.open(sample['image_path'])
        label = sample['label']
        cls = sample['class']
        bbox = sample['bbox']

        # 2. convert the image and bbox to numpy array and crop to square form
        image = np.asarray(image, dtype=np.uint8)
        bbox_cr = np.asarray(bbox, dtype=np.float32)
        show_list = [{
            'image': image.copy(),
            'bbox_cr': bbox_cr.copy(),
            'label': label.copy(),
            'title': 'Original'
        }]
        # image, bbox_cr, cls, label = self.crop(image, bbox_cr, cls, label, is_random=False)
        # show_list.append({'image': image.copy(), 'bbox_cr': bbox_cr.copy(), 'label': label.copy(), 'title': 'Cropping(Square)'})

        # 3. Do the augmentation if needed. e.g. random clip the bounding box or flip the bounding box
        if self.n_augmented > 0:

            # calling the cropping function
            image, bbox_cr, cls, label = self.crop(image,
                                                   bbox_cr,
                                                   cls,
                                                   label,
                                                   is_random=True)
            show_list.append({
                'image': image.copy(),
                'bbox_cr': bbox_cr.copy(),
                'label': label.copy(),
                'title': 'Cropping(Random)'
            })

            # calling the flip function
            image, bbox_cr = self.flip(image, bbox_cr)
            show_list.append({
                'image': image.copy(),
                'bbox_cr': bbox_cr.copy(),
                'label': label.copy(),
                'title': 'Flipping'
            })

        # 4. resize the image (H, W, C) to net size(300, 300)
        bbox_cr[:, [0, 2]] = bbox_cr[:, [0, 2]] * (
            self.net_size[0] / image.shape[1])  # Width
        bbox_cr[:, [1, 3]] = bbox_cr[:, [1, 3]] * (
            self.net_size[1] / image.shape[0])  # Height
        image = cv2.resize(image,
                           dsize=self.net_size,
                           interpolation=cv2.INTER_CUBIC)
        show_list.append({
            'image': image.copy(),
            'bbox_cr': bbox_cr.copy(),
            'label': label.copy(),
            'title': 'Resizing'
        })

        # Check intermediate input
        if self.debug:
            self.show_image(show_list)

        # 5. Convert the bounding box from corner form (left-top, right-bottom): [(x,y), (x+w, y+h)] to
        #    center form: [(center_x, center_y, w, h)]
        center_xy = (bbox_cr[:, 2:] + bbox_cr[:, :2]) / 2.
        center_wh = (bbox_cr[:, 2:] - bbox_cr[:, :2])
        bbox_ct = np.concatenate((center_xy, center_wh), axis=1)

        # 6. Normalize the image with self.mean and self.std
        image_norm = np.divide(
            (np.asarray(image, dtype=np.float32) - self.mean), self.std)
        # Normalize the bounding box position value from 0 to 1
        bbox_ct[:, [0, 2]] = bbox_ct[:, [0, 2]] / self.net_size[0]
        bbox_ct[:, [1, 3]] = bbox_ct[:, [1, 3]] / self.net_size[1]

        # 7. Do the matching prior and generate ground-truth labels as well as the boxes
        sample_labels = torch.from_numpy(np.asarray(cls)).type(
            torch.long)  # Cuda Tensor
        sample_bboxes = torch.from_numpy(bbox_ct).type(
            torch.float32)  # Cuda Tensor
        if torch.cuda.is_available():
            sample_labels = sample_labels.cuda()
            sample_bboxes = sample_bboxes.cuda()

        bbox_tensor, bbox_label_tensor = match_priors(self.prior_bboxes,
                                                      sample_bboxes,
                                                      sample_labels,
                                                      iou_threshold=0.5)
        sample_img_tensor = torch.from_numpy(image_norm.transpose()).type(
            torch.float32)  # Cuda Tensor

        if torch.cuda.is_available():
            bbox_tensor = bbox_tensor.cuda()
            bbox_label_tensor = bbox_label_tensor.cuda()
            sample_img_tensor = sample_img_tensor.cuda()

        # Check the final tensor input
        if self.debug:
            self.show_tensor_image(sample_img_tensor.clone(),
                                   bbox_tensor.clone(),
                                   bbox_label_tensor.clone(), label.copy(),
                                   sample_idx)

        # [DEBUG] check the output.
        assert isinstance(sample_img_tensor, torch.Tensor)
        assert isinstance(bbox_label_tensor, torch.Tensor)
        assert isinstance(bbox_tensor, torch.Tensor)
        assert bbox_tensor.dim() == 2
        assert bbox_tensor.shape[1] == 4
        assert bbox_label_tensor.dim() == 1
        assert bbox_label_tensor.shape[0] == bbox_tensor.shape[0]

        return sample_img_tensor, bbox_label_tensor, bbox_tensor, self.prior_bboxes
    def __getitem__(self, idx):
        """
        Load the data from list, and match the ground-truth bounding boxes with prior bounding boxes.
        :return bbox_tensor: matched bounding box, dim: (num_priors, 4)
        :return bbox_label: matched classification label, dim: (num_priors)
        """


        # 1. Load image as well as the bounding box with its label
        item = self.dataset_list[idx]
        file_path = item['file_path']
        ground_truth = item['label']
        sample_labels = np.asarray(ground_truth[0], dtype=np.float32)
        sample_bboxes = np.asarray(ground_truth[1], dtype=np.float32)
        sample_img = Image.open(file_path)

        augmentation = np.random.randint(0, 4)
        sample_img, sample_bboxes, sample_labels = self.crop(sample_img,sample_bboxes,sample_labels)
        #augmentation=None
        if augmentation == 0:
            sample_img = ImageEnhance.Brightness(sample_img).enhance(np.random.randint(5, 25) / 10.0)

        # horizontal flip
        if augmentation == 1:
            sample_img = sample_img.transpose(Image.FLIP_LEFT_RIGHT)
            width = sample_img.size[0]
            flipped_boxes = sample_bboxes.copy()
            # sample_bboxes = [float(width), float(top), float(left), float(top)] - flipped_bboxes
            sample_bboxes[:, 0] = width - flipped_boxes[:, 2]
            sample_bboxes[:, 2] = width - flipped_boxes[:, 0]
            # flipped_boxes = sample_bboxes.copy()
            # sample_bboxes[:, 0] = flipped_boxes[:, 2]
            # sample_bboxes[:, 2] = flipped_boxes[:, 0]

        if augmentation == 2:
            if random.choice([True, False]) == True:
                sample_img = sample_img.filter(ImageFilter.BLUR)
            else:
                sample_img = sample_img.filter(ImageFilter.SHARPEN)

        # if augmentation == 3:
        #     w, h = sample_img.size[:2]
        #     left = np.random.randint(0, np.min(sample_bboxes[:, 0])-(np.min(sample_bboxes[:, 0])/5).astype(int))
        #     # print("left---------------",left)
        #     top = np.random.randint(0, np.min(sample_bboxes[:, 1])-(np.min(sample_bboxes[:, 1])/5).astype(int))
        #     right = np.random.randint(np.max(sample_bboxes[:, 2])+((w-np.max(sample_bboxes[:, 2]))/5).astype(int), w)
        #     # print("right--------------",right)
        #     bottom = np.random.randint( np.max(sample_bboxes[:, 3])+((h-np.max(sample_bboxes[:, 3]))/5).astype(int), h)
        #     # print("bottom-------------",bottom)
        #
        #     sample_img = sample_img.crop((left, top, right, bottom))
        #     # print(sample_bboxes[0])
        #     # print("left", left)
        #     sample_bboxes = sample_bboxes - [float(left), float(top), float(left), float(top)]
        #     # print(sample_bboxes[0])

        # 2. Normalize the image with self.mean and self.std
        img = sample_img.resize((300, 300))
        img_array = np.asarray(img)
        img_array = (img_array-self.mean)/self.std
        h, w, c = img_array.shape[0], img_array.shape[1], img_array.shape[2]

        # 3. Convert the bounding box from corner form (left-top, right-bottom): [(x,y), (x+w, y+h)] to
        #    center form: [(center_x, center_y, w, h)]
        #print([sample_img.size[0],sample_img.size[1],sample_img.size[0],sample_img.size[1]])
        sample_bboxes = torch.Tensor(sample_bboxes)/torch.Tensor([sample_img.size[0],sample_img.size[1],sample_img.size[0],sample_img.size[1]])

        # 4. Normalize the bounding box position value from 0 to 1
        sample_bboxes = corner2center(sample_bboxes)
        #self.prior_bboxes = center2corner(self.prior_bboxes)

        # 4. Do the augmentation if needed. e.g. random clip the bounding box or flip the bounding box
        # TODO: data augmentation
        # 5. Do the matching prior and generate ground-truth labels as well as the boxes
        bbox_tensor, bbox_label_tensor = match_priors(self.prior_bboxes.cuda(), sample_bboxes.cuda(), torch.Tensor(sample_labels).cuda(), iou_threshold=0.45)
        #bbox_tensor, bbox_label_tensor = assign_priors(sample_bboxes.cuda(), torch.Tensor(sample_labels).cuda(), self.prior_bboxes.cuda(), iou_threshold=0.5)


        img_tensor = torch.Tensor(img_array)
        img_tensor = img_tensor.view(c, h, w)
        #print(img_tensor.shape)
        # [DEBUG] check the output.
        assert isinstance(bbox_label_tensor, torch.Tensor)
        assert isinstance(bbox_tensor, torch.Tensor)
        assert bbox_tensor.dim() == 2
        assert bbox_tensor.shape[1] == 4
        assert bbox_label_tensor.dim() == 1
        assert bbox_label_tensor.shape[0] == bbox_tensor.shape[0]
        return img_tensor, bbox_tensor, bbox_label_tensor
    def __getitem__(self, idx):
        """
        Load the data from list, and match the ground-truth bounding boxes with prior bounding boxes.
        :return bbox_tensor: matched bounding box, dim: (num_priors, 4)
        :return bbox_label: matched classification label, dim: (num_priors)
        """

        # TODO: implement data loading
        # 1. Load image as well as the bounding box with its label
        # 2. Normalize the image with self.mean and self.std
        # 3. Convert the bounding box from corner form (left-top, right-bottom): [(x,y), (x+w, y+h)] to
        #    center form: [(center_x, center_y, w, h)]
        # 4. Normalize the bounding box position value from 0 to 1
        item = self.dataset_list[idx]
        image_path = item['image_path']
        labels = np.asarray(item['labels'])
        labels = torch.Tensor(labels).cuda()
        locations = torch.Tensor(item['bboxes']).cuda()
        bbox = np.array(item['bboxes'])

        image = Image.open(image_path)

        self.imgWidth, self.imgHeight = image.size
        self.resize_ratio = min(self.imgHeight / 300., self.imgWidth / 300.)

        locations = helper.corner2center(locations)

        image = self.resize(image)
        locations = self.resize(locations)

        # Prepare image array first to update crop.
        image = self.crop(image)
        image = self.brighten(image)
        image = self.normalize(image)

        # Prepare labels second to apply crop.
        locations = self.crop(locations)
        locations = self.normalize(locations)

        # convert to tensor
        img_tensor = image.view(
            (image.shape[2], image.shape[0], image.shape[1]))
        img_tensor = img_tensor.cuda()

        labels = labels[self.ios_index]

        # 4. Do the augmentation if needed. e.g. random clip the bounding box or flip the bounding box

        # 5. Do the matching prior and generate ground-truth labels as well as the boxes
        bbox_tensor, bbox_label_tensor = match_priors(
            self.prior_bboxes,
            helper.center2corner(locations),
            labels,
            iou_threshold=0.5)

        # [DEBUG] check the output.
        # assert isinstance(bbox_label_tensor, torch.Tensor)
        # assert isinstance(bbox_tensor, torch.Tensor)
        # assert bbox_tensor.dim() == 2
        # assert bbox_tensor.shape[1] == 4
        # assert bbox_label_tensor.dim() == 1
        # assert bbox_label_tensor.shape[0] == bbox_tensor.shape[0]
        return img_tensor, bbox_tensor, bbox_label_tensor