def _get_image_blob(roidb): """Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) # Sample random scales to use for each image in this batch scale_inds = np.random.randint(0, high=len(cfg.TRAIN.SCALES), size=num_images) processed_ims = [] im_scales = [] for i in range(num_images): im = cv2.imread(roidb[i]['image']) assert im is not None, \ 'Failed to read image \'{}\''.format(roidb[i]['image']) # AUG BEGIN-------------------------------- aug_flag = np.random.choice([0, 1], p=[0.5, 0.5]) if aug_flag: img_id = roidb[i]['id'] ann_ids = coco.getAnnIds(imgIds=img_id) anns = coco.loadAnns(ann_ids) new_ann, im = get_new_data(anns, im, None, background=None) if new_ann and new_ann[0]: try: new_roidb, ratio_list, ratio_index = combined_roidb_for_training( \ ('coco_2017_train',), cfg.TRAIN.PROPOSAL_FILES, \ img_id, new_ann, coco ) if new_roidb: if roidb[i]['flipped']: roidb[i] = new_roidb[1] else: roidb[i] = new_roidb[0] except IndexError: print("Index error") # AUG END---------------------------------- # If NOT using opencv to read in images, uncomment following lines # if len(im.shape) == 2: # im = im[:, :, np.newaxis] # im = np.concatenate((im, im, im), axis=2) # # flip the channel, since the original one using cv2 # # rgb -> bgr # im = im[:, :, ::-1] if roidb[i]['flipped']: im = im[:, ::-1, :] target_size = cfg.TRAIN.SCALES[scale_inds[i]] im, im_scale = blob_utils.prep_im_for_blob(im, cfg.PIXEL_MEANS, [target_size], cfg.TRAIN.MAX_SIZE) im_scales.append(im_scale[0]) processed_ims.append(im[0]) # Create a blob to hold the input images [n, c, h, w] blob = blob_utils.im_list_to_blob(processed_ims) return blob, im_scales, roidb
def prepare_train_img(self, idx): img_info = self.img_infos[idx] # load image img = mmcv.imread(osp.join(self.img_prefix, img_info['filename'])) # load proposals if necessary if self.proposals is not None: proposals = self.proposals[idx][:self.num_max_proposals] # TODO: Handle empty proposals properly. Currently images with # no proposals are just ignored, but they can be used for # training in concept. if len(proposals) == 0: return None if not (proposals.shape[1] == 4 or proposals.shape[1] == 5): raise AssertionError( 'proposals should have shapes (n, 4) or (n, 5), ' 'but found {}'.format(proposals.shape)) if proposals.shape[1] == 5: scores = proposals[:, 4, None] proposals = proposals[:, :4] else: scores = None #ann = self.get_ann_info(idx) #------------------------expand function get_ann_info------------------------------ img_id = self.img_infos[idx]['id'] ann_ids = self.coco.getAnnIds(imgIds=[img_id]) ann_info = self.coco.loadAnns(ann_ids) #------------------------AUG BEGIN------------------------------------------------- aug_flag = np.random.choice([0, 1], p=[0.5, 0.5]) if aug_flag: ann_info, img = get_new_data(ann_info, img, None, background=None) #------------------------AUG END--------------------------------------------------- ann = self._parse_ann_info(ann_info, self.with_mask) #------------------------expand function get_ann_info------------------------------ gt_bboxes = ann['bboxes'] gt_labels = ann['labels'] if self.with_crowd: gt_bboxes_ignore = ann['bboxes_ignore'] # skip the image if there is no valid gt bbox if len(gt_bboxes) == 0: return None # extra augmentation if self.extra_aug is not None: img, gt_bboxes, gt_labels = self.extra_aug(img, gt_bboxes, gt_labels) # apply transforms flip = True if np.random.rand() < self.flip_ratio else False # randomly sample a scale img_scale = random_scale(self.img_scales, self.multiscale_mode) img, img_shape, pad_shape, scale_factor = self.img_transform( img, img_scale, flip, keep_ratio=self.resize_keep_ratio) img = img.copy() if self.proposals is not None: proposals = self.bbox_transform(proposals, img_shape, scale_factor, flip) proposals = np.hstack([proposals, scores ]) if scores is not None else proposals gt_bboxes = self.bbox_transform(gt_bboxes, img_shape, scale_factor, flip) if self.with_crowd: gt_bboxes_ignore = self.bbox_transform(gt_bboxes_ignore, img_shape, scale_factor, flip) if self.with_mask: img_sz = (img_info['width'], img_info['height']) masks = [get_mask(x, img_sz) for x in ann['masks']] gt_masks = self.mask_transform(masks, pad_shape, scale_factor, flip) ori_shape = (img_info['height'], img_info['width'], 3) img_meta = dict(ori_shape=ori_shape, img_shape=img_shape, pad_shape=pad_shape, scale_factor=scale_factor, flip=flip) data = dict(img=DC(to_tensor(img), stack=True), img_meta=DC(img_meta, cpu_only=True), gt_bboxes=DC(to_tensor(gt_bboxes))) if self.proposals is not None: data['proposals'] = DC(to_tensor(proposals)) if self.with_label: data['gt_labels'] = DC(to_tensor(gt_labels)) if self.with_crowd: data['gt_bboxes_ignore'] = DC(to_tensor(gt_bboxes_ignore)) if self.with_mask: data['gt_masks'] = DC(gt_masks, cpu_only=True) return data
def pull_item(self, index): """ Args: index (int): Indexcolor_cache Returns: tuple: Tuple (image, target, masks, height, width, crowd). target is the object returned by ``coco.loadAnns``. Note that if no crowd annotations exist, crowd will be None """ img_id = self.ids[index] if self.has_gt: target = self.coco.imgToAnns[img_id] ann_ids = self.coco.getAnnIds(imgIds=img_id) # Target has {'segmentation', 'area', iscrowd', 'image_id', 'bbox', 'category_id'} target = self.coco.loadAnns(ann_ids) else: target = [] file_name = self.coco.loadImgs(img_id)[0]['file_name'] if file_name.startswith('COCO'): file_name = file_name.split('_')[-1] path = osp.join(self.root, file_name) assert osp.exists(path), 'Image path does not exist: {}'.format(path) img = cv2.imread(path) if self.is_train: target, img = get_new_data(target, img, None, background=None) # Separate out crowd annotations. These are annotations that signify a large crowd of # objects of said class, where there is no annotation for each individual object. Both # during testing and training, consider these crowds as neutral. crowd = [x for x in target if ('iscrowd' in x and x['iscrowd'])] target = [x for x in target if not ('iscrowd' in x and x['iscrowd'])] num_crowds = len(crowd) # This is so we ensure that all crowd annotations are at the end of the array target += crowd # The split here is to have compatibility with both COCO2014 and 2017 annotations. # In 2014, images have the pattern COCO_{train/val}2014_%012d.jpg, while in 2017 it's %012d.jpg. # Our script downloads the images as %012d.jpg so convert accordingly. # file_name = self.coco.loadImgs(img_id)[0]['file_name'] # if file_name.startswith('COCO'): # file_name = file_name.split('_')[-1] # path = osp.join(self.root, file_name) # assert osp.exists(path), 'Image path does not exist: {}'.format(path) # img = cv2.imread(path) height, width, _ = img.shape if len(target) > 0: # Pool all the masks for this image into one [num_objects,height,width] matrix masks = [self.coco.annToMask(obj).reshape(-1) for obj in target] masks = np.vstack(masks) masks = masks.reshape(-1, height, width) if self.target_transform is not None and len(target) > 0: target = self.target_transform(target, width, height) if self.transform is not None: if len(target) > 0: target = np.array(target) img, masks, boxes, labels = self.transform( img, masks, target[:, :4], { 'num_crowds': num_crowds, 'labels': target[:, 4] }) # I stored num_crowds in labels so I didn't have to modify the entirety of augmentations num_crowds = labels['num_crowds'] labels = labels['labels'] target = np.hstack((boxes, np.expand_dims(labels, axis=1))) else: img, _, _, _ = self.transform( img, np.zeros((1, height, width), dtype=np.float), np.array([[0, 0, 1, 1]]), { 'num_crowds': 0, 'labels': np.array([0]) }) masks = None target = None return torch.from_numpy(img).permute( 2, 0, 1), target, masks, height, width, num_crowds