def load_mask(self, image_id): """Load instance masks for the given image. Different datasets use different ways to store masks. This function converts the different mask format to one format in the form of a bitmap [height, width, instances]. Returns: masks: A bool array of shape [height, width, instance count] with one mask per instance. class_ids: a 1D array of class IDs of the instance masks. """ image_info = self.image_info[image_id] instance_masks = [] class_ids = [] annotations = self.image_info[image_id]["annotations"] # Build mask of shape [height, width, instance_count] and list # of class IDs that correspond to each channel of the mask. for annotation in annotations: class_id = self.map_source_class_id("taco.{}".format( annotation['category_id'])) if class_id: m = utils.annToMask(annotation, image_info["height"], image_info["width"]) # Some objects are so small that they're less than 1 pixel area # and end up rounded out. Skip those objects. if m.max() < 1: continue # Is it a crowd? If so, use a negative class ID. if annotation['iscrowd']: # Use negative class ID for crowds class_id *= -1 # For crowd masks, annToMask() sometimes returns a mask # smaller than the given dimensions. If so, resize it. if m.shape[0] != image_info["height"] or m.shape[ 1] != image_info["width"]: m = np.ones( [image_info["height"], image_info["width"]], dtype=bool) instance_masks.append(m) class_ids.append(class_id) # Pack instance masks into an array if class_ids: mask = np.stack(instance_masks, axis=2).astype(np.bool) class_ids = np.array(class_ids, dtype=np.int32) return mask, class_ids else: # Call super class to return an empty mask return super(Taco, self).load_mask(image_id)
def data_generator(): ann_file = '{}/annotations/instances_{}.json'.format( config.DATASET_DIR, config.DATASET_TYPE) coco = COCO(ann_file) categories = coco.loadCats(coco.getCatIds()) nms = [cat['name'] for cat in categories] print('COCO categories: \n{}\n'.format(' '.join(nms))) img_ids = coco.getImgIds() all_anchors = utils.generate_anchors() while True: rand = np.random.randint(0, len(img_ids)) # rand = 3118 # print(rand) img_info = coco.loadImgs(img_ids[rand])[0] img = scipy.ndimage.imread(config.DATASET_DIR + '\\' + config.DATASET_TYPE + '\\' + img_info['file_name']) img = img.astype(np.float32) / 255. ratio, img, offset = utils.resize_keep_ratio(img, (1024, 1024)) ann_ids = coco.getAnnIds(imgIds=img_info['id'], iscrowd=0) anns = coco.loadAnns(ann_ids) bboxs = [ann['bbox'] for ann in anns] bboxs = np.vstack(bboxs) # OFFSET one for backgroound cls = np.array([ann['category_id'] + 1 for ann in anns]) masks = np.array([ utils.annToMask(ann, img_info['height'], img_info['width']) for ann in anns ]) # resize masks to desired shape bboxs_ind = bboxs.astype(np.int) masks = np.array([ cv2.resize( mask[bboxs_ind[i, 1]:bboxs_ind[i, 1] + bboxs_ind[i, 3], bboxs_ind[i, 0]:bboxs_ind[i, 0] + bboxs_ind[i, 2]], (config.MASK_OUTPUT_SHAPE, config.MASK_OUTPUT_SHAPE)) for i, mask in enumerate(masks) ]) bboxs = bboxs * ratio bboxs[:, :2] += offset bboxs_rpn = bboxs valid_label_range = 0 # we pad ot trim all labels to MAX_GT_TRAIN_INSTANCES to make it batched if bboxs.shape[0] > config.MAX_GT_TRAIN_INSTANCES: valid_label_range = config.MAX_GT_TRAIN_INSTANCES bboxs = bboxs[:config.MAX_GT_TRAIN_INSTANCES, :] cls = cls[:config.MAX_GT_TRAIN_INSTANCES] masks = masks[:config.MAX_GT_TRAIN_INSTANCES, :, :] else: valid_label_range = bboxs.shape[0] bboxs = np.pad( bboxs, ((0, config.MAX_GT_TRAIN_INSTANCES - bboxs.shape[0]), (0, 0)), mode='constant', constant_values=((0, 0), (0, 0))) cls = np.pad(cls, (0, config.MAX_GT_TRAIN_INSTANCES - cls.shape[0]), mode='constant', constant_values=(0, 0)) masks = np.pad( masks, ((0, config.MAX_GT_TRAIN_INSTANCES - masks.shape[0]), (0, 0), (0, 0)), mode='constant', constant_values=((0, 0), (0, 0), (0, 0))) # pre compute rpn targets anchor_types, matches = utils.generate_anchor_types( all_anchors, bboxs_rpn) rpn_positive_mask, rpn_mask = utils.get_mask(anchor_types) rpn_labels = utils.generate_rpn_labels(anchor_types, rpn_mask) rpn_deltas = utils.generate_rpn_deltas(all_anchors, bboxs_rpn, rpn_positive_mask, matches) rpn_positive_range = rpn_deltas.shape[0] # do some padding rpn_deltas = np.pad( rpn_deltas, ((0, config.RPN_ANCHORS_TRAIN_PER_IMAGE - rpn_positive_range), (0, 0)), 'constant') rpn_positive_mask = np.pad( rpn_positive_mask, (0, config.RPN_ANCHORS_TRAIN_PER_IMAGE - rpn_positive_range), 'constant', constant_values=-1) if config.DEBUG: fig = plt.figure() ax = fig.add_subplot(111) plt.imshow(img) # coco.showAnns(anns) for bbox in bboxs: ax.add_patch( patches.Rectangle( (bbox[0], bbox[1]), bbox[2], bbox[3], edgecolor="red", fill=False # remove background )) for m in matches: ax.add_patch( patches.Rectangle( (all_anchors[m][0], all_anchors[m][1]), all_anchors[m][2], all_anchors[m][3], edgecolor="blue", fill=False # remove background )) plt.show() # we feed precomputed rpn masks on multi-threaded cpu print() yield img, bboxs, rpn_labels, rpn_deltas, rpn_mask, rpn_positive_range, rpn_positive_mask, cls, masks, valid_label_range
def load_augment_data(self, image_id): """Generate augmented data for the image with the given ID. """ info = self.image_info[image_id] image = self.load_image(image_id) # apply random gamma correction to the image gamma = np.random.uniform(0.8, 1) gain = np.random.uniform(0.8, 1) image = exposure.adjust_gamma(image, gamma, gain) # generate random rotation degree rotate_degree = np.random.uniform(-5, 5) if info["source"] in ["ShapeNetTOI", "Real"]: domain_label = 0 ## has coordinate map loss mask_path = info["path"] + '_mask.png' coord_path = info["path"] + '_coord.png' inst_dict = info['inst_dict'] meta_path = info["path"] + '_meta.txt' mask_im = cv2.imread(mask_path)[:, :, 2] coord_map = cv2.imread(coord_path)[:, :, :3] coord_map = coord_map[:, :, ::-1] image, mask_im, coord_map = utils.rotate_and_crop_images(image, masks=mask_im, coords=coord_map, rotate_degree=rotate_degree) masks, coords, class_ids, scales = self.process_data(mask_im, coord_map, inst_dict, meta_path) elif info["source"]=="coco": domain_label = 1 ## no coordinate map loss instance_masks = [] class_ids = [] annotations = self.image_info[image_id]["annotations"] # Build mask of shape [height, width, instance_count] and list # of class IDs that correspond to each channel of the mask. for annotation in annotations: class_id = self.map_source_class_id( "coco.{}".format(annotation['category_id'])) if class_id: m = utils.annToMask(annotation, info["height"], info["width"]) # Some objects are so small that they're less than 1 pixel area # and end up rounded out. Skip those objects. if m.max() < 1: continue instance_masks.append(m) class_ids.append(class_id) # Pack instance masks into an array masks = np.stack(instance_masks, axis=2) class_ids = np.array(class_ids, dtype=np.int32) #print('\nbefore augmented, image shape: {}, masks shape: {}'.format(image.shape, masks.shape)) image, masks = utils.rotate_and_crop_images(image, masks=masks, coords=None, rotate_degree=rotate_degree) #print('\nafter augmented, image shape: {}, masks shape: {}'.format(image.shape, masks.shape)) if len(masks.shape)==2: masks = masks[:, :, np.newaxis] final_masks = [] final_class_ids = [] for i in range(masks.shape[-1]): m = masks[:, :, i] if m.max() < 1: continue final_masks.append(m) final_class_ids.append(class_ids[i]) if final_class_ids: masks = np.stack(final_masks, axis=2) class_ids = np.array(final_class_ids, dtype=np.int32) else: # Call super class to return an empty mask masks = np.empty([0, 0, 0]) class_ids = np.empty([0], np.int32) # use zero arrays as coord map for COCO images coords = np.zeros(masks.shape+(3,), dtype=np.float32) scales = np.ones((len(class_ids),3), dtype=np.float32) else: assert False return image, masks, coords, class_ids, scales, domain_label
def load_mask(self, image_id): """Generate instance masks for the objects in the image with the given ID. """ info = self.image_info[image_id] #masks, coords, class_ids, scales, domain_label = None, None, None, None, None if info["source"] in ["ShapeNetTOI", "Real"]: domain_label = 0 ## has coordinate map loss mask_path = info["path"] + '_mask.png' coord_path = info["path"] + '_coord.png' assert os.path.exists(mask_path), "{} is missing".format(mask_path) assert os.path.exists(coord_path), "{} is missing".format(coord_path) inst_dict = info['inst_dict'] meta_path = info["path"] + '_meta.txt' mask_im = cv2.imread(mask_path)[:, :, 2] coord_map = cv2.imread(coord_path)[:, :, :3] coord_map = coord_map[:, :, (2, 1, 0)] masks, coords, class_ids, scales = self.process_data(mask_im, coord_map, inst_dict, meta_path) elif info["source"]=="coco": domain_label = 1 ## no coordinate map loss instance_masks = [] class_ids = [] annotations = self.image_info[image_id]["annotations"] # Build mask of shape [height, width, instance_count] and list # of class IDs that correspond to each channel of the mask. for annotation in annotations: class_id = self.map_source_class_id( "coco.{}".format(annotation['category_id'])) if class_id: m = utils.annToMask(annotation, info["height"], info["width"]) # Some objects are so small that they're less than 1 pixel area # and end up rounded out. Skip those objects. if m.max() < 1: continue instance_masks.append(m) class_ids.append(class_id) # Pack instance masks into an array if class_ids: masks = np.stack(instance_masks, axis=2) class_ids = np.array(class_ids, dtype=np.int32) else: # Call super class to return an empty mask masks = np.empty([0, 0, 0]) class_ids = np.empty([0], np.int32) # use zero arrays as coord map for COCO images coords = np.zeros(masks.shape+(3,), dtype=np.float32) scales = np.ones((len(class_ids),3), dtype=np.float32) #print('\nwithout augmented, masks shape: {}'.format(masks.shape)) else: assert False return masks, coords, class_ids, scales, domain_label