def preprocess(roidb): fname, boxes, klass, is_crowd = roidb['file_name'], roidb['boxes'], roidb['class'], roidb['is_crowd'] boxes = np.copy(boxes) im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname im = im.astype('float32') # assume floatbox as input assert boxes.dtype == np.float32, "Loader has to return floating point boxes!" # augmentation: im, params = aug.augment_return_params(im) points = box_to_point8(boxes) points = aug.augment_coords(points, params) boxes = point8_to_box(points) assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!" ret = {'image': im} # rpn anchor: try: if cfg.MODE_FPN: multilevel_anchor_inputs = get_multilevel_rpn_anchor_input(im, boxes, is_crowd) for i, (anchor_labels, anchor_boxes) in enumerate(multilevel_anchor_inputs): ret['anchor_labels_lvl{}'.format(i + 2)] = anchor_labels ret['anchor_boxes_lvl{}'.format(i + 2)] = anchor_boxes else: # anchor_labels, anchor_boxes ret['anchor_labels'], ret['anchor_boxes'] = get_rpn_anchor_input(im, boxes, is_crowd) boxes = boxes[is_crowd == 0] # skip crowd boxes in training target klass = klass[is_crowd == 0] ret['gt_boxes'] = boxes ret['gt_labels'] = klass if not len(boxes): raise MalformedData("No valid gt_boxes!") except MalformedData as e: log_once("Input {} is filtered for training: {}".format(fname, str(e)), 'warn') return None if cfg.MODE_MASK: # augmentation will modify the polys in-place segmentation = copy.deepcopy(roidb['segmentation']) segmentation = [segmentation[k] for k in range(len(segmentation)) if not is_crowd[k]] assert len(segmentation) == len(boxes) # Apply augmentation on polygon coordinates. # And produce one image-sized binary mask per box. masks = [] for polys in segmentation: polys = [aug.augment_coords(p, params) for p in polys] masks.append(segmentation_to_mask(polys, im.shape[0], im.shape[1])) masks = np.asarray(masks, dtype='uint8') # values in {0, 1} ret['gt_masks'] = masks # from viz import draw_annotation, draw_mask # viz = draw_annotation(im, boxes, klass) # for mask in masks: # viz = draw_mask(viz, mask) # tpviz.interactive_imshow(viz) return ret
def preprocess(img): fname, boxes, klass, is_crowd = img['file_name'], img['boxes'], img['class'], img['is_crowd'] im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname im = im.astype('float32') # assume floatbox as input assert boxes.dtype == np.float32 # augmentation: im, params = aug.augment_return_params(im) points = box_to_point8(boxes) points = aug.augment_coords(points, params) boxes = point8_to_box(points) assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!" # rpn anchor: try: if config.MODE_FPN: multilevel_anchor_inputs = get_multilevel_rpn_anchor_input(im, boxes, is_crowd) anchor_inputs = itertools.chain.from_iterable(multilevel_anchor_inputs) else: # anchor_labels, anchor_boxes anchor_inputs = get_rpn_anchor_input(im, boxes, is_crowd) assert len(anchor_inputs) == 2 boxes = boxes[is_crowd == 0] # skip crowd boxes in training target klass = klass[is_crowd == 0] if not len(boxes): raise MalformedData("No valid gt_boxes!") except MalformedData as e: log_once("Input {} is filtered for training: {}".format(fname, str(e)), 'warn') return None ret = [im] + list(anchor_inputs) + [boxes, klass] # TODO pad im when FPN if add_mask: # augmentation will modify the polys in-place segmentation = copy.deepcopy(img['segmentation']) segmentation = [segmentation[k] for k in range(len(segmentation)) if not is_crowd[k]] assert len(segmentation) == len(boxes) # Apply augmentation on polygon coordinates. # And produce one image-sized binary mask per box. masks = [] for polys in segmentation: polys = [aug.augment_coords(p, params) for p in polys] masks.append(segmentation_to_mask(polys, im.shape[0], im.shape[1])) masks = np.asarray(masks, dtype='uint8') # values in {0, 1} ret.append(masks) # from viz import draw_annotation, draw_mask # viz = draw_annotation(im, boxes, klass) # for mask in masks: # viz = draw_mask(viz, mask) # tpviz.interactive_imshow(viz) return ret
def preprocess(img): fname, boxes, klass, is_crowd = img['file_name'], img['boxes'], img[ 'class'], img['is_crowd'] im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname im = im.astype('float32') # assume floatbox as input assert boxes.dtype == np.float32 # augmentation: im, params = aug.augment_return_params(im) points = box_to_point8(boxes) points = aug.augment_coords(points, params) boxes = point8_to_box(points) # rpn anchor: try: fm_labels, fm_boxes = get_rpn_anchor_input(im, boxes, klass, is_crowd) boxes = boxes[is_crowd == 0] # skip crowd boxes in training target klass = klass[is_crowd == 0] if not len(boxes): raise MalformedData("No valid gt_boxes!") except MalformedData as e: log_once( "Input {} is invalid for training: {}".format(fname, str(e)), 'warn') return None ret = [im, fm_labels, fm_boxes, boxes, klass] # masks segmentation = img.get('segmentation', None) if segmentation is not None: segmentation = [ segmentation[k] for k in range(len(segmentation)) if not is_crowd[k] ] assert len(segmentation) == len(boxes) # one image-sized binary mask per box masks = [] for polys in segmentation: polys = [aug.augment_coords(p, params) for p in polys] masks.append( segmentation_to_mask(polys, im.shape[0], im.shape[1])) masks = np.asarray(masks, dtype='uint8') # values in {0, 1} ret.append(masks) # from viz import draw_annotation, draw_mask # viz = draw_annotation(im, boxes, klass) # for mask in masks: # viz = draw_mask(viz, mask) # tpviz.interactive_imshow(viz) return ret
def preprocess(img): fname, boxes, klass, is_crowd = img['file_name'], img['boxes'], img['class'], img['is_crowd'] im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname im = im.astype('float32') # assume floatbox as input assert boxes.dtype == np.float32 # augmentation: im, params = aug.augment_return_params(im) points = box_to_point8(boxes) points = aug.augment_coords(points, params) boxes = point8_to_box(points) assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!" # rpn anchor: try: fm_labels, fm_boxes = get_rpn_anchor_input(im, boxes, is_crowd) boxes = boxes[is_crowd == 0] # skip crowd boxes in training target klass = klass[is_crowd == 0] if not len(boxes): raise MalformedData("No valid gt_boxes!") except MalformedData as e: log_once("Input {} is filtered for training: {}".format(fname, str(e)), 'warn') return None ret = [im, fm_labels, fm_boxes, boxes, klass] if add_mask: # augmentation will modify the polys in-place segmentation = copy.deepcopy(img['segmentation']) segmentation = [segmentation[k] for k in range(len(segmentation)) if not is_crowd[k]] assert len(segmentation) == len(boxes) # Apply augmentation on polygon coordinates. # And produce one image-sized binary mask per box. masks = [] for polys in segmentation: polys = [aug.augment_coords(p, params) for p in polys] masks.append(segmentation_to_mask(polys, im.shape[0], im.shape[1])) masks = np.asarray(masks, dtype='uint8') # values in {0, 1} ret.append(masks) # from viz import draw_annotation, draw_mask # viz = draw_annotation(im, boxes, klass) # for mask in masks: # viz = draw_mask(viz, mask) # tpviz.interactive_imshow(viz) return ret
def preprocess(img): print("start preproc coco") start = time.time() if config.USE_SECOND_HEAD: fname, boxes, klass, second_klass, is_crowd = img['file_name'], img['boxes'], img['class'], \ img['second_class'], img['is_crowd'] else: fname, boxes, klass, is_crowd = img['file_name'], img[ 'boxes'], img['class'], img['is_crowd'] second_klass = None res = preproc_img(fname, boxes, klass, second_klass, is_crowd, aug) if res is None: print("coco: preproc_img returned None on", fname) return None ret, params = res im = ret[0] boxes = ret[3] # masks if add_mask: # augmentation will modify the polys in-place segmentation = copy.deepcopy(img.get('segmentation', None)) segmentation = [ segmentation[k] for k in range(len(segmentation)) if not is_crowd[k] ] assert len(segmentation) == len(boxes), (len(segmentation), len(boxes)) # one image-sized binary mask per box masks = [] for polys in segmentation: polys = [aug.augment_coords(p, params) for p in polys] masks.append( segmentation_to_mask(polys, im.shape[0], im.shape[1])) masks = np.asarray(masks, dtype='uint8') # values in {0, 1} ret.append(masks) # from viz import draw_annotation, draw_mask # viz = draw_annotation(im, boxes, klass) # for mask in masks: # viz = draw_mask(viz, mask) # tpviz.interactive_imshow(viz) end = time.time() elapsed = end - start print("coco example done, elapsed:", elapsed) return ret
def draw_mask(im, mask, alpha=0.5, color=None): """ Overlay a mask on top of the image. Args: im: a 3-channel uint8 image in BGR mask: a binary 1-channel image of the same size color: if None, will choose automatically """ x, y, width, height, angle = mask rect = (x,y), (width, height), angle box = cv2.boxPoints(rect) mask = segmentation_to_mask([box], im.shape[0], im.shape[1]) if color is None: color = PALETTE_RGB[np.random.choice(len(PALETTE_RGB))][::-1] im = np.where(np.repeat((mask > 0)[:, :, None], 3, axis=2), im * (1 - alpha) + color * alpha, im) im = im.astype('uint8') return im
def __call__(self, roidb): fname, boxes, klass, is_crowd = roidb["file_name"], roidb["boxes"], roidb["class"], roidb["is_crowd"] boxes = np.copy(boxes) im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname im = im.astype("float32") height, width = im.shape[:2] # assume floatbox as input assert boxes.dtype == np.float32, "Loader has to return floating point boxes!" if not self.cfg.DATA.ABSOLUTE_COORD: boxes[:, 0::2] *= width boxes[:, 1::2] *= height # augmentation: im, params = self.aug.augment_return_params(im) points = box_to_point8(boxes) points = self.aug.augment_coords(points, params) boxes = point8_to_box(points) assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!" ret = {"image": im} # Add rpn data to dataflow: try: if self.cfg.MODE_FPN: multilevel_anchor_inputs = self.get_multilevel_rpn_anchor_input(im, boxes, is_crowd) for i, (anchor_labels, anchor_boxes) in enumerate(multilevel_anchor_inputs): ret["anchor_labels_lvl{}".format(i + 2)] = anchor_labels ret["anchor_boxes_lvl{}".format(i + 2)] = anchor_boxes else: ret["anchor_labels"], ret["anchor_boxes"] = self.get_rpn_anchor_input(im, boxes, is_crowd) boxes = boxes[is_crowd == 0] # skip crowd boxes in training target klass = klass[is_crowd == 0] ret["gt_boxes"] = boxes ret["gt_labels"] = klass if not len(boxes): raise MalformedData("No valid gt_boxes!") except MalformedData as e: log_once("Input {} is filtered for training: {}".format(fname, str(e)), "warn") return None if self.cfg.MODE_MASK: # augmentation will modify the polys in-place segmentation = copy.deepcopy(roidb["segmentation"]) segmentation = [segmentation[k] for k in range(len(segmentation)) if not is_crowd[k]] assert len(segmentation) == len(boxes) # Apply augmentation on polygon coordinates. # And produce one image-sized binary mask per box. masks = [] width_height = np.asarray([width, height], dtype=np.float32) gt_mask_width = int(np.ceil(im.shape[1] / 8.0) * 8) # pad to 8 in order to pack mask into bits for polys in segmentation: if not self.cfg.DATA.ABSOLUTE_COORD: polys = [p * width_height for p in polys] polys = [self.aug.augment_coords(p, params) for p in polys] masks.append(segmentation_to_mask(polys, im.shape[0], gt_mask_width)) masks = np.asarray(masks, dtype='uint8') # values in {0, 1} masks = np.packbits(masks, axis=-1) ret['gt_masks_packed'] = masks # from viz import draw_annotation, draw_mask # viz = draw_annotation(im, boxes, klass) # for mask in masks: # viz = draw_mask(viz, mask) # tpviz.interactive_imshow(viz) return ret
def preprocess(roidb_batch): datapoint_list = [] for roidb in roidb_batch: fname, boxes, klass, is_crowd = roidb['file_name'], roidb['boxes'], roidb['class'], roidb['is_crowd'] boxes = np.copy(boxes) im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname im = im.astype('float32') # assume floatbox as input assert boxes.dtype == np.float32, "Loader has to return floating point boxes!" # augmentation: im, params = aug.augment_return_params(im) points = box_to_point8(boxes) points = aug.augment_coords(points, params) boxes = point8_to_box(points) assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!" ret = {'images': im} # rpn anchor: try: if cfg.MODE_FPN: multilevel_anchor_inputs = get_multilevel_rpn_anchor_input(im, boxes, is_crowd) for i, (anchor_labels, anchor_boxes) in enumerate(multilevel_anchor_inputs): ret['anchor_labels_lvl{}'.format(i + 2)] = anchor_labels ret['anchor_boxes_lvl{}'.format(i + 2)] = anchor_boxes else: raise NotImplementedError("[armand] Batch mode only available for FPN") boxes = boxes[is_crowd == 0] # skip crowd boxes in training target klass = klass[is_crowd == 0] ret['gt_boxes'] = boxes ret['gt_labels'] = klass ret['filename'] = fname if not len(boxes): raise MalformedData("No valid gt_boxes!") except MalformedData as e: log_once("Input {} is filtered for training: {}".format(fname, str(e)), 'warn') return None if cfg.MODE_MASK: # augmentation will modify the polys in-place segmentation = copy.deepcopy(roidb['segmentation']) segmentation = [segmentation[k] for k in range(len(segmentation)) if not is_crowd[k]] assert len(segmentation) == len(boxes) # Apply augmentation on polygon coordinates. # And produce one image-sized binary mask per box. masks = [] for polys in segmentation: polys = [aug.augment_coords(p, params) for p in polys] masks.append(segmentation_to_mask(polys, im.shape[0], im.shape[1])) masks = np.asarray(masks, dtype='uint8') # values in {0, 1} ret['gt_masks'] = masks datapoint_list.append(ret) ################################################################################################################# # Batchify the output ################################################################################################################# # Now we need to batch the various fields # Easily stackable: # - anchor_labels_lvl2 # - anchor_boxes_lvl2 # - anchor_labels_lvl3 # - anchor_boxes_lvl3 # - anchor_labels_lvl4 # - anchor_boxes_lvl4 # - anchor_labels_lvl5 # - anchor_boxes_lvl5 # - anchor_labels_lvl6 # - anchor_boxes_lvl6 batched_datapoint = {} for stackable_field in ["anchor_labels_lvl2", "anchor_boxes_lvl2", "anchor_labels_lvl3", "anchor_boxes_lvl3", "anchor_labels_lvl4", "anchor_boxes_lvl4", "anchor_labels_lvl5", "anchor_boxes_lvl5", "anchor_labels_lvl6", "anchor_boxes_lvl6"]: batched_datapoint[stackable_field] = np.stack([d[stackable_field] for d in datapoint_list]) # Require padding and original dimension storage # - image (HxWx3) # - gt_boxes (?x4) # - gt_labels (?) # - gt_masks (?xHxW) """ Find the minimum container size for images (maxW x maxH) Find the maximum number of ground truth boxes For each image, save original dimension and pad """ if cfg.PREPROC.PREDEFINED_PADDING: padding_shapes = [get_padding_shape(*(d["images"].shape[:2])) for d in datapoint_list] max_height = max([shp[0] for shp in padding_shapes]) max_width = max([shp[1] for shp in padding_shapes]) else: image_dims = [d["images"].shape for d in datapoint_list] heights = [dim[0] for dim in image_dims] widths = [dim[1] for dim in image_dims] max_height = max(heights) max_width = max(widths) # image padded_images = [] original_image_dims = [] for datapoint in datapoint_list: image = datapoint["images"] original_image_dims.append(image.shape) h_padding = max_height - image.shape[0] w_padding = max_width - image.shape[1] padded_image = np.pad(image, [[0, h_padding], [0, w_padding], [0, 0]], 'constant') padded_images.append(padded_image) batched_datapoint["images"] = np.stack(padded_images) #print(batched_datapoint["images"].shape) batched_datapoint["orig_image_dims"] = np.stack(original_image_dims) # gt_boxes and gt_labels max_num_gts = max([d["gt_labels"].size for d in datapoint_list]) gt_counts = [] padded_gt_labels = [] padded_gt_boxes = [] padded_gt_masks = [] for datapoint in datapoint_list: gt_count_for_image = datapoint["gt_labels"].size gt_counts.append(gt_count_for_image) gt_padding = max_num_gts - gt_count_for_image padded_gt_labels_for_img = np.pad(datapoint["gt_labels"], [0, gt_padding], 'constant', constant_values=-1) padded_gt_labels.append(padded_gt_labels_for_img) padded_gt_boxes_for_img = np.pad(datapoint["gt_boxes"], [[0, gt_padding], [0,0]], 'constant') padded_gt_boxes.append(padded_gt_boxes_for_img) h_padding = max_height - datapoint["images"].shape[0] w_padding = max_width - datapoint["images"].shape[1] if cfg.MODE_MASK: padded_gt_masks_for_img = np.pad(datapoint["gt_masks"], [[0, gt_padding], [0, h_padding], [0, w_padding]], 'constant') padded_gt_masks.append(padded_gt_masks_for_img) batched_datapoint["orig_gt_counts"] = np.stack(gt_counts) batched_datapoint["gt_labels"] = np.stack(padded_gt_labels) batched_datapoint["gt_boxes"] = np.stack(padded_gt_boxes) batched_datapoint["filenames"] = [d["filename"] for d in datapoint_list] if cfg.MODE_MASK: batched_datapoint["gt_masks"] = np.stack(padded_gt_masks) return batched_datapoint
def preprocess(roidb): fname, boxes, klass, is_crowd = roidb['file_name'], roidb[ 'boxes'], roidb['class'], roidb['is_crowd'] boxes = np.copy(boxes) im = imread(fname) assert im is not None, fname im = np.expand_dims(im, axis=2) im = np.repeat(im, 3, axis=2) im = im.astype('float32') #height, width = im.shape[:2] # assume floatbox as input assert boxes.dtype == np.float32, "Loader has to return floating point boxes!" if not cfg.DATA.ABSOLUTE_COORD: boxes[:, 0::2] *= width boxes[:, 1::2] *= height #source_image = Image.fromarray(im.astype('uint8')) #imsave('./input_image1', im[:,:,1].astype(np.float32), imagej=True) """ draw = ImageDraw.Draw(source_image) for i, bbox in enumerate(boxes): # tmp_x = bbox[2] - bbox[0] # tmp_y = bbox[3] - bbox[1] # draw.rectangle((bbox[0], bbox[1], tmp_x, tmp_y), outline='red') draw.rectangle((bbox[0], bbox[1], bbox[2], bbox[3]), outline='red') #draw.text((bbox[0] + 5, bbox[1] + 5), str(klass_tmp[i])) source_image.save('./input_image1', "JPEG") """ # augmentation: im, params = aug.augment_return_params(im) points = box_to_point8(boxes) points = aug.augment_coords(points, params) boxes = point8_to_box(points) assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!" klass_tmp = np.copy(klass) #print(klass) #imsave('./input_image2', im[:,:,1].astype(np.float32), imagej=True) """ source_image = Image.fromarray(im.astype('uint8')) draw = ImageDraw.Draw(source_image) for i, bbox in enumerate(boxes): # tmp_x = bbox[2] - bbox[0] # tmp_y = bbox[3] - bbox[1] # draw.rectangle((bbox[0], bbox[1], tmp_x, tmp_y), outline='red') draw.rectangle((bbox[0], bbox[1], bbox[2], bbox[3]), outline='red') #draw.text((bbox[0]+5, bbox[1]+5), str(klass_tmp[i])) source_image.save('./input_image2', "JPEG") """ ret = {'image': im} # rpn anchor: try: if cfg.MODE_FPN: multilevel_anchor_inputs = get_multilevel_rpn_anchor_input( im, boxes, is_crowd) for i, (anchor_labels, anchor_boxes) in enumerate(multilevel_anchor_inputs): ret['anchor_labels_lvl{}'.format(i + 2)] = anchor_labels ret['anchor_boxes_lvl{}'.format(i + 2)] = anchor_boxes else: # anchor_labels, anchor_boxes ret['anchor_labels'], ret[ 'anchor_boxes'] = get_rpn_anchor_input( im, boxes, is_crowd) boxes = boxes[is_crowd == 0] # skip crowd boxes in training target klass = klass[is_crowd == 0] ret['gt_boxes'] = boxes ret['gt_labels'] = klass if not len(boxes): raise MalformedData("No valid gt_boxes!") except MalformedData as e: log_once( "Input {} is filtered for training: {}".format(fname, str(e)), 'warn') return None if cfg.MODE_MASK: # augmentation will modify the polys in-place segmentation = copy.deepcopy(roidb['segmentation']) segmentation = [ segmentation[k] for k in range(len(segmentation)) if not is_crowd[k] ] assert len(segmentation) == len(boxes) # Apply augmentation on polygon coordinates. # And produce one image-sized binary mask per box. masks = [] width_height = np.asarray([width, height], dtype=np.float32) for polys in segmentation: if not cfg.DATA.ABSOLUTE_COORD: polys = [p * width_height for p in polys] polys = [aug.augment_coords(p, params) for p in polys] masks.append( segmentation_to_mask(polys, im.shape[0], im.shape[1])) masks = np.asarray(masks, dtype='uint8') # values in {0, 1} ret['gt_masks'] = masks # from viz import draw_annotation, draw_mask # viz = draw_annotation(im, boxes, klass) # for mask in masks: # viz = draw_mask(viz, mask) # tpviz.interactive_imshow(viz) return ret