def __call__(self, dataset_dict): """ Args: dict: a detection dataset dict Returns: list[dict]: a list of dataset dicts, which contain augmented version of the input image. The total number of dicts is ``len(min_sizes) * (2 if flip else 1)``. """ ret = [] if "image" not in dataset_dict: numpy_image = read_image(dataset_dict["file_name"], self.image_format) else: numpy_image = dataset_dict["image"].permute( 1, 2, 0).numpy().astype("uint8") image_sizes = [(min_size, self.max_size) for min_size in self.min_sizes] image_sizes.extend(self.extra_sizes) for min_size, max_size in image_sizes: image = np.copy(numpy_image) tfm = ResizeShortestEdge(min_size, max_size).get_transform(image) resized = tfm.apply_image(image) resized = torch.as_tensor( resized.transpose(2, 0, 1).astype("float32")) dic = copy.deepcopy(dataset_dict) dic["horiz_flip"] = False dic["image"] = resized ret.append(dic) if self.flip: dic = copy.deepcopy(dataset_dict) dic["horiz_flip"] = True dic["image"] = torch.flip(resized, dims=[2]) ret.append(dic) return ret
def _read_data(self, file_name): return read_image(file_name, format=self.data_format)
img = img[:, :, [2, 1, 0]] else: img = np.asarray( Image.fromarray(img, mode=cfg.INPUT.FORMAT).convert("RGB")) visualizer = Visualizer(img, metadata=metadata, scale=scale) target_fields = per_image["instances"].get_fields() labels = [ metadata.thing_classes[i] for i in target_fields["gt_classes"] ] vis = visualizer.overlay_instances( labels=labels, boxes=target_fields.get("gt_boxes", None), masks=target_fields.get("gt_masks", None), keypoints=target_fields.get("gt_keypoints", None), ) output(vis, str(per_image["image_id"]) + ".jpg") else: dicts = list( chain.from_iterable( [DatasetCatalog.get(k) for k in cfg.DATASETS.TRAIN])) if cfg.MODEL.KEYPOINT_ON: dicts = filter_images_with_few_keypoints(dicts, 1) for dic in tqdm.tqdm(dicts): img = utils.read_image(dic["file_name"], "RGB") visualizer = Visualizer(img, metadata=metadata, scale=scale) vis = visualizer.draw_dataset_dict(dic) output(vis, os.path.basename(dic["file_name"]))
mp.set_start_method("spawn", force=True) args = get_parser().parse_args() logger = setup_logger() logger.info("Arguments: " + str(args)) cfg = setup_cfg(args) demo = VisualizationDemo(cfg, parallel=args.parallel) if args.input: if len(args.input) == 1: args.input = glob.glob(os.path.expanduser(args.input[0])) assert args.input, "The input path(s) was not found" for path in tqdm.tqdm(args.input, disable=not args.output): # use PIL, to be consistent with evaluation img = read_image(path, format="BGR") start_time = time.time() predictions, visualized_output = demo.run_on_image(img) logger.info("{}: detected {} instances in {:.2f}s".format( path, len(predictions["instances"]), time.time() - start_time)) if args.output: if os.path.isdir(args.output): assert os.path.isdir(args.output), args.output out_filename = os.path.join(args.output, os.path.basename(path)) else: assert len( args.input ) == 1, "Please specify a directory with args.output"
def __getitem__(self, index): """Load data, apply transforms, converto to Instances. """ dataset_dict = copy.deepcopy(self.dataset_dicts[index]) # read image image = read_image(dataset_dict["file_name"], format=self.data_format) check_image_size(dataset_dict, image) if "annotations" in dataset_dict: annotations = dataset_dict.pop("annotations") annotations = [ ann for ann in annotations if ann.get("iscrowd", 0) == 0] else: annotations = None if "sem_seg_file_name" in dataset_dict: if annotations is None: annotations = [] with PathManager.open(dataset_dict.get("sem_seg_file_name"), "rb") as f: sem_seg_gt = Image.open(f) sem_seg_gt = np.asarray(sem_seg_gt, dtype="uint8") annotations.insert(0, {"sem_seg": sem_seg_gt}) # apply transfrom image, annotations = self._apply_transforms( image, annotations, keypoint_hflip_indices=self.keypoint_hflip_indices) # mosaic transform mosaic_flag = np.random.randint(2) if self.is_train and self.mosaic is not None and mosaic_flag == 1: min_offset = self.mosaic.get('MIN_OFFSET', 0.2) mosaic_width = self.mosaic.get('MOSAIC_WIDTH', 640) mosaic_height = self.mosaic.get('MOSAIC_HEIGHT', 640) cut_x = np.random.randint(int(mosaic_width * min_offset), int(mosaic_width * (1 - min_offset))) cut_y = np.random.randint(int(mosaic_height * min_offset), int(mosaic_height * (1 - min_offset))) # init out image out_image = np.zeros([mosaic_height, mosaic_width, 3], dtype=np.float32) out_annotations = [] # mosaic transform for m_idx in range(4): if m_idx != 0: new_index = np.random.choice( range(len(self.dataset_dicts))) dataset_dict = copy.deepcopy(self.dataset_dicts[new_index]) # read image image = read_image(dataset_dict["file_name"], format=self.data_format) check_image_size(dataset_dict, image) if "annotations" in dataset_dict: annotations = dataset_dict.pop("annotations") annotations = [ ann for ann in annotations if ann.get("iscrowd", 0) == 0] else: annotations = None # apply transfrom image, annotations = self._apply_transforms(image, annotations) image_size = image.shape[:2] # h, w # as all meta_infos are the same, we just keep the first one meta_infos = \ [annotation.pop("meta_infos") for annotation in annotations][0] pleft = meta_infos.get('jitter_pad_left', 0) pright = meta_infos.get('jitter_pad_right', 0) ptop = meta_infos.get('jitter_pad_top', 0) pbot = meta_infos.get('jitter_pad_bot', 0) swidth = meta_infos.get('jitter_swidth', image_size[1]) sheight = meta_infos.get('jitter_sheight', image_size[0]) # get shifts left_shift = int( min(cut_x, max(0, (-int(pleft) * image_size[1] / swidth)))) top_shift = int( min(cut_y, max(0, (-int(ptop) * image_size[0] / sheight)))) right_shift = int(min(image_size[1] - cut_x, max(0, ( -int(pright) * image_size[1] / swidth)))) bot_shift = int(min(image_size[0] - cut_y, max(0, ( -int(pbot) * image_size[0] / sheight)))) out_image, annos = self._blend_moasic(cut_x, cut_y, out_image, image, copy.deepcopy( annotations), (mosaic_height, mosaic_width), m_idx, (left_shift, top_shift, right_shift, bot_shift)) out_annotations.extend(annos) # replace image and annotation with out_image and out_annotation image = out_image annotations = out_annotations if "sem_seg_file_name" in dataset_dict: dataset_dict.pop("sem_seg_file_name") sem_seg_gt = annotations[0].pop("sem_seg") sem_seg_gt = torch.as_tensor(sem_seg_gt.astype("long")) dataset_dict["sem_seg"] = sem_seg_gt annotations = annotations[1:] if not annotations: annotations = None if annotations is not None: # got instances in annotations image_shape = image.shape[:2] # h, w instances = annotations_to_instances( annotations, image_shape, mask_format=self.mask_format ) # # Create a tight bounding box from masks, useful when image is cropped # if self.crop_gen and instances.has("gt_masks"): # instances.gt_boxes = instances.gt_masks.get_bounding_boxes() dataset_dict["instances"] = filter_empty_instances(instances) # convert to Instance type # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. # h, w, c -> c, h, w dataset_dict["image"] = torch.as_tensor( np.ascontiguousarray(image.transpose(2, 0, 1))) return dataset_dict