def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy( dataset_dict) # it will be modified by code below # USER: Write your own image loading if it's not from a file image = utils.read_image(dataset_dict["file_name"], format=self.image_format) utils.check_image_size(dataset_dict, image) # USER: Remove if you don't do semantic/panoptic segmentation. if "sem_seg_file_name" in dataset_dict: sem_seg_gt = utils.read_image( dataset_dict.pop("sem_seg_file_name"), "L").squeeze(2) else: sem_seg_gt = None aug_input = T.StandardAugInput(image, sem_seg=sem_seg_gt) transforms = aug_input.apply_augmentations(self.augmentations) image, sem_seg_gt = aug_input.image, aug_input.sem_seg image_shape = image.shape[:2] # h, w # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. dataset_dict["image"] = torch.as_tensor( np.ascontiguousarray(image.transpose(2, 0, 1))) if sem_seg_gt is not None: dataset_dict["sem_seg"] = torch.as_tensor( sem_seg_gt.astype("long")) # USER: Remove if you don't use pre-computed proposals. # Most users would not need this feature. if self.proposal_topk is not None: utils.transform_proposals(dataset_dict, image_shape, transforms, proposal_topk=self.proposal_topk) if not self.is_train: # USER: Modify this if you want to keep them for some reason. dataset_dict.pop("annotations", None) dataset_dict.pop("sem_seg_file_name", None) return dataset_dict if "annotations" in dataset_dict: # USER: Modify this if you want to keep them for some reason. for anno in dataset_dict["annotations"]: if not self.use_instance_mask: anno.pop("segmentation", None) if not self.use_keypoint: anno.pop("keypoints", None) # USER: Implement additional transformations if you have other types of data annos = [ utils.transform_instance_annotations( obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances( annos, image_shape, mask_format=self.instance_mask_format) # @ Will Lee 精细分类类别:非标准,基本标准 or 标准 standard_ids = [obj["standard_id"] for obj in annos] standard_ids = torch.tensor(standard_ids, dtype=torch.int64) instances.gt_standards = standard_ids # After transforms such as cropping are applied, the bounding box may no longer # tightly bound the object. As an example, imagine a triangle object # [(0,0), (2,0), (0,2)] cropped by a box [(1,0),(2,2)] (XYXY format). The tight # bounding box of the cropped triangle should be [(1,0),(2,1)], which is not equal to # the intersection of original bounding box and the cropping box. if self.recompute_boxes: instances.gt_boxes = instances.gt_masks.get_bounding_boxes() dataset_dict["instances"] = utils.filter_empty_instances(instances) return dataset_dict
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below # USER: Write your own image loading if it's not from a file image = utils.read_image(dataset_dict["file_name"], format=self.img_format) utils.check_image_size(dataset_dict, image) if "annotations" not in dataset_dict: image, transforms = T.apply_transform_gens( ([self.crop_gen] if self.crop_gen else []) + self.tfm_gens, image ) else: # Crop around an instance if there are instances in the image. # USER: Remove if you don't use cropping if self.crop_gen: crop_tfm = utils.gen_crop_transform_with_instance( self.crop_gen.get_crop_size(image.shape[:2]), image.shape[:2], np.random.choice(dataset_dict["annotations"]), ) image = crop_tfm.apply_image(image) image, transforms = T.apply_transform_gens(self.tfm_gens, image) if self.crop_gen: transforms = crop_tfm + transforms image_shape = image.shape[:2] # h, w # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1))) # USER: Remove if you don't use pre-computed proposals. # Most users would not need this feature. if self.load_proposals: utils.transform_proposals( dataset_dict, image_shape, transforms, proposal_topk=self.proposal_topk, min_box_size=self.proposal_min_box_size, ) # HACK Keep annotations for test # if not self.is_train: # # USER: Modify this if you want to keep them for some reason. # dataset_dict.pop("annotations", None) # dataset_dict.pop("sem_seg_file_name", None) # return dataset_dict if "annotations" in dataset_dict: # USER: Modify this if you want to keep them for some reason. for anno in dataset_dict["annotations"]: if not self.mask_on: anno.pop("segmentation", None) if not self.keypoint_on: anno.pop("keypoints", None) # USER: Implement additional transformations if you have other types of data annos = [ utils.transform_instance_annotations( obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices ) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances( annos, image_shape, mask_format=self.mask_format ) # Create a tight bounding box from masks, useful when image is cropped if self.crop_gen and instances.has("gt_masks"): instances.gt_boxes = instances.gt_masks.get_bounding_boxes() dataset_dict["instances"] = utils.filter_empty_instances(instances) return dataset_dict
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy( dataset_dict) # it will be modified by code below # USER: Write your own image loading if it's not from a file image = utils.read_image(dataset_dict["file_name"], format=self.img_format) utils.check_image_size(dataset_dict, image) ### my code ### ## segmentaion の annotation を一旦退避して、後で追加する seg_bk = [ dictwk["segmentation"] for dictwk in dataset_dict["annotations"] ] for i in range(len(dataset_dict["annotations"])): dataset_dict["annotations"][i].pop("segmentation") image, dataset_dict = self.aug_handler( image=image, dataset_dict_detectron=dataset_dict) for i in range(len(dataset_dict["annotations"])): dataset_dict["annotations"][i]["segmentation"] = seg_bk[i] ### my code ### if "annotations" not in dataset_dict: image, transforms = T.apply_transform_gens( ([self.crop_gen] if self.crop_gen else []) + self.tfm_gens, image) else: # Crop around an instance if there are instances in the image. # USER: Remove if you don't use cropping if self.crop_gen: crop_tfm = utils.gen_crop_transform_with_instance( self.crop_gen.get_crop_size(image.shape[:2]), image.shape[:2], np.random.choice(dataset_dict["annotations"]), ) image = crop_tfm.apply_image(image) image, transforms = T.apply_transform_gens(self.tfm_gens, image) if self.crop_gen: transforms = crop_tfm + transforms image_shape = image.shape[:2] # h, w # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. dataset_dict["image"] = torch.as_tensor( np.ascontiguousarray(image.transpose(2, 0, 1))) # USER: Remove if you don't use pre-computed proposals. if self.load_proposals: utils.transform_proposals(dataset_dict, image_shape, transforms, self.min_box_side_len, self.proposal_topk) if not self.is_train: # USER: Modify this if you want to keep them for some reason. dataset_dict.pop("annotations", None) dataset_dict.pop("sem_seg_file_name", None) return dataset_dict if "annotations" in dataset_dict: # USER: Modify this if you want to keep them for some reason. for anno in dataset_dict["annotations"]: if not self.mask_on: anno.pop("segmentation", None) if not self.keypoint_on: anno.pop("keypoints", None) # USER: Implement additional transformations if you have other types of data annos = [ utils.transform_instance_annotations( obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances( annos, image_shape, mask_format=self.mask_format) # Create a tight bounding box from masks, useful when image is cropped if self.crop_gen and instances.has("gt_masks"): instances.gt_boxes = instances.gt_masks.get_bounding_boxes() dataset_dict["instances"] = utils.filter_empty_instances(instances) # USER: Remove if you don't do semantic/panoptic segmentation. if "sem_seg_file_name" in dataset_dict: with PathManager.open(dataset_dict.pop("sem_seg_file_name"), "rb") as f: sem_seg_gt = Image.open(f) sem_seg_gt = np.asarray(sem_seg_gt, dtype="uint8") sem_seg_gt = transforms.apply_segmentation(sem_seg_gt) sem_seg_gt = torch.as_tensor(sem_seg_gt.astype("long")) dataset_dict["sem_seg"] = sem_seg_gt return dataset_dict
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below # USER: Write your own image loading if it's not from a file image = utils.read_image(dataset_dict["file_name"], format=self.img_format) utils.check_image_size(dataset_dict, image) # ################################################################################################################ # print("AutoAugDet:", dataset_dict["file_name"]) # h, w, c = image.shape # if h <= 0 or w <=0: # print("Empty image") # if self.autoaugdet and "annotations" in dataset_dict: # from detectron2.structures.boxes import BoxMode # bboxes = [] # for label in dataset_dict["annotations"]: # assert label['bbox_mode'] == BoxMode.XYWH_ABS # bboxes.append(label['bbox']) # # import cv2, random # # showimg_in = image.copy() # # for box in bboxes: # # cv2.rectangle(showimg_in, (int(box[0]), int(box[1])), (int(box[0] + box[2]), int(box[1] + box[3])),(random.randint(0,255), random.randint(0,255), random.randint(0,255))) # try: # image, bboxes = autoaugdet.autoaugdet(image, bboxes, self.autoaugdet) # except Exception as e: # print("AutoAug Error:", e) # # showimg_out = image.copy() # # for box in bboxes: # # cv2.rectangle(showimg_out, (int(box[0]), int(box[1])), (int(box[0] + box[2]), int(box[1] + box[3])),(random.randint(0,255), random.randint(0,255), random.randint(0,255))) # # cv2.imshow("in", showimg_in) # # cv2.imshow("out", showimg_out) # # cv2.waitKey(0) # for i in range(len(bboxes)): # dataset_dict["annotations"][i]['bbox'] = bboxes[i] # ################################################################################################# if "annotations" not in dataset_dict: image, transforms = T.apply_transform_gens( ([self.crop_gen] if self.crop_gen else []) + self.tfm_gens, image ) else: # Crop around an instance if there are instances in the image. # USER: Remove if you don't use cropping if self.crop_gen: crop_tfm = utils.gen_crop_transform_with_instance( self.crop_gen.get_crop_size(image.shape[:2]), image.shape[:2], np.random.choice(dataset_dict["annotations"]), ) image = crop_tfm.apply_image(image) image, transforms = T.apply_transform_gens(self.tfm_gens, image) if self.crop_gen: transforms = crop_tfm + transforms image_shape = image.shape[:2] # h, w # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32")) # Can use uint8 if it turns out to be slow some day # USER: Remove if you don't use pre-computed proposals. if self.load_proposals: utils.transform_proposals( dataset_dict, image_shape, transforms, self.min_box_side_len, self.proposal_topk ) if not self.is_train: dataset_dict.pop("annotations", None) dataset_dict.pop("sem_seg_file_name", None) return dataset_dict if "annotations" in dataset_dict: # USER: Modify this if you want to keep them for some reason. for anno in dataset_dict["annotations"]: if not self.mask_on: anno.pop("segmentation", None) if not self.keypoint_on: anno.pop("keypoints", None) # USER: Implement additional transformations if you have other types of data annos = [ utils.transform_instance_annotations( obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices ) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances( annos, image_shape, mask_format=self.mask_format ) # Create a tight bounding box from masks, useful when image is cropped if self.crop_gen and instances.has("gt_masks"): instances.gt_boxes = instances.gt_masks.get_bounding_boxes() dataset_dict["instances"] = utils.filter_empty_instances(instances) # generate heatmaps of keypoints #if dataset_dict["instances"].has("gt_keypoints"): # #For segmentation-based detection, transform the instance-level segmentation mask into semantic segmasks and contour maps # turning instance-level segmentation map into semantic segmap # get the contour map for segmentation-based detection dataset_dict["contours"], dataset_dict["semseg"] = utils.annotations_to_segmaps(annos, self.num_classes, image_shape) kpts = [obj.get("keypoints", []) for obj in annos] map_shape = (image_shape[0], image_shape[1]) kp_maps, short_offsets = get_keypoint_maps(None, kpts, map_shape) dataset_dict["kp_maps"] = kp_maps.transpose(2, 0, 1) dataset_dict["short_offsets"] = short_offsets.transpose(2, 0, 1) ################################################################ # # visualize the keypoints # from detectron2.utils.visualizer import Visualizer # from detectron2.data.datasets.builtin_meta import COCO_CATEGORIES # from os import path # image_rgb = image[..., ::-1] # V = Visualizer(image_rgb, dataset_dict) # # draw the foreground mask of each object category # binary_masks = kp_maps>0.1 # _, fn = path.split(dataset_dict["file_name"]) # fn_next, ext = path.splitext(fn) # print('Mask size: ', binary_masks.shape) # print('Image size: ', image_rgb.shape) # assert binary_masks.shape[1]==image_rgb.shape[0], (binary_masks.shape[1], image_rgb.shape[0]) # assert binary_masks.shape[2]==image_rgb.shape[1], (binary_masks.shape[2], image_rgb.shape[1]) # assert image_rgb.shape[2]==3, image_rgb.shape[2] # bm = binary_masks # for i in range(binary_masks.shape[0]): # masked_image = V.draw_binary_mask( # bm[i, :, :].squeeze(), color=None, edge_color='r', alpha=0.5, area_threshold=10 # ) # COCO_CATEGORIES[i]["color"] # # filepath = "tmp/" + fn_next + '_' + COCO_CATEGORIES[i]["name"] + '.png' # # masked_image.save(filepath) # filepath = "tmp/" + fn_next + '.png' # masked_image.save(filepath) ################################################################ ################################################ # # visualize the segmentation mask # from os import path # image_rgb = image[..., ::-1] #utils.read_image(dataset_dict["file_name"], format="RGB") # segmask = dataset_dict["semseg"].tensor.numpy() # _, fn = path.split(dataset_dict["file_name"]) # fn_next, ext = path.splitext(fn) # im = Image.fromarray(np.uint8(image_rgb)) # filepath = "tmp_segmap_sorted/" + fn_next + '_raw.png' # im.save(filepath) # im2 = Image.fromarray(np.uint8(segmask*3)) # filepath2 = "tmp_segmap_sorted/" + fn_next + '_seg.png' # im2.save(filepath2) ################################################ ############### # # visualize the segmentation map and contours # from detectron2.utils.visualizer import Visualizer # from detectron2.data.datasets.builtin_meta import COCO_CATEGORIES # from os import path # #V.draw_sem_seg(self, sem_seg, area_threshold=None, alpha=0.8) # image_rgb = image[..., ::-1] #utils.read_image(dataset_dict["file_name"], format="RGB") # V = Visualizer(image_rgb, dataset_dict) # # draw the foreground mask of each object category # #binary_masks = dataset_dict["contours"].gt_segmasks.tensor # binary_masks = dataset_dict["contours"].gt_contours.tensor # _, fn = path.split(dataset_dict["file_name"]) # fn_next, ext = path.splitext(fn) # print('Mask size: ', binary_masks.size()) # print('Image size: ', image_rgb.shape) # assert binary_masks.size(1)==image_rgb.shape[0], (binary_masks.size(1), image_rgb.shape[0]) # assert binary_masks.size(2)==image_rgb.shape[1], (binary_masks.size(2), image_rgb.shape[1]) # assert image_rgb.shape[2]==3, image_rgb.shape[2] # bm = binary_masks.numpy() # # bm_uint8 = bm.astype("uint8") # # print(bm) # for i in range(binary_masks.size(0)): # masked_image = V.draw_binary_mask( # bm[i, :, :].squeeze(), color=None, edge_color='r', alpha=0.5, area_threshold=10 # ) # COCO_CATEGORIES[i]["color"] # # filepath = "tmp/" + fn_next + '_' + COCO_CATEGORIES[i]["name"] + '.png' # # masked_image.save(filepath) # filepath = "tmp/" + fn_next + '.png' # masked_image.save(filepath) ################################################################################################# # USER: Remove if you don't do semantic/panoptic segmentation. if "sem_seg_file_name" in dataset_dict: with PathManager.open(dataset_dict.pop("sem_seg_file_name"), "rb") as f: sem_seg_gt = Image.open(f) sem_seg_gt = np.asarray(sem_seg_gt, dtype="uint8") sem_seg_gt = transforms.apply_segmentation(sem_seg_gt) sem_seg_gt = torch.as_tensor(sem_seg_gt.astype("long")) dataset_dict["sem_seg"] = sem_seg_gt return dataset_dict
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy( dataset_dict) # it will be modified by code below # USER: Write your own image loading if it's not from a file try: image = utils.read_image(dataset_dict["file_name"], format=self.img_format) except Exception as e: print(dataset_dict["file_name"]) print(e) raise e try: utils.check_image_size(dataset_dict, image) except SizeMismatchError as e: expected_wh = (dataset_dict["width"], dataset_dict["height"]) image_wh = (image.shape[1], image.shape[0]) if (image_wh[1], image_wh[0]) == expected_wh: print("transposing image {}".format(dataset_dict["file_name"])) image = image.transpose(1, 0, 2) else: raise e if "annotations" not in dataset_dict or len( dataset_dict["annotations"]) == 0: image, transforms = T.apply_transform_gens( ([self.crop_gen] if self.crop_gen else []) + self.tfm_gens, image) else: # Crop around an instance if there are instances in the image. # USER: Remove if you don't use cropping if self.crop_gen: crop_tfm = gen_crop_transform_with_instance( self.crop_gen.get_crop_size(image.shape[:2]), image.shape[:2], dataset_dict["annotations"], crop_box=self.crop_box, ) image = crop_tfm.apply_image(image) image, transforms = T.apply_transform_gens(self.tfm_gens, image) if self.crop_gen: transforms = crop_tfm + transforms image_shape = image.shape[:2] # h, w # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. dataset_dict["image"] = torch.as_tensor( image.transpose(2, 0, 1).astype("float32")) # Can use uint8 if it turns out to be slow some day # USER: Remove if you don't use pre-computed proposals. if self.load_proposals: utils.transform_proposals(dataset_dict, image_shape, transforms, self.min_box_side_len, self.proposal_topk) if not self.is_train: dataset_dict.pop("annotations", None) dataset_dict.pop("sem_seg_file_name", None) dataset_dict.pop("pano_seg_file_name", None) return dataset_dict if "annotations" in dataset_dict: # USER: Modify this if you want to keep them for some reason. for anno in dataset_dict["annotations"]: if not self.mask_on: anno.pop("segmentation", None) if not self.keypoint_on: anno.pop("keypoints", None) # USER: Implement additional transformations if you have other types of data annos = [ transform_instance_annotations( obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = annotations_to_instances(annos, image_shape, mask_format=self.mask_format) # Create a tight bounding box from masks, useful when image is cropped if self.crop_gen and instances.has("gt_masks"): instances.gt_boxes = instances.gt_masks.get_bounding_boxes() dataset_dict["instances"] = utils.filter_empty_instances(instances) # USER: Remove if you don't do semantic/panoptic segmentation. if "sem_seg_file_name" in dataset_dict: with PathManager.open(dataset_dict.pop("sem_seg_file_name"), "rb") as f: sem_seg_gt = Image.open(f) sem_seg_gt = np.asarray(sem_seg_gt, dtype="uint8") sem_seg_gt = transforms.apply_segmentation(sem_seg_gt) sem_seg_gt = torch.as_tensor(sem_seg_gt.astype("long")) dataset_dict["sem_seg"] = sem_seg_gt if self.basis_loss_on and self.is_train: # load basis supervisions if self.ann_set == "coco": basis_sem_path = dataset_dict["file_name"].replace( 'train2017', 'thing_train2017').replace('image/train', 'thing_train') else: basis_sem_path = dataset_dict["file_name"].replace( 'coco', 'lvis').replace('train2017', 'thing_train').replace('jpg', 'npz') basis_sem_path = basis_sem_path.replace('jpg', 'npz') basis_sem_gt = np.load(basis_sem_path)["mask"] basis_sem_gt = transforms.apply_segmentation(basis_sem_gt) basis_sem_gt = torch.as_tensor(basis_sem_gt.astype("long")) dataset_dict["basis_sem"] = basis_sem_gt return dataset_dict
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy( dataset_dict) # it will be modified by code below # USER: Write your own image loading if it's not from a file image = utils.read_image(dataset_dict["file_name"], format=self.img_format) utils.check_image_size(dataset_dict, image) if "annotations" not in dataset_dict: image, transforms = T.apply_transform_gens( ([self.crop_gen] if self.crop_gen else []) + self.tfm_gens, image) else: # Crop around an instance if there are instances in the image. # USER: Remove if you don't use cropping if self.crop_gen: crop_tfm = utils.gen_crop_transform_with_instance( self.crop_gen.get_crop_size(image.shape[:2]), image.shape[:2], np.random.choice(dataset_dict["annotations"]), ) image = crop_tfm.apply_image(image) image, transforms = T.apply_transform_gens(self.tfm_gens, image) if self.crop_gen: transforms = crop_tfm + transforms image_shape = image.shape[:2] # h, w # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. dataset_dict["image"] = torch.as_tensor( image.transpose(2, 0, 1).astype("float32")) # Can use uint8 if it turns out to be slow some day # USER: Remove if you don't use pre-computed proposals. if self.load_proposals: utils.transform_proposals(dataset_dict, image_shape, transforms, self.min_box_side_len, self.proposal_topk) if not self.is_train: dataset_dict.pop("annotations", None) dataset_dict.pop("sem_seg_file_name", None) return dataset_dict if "annotations" in dataset_dict: # USER: Modify this if you want to keep them for some reason. for annotation in dataset_dict["annotations"]: if not self.mask_on: annotation.pop("segmentation", None) if not self.keypoint_on: annotation.pop("keypoints", None) if not self.fiberwidth_on: annotation.pop("fiberwidth", None) if not self.fiberlength_on: annotation.pop("fiberlength", None) annotations = [ obj for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] num_keypoints = self.cfg.MODEL.ROI_KEYPOINT_HEAD.NUM_KEYPOINTS annotations = [ transformation.interpolate_keypoints(obj, num_keypoints) for obj in annotations ] annotations = [ utils.transform_instance_annotations( obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices) for obj in annotations ] annotations = [ transformation.transform_instance_keypoint_order( obj, self.cfg) for obj in annotations ] instances = utils.annotations_to_instances( annotations, image_shape, mask_format=self.mask_format) if len(annotations ) and "fiberwidth" in annotations[0] and self.fiberwidth_on: gt_fiberwidth = torch.tensor( [obj["fiberwidth"] for obj in annotations]) instances.gt_fiberwidth = gt_fiberwidth if len(annotations) and "fiberlength" in annotations[ 0] and self.fiberlength_on: gt_fiberlength = torch.tensor( [obj["fiberlength"] for obj in annotations]) instances.gt_fiberlength = gt_fiberlength # Create a tight bounding box from masks, useful when image is cropped if self.crop_gen and instances.has("gt_masks"): instances.gt_boxes = instances.gt_masks.get_bounding_boxes() dataset_dict["instances"] = utils.filter_empty_instances(instances) return dataset_dict
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below #print("BELOW IS THE dataset_dict (FOR DEBUGGING)") #print(dataset_dict) # USER: Write your own image loading if it's not from a file try: image = utils.read_image( dataset_dict["file_name"], format=self.image_format ) except Exception as e: print(dataset_dict["file_name"]) print(e) raise e try: utils.check_image_size(dataset_dict, image) except SizeMismatchError as e: expected_wh = (dataset_dict["width"], dataset_dict["height"]) image_wh = (image.shape[1], image.shape[0]) if (image_wh[1], image_wh[0]) == expected_wh: print("transposing image {}".format(dataset_dict["file_name"])) image = image.transpose(1, 0, 2) else: raise e # USER: Remove if you don't do semantic/panoptic segmentation. if "sem_seg_file_name" in dataset_dict: sem_seg_gt = utils.read_image( dataset_dict.pop("sem_seg_file_name"), "L" ).squeeze(2) else: sem_seg_gt = None boxes = np.asarray( [ BoxMode.convert( instance["bbox"], instance["bbox_mode"], BoxMode.XYXY_ABS ) for instance in dataset_dict["annotations"] ] ) aug_input = T.StandardAugInput(image, boxes=boxes, sem_seg=sem_seg_gt) transforms = aug_input.apply_augmentations(self.augmentation) image, sem_seg_gt = aug_input.image, aug_input.sem_seg image_shape = image.shape[:2] # h, w # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. dataset_dict["image"] = torch.as_tensor( np.ascontiguousarray(image.transpose(2, 0, 1)) ) if sem_seg_gt is not None: dataset_dict["sem_seg"] = torch.as_tensor(sem_seg_gt.astype("long")) # USER: Remove if you don't use pre-computed proposals. # Most users would not need this feature. if self.proposal_topk: utils.transform_proposals( dataset_dict, image_shape, transforms, proposal_topk=self.proposal_topk, min_box_size=self.proposal_min_box_size, ) if not self.is_train: dataset_dict.pop("annotations", None) dataset_dict.pop("sem_seg_file_name", None) dataset_dict.pop("pano_seg_file_name", None) return dataset_dict if "annotations" in dataset_dict: # USER: Modify this if you want to keep them for some reason. for anno in dataset_dict["annotations"]: if not self.use_instance_mask: anno.pop("segmentation", None) if not self.use_keypoint: anno.pop("keypoints", None) # USER: Implement additional transformations if you have other types of data annos = [ transform_instance_annotations( obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices, ) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = annotations_to_instances( annos, image_shape, mask_format=self.instance_mask_format ) # After transforms such as cropping are applied, the bounding box may no longer # tightly bound the object. As an example, imagine a triangle object # [(0,0), (2,0), (0,2)] cropped by a box [(1,0),(2,2)] (XYXY format). The tight # bounding box of the cropped triangle should be [(1,0),(2,1)], which is not equal to if self.recompute_boxes: instances.gt_boxes = instances.gt_masks.get_bounding_boxes() dataset_dict["instances"] = utils.filter_empty_instances(instances) if self.basis_loss_on and self.is_train: # load basis supervisions if self.ann_set == "coco": basis_sem_path = ( dataset_dict["file_name"] .replace("train2017", "thing_train2017") .replace("image/train", "thing_train") ) else: basis_sem_path = ( dataset_dict["file_name"] .replace("coco", "lvis") .replace("train2017", "thing_train") ) # change extension to npz basis_sem_path = osp.splitext(basis_sem_path)[0] + ".npz" basis_sem_gt = np.load(basis_sem_path)["mask"] basis_sem_gt = transforms.apply_segmentation(basis_sem_gt) basis_sem_gt = torch.as_tensor(basis_sem_gt.astype("long")) dataset_dict["basis_sem"] = basis_sem_gt return dataset_dict
def __call__(self, dataset_dict): dataset_dict = copy.deepcopy(dataset_dict) image = utils.read_image(dataset_dict["file_name"], format=self.img_format) utils.check_image_size(dataset_dict, image) # this place you can add your own code to change the input image if "annotations" not in dataset_dict: image, transforms = T.apply_transform_gens( ([self.crop_gen] if self.crop_gen else []) + self.tfm_gens, image) else: if self.crop_gen: crop_tfm = utils.gen_crop_transform_with_instance( self.crop_gen.get_crop_size(image.shape[:2]), image.shape[:2], np.random.choice(dataset_dict["annotations"]), ) image = crop_tfm.apply_image(image) image, transforms = T.apply_transform_gens(self.tfm_gens, image) if self.crop_gen: transforms = crop_tfm + transforms image_shape = image.shape[:2] dataset_dict["image"] = torch.as_tensor( np.ascontiguousarray(image.transpose(2, 0, 1))) if self.load_proposals: utils.transform_proposals(dataset_dict, image_shape, transforms, self.min_box_side_len, self.proposal_topk) if not self.is_train: dataset_dict.pop("annotations", None) dataset_dict.pop("sem_seg_file_name", None) return dataset_dict if "annotations" in dataset_dict: for anno in dataset_dict["annotations"]: if not self.mask_on: anno.pop("segmentation", None) if not self.keypoint_on: anno.pop("keypoints", None) if not self.attribute_on: anno.pop("attribute_ids") annos = [ utils.transform_instance_annotations( obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = annotations_to_instances_with_attributes( annos, image_shape, mask_format=self.mask_format, load_attributes=self.attribute_on, max_attr_per_ins=self.max_attr_per_ins) if self.crop_gen and instances.has("gt_masks"): instances.gt_boxes = instances.gt_masks.get_bounding_boxes() dataset_dict["instances"] = utils.filter_empty_instances(instances) if "sem_seg_file_name" in dataset_dict: with PathManager.open(dataset_dict.pop("sem_seg_file_name"), "rb") as f: sem_seg_gt = Image.open(f) sem_seg_gt = np.asarray(sem_seg_gt, dtype="uint8") sem_seg_gt = transforms.apply_segmentation(sem_seg_gt) sem_seg_gt = torch.as_tensor(sem_seg_gt.astype("long")) dataset_dict["sem_seg"] = sem_seg_gt return dataset_dict
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below image = utils.read_image(dataset_dict["file_name"], format=self.img_format) try: utils.check_image_size(dataset_dict, image) except Exception as e: print(e) import moxing as mox mox.file.copy_parallel(dataset_dict["file_name"], 's3://bucket-6756/liangxiwen/result/haitian_semi/unbiased-teacher/wrong_imgs/' + dataset_dict["file_name"].split('/')[-1]) print(image.shape) image = np.rot90(image) print(image.shape) utils.check_image_size(dataset_dict, image) if "sem_seg_file_name" in dataset_dict: sem_seg_gt = utils.read_image( dataset_dict.pop("sem_seg_file_name"), "L" ).squeeze(2) else: sem_seg_gt = None aug_input = T.StandardAugInput(image, sem_seg=sem_seg_gt) transforms = aug_input.apply_augmentations(self.augmentation) image_weak_aug, sem_seg_gt = aug_input.image, aug_input.sem_seg image_shape = image_weak_aug.shape[:2] # h, w if sem_seg_gt is not None: dataset_dict["sem_seg"] = torch.as_tensor(sem_seg_gt.astype("long")) if self.load_proposals: utils.transform_proposals( dataset_dict, image_shape, transforms, proposal_topk=self.proposal_topk, min_box_size=self.proposal_min_box_size, ) if not self.is_train: dataset_dict.pop("annotations", None) dataset_dict.pop("sem_seg_file_name", None) return dataset_dict if "annotations" in dataset_dict: for anno in dataset_dict["annotations"]: if not self.mask_on: anno.pop("segmentation", None) if not self.keypoint_on: anno.pop("keypoints", None) annos = [ utils.transform_instance_annotations( obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices, ) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances( annos, image_shape, mask_format=self.mask_format ) if self.compute_tight_boxes and instances.has("gt_masks"): instances.gt_boxes = instances.gt_masks.get_bounding_boxes() bboxes_d2_format = utils.filter_empty_instances(instances) dataset_dict["instances"] = bboxes_d2_format # apply strong augmentation # We use torchvision augmentation, which is not compatiable with # detectron2, which use numpy format for images. Thus, we need to # convert to PIL format first. image_pil = Image.fromarray(image_weak_aug.astype("uint8"), "RGB") image_strong_aug = np.array(self.strong_augmentation(image_pil)) dataset_dict["image"] = torch.as_tensor( np.ascontiguousarray(image_strong_aug.transpose(2, 0, 1)) ) dataset_dict_key = copy.deepcopy(dataset_dict) dataset_dict_key["image"] = torch.as_tensor( np.ascontiguousarray(image_weak_aug.transpose(2, 0, 1)) ) assert dataset_dict["image"].size(1) == dataset_dict_key["image"].size(1) assert dataset_dict["image"].size(2) == dataset_dict_key["image"].size(2) return (dataset_dict, dataset_dict_key)
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy( dataset_dict) # it will be modified by code below # USER: Write your own image loading if it's not from a file try: image = utils.read_image(dataset_dict["file_name"], format=self.img_format) except Exception as e: print(dataset_dict["file_name"]) print(e) raise e try: utils.check_image_size(dataset_dict, image) except SizeMismatchError as e: expected_wh = (dataset_dict["width"], dataset_dict["height"]) image_wh = (image.shape[1], image.shape[0]) if (image_wh[1], image_wh[0]) == expected_wh: print("transposing image {}".format(dataset_dict["file_name"])) image = image.transpose(1, 0, 2) else: raise e if "annotations" not in dataset_dict or len( dataset_dict["annotations"]) == 0: image, transforms = T.apply_augmentations( ([self.crop] if self.crop else []) + self.augmentation, image) else: # Crop around an instance if there are instances in the image. # USER: Remove if you don't use cropping if self.crop: crop_tfm = gen_crop_transform_with_instance( self.crop.get_crop_size(image.shape[:2]), image.shape[:2], dataset_dict["annotations"], crop_box=self.crop_box, ) image = crop_tfm.apply_image(image) try: image, transforms = T.apply_augmentations( self.augmentation, image) except ValueError as e: print(dataset_dict["file_name"]) raise e if self.crop: transforms = crop_tfm + transforms image_shape = image.shape[:2] # h, w # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. dataset_dict["image"] = torch.as_tensor( np.ascontiguousarray(image.transpose(2, 0, 1))) # USER: Remove if you don't use pre-computed proposals. # Most users would not need this feature. if self.load_proposals: utils.transform_proposals( dataset_dict, image_shape, transforms, proposal_topk=self.proposal_topk, min_box_size=self.proposal_min_box_size, ) if not self.is_train: dataset_dict.pop("annotations", None) dataset_dict.pop("sem_seg_file_name", None) dataset_dict.pop("pano_seg_file_name", None) return dataset_dict if "annotations" in dataset_dict: # USER: Modify this if you want to keep them for some reason. for anno in dataset_dict["annotations"]: if not self.mask_on: anno.pop("segmentation", None) if not self.keypoint_on: anno.pop("keypoints", None) # USER: Implement additional transformations if you have other types of data annos = [ transform_instance_annotations( obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = annotations_to_instances(annos, image_shape, mask_format=self.mask_format) # After transforms such as cropping are applied, the bounding box may no longer # tightly bound the object. As an example, imagine a triangle object # [(0,0), (2,0), (0,2)] cropped by a box [(1,0),(2,2)] (XYXY format). The tight # bounding box of the cropped triangle should be [(1,0),(2,1)], which is not equal to # the intersection of original bounding box and the cropping box. if self.crop and instances.has("gt_masks"): instances.gt_boxes = instances.gt_masks.get_bounding_boxes() dataset_dict["instances"] = utils.filter_empty_instances(instances) # USER: Remove if you don't do semantic/panoptic segmentation. if "sem_seg_file_name" in dataset_dict: sem_seg_gt = utils.read_image( dataset_dict.pop("sem_seg_file_name"), "L").squeeze(2) sem_seg_gt = transforms.apply_segmentation(sem_seg_gt) sem_seg_gt = torch.as_tensor(sem_seg_gt.astype("long")) dataset_dict["sem_seg"] = sem_seg_gt if self.basis_loss_on and self.is_train: # load basis supervisions if self.ann_set == "coco": basis_sem_path = dataset_dict["file_name"].replace( 'train2017', 'thing_train2017').replace('image/train', 'thing_train') else: basis_sem_path = dataset_dict["file_name"].replace( 'coco', 'lvis').replace('train2017', 'thing_train') # change extension to npz basis_sem_path = osp.splitext(basis_sem_path)[0] + ".npz" basis_sem_gt = np.load(basis_sem_path)["mask"] basis_sem_gt = transforms.apply_segmentation(basis_sem_gt) basis_sem_gt = torch.as_tensor(basis_sem_gt.astype("long")) dataset_dict["basis_sem"] = basis_sem_gt return dataset_dict
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy( dataset_dict) # it will be modified by code below # USER: Write your own image loading if it's not from a file image = self.load_image(dataset_dict) prob = rand_range() if self.is_train: # mosaic and mixup should only apply one try: if prob <= self.mosaic_prob: image, dataset_dict = self.load_mosaic(image, dataset_dict) elif prob <= self.mixup_prob: image, dataset_dict = self.load_mixup(image, dataset_dict) except: print("mosaic or mixup augmentation error!! ") # apply cutout # if random.random() < self.cutout_prob: # image, dataset_dict = cutout(image, dataset_dict) # apply albumentations transform if self.img_format == "BGR": image = image[..., ::-1] # albumentations use rgb image as input # one of [hsv, brightness_contrast] # if self.is_train: # if random.random() < 0.5: # image = augment_hsv(image, 0.014, 0.68, 0.36) # yolov5 hyp # else: # image = augment_brightness_contrast(image) augment_anno = {"image": image, "bboxes": [], "category_id": []} if "annotations" in dataset_dict: augment_anno["bboxes"] = [ x['bbox'] for x in dataset_dict["annotations"] ] augment_anno["category_id"] = [ x['category_id'] for x in dataset_dict["annotations"] ] # do augmentation augment_anno = self._albumentations_tfm(**augment_anno) image = augment_anno["image"] if self.img_format == "BGR": image = image[..., ::-1] # translate back to bgr if len(augment_anno["bboxes"]) > 0: dataset_dict["annotations"] = [{ "category_id": category_id, "bbox": bbox, "iscrowd": 0, "area": bbox[2] * bbox[3], "bbox_mode": BoxMode.XYWH_ABS, } for bbox, category_id in zip(augment_anno["bboxes"], augment_anno["category_id"])] else: dataset_dict["annotations"] = [] # apply detectron2 transform if "annotations" not in dataset_dict: image, transforms = T.apply_transform_gens( ([self.crop_gen] if self.crop_gen else []) + self.tfm_gens, image) else: # Crop around an instance if there are instances in the image. # USER: Remove if you don't use cropping if self.crop_gen: crop_tfm = utils.gen_crop_transform_with_instance( self.crop_gen.get_crop_size(image.shape[:2]), image.shape[:2], np.random.choice(dataset_dict["annotations"]), ) image = crop_tfm.apply_image(image) # only transform image image, transforms = T.apply_transform_gens(self.tfm_gens, image) if self.crop_gen: transforms = crop_tfm + transforms image_shape = image.shape[:2] # h, w # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. dataset_dict["image"] = torch.as_tensor( np.ascontiguousarray(image.transpose(2, 0, 1))) # USER: Remove if you don't use pre-computed proposals. if self.load_proposals: utils.transform_proposals(dataset_dict, image_shape, transforms, self.min_box_side_len, self.proposal_topk) # if not self.is_train: # # USER: Modify this if you want to keep them for some reason. # dataset_dict.pop("annotations", None) # dataset_dict.pop("sem_seg_file_name", None) # return dataset_dict if "annotations" in dataset_dict: # USER: Modify this if you want to keep them for some reason. for anno in dataset_dict["annotations"]: if not self.mask_on: anno.pop("segmentation", None) if not self.keypoint_on: anno.pop("keypoints", None) # USER: Implement additional transformations if you have other types of data # transform only annotations, because the image has transformed before annos = [ utils.transform_instance_annotations( obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances( annos, image_shape, mask_format=self.mask_format) # Create a tight bounding box from masks, useful when image is cropped if self.crop_gen and instances.has("gt_masks"): instances.gt_boxes = instances.gt_masks.get_bounding_boxes() dataset_dict["instances"] = utils.filter_empty_instances(instances) return dataset_dict
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in centernet Dataset format. Returns: dict: a format that builtin models in centernet accept """ dataset_dict = copy.deepcopy( dataset_dict) # it will be modified by code below # USER: Write your own image loading if it's not from a file image = utils.read_image(dataset_dict["file_name"], format=self.img_format) utils.check_image_size(dataset_dict, image) #draw image # for anno in dataset_dict['annotations']: # bbox = anno['bbox'] # import cv2 # cv2.rectangle(image, # (int(bbox[0]), int(bbox[1])), # (int(bbox[2]), int(bbox[3])), # (0, 255, 0), # 2) # cv2.imwrite('result.jpg',image) # import pdb; # pdb.set_trace() if self.kd_without_label or "annotations" not in dataset_dict: image, transforms = T.apply_transform_gens( ([self.crop_gen] if self.crop_gen else []) + self.tfm_gens, image) else: # Crop around an instance if there are instances in the image. # USER: Remove if you don't use cropping if self.crop_gen: crop_tfm = utils.gen_crop_transform_with_instance( self.crop_gen.get_crop_size(image.shape[:2]), image.shape[:2], np.random.choice(dataset_dict["annotations"]), ) image = crop_tfm.apply_image(image) image, transforms = T.apply_transform_gens(self.tfm_gens, image) if self.crop_gen: transforms = crop_tfm + transforms image_shape = image.shape[:2] # h, w # apply imgaug if self.is_train and self.imgaug_prob < 1.0: image = arguementation(image, self.imgaug_prob) # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. dataset_dict["image"] = torch.as_tensor( image.transpose(2, 0, 1).astype("float32")) # Can use uint8 if it turns out to be slow some day # USER: Remove if you don't use pre-computed proposals. if self.load_proposals: utils.transform_proposals(dataset_dict, image_shape, transforms, self.min_box_side_len, self.proposal_topk) if not self.is_train and not self.eval_with_gt: dataset_dict.pop("annotations", None) dataset_dict.pop("sem_seg_file_name", None) return dataset_dict if "annotations" in dataset_dict and not self.kd_without_label: # USER: Modify this if you want to keep them for some reason. for anno in dataset_dict["annotations"]: if not self.mask_on: anno.pop("segmentation", None) if not self.keypoint_on: anno.pop("keypoints", None) # USER: Implement additional transformations if you have other types of data annos = [ utils.transform_instance_annotations( obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances( annos, image_shape, mask_format=self.mask_format) # Create a tight bounding box from masks, useful when image is cropped if self.crop_gen and instances.has("gt_masks"): instances.gt_boxes = instances.gt_masks.get_bounding_boxes() dataset_dict["instances"] = utils.filter_empty_instances( instances, box_threshold=self.BOX_MINSIZE) # USER: Remove if you don't do semantic/panoptic segmentation. if "sem_seg_file_name" in dataset_dict: with PathManager.open(dataset_dict.pop("sem_seg_file_name"), "rb") as f: sem_seg_gt = Image.open(f) sem_seg_gt = np.asarray(sem_seg_gt, dtype="uint8") sem_seg_gt = transforms.apply_segmentation(sem_seg_gt) sem_seg_gt = torch.as_tensor(sem_seg_gt.astype("long")) dataset_dict["sem_seg"] = sem_seg_gt return dataset_dict
def __call__(self, dataset_dict): """ Transform the dataset_dict according to the configured transformations. Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a new dict that's going to be processed by the model. It currently does the following: 1. Read the image from "file_name" 2. Transform the image and annotations 3. Prepare the annotations to :class:`Instances` """ # get 3D models for each annotations and remove 3D mesh models from image dict mesh_models = [] if "annotations" in dataset_dict: for anno in dataset_dict["annotations"]: mesh_models.append( [ self._all_mesh_models[anno["mesh"]][0].clone(), self._all_mesh_models[anno["mesh"]][1].clone(), ] ) dataset_dict = {key: value for key, value in dataset_dict.items() if key != "mesh_models"} dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below if "annotations" in dataset_dict: for i, anno in enumerate(dataset_dict["annotations"]): anno["mesh"] = mesh_models[i] image = utils.read_image(dataset_dict["file_name"], format=self.img_format) utils.check_image_size(dataset_dict, image) image, transforms = T.apply_transform_gens(self.tfm_gens, image) image_shape = image.shape[:2] # h, w # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32")) # Can use uint8 if it turns out to be slow some day # USER: Remove if you don't use pre-computed proposals. if self.load_proposals: utils.transform_proposals( dataset_dict, image_shape, transforms, self.min_box_side_len, self.proposal_topk ) if not self.is_train: dataset_dict.pop("annotations", None) return dataset_dict if "annotations" in dataset_dict: annos = [ self.transform_annotations(obj, transforms, image_shape) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] # Should not be empty during training instances = annotations_to_instances(annos, image_shape) dataset_dict["instances"] = instances[instances.gt_boxes.nonempty()] return dataset_dict
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy( dataset_dict) # it will be modified by code below image = utils.read_image(dataset_dict["file_name"], format=self.img_format) utils.check_image_size(dataset_dict, image) original_image = image if self.crop_gen is None or np.random.rand() > 0.5: tfm_gens = self.tfm_gens else: tfm_gens = self.tfm_gens[:-1] + self.crop_gen + self.tfm_gens[-1:] if "sem_seg_file_name" in dataset_dict: sem_seg_gt = utils.read_image( dataset_dict.pop("sem_seg_file_name"), "L").squeeze(2) dataset_dict["sem_seg"] = torch.as_tensor( sem_seg_gt.astype("long")) else: sem_seg_gt = None aug_input = T.StandardAugInput(original_image, sem_seg=sem_seg_gt) transforms = aug_input.apply_augmentations(tfm_gens) image, sem_seg_gt = aug_input.image, aug_input.sem_seg image_shape = image.shape[:2] # h, w # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. dataset_dict["image"] = torch.as_tensor( np.ascontiguousarray(image.transpose(2, 0, 1))) if self.proposal_topk is not None: utils.transform_proposals(dataset_dict, image_shape, transforms, proposal_topk=self.proposal_topk) if not self.is_train: # USER: Modify this if you want to keep them for some reason. return dataset_dict if type(transforms[0]) is FT.NoOpTransform: flip = 0 elif type(transforms[0]) is FT.HFlipTransform: flip = 1 else: flip = 2 dataset_dict["flip"] = flip if sem_seg_gt is not None: sem_seg_gt = torch.as_tensor(sem_seg_gt.astype("long")) if self.sem_seg_unlabeled_region_on: sem_seg_gt[sem_seg_gt == self.ignore_value] = self.num_sem_seg_classes dataset_dict["sem_seg"] = sem_seg_gt if "annotations" in dataset_dict: # USER: Modify this if you want to keep them for some reason. for anno in dataset_dict["annotations"]: if not self.mask_on: anno.pop("segmentation", None) anno.pop("keypoints", None) # USER: Implement additional transformations if you have other types of data annos = [ utils.transform_instance_annotations(obj, transforms, image_shape) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances(annos, image_shape) dataset_dict["instances"] = utils.filter_empty_instances(instances) if self.unseen_label_set is not None: dataset_dict["instances"] = filter_unseen_class( dataset_dict["instances"], self.unseen_label_set) if self.unlabeled_region_on: if self.sem_seg_unlabeled_region_on: cum_sem_seg = cum_map(dataset_dict["sem_seg"], self.num_sem_seg_classes) else: cum_sem_seg = cum_map(dataset_dict["sem_seg"], self.ignore_value) dataset_dict["integral_sem_seg"] = cum_sem_seg return dataset_dict
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below # USER: Write your own image loading if it's not from a file image = utils.read_image(dataset_dict["file_name"], format=self.image_format) utils.check_image_size(dataset_dict, image) image = transform(image=image)["image"] ############################################################################ ''' image = utils.read_image(dataset_dict["file_name"], format=self.image_format) h, w, _ = image.shape utils.check_image_size(dataset_dict, image) bboxes = [ann["bbox"] for ann in dataset_dict['annotations']] labels = [ann['category_id'] for ann in dataset_dict['annotations']] class_labels = [CLASSES[label] for label in labels] segmentations = [ann["segmentation"] for ann in dataset_dict['annotations']] #cprint("before :" , segmentations) masks = convert_coco_poly_to_mask(segmentations, h, w) masks = [mask.numpy() for mask in masks] transformed = transform(image=image, bboxes=bboxes, class_labels=class_labels, masks=masks) image = transformed["image"] bboxes = transformed["bboxes"] class_labels = transformed["class_labels"] labels = [CLASSES.index(cl) for cl in class_labels] filtered_masks = [] for mask in transformed["masks"]: #if len(np.unique(mask)) > 1: filtered_masks.append(mask) if len(bboxes) != len(filtered_masks): print(len(bboxes), len(filtered_masks), len(labels)) #print(len(bboxes), len(masks), len(labels)) seg_masks = [binary_mask_to_polygon(mask, tolerance=2) for mask in masks] for idx in range(len(labels)): dataset_dict['annotations'][idx]["bbox"] = bboxes[idx] dataset_dict['annotations'][idx]["labels"] = labels[idx] dataset_dict['annotations'][idx]["segmentation"] = seg_masks[idx] dataset_dict['annotations'] = dataset_dict['annotations'][:len(labels)] ''' # USER: Remove if you don't do semantic/panoptic segmentation. if "sem_seg_file_name" in dataset_dict: sem_seg_gt = utils.read_image(dataset_dict.pop("sem_seg_file_name"), "L").squeeze(2) else: sem_seg_gt = None aug_input = T.StandardAugInput(image, sem_seg=sem_seg_gt) transforms = aug_input.apply_augmentations(self.augmentations) image, sem_seg_gt = aug_input.image, aug_input.sem_seg image_shape = image.shape[:2] # h, w # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1))) if sem_seg_gt is not None: dataset_dict["sem_seg"] = torch.as_tensor(sem_seg_gt.astype("long")) # USER: Remove if you don't use pre-computed proposals. # Most users would not need this feature. if self.proposal_topk is not None: utils.transform_proposals( dataset_dict, image_shape, transforms, proposal_topk=self.proposal_topk ) if not self.is_train: # USER: Modify this if you want to keep them for some reason. dataset_dict.pop("annotations", None) dataset_dict.pop("sem_seg_file_name", None) return dataset_dict if "annotations" in dataset_dict: # USER: Modify this if you want to keep them for some reason. for anno in dataset_dict["annotations"]: if not self.use_instance_mask: anno.pop("segmentation", None) if not self.use_keypoint: anno.pop("keypoints", None) # USER: Implement additional transformations if you have other types of data annos = [ utils.transform_instance_annotations( obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices ) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances( annos, image_shape, mask_format=self.instance_mask_format ) # After transforms such as cropping are applied, the bounding box may no longer # tightly bound the object. As an example, imagine a triangle object # [(0,0), (2,0), (0,2)] cropped by a box [(1,0),(2,2)] (XYXY format). The tight # bounding box of the cropped triangle should be [(1,0),(2,1)], which is not equal to # the intersection of original bounding box and the cropping box. if self.recompute_boxes: instances.gt_boxes = instances.gt_masks.get_bounding_boxes() dataset_dict["instances"] = utils.filter_empty_instances(instances) return dataset_dict
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy( dataset_dict) # it will be modified by code below # USER: Write your own image loading if it's not from a file category = dataset_dict["annotations"][0]['category_id'] try: image = utils.read_image(dataset_dict["file_name"], format=self.image_format) except Exception as e: print(dataset_dict["file_name"]) print(e) raise e try: utils.check_image_size(dataset_dict, image) except SizeMismatchError as e: expected_wh = (dataset_dict["width"], dataset_dict["height"]) image_wh = (image.shape[1], image.shape[0]) if (image_wh[1], image_wh[0]) == expected_wh: print("transposing image {}".format(dataset_dict["file_name"])) image = image.transpose(1, 0, 2) else: raise e # USER: Remove if you don't do semantic/panoptic segmentation. if "sem_seg_file_name" in dataset_dict: sem_seg_gt = utils.read_image( dataset_dict.pop("sem_seg_file_name"), "L").squeeze(2) else: sem_seg_gt = None if (int(category) != 5): boxes = np.asarray([ BoxMode.convert(instance["bbox"], instance["bbox_mode"], BoxMode.XYXY_ABS) for instance in dataset_dict["annotations"] ]) aug_input = T.StandardAugInput(image, boxes=boxes, sem_seg=sem_seg_gt) transforms = aug_input.apply_augmentations(self.augmentation) image, sem_seg_gt = aug_input.image, aug_input.sem_seg else: boxes = np.asarray([0]) # aug_input = T.StandardAugInput(image, boxes=boxes, sem_seg=sem_seg_gt) # transforms = aug_input.apply_augmentations(self.augmentation) # image, sem_seg_gt = aug_input.image, aug_input.sem_seg image_shape = image.shape[:2] # h, w # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. dataset_dict["image"] = torch.as_tensor( np.ascontiguousarray(image.transpose(2, 0, 1))) if sem_seg_gt is not None: dataset_dict["sem_seg"] = torch.as_tensor( sem_seg_gt.astype("long")) # USER: Remove if you don't use pre-computed proposals. # Most users would not need this feature. if self.proposal_topk: if (int(category) != 5): utils.transform_proposals( dataset_dict, image_shape, transforms, proposal_topk=self.proposal_topk, min_box_size=self.proposal_min_box_size, ) if not self.is_train: if (int(category) != 5): dataset_dict.pop("annotations", None) dataset_dict.pop("sem_seg_file_name", None) dataset_dict.pop("pano_seg_file_name", None) return dataset_dict if "annotations" in dataset_dict: # USER: Modify this if you want to keep them for some reason. for anno in dataset_dict["annotations"]: if not self.use_instance_mask: anno.pop("segmentation", None) if not self.use_keypoint: anno.pop("keypoints", None) # USER: Implement additional transformations if you have other types of data if (int(category) != 5): annos = [ transform_instance_annotations( obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices, ) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] segment_transform = transf.Compose([ myTransform.FreeScaleMask((60, 100)), myTransform.MaskToTensor(), ]) img_transform = transf.Compose([ transf.Resize((288, 800)), transf.ToTensor(), transf.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), ]) if self.is_train: use_ax = True else: use_ax = False if (int(category) != 5): # dataset_dict['seg_label'] = torch.zeros([36,100,3]) # dataset_dict['cls_label'] = [[-1 for _ in range(4)] for _ in range(18)] instances = annotations_to_instances( annos, image_shape, mask_format=self.instance_mask_format) if self.recompute_boxes: instances.gt_boxes = instances.gt_masks.get_bounding_boxes( ) dataset_dict["instances"] = utils.filter_empty_instances( instances) else: cl = LaneClsDataset( '/home/ghr/hdd/traffic_sign/only_lane/images/CULANE_288', img_path=dataset_dict['file_name'], row_anchor=culane_row_anchor, seg_path=dataset_dict['annotations'][0]['lanefilepath'], segment_transform=segment_transform, use_aux=use_ax) if use_ax: img, cls, seg = cl.get_item() else: img, cls = cl.get_item() seg = 0 # print('hahahahahahahahahah')img # import pdb; pdb.set_trace() # dataset_dict["image"] = img dataset_dict['seg_label'] = seg dataset_dict['cls_label'] = cls #instances = annotations_to_instances(dataset_dict['annotations'], image_shape, mask_format=self.instance_mask_format) # Call lane class return label,... # After transforms such as cropping are applied, the bounding box may no longer # tightly bound the object. As an example, imagine a triangle object # [(0,0), (2,0), (0,2)] cropped by a box [(1,0),(2,2)] (XYXY format). The tight # bounding box of the cropped triangle should be [(1,0),(2,1)], which is not equal to # if self.recompute_boxes: # instances.gt_boxes = instances.gt_masks.get_bounding_boxes() # dataset_dict["instances"] = utils.filter_empty_instances(instances) if self.basis_loss_on and self.is_train: # load basis supervisions if self.ann_set == "coco": basis_sem_path = (dataset_dict["file_name"].replace( "train2017", "thing_train2017").replace("image/train", "thing_train")) else: basis_sem_path = (dataset_dict["file_name"].replace( "coco", "lvis").replace("train2017", "thing_train")) # change extension to npz basis_sem_path = osp.splitext(basis_sem_path)[0] + ".npz" basis_sem_gt = np.load(basis_sem_path)["mask"] basis_sem_gt = transforms.apply_segmentation(basis_sem_gt) basis_sem_gt = torch.as_tensor(basis_sem_gt.astype("long")) dataset_dict["basis_sem"] = basis_sem_gt return dataset_dict
def _original_call(self, dataset_dict): """ Modified from detectron2's original __call__ in DatasetMapper """ dataset_dict = copy.deepcopy( dataset_dict) # it will be modified by code below image = self._read_image(dataset_dict, format=self.img_format) if not self.backfill_size: utils.check_image_size(dataset_dict, image) image, dataset_dict = self._custom_transform(image, dataset_dict) inputs = AugInput(image=image) if "annotations" not in dataset_dict: transforms = AugmentationList( ([self.crop_gen] if self.crop_gen else []) + self.tfm_gens)(inputs) image = inputs.image else: # pass additional arguments, will only be used when the Augmentation # takes `annotations` as input inputs.annotations = dataset_dict["annotations"] # Crop around an instance if there are instances in the image. if self.crop_gen: crop_tfm = utils.gen_crop_transform_with_instance( self.crop_gen.get_crop_size(image.shape[:2]), image.shape[:2], np.random.choice(dataset_dict["annotations"]), ) inputs.image = crop_tfm.apply_image(image) transforms = AugmentationList(self.tfm_gens)(inputs) image = inputs.image if self.crop_gen: transforms = crop_tfm + transforms # Cache identical transforms in dataset_dict for subclass mappers # TODO T122215878 Find more explicit way to expose transforms used dataset_dict["transforms"] = transforms image_shape = image.shape[:2] # h, w if image.ndim == 2: image = np.expand_dims(image, 2) dataset_dict["image"] = torch.as_tensor( image.transpose(2, 0, 1).astype("float32")) # Can use uint8 if it turns out to be slow some day if self.load_proposals: utils.transform_proposals( dataset_dict, image_shape, transforms, proposal_topk=self.proposal_topk, min_box_size=self.proposal_min_box_size, ) if not self.is_train: dataset_dict.pop("annotations", None) dataset_dict.pop("sem_seg_file_name", None) return dataset_dict if "annotations" in dataset_dict: for anno in dataset_dict["annotations"]: if not self.mask_on: anno.pop("segmentation", None) if not self.keypoint_on: anno.pop("keypoints", None) annos = [ utils.transform_instance_annotations( obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices, ) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances( annos, image_shape, mask_format=self.mask_format) # Create a tight bounding box from masks, useful when image is cropped if self.crop_gen and instances.has("gt_masks"): instances.gt_boxes = instances.gt_masks.get_bounding_boxes() dataset_dict["instances"] = utils.filter_empty_instances(instances) if "sem_seg_file_name" in dataset_dict: sem_seg_gt = read_sem_seg_file_with_prefetch( dataset_dict.pop("sem_seg_file_name"), prefetched=dataset_dict.get(PREFETCHED_SEM_SEG_FILE_NAME, None), ) if len(sem_seg_gt.shape) > 2: sem_seg_gt = sem_seg_gt.squeeze(2) sem_seg_gt = transforms.apply_segmentation(sem_seg_gt) sem_seg_gt = torch.as_tensor(sem_seg_gt.astype("long")) dataset_dict["sem_seg"] = sem_seg_gt # extend standard D2 semantic segmentation to support multiple segmentation # files, each file can represent a class if "multi_sem_seg_file_names" in dataset_dict: raise NotImplementedError() if "_post_process_" in dataset_dict: proc_func = dataset_dict.pop("_post_process_") dataset_dict = proc_func(dataset_dict) return dataset_dict
def __call__(self, dataset_dicts): """ Args: dataset_dicts (list[dict]): Metadata of one frame batch, each frame in Detectron2 Dataset format. Returns: list[dict]: batch of N input frames each item in a format that builtin models in detectron2 accept """ output_dicts = [] transforms = None dataset_dicts = copy.deepcopy( dataset_dicts) # it will be modified by code below for frame_dict in dataset_dicts: image = read_image(frame_dict["file_name"], format=self.img_format) utils.check_image_size(frame_dict, image) # First, generate the TransformList for the first image (It has random components!). # Then, apply the same transformations fo the next images. # This way, we are applying the same transformation to the whole batch. if transforms is None: image, transforms = T.apply_transform_gens( self.tfm_gens, image) else: image = transforms.apply_image(image) image_shape = image.shape[:2] # h, w # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. frame_dict["image"] = torch.as_tensor( np.ascontiguousarray(image.transpose(2, 0, 1))) # USER: Remove if you don't use pre-computed proposals. if self.load_proposals: utils.transform_proposals(frame_dict, image_shape, transforms, self.min_box_side_len, self.proposal_topk) if not self.is_train: # USER: Modify this if you want to keep them for some reason. frame_dict.pop("annotations", None) frame_dict.pop("sem_seg_file_name", None) return frame_dict if "annotations" in frame_dict: # USER: Modify this if you want to keep them for some reason. for anno in frame_dict["annotations"]: anno.pop("segmentation", None) anno.pop("keypoints", None) # USER: Implement additional transformations if you have other types of data annos = [ utils.transform_instance_annotations( obj, transforms, image_shape) for obj in frame_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = annotations_to_instances(annos, image_shape) frame_dict["instances"] = utils.filter_empty_instances( instances) output_dicts.append(frame_dict) return output_dicts