def test_coco(self): from sahi.utils.coco import Coco category_mapping = {"1": "human", "2": "car"} # init coco coco_path = "tests/data/coco_utils/terrain_all_coco.json" coco_dict = load_json(coco_path) coco1 = Coco(coco_dict) coco2 = Coco.from_coco_path(coco_path) # compare self.assertEqual(len(coco1.images), 3) self.assertEqual(len(coco2.images), 3) self.assertEqual(coco1.images[2].annotations[1].category_name, "human") self.assertEqual(coco2.images[2].annotations[1].category_name, "human") self.assertEqual( coco1.images[1].annotations[1].segmentation, [[501, 451, 622, 451, 622, 543, 501, 543]], ) self.assertEqual( coco2.images[1].annotations[1].segmentation, [[501, 451, 622, 451, 622, 543, 501, 543]], ) self.assertEqual( coco1.category_mapping, category_mapping, ) self.assertEqual( coco2.category_mapping, category_mapping, )
def test_split_coco_as_train_val(self): from sahi.utils.coco import Coco coco_dict_path = "tests/data/coco_utils/combined_coco.json" image_dir = "tests/data/coco_utils/" coco = Coco.from_coco_dict_or_path(coco_dict_path, image_dir=image_dir) result = coco.split_coco_as_train_val(train_split_rate=0.5, numpy_seed=0) self.assertEqual(len(result["train_coco"].json["images"]), 1) self.assertEqual(len(result["train_coco"].json["annotations"]), 5) self.assertEqual(result["train_coco"].json["images"][0]["height"], 682) self.assertEqual(result["train_coco"].image_dir, image_dir) self.assertEqual(result["train_coco"].stats["num_images"], len(result["train_coco"].images)) self.assertEqual( result["train_coco"].stats["num_annotations"], len(result["train_coco"].json["annotations"]), ) self.assertEqual(len(result["val_coco"].json["images"]), 1) self.assertEqual(len(result["val_coco"].json["annotations"]), 7) self.assertEqual(result["val_coco"].json["images"][0]["height"], 1365) self.assertEqual(result["val_coco"].image_dir, image_dir) self.assertEqual(result["val_coco"].stats["num_images"], len(result["val_coco"].images)) self.assertEqual( result["val_coco"].stats["num_annotations"], len(result["val_coco"].json["annotations"]), )
def test_slice_image(self): # read coco file coco_path = "tests/data/coco_utils/terrain1_coco.json" coco_dict = load_json(coco_path) # create coco_utils.Coco object coco = Coco(coco_dict) output_file_name = None output_dir = None image_path = "tests/data/coco_utils/" + coco.images[0].file_name slice_image_result, num_total_invalid_segmentation = slice_image( image=image_path, coco_annotation_list=coco.images[0].annotations, output_file_name=output_file_name, output_dir=output_dir, slice_height=512, slice_width=512, max_allowed_zeros_ratio=0.2, overlap_height_ratio=0.1, overlap_width_ratio=0.4, slice_sep="|", out_ext=".png", verbose=False, ) self.assertEqual(len(slice_image_result.images), 21) self.assertEqual(len(slice_image_result.coco_images), 21) self.assertEqual(slice_image_result.coco_images[0].annotations, []) self.assertEqual(slice_image_result.coco_images[15].annotations[1].area, 12483) self.assertEqual( slice_image_result.coco_images[15].annotations[1].bbox, [341, 204, 73, 171], )
def test_coco_merge(self): from sahi.utils.coco import Coco # load coco files to be combined coco_path1 = "tests/data/coco_utils/terrain1_coco.json" coco_path2 = "tests/data/coco_utils/terrain2_coco.json" coco_path3 = "tests/data/coco_utils/terrain3_coco.json" image_dir = "tests/data/coco_utils/" coco1 = Coco.from_coco_dict_or_path(coco_path1, image_dir=image_dir) coco2 = Coco.from_coco_dict_or_path(coco_path2, image_dir=image_dir) coco3 = Coco.from_coco_dict_or_path(coco_path3, image_dir=image_dir) coco1.merge(coco2) coco1.merge(coco3) self.assertEqual(len(coco1.json["images"]), 3) self.assertEqual(len(coco1.json["annotations"]), 22) self.assertEqual(len(coco1.json["categories"]), 2) self.assertEqual(len(coco1.images), 3) self.assertEqual( coco1.json["annotations"][12]["id"], 13, ) self.assertEqual( coco1.json["annotations"][12]["image_id"], 3, ) self.assertEqual( coco1.json["annotations"][9]["category_id"], 1, ) self.assertEqual( coco1.json["annotations"][9]["image_id"], 2, ) self.assertEqual( coco1.image_dir, image_dir, ) self.assertEqual( coco2.image_dir, image_dir, ) self.assertEqual(coco2.stats["num_images"], len(coco2.images)) self.assertEqual(coco2.stats["num_annotations"], len(coco2.json["annotations"]))
def test_coco_update_categories(self): from sahi.utils.coco import Coco coco_path = "tests/data/coco_utils/terrain2_coco.json" image_dir = "tests/data/coco_utils/" coco = Coco.from_coco_dict_or_path(coco_path, image_dir=image_dir) self.assertEqual(len(coco.json["annotations"]), 5) self.assertEqual(len(coco.json["images"]), 1) self.assertEqual(len(coco.json["categories"]), 1) self.assertEqual( coco.json["categories"], [{ "id": 1, "name": "car", "supercategory": "car" }], ) self.assertEqual(coco.json["annotations"][1]["category_id"], 1) self.assertEqual(coco.image_dir, image_dir) self.assertEqual(coco.stats["num_images"], len(coco.images)) self.assertEqual(coco.stats["num_annotations"], len(coco.json["annotations"])) # update categories desired_name2id = {"human": 1, "car": 2, "big_vehicle": 3} coco.update_categories(desired_name2id=desired_name2id) self.assertEqual(len(coco.json["annotations"]), 5) self.assertEqual(len(coco.json["images"]), 1) self.assertEqual(len(coco.json["categories"]), 3) self.assertEqual( coco.json["categories"], [ { "id": 1, "name": "human", "supercategory": "human" }, { "id": 2, "name": "car", "supercategory": "car" }, { "id": 3, "name": "big_vehicle", "supercategory": "big_vehicle" }, ], ) self.assertEqual(coco.json["annotations"][1]["category_id"], 2) self.assertEqual(coco.image_dir, image_dir) self.assertEqual(coco.stats["num_images"], len(coco.images)) self.assertEqual(coco.stats["num_annotations"], len(coco.json["annotations"]))
def test_coco2yolo(self): from sahi.utils.coco import Coco coco_dict_path = "tests/data/coco_utils/combined_coco.json" image_dir = "tests/data/coco_utils/" output_dir = "tests/data/coco2yolo/" if os.path.isdir(output_dir): shutil.rmtree(output_dir) coco = Coco.from_coco_dict_or_path(coco_dict_path, image_dir=image_dir) coco.export_as_yolov5(output_dir=output_dir, train_split_rate=0.5, numpy_seed=0)
def test_get_subsampled_coco(self): from sahi.utils.coco import Coco from sahi.utils.file import load_json coco_path = "tests/data/coco_utils/visdrone2019-det-train-first50image.json" image_dir = "tests/data/coco_utils/" coco = Coco.from_coco_dict_or_path(coco_path, image_dir=image_dir) subsampled_coco = coco.get_subsampled_coco(subsample_ratio=5) self.assertEqual( len(coco.json["images"]), 50, ) self.assertEqual( len(subsampled_coco.json["images"]), 10, ) self.assertEqual( len(coco.images[5].annotations), len(subsampled_coco.images[1].annotations), ) self.assertEqual( len(coco.images[5].annotations), len(subsampled_coco.images[1].annotations), ) self.assertEqual( coco.image_dir, image_dir, ) self.assertEqual( subsampled_coco.image_dir, image_dir, ) self.assertEqual(subsampled_coco.stats["num_images"], len(subsampled_coco.images)) self.assertEqual( subsampled_coco.stats["num_annotations"], len(subsampled_coco.json["annotations"]), )
def test_get_area_filtered_coco(self): from sahi.utils.coco import Coco coco_path = "tests/data/coco_utils/visdrone2019-det-train-first50image.json" image_dir = "tests/data/coco_utils/" min_area = 50 max_area = 10000 coco = Coco.from_coco_dict_or_path(coco_path, image_dir=image_dir) area_filtered_coco = coco.get_area_filtered_coco(min=min_area, max=max_area) self.assertEqual( len(coco.json["images"]), 50, ) self.assertEqual( len(area_filtered_coco.json["images"]), 15, ) self.assertGreater( area_filtered_coco.stats["min_annotation_area"], min_area, ) self.assertLess( area_filtered_coco.stats["max_annotation_area"], max_area, ) self.assertEqual( area_filtered_coco.image_dir, image_dir, ) self.assertEqual(area_filtered_coco.stats["num_images"], len(area_filtered_coco.images)) self.assertEqual( area_filtered_coco.stats["num_annotations"], len(area_filtered_coco.json["annotations"]), ) intervals_per_category = { "human": { "min": 20, "max": 10000 }, "vehicle": { "min": 50, "max": 15000 }, } area_filtered_coco = coco.get_area_filtered_coco( intervals_per_category=intervals_per_category) self.assertEqual( len(coco.json["images"]), 50, ) self.assertEqual( len(area_filtered_coco.json["images"]), 22, ) self.assertGreater( area_filtered_coco.stats["min_annotation_area"], min( intervals_per_category["human"]["min"], intervals_per_category["vehicle"]["min"], ), ) self.assertLess( area_filtered_coco.stats["max_annotation_area"], max( intervals_per_category["human"]["max"], intervals_per_category["vehicle"]["max"], ), ) self.assertEqual( area_filtered_coco.image_dir, image_dir, ) self.assertEqual(area_filtered_coco.stats["num_images"], len(area_filtered_coco.images)) self.assertEqual( area_filtered_coco.stats["num_annotations"], len(area_filtered_coco.json["annotations"]), ) intervals_per_category = { "human": { "min": 20, "max": 10000 }, "vehicle": { "min": 50, "max": 15000 }, } area_filtered_coco = coco.get_area_filtered_coco( intervals_per_category=intervals_per_category) self.assertEqual( len(coco.json["images"]), 50, ) self.assertEqual( len(area_filtered_coco.json["images"]), 22, ) self.assertGreater( area_filtered_coco.stats["min_annotation_area"], min( intervals_per_category["human"]["min"], intervals_per_category["vehicle"]["min"], ), ) self.assertLess( area_filtered_coco.stats["max_annotation_area"], max( intervals_per_category["human"]["max"], intervals_per_category["vehicle"]["max"], ), ) self.assertEqual( area_filtered_coco.image_dir, image_dir, ) self.assertEqual(area_filtered_coco.stats["num_images"], len(area_filtered_coco.images)) self.assertEqual( area_filtered_coco.stats["num_annotations"], len(area_filtered_coco.json["annotations"]), )
def test_slice_image(self): # read coco file coco_path = "tests/data/coco_utils/terrain1_coco.json" coco = Coco.from_coco_dict_or_path(coco_path) output_file_name = None output_dir = None image_path = "tests/data/coco_utils/" + coco.images[0].file_name slice_image_result = slice_image( image=image_path, coco_annotation_list=coco.images[0].annotations, output_file_name=output_file_name, output_dir=output_dir, slice_height=512, slice_width=512, overlap_height_ratio=0.1, overlap_width_ratio=0.4, min_area_ratio=0.1, out_ext=".png", verbose=False, ) self.assertEqual(len(slice_image_result.images), 18) self.assertEqual(len(slice_image_result.coco_images), 18) self.assertEqual(slice_image_result.coco_images[0].annotations, []) self.assertEqual(slice_image_result.coco_images[15].annotations[1].area, 7296) self.assertEqual( slice_image_result.coco_images[15].annotations[1].bbox, [17, 186, 48, 152], ) image_cv = read_image(image_path) slice_image_result = slice_image( image=image_cv, coco_annotation_list=coco.images[0].annotations, output_file_name=output_file_name, output_dir=output_dir, slice_height=512, slice_width=512, overlap_height_ratio=0.1, overlap_width_ratio=0.4, min_area_ratio=0.1, out_ext=".png", verbose=False, ) self.assertEqual(len(slice_image_result.images), 18) self.assertEqual(len(slice_image_result.coco_images), 18) self.assertEqual(slice_image_result.coco_images[0].annotations, []) self.assertEqual(slice_image_result.coco_images[15].annotations[1].area, 7296) self.assertEqual( slice_image_result.coco_images[15].annotations[1].bbox, [17, 186, 48, 152], ) image_pil = Image.open(image_path) slice_image_result = slice_image( image=image_pil, coco_annotation_list=coco.images[0].annotations, output_file_name=output_file_name, output_dir=output_dir, slice_height=512, slice_width=512, overlap_height_ratio=0.1, overlap_width_ratio=0.4, min_area_ratio=0.1, out_ext=".png", verbose=False, ) self.assertEqual(len(slice_image_result.images), 18) self.assertEqual(len(slice_image_result.coco_images), 18) self.assertEqual(slice_image_result.coco_images[0].annotations, []) self.assertEqual(slice_image_result.coco_images[15].annotations[1].area, 7296) self.assertEqual( slice_image_result.coco_images[15].annotations[1].bbox, [17, 186, 48, 152], )
def slice_coco( coco_annotation_file_path: str, image_dir: str, output_coco_annotation_file_name: str, output_dir: Optional[str] = None, ignore_negative_samples: bool = False, slice_height: int = 512, slice_width: int = 512, overlap_height_ratio: float = 0.2, overlap_width_ratio: float = 0.2, min_area_ratio: float = 0.1, out_ext: Optional[str] = None, verbose: bool = False, ) -> List[Union[Dict, str]]: """ Slice large images given in a directory, into smaller windows. If out_name is given export sliced images and coco file. Args: coco_annotation_file_pat (str): Location of the coco annotation file image_dir (str): Base directory for the images output_coco_annotation_file_name (str): File name of the exported coco datatset json. output_dir (str, optional): Output directory ignore_negative_samples (bool): If True, images without annotations are ignored. Defaults to False. slice_height (int): Height of each slice. Default 512. slice_width (int): Width of each slice. Default 512. overlap_height_ratio (float): Fractional overlap in height of each slice (e.g. an overlap of 0.2 for a slice of size 100 yields an overlap of 20 pixels). Default 0.2. overlap_width_ratio (float): Fractional overlap in width of each slice (e.g. an overlap of 0.2 for a slice of size 100 yields an overlap of 20 pixels). Default 0.2. min_area_ratio (float): If the cropped annotation area to original annotation ratio is smaller than this value, the annotation is filtered out. Default 0.1. out_ext (str, optional): Extension of saved images. Default is the original suffix. verbose (bool, optional): Switch to print relevant values to screen. Default 'False'. Returns: coco_dict: dict COCO dict for sliced images and annotations save_path: str Path to the saved coco file """ # read coco file coco_dict: Dict = load_json(coco_annotation_file_path) # create image_id_to_annotation_list mapping coco = Coco.from_coco_dict_or_path(coco_dict) # init sliced coco_utils.CocoImage list sliced_coco_images: List = [] # iterate over images and slice for coco_image in tqdm(coco.images): # get image path image_path: str = os.path.join(image_dir, coco_image.file_name) # get annotation json list corresponding to selected coco image # slice image slice_image_result = slice_image( image=image_path, coco_annotation_list=coco_image.annotations, output_file_name=Path(coco_image.file_name).stem, output_dir=output_dir, slice_height=slice_height, slice_width=slice_width, overlap_height_ratio=overlap_height_ratio, overlap_width_ratio=overlap_width_ratio, min_area_ratio=min_area_ratio, out_ext=out_ext, verbose=verbose, ) # append slice outputs sliced_coco_images.extend(slice_image_result.coco_images) # create and save coco dict coco_dict = create_coco_dict( sliced_coco_images, coco_dict["categories"], ignore_negative_samples=ignore_negative_samples, ) save_path = "" if output_coco_annotation_file_name and output_dir: save_path = os.path.join( output_dir, output_coco_annotation_file_name + "_coco.json") save_json(coco_dict, save_path) return coco_dict, save_path
default=0.9, help="set the training split ratio", ) parser.add_argument("--project", default="runs/coco2yolov5", help="save results to project/name") parser.add_argument("--name", default="exp", help="save results to project/name") parser.add_argument("--seed", type=int, default=1, help="fix the seed for reproducibility") opt = parser.parse_args() # increment run save_dir = Path( increment_path(Path(opt.project) / opt.name, exist_ok=False)) # load coco dict coco = Coco.from_coco_dict_or_path( coco_dict_or_path=opt.coco_file, image_dir=opt.source, ) # export as yolov5 coco.export_as_yolov5( output_dir=str(save_dir), train_split_rate=opt.train_split, numpy_seed=opt.seed, )
def slice_coco( coco_annotation_file_path: str, image_dir: str, output_coco_annotation_file_name: str = "", output_dir: str = "", ignore_negative_samples: bool = True, slice_height: int = 256, slice_width: int = 256, max_allowed_zeros_ratio: float = 0.2, overlap_height_ratio: float = 0.2, overlap_width_ratio: float = 0.2, slice_sep: str = "_", out_ext: str = ".png", verbose: bool = False, ): """ Slice large images given in a directory, into smaller windows. If out_name is given export sliced images and coco file. Args: coco_annotation_file_path: str Location of the coco annotation file image_dir: str Base diectory for the images output_coco_annotation_file_name : str Root name of the exported coco datatset file output_dir: str Output directory ignore_negative_samples: bool If True, images without annotations are ignored. Defaults to True. slice_height: int Height of each slice. Defaults to ``256``. slice_width: int Width of each slice. Defaults to ``256``. max_allowed_zeros_ratio: float Maximum fraction of window that is allowed to be zeros or null. Defaults to ``0.2``. overlap_height_ratio: float Fractional overlap in height of each window (e.g. an overlap of 0.2 for a window of size 256 yields an overlap of 51 pixels). Default to ``0.2``. overlap_width_ratio: float Fractional overlap in width of each window (e.g. an overlap of 0.2 for a window of size 256 yields an overlap of 51 pixels). Default to ``0.2``. slice_sep: str Character used to separate outname from coordinates in the saved windows. Defaults to ``|`` out_ext: str Extension of saved images. Defaults to ``.png``. verbose: bool Switch to print relevant values to screen. Defaults to ``False`` Returns: coco_dict: dict COCO dict for sliced images and annotations save_path: str Path to the saved coco file """ # define verboseprint verboseprint = print if verbose else lambda *a, **k: None # read coco file coco_dict = load_json(coco_annotation_file_path) # create coco_utils.Coco object coco = Coco(coco_dict) # init sliced coco_utils.CocoImage list sliced_coco_images = [] num_total_invalid_segmentation = 0 # iterate over images and slice for coco_image in tqdm(coco.images): # get image path image_path = os.path.join(image_dir, coco_image.file_name) # get coco_utils.CocoAnnotation list corresponding to selected coco_utils.CocoImage coco_annotation_list = coco_image.annotations # slice image slice_image_result, num_invalid_segmentation = slice_image( image=image_path, coco_annotation_list=coco_annotation_list, output_file_name=os.path.basename(coco_image.file_name), output_dir=output_dir, slice_height=slice_height, slice_width=slice_width, max_allowed_zeros_ratio=max_allowed_zeros_ratio, overlap_height_ratio=overlap_height_ratio, overlap_width_ratio=overlap_width_ratio, slice_sep="_", out_ext=".png", verbose=False, ) num_total_invalid_segmentation = (num_total_invalid_segmentation + num_invalid_segmentation) # append slice outputs sliced_coco_images.extend(slice_image_result.coco_images) # create and save coco dict coco_dict = create_coco_dict( sliced_coco_images, coco_dict["categories"], ignore_negative_samples=ignore_negative_samples, ) save_path = "" if output_coco_annotation_file_name and output_dir: save_path = os.path.join( output_dir, output_coco_annotation_file_name + "_coco.json") save_json(coco_dict, save_path) verboseprint("There are", num_total_invalid_segmentation, "invalid segmentations") return coco_dict, save_path