group.add_argument('--json_paths', nargs="+", help='json paths separated by whitespace') group.add_argument('--annots_folder', help='path of annotation folder containing multiple jsons') ap.add_argument('--output_json', help='path of output json', required=True) args = ap.parse_args() # create Datumaro project project = Project() # add sources if args.json_paths: for i, json_path in enumerate(args.json_paths): new_json_path = check_json_path(json_path) project.add_source(f'src{i}', {'url': str(new_json_path), 'format': 'coco_instances'}) elif args.annots_folder: # doesnt recursively search in subfolders for i, json_path in enumerate(Path(args.annots_folder).iterdir()): if json_path.suffix == '.json': new_json_path = check_json_path(json_path) project.add_source(f'src{i}', {'url': str(new_json_path), 'format': 'coco_instances'}) # create a dataset dataset = project.make_dataset() # print some stats print(f'num images: {num_img(dataset)}') print(f'num images with annotations: {num_img_with_annots(dataset)}') print(f'num annotations: {num_annots(dataset)}') # export the resulting json in COCO format export_json(dataset, args.output_json)
# add sources project.add_source('src1', {'url': args.json_path, 'format': 'coco_instances'}) # create a dataset dataset = project.make_dataset() print(f'total images: {num_img(dataset)}') split_ratio = args.split_ratio train_ratio = split_ratio[0] / sum(split_ratio) val_ratio = split_ratio[1] / sum(split_ratio) test_ratio = split_ratio[2] / sum(split_ratio) print(f'train/val/test ratio: {train_ratio} {val_ratio} {test_ratio}') dataset = dataset.transform('random_split', [('train', train_ratio), ('val', val_ratio), ('test', test_ratio)]) for subset_name, subset in dataset.subsets().items(): # print some stats print(f'subset: {subset_name}') print(f'num images: {num_img(subset)}') print(f'num images with annotations: {num_img_with_annots(subset)}') print(f'num annotations: {num_annots(subset)}') # export the resulting dataset in COCO format subset_to_export = dataset.select(lambda item: item.subset == subset_name) output_json_path = Path( args.json_path).parent / Path(subset_name + '.json') export_json(subset_to_export, str(output_json_path))
args = ap.parse_args() # WRITE YOUR SPLIT HERE splits = {'train': ['set00', 'set01'], 'val': ['set02'], 'test': ['set03']} # create Datumaro project project = Project() # add sources project.add_source('src1', {'url': args.json_path, 'format': 'coco_instances'}) # create a dataset dataset = project.make_dataset() print(f'total images: {num_img(dataset)}') for split_name, split_list in splits.items(): # DEFINE SPLIT FUNCTION HERE dataset_split = dataset.select( lambda item: item.id.startswith(tuple(split_list))) # print some stats print(f'split: {split_name}') print(f'num images: {num_img(dataset_split)}') print(f'num images with annotations: {num_img_with_annots(dataset_split)}') print(f'num annotations: {num_annots(dataset_split)}') # export the resulting dataset in COCO format output_json_path = str( Path(args.json_path).parent / Path(split_name + '.json')) export_json(dataset_split, output_json_path)
ap.add_argument('--json_path', help='annotations json path', required=True) ap.add_argument('--output_json', default='', help='path of output json. overwrite input json if not specified') args = ap.parse_args() # create Datumaro project project = Project() # add sources project.add_source('src1', {'url': args.json_path, 'format': 'coco_instances'}) # create a dataset dataset = project.make_dataset() # print original stats print('original stats') print(f'num images: {num_img(dataset)}') print(f'num images with annotations: {num_img_with_annots(dataset)}') print(f'num annotations: {num_annots(dataset)}') # WRITE YOUR FILTER HERE dataset1 = dataset.filter('/item/annotation[w>5]', filter_annotations=True) # print filtered stats print('filtered stats') print(f'num images: {num_img(dataset1)}') print(f'num images with annotations: {num_img_with_annots(dataset1)}') print(f'num annotations: {num_annots(dataset1)}') # export the resulting dataset in COCO format export_json(dataset1, args.output_json, args.json_path)