Exemplo n.º 1
0
group.add_argument('--json_paths', nargs="+", help='json paths separated by whitespace')
group.add_argument('--annots_folder', help='path of annotation folder containing multiple jsons')
ap.add_argument('--output_json', help='path of output json', required=True)
args = ap.parse_args()

# create Datumaro project
project = Project()

# add sources
if args.json_paths:
	for i, json_path in enumerate(args.json_paths):
		new_json_path = check_json_path(json_path)
		project.add_source(f'src{i}', {'url': str(new_json_path), 'format': 'coco_instances'})
elif args.annots_folder:
	# doesnt recursively search in subfolders
	for i, json_path in enumerate(Path(args.annots_folder).iterdir()):
		if json_path.suffix == '.json':
			new_json_path = check_json_path(json_path)
			project.add_source(f'src{i}', {'url': str(new_json_path), 'format': 'coco_instances'})

# create a dataset
dataset = project.make_dataset()

# print some stats
print(f'num images: {num_img(dataset)}')
print(f'num images with annotations: {num_img_with_annots(dataset)}')
print(f'num annotations: {num_annots(dataset)}')

# export the resulting json in COCO format
export_json(dataset, args.output_json)
Exemplo n.º 2
0
# add sources
project.add_source('src1', {'url': args.json_path, 'format': 'coco_instances'})

# create a dataset
dataset = project.make_dataset()
print(f'total images: {num_img(dataset)}')

split_ratio = args.split_ratio
train_ratio = split_ratio[0] / sum(split_ratio)
val_ratio = split_ratio[1] / sum(split_ratio)
test_ratio = split_ratio[2] / sum(split_ratio)
print(f'train/val/test ratio: {train_ratio} {val_ratio} {test_ratio}')

dataset = dataset.transform('random_split', [('train', train_ratio),
                                             ('val', val_ratio),
                                             ('test', test_ratio)])

for subset_name, subset in dataset.subsets().items():
    # print some stats
    print(f'subset: {subset_name}')
    print(f'num images: {num_img(subset)}')
    print(f'num images with annotations: {num_img_with_annots(subset)}')
    print(f'num annotations: {num_annots(subset)}')

    # export the resulting dataset in COCO format
    subset_to_export = dataset.select(lambda item: item.subset == subset_name)
    output_json_path = Path(
        args.json_path).parent / Path(subset_name + '.json')
    export_json(subset_to_export, str(output_json_path))
Exemplo n.º 3
0
args = ap.parse_args()

# WRITE YOUR SPLIT HERE
splits = {'train': ['set00', 'set01'], 'val': ['set02'], 'test': ['set03']}

# create Datumaro project
project = Project()

# add sources
project.add_source('src1', {'url': args.json_path, 'format': 'coco_instances'})

# create a dataset
dataset = project.make_dataset()
print(f'total images: {num_img(dataset)}')

for split_name, split_list in splits.items():
    # DEFINE SPLIT FUNCTION HERE
    dataset_split = dataset.select(
        lambda item: item.id.startswith(tuple(split_list)))

    # print some stats
    print(f'split: {split_name}')
    print(f'num images: {num_img(dataset_split)}')
    print(f'num images with annotations: {num_img_with_annots(dataset_split)}')
    print(f'num annotations: {num_annots(dataset_split)}')

    # export the resulting dataset in COCO format
    output_json_path = str(
        Path(args.json_path).parent / Path(split_name + '.json'))
    export_json(dataset_split, output_json_path)
Exemplo n.º 4
0
ap.add_argument('--json_path', help='annotations json path', required=True)
ap.add_argument('--output_json', default='', help='path of output json. overwrite input json if not specified')
args = ap.parse_args()

# create Datumaro project
project = Project()

# add sources
project.add_source('src1', {'url': args.json_path, 'format': 'coco_instances'})

# create a dataset
dataset = project.make_dataset()

# print original stats
print('original stats')
print(f'num images: {num_img(dataset)}')
print(f'num images with annotations: {num_img_with_annots(dataset)}')
print(f'num annotations: {num_annots(dataset)}')

# WRITE YOUR FILTER HERE
dataset1 = dataset.filter('/item/annotation[w>5]', filter_annotations=True)

# print filtered stats
print('filtered stats')
print(f'num images: {num_img(dataset1)}')
print(f'num images with annotations: {num_img_with_annots(dataset1)}')
print(f'num annotations: {num_annots(dataset1)}')

# export the resulting dataset in COCO format
export_json(dataset1, args.output_json, args.json_path)