def main(): args = parse_args() cityscapes_path = args.cityscapes_path out_dir = args.out_dir if args.out_dir else cityscapes_path mmcv.mkdir_or_exist(out_dir) gt_dir = osp.join(cityscapes_path, args.gt_dir) poly_files = [] for poly in mmcv.scandir(gt_dir, '_polygons.json', recursive=True): poly_file = osp.join(gt_dir, poly) poly_files.append(poly_file) if args.nproc > 1: mmcv.track_parallel_progress(convert_json_to_label, poly_files, args.nproc) else: mmcv.track_progress(convert_json_to_label, poly_files) split_names = ['train', 'val', 'test'] for split in split_names: filenames = [] for poly in mmcv.scandir( osp.join(gt_dir, split), '_polygons.json', recursive=True): filenames.append(poly.replace('_gtFine_polygons.json', '')) with open(osp.join(out_dir, f'{split}.txt'), 'w') as f: f.writelines(f + '\n' for f in filenames)
def convert_annotations(data, path_prefix, num_sample, nproc, start_img_id=0, start_ann_id=0): modify_image_info_with_params = partial(modify_image_info, path_prefix=path_prefix, start_img_id=start_img_id) modify_annotation_with_params = partial(modify_annotation, num_sample=num_sample, start_img_id=start_img_id, start_ann_id=start_ann_id) if nproc > 1: data['annotations'] = mmcv.track_parallel_progress( modify_annotation_with_params, data['annotations'], nproc=nproc) data['images'] = mmcv.track_parallel_progress( modify_image_info_with_params, data['images'], nproc=nproc) else: data['annotations'] = mmcv.track_progress( modify_annotation_with_params, data['annotations']) data['images'] = mmcv.track_progress( modify_image_info_with_params, data['images'], ) data['categories'] = [{'id': 1, 'name': 'text'}] return data
def main(): # parse cfg and args args = parse_args() cfg = Config.fromfile(args.config) if args.cfg_options is not None: cfg.merge_from_dict(args.cfg_options) # touch output file to save broken files list. output_path = Path(args.out_path) if not output_path.parent.exists(): raise Exception('log_file parent directory not found.') if output_path.exists(): os.remove(output_path) output_path.touch() # do valid validator = DatasetValidator(cfg, output_path, args.phase) if args.num_process > 1: # The default chunksize calcuation method of Pool.map chunksize, extra = divmod(len(validator), args.num_process * 8) if extra: chunksize += 1 track_parallel_progress(validator.valid_idx, list(range(len(validator))), args.num_process, chunksize=chunksize, keep_order=False) else: track_progress(validator.valid_idx, list(range(len(validator)))) print_info(output_path)
def convert(self): """Convert action.""" import mmcv print('Start converting ...') mmcv.track_parallel_progress(self.convert_one, range(len(self)), self.workers) print('\nFinished ...')
def main(): args = parse_args() coco_path = args.coco_path nproc = args.nproc out_dir = args.out_dir or coco_path out_img_dir = osp.join(out_dir, 'images') out_mask_dir = osp.join(out_dir, 'annotations') mmcv.mkdir_or_exist(osp.join(out_img_dir, 'train2014')) mmcv.mkdir_or_exist(osp.join(out_img_dir, 'test2014')) mmcv.mkdir_or_exist(osp.join(out_mask_dir, 'train2014')) mmcv.mkdir_or_exist(osp.join(out_mask_dir, 'test2014')) train_list, test_list = generate_coco_list(coco_path) assert (len(train_list) + len(test_list)) == COCO_LEN, 'Wrong length of list {} & {}'.format( len(train_list), len(test_list)) if args.nproc > 1: mmcv.track_parallel_progress( partial( convert_to_trainID, in_img_dir=osp.join(coco_path, 'images'), in_ann_dir=osp.join(coco_path, 'annotations'), out_img_dir=out_img_dir, out_mask_dir=out_mask_dir, is_train=True), train_list, nproc=nproc) mmcv.track_parallel_progress( partial( convert_to_trainID, in_img_dir=osp.join(coco_path, 'images'), in_ann_dir=osp.join(coco_path, 'annotations'), out_img_dir=out_img_dir, out_mask_dir=out_mask_dir, is_train=False), test_list, nproc=nproc) else: mmcv.track_progress( partial( convert_to_trainID, in_img_dir=osp.join(coco_path, 'images'), in_ann_dir=osp.join(coco_path, 'annotations'), out_img_dir=out_img_dir, out_mask_dir=out_mask_dir, is_train=True), train_list) mmcv.track_progress( partial( convert_to_trainID, in_img_dir=osp.join(coco_path, 'images'), in_ann_dir=osp.join(coco_path, 'annotations'), out_img_dir=out_img_dir, out_mask_dir=out_mask_dir, is_train=False), test_list) print('Done!')
def main(): args = parse_args() print('Loading annoataion data...') data = load_gt_data(args.anno_path, args.n_proc) process_with_outdir = partial(process, img_path_prefix=args.img_path, out_dir=args.out_dir) print('Creating cropped images and gold labels...') mmcv.track_parallel_progress(process_with_outdir, data, nproc=args.n_proc) print('Done')
def main(): args = parse_args() devkit_path = args.devkit_path aug_path = args.aug_path nproc = args.nproc if args.out_dir is None: out_dir = osp.join(devkit_path, 'VOC2012', 'SegmentationClassAug') else: out_dir = args.out_dir mmcv.mkdir_or_exist(out_dir) in_dir = osp.join(aug_path, 'dataset', 'cls') mmcv.track_parallel_progress(partial(convert_mat, in_dir=in_dir, out_dir=out_dir), list(mmcv.scandir(in_dir, suffix='.mat')), nproc=nproc) full_aug_list = [] with open(osp.join(aug_path, 'dataset', 'train.txt')) as f: full_aug_list += [line.strip() for line in f] with open(osp.join(aug_path, 'dataset', 'val.txt')) as f: full_aug_list += [line.strip() for line in f] with open( osp.join(devkit_path, 'VOC2012/ImageSets/Segmentation', 'train.txt')) as f: ori_train_list = [line.strip() for line in f] with open( osp.join(devkit_path, 'VOC2012/ImageSets/Segmentation', 'val.txt')) as f: val_list = [line.strip() for line in f] aug_train_list = generate_aug_list(ori_train_list + full_aug_list, val_list) assert len(aug_train_list) == AUG_LEN, 'len(aug_train_list) != {}'.format( AUG_LEN) with open( osp.join(devkit_path, 'VOC2012/ImageSets/Segmentation', 'trainaug.txt'), 'w') as f: f.writelines(line + '\n' for line in aug_train_list) aug_list = generate_aug_list(full_aug_list, ori_train_list + val_list) assert len(aug_list) == AUG_LEN - len( ori_train_list), 'len(aug_list) != {}'.format(AUG_LEN - len(ori_train_list)) with open( osp.join(devkit_path, 'VOC2012/ImageSets/Segmentation', 'aug.txt'), 'w') as f: f.writelines(line + '\n' for line in aug_list) print('Done!')
def convert(self): """Convert action.""" print('Start converting ...') mmcv.track_parallel_progress(self.convert_one, range(len(self)), self.workers) print('\nFinished ...') # combine all files into one .bin pathnames = sorted(glob(join(self.waymo_results_save_dir, '*.bin'))) combined = self.combine(pathnames) with open(self.waymo_results_final_path, 'wb') as f: f.write(combined.SerializeToString())
def evaluate(pred_root, gt_root, trimap_root, verbose, nproc): """Evaluate test results of Adobe composition-1k dataset. There are 50 different ground truth foregrounds and alpha mattes pairs, each of the foreground will be composited with 20 different backgrounds, producing 1000 images for testing. In some repo, the ground truth alpha matte will be copied 20 times and named the same as the images. This function accept both original alpha matte folder (contains 50 ground truth alpha mattes) and copied alpha matte folder (contains 1000 ground truth alpha mattes) for `gt_root`. Example of copied name: ``` alpha_matte1.png -> alpha_matte1_0.png alpha_matte1_1.png ... alpha_matte1_19.png alpha_matte1_20.png ``` Args: pred_root (str): Path to the predicted alpha matte folder. gt_root (str): Path to the ground truth alpha matte folder. trimap_root (str): Path to the predicted alpha matte folder. verbose (bool): Whether print result for each predicted alpha matte. nproc (int): number of processers. """ images = sorted(mmcv.scandir(pred_root)) gt_files_num = len(list(mmcv.scandir(gt_root))) # If ground truth alpha mattes are not copied (number of files is 50), we # use the below pattern to recover the name of the original alpha matte. if gt_files_num == 50: pattern = re.compile(r'(.+)_(?:\d+)(.png)') pairs = [] for img in images: pred_alpha_path = osp.join(pred_root, img) # if ground truth alpha matte are not copied, recover the original name if gt_files_num == 50: groups = pattern.match(img).groups() alpha_path = osp.join(gt_root, ''.join(groups)) # if ground truth alpha matte are copied, the name should be the same else: # gt_files_num == 1000 alpha_path = osp.join(gt_root, img) trimap_path = (osp.join(trimap_root, img) if trimap_root is not None else None) pairs.append((pred_alpha_path, alpha_path, trimap_path)) results = mmcv.track_parallel_progress(evaluate_one, pairs, nproc) if verbose: # for sad_result, mse_result, grad_result, conn_result in results: for i, img in enumerate(images): sad_result, mse_result, grad_result, conn_result = results[i] print(f'{img} SAD: {sad_result:.6g} MSE: {mse_result:.6g} ' f'GRAD: {grad_result:.6g} CONN: {conn_result:.6g}') sad_mean, mse_mean, grad_mean, conn_mean = np.mean(results, axis=0) print(f'MEAN: SAD: {sad_mean:.6g} MSE: {mse_mean:.6g} ' f'GRAD: {grad_mean:.6g} CONN: {conn_mean:.6g}')
def collect_annotations(files, dataset, nproc=1): """Collect the annotation information. Args: files(list): The list of tuples (image_file, groundtruth_file) dataset(str): The dataset name, icdar2015 or icdar2017 nproc(int): The number of process to collect annotations Returns: images(list): The list of image information dicts """ assert isinstance(files, list) assert isinstance(dataset, str) assert dataset assert isinstance(nproc, int) load_img_info_with_dataset = partial(load_img_info, dataset=dataset) if nproc > 1: images = mmcv.track_parallel_progress(load_img_info_with_dataset, files, nproc=nproc) else: images = mmcv.track_progress(load_img_info_with_dataset, files) return images
def collect_annotations(files, nproc=1): print('Loading annotation images') if nproc > 1: images = mmcv.track_parallel_progress(img2coco, files, nproc=nproc) else: images = mmcv.track_progress(img2coco, files) return images
def main(): args = parse_args() coco_path = args.coco_path nproc = args.nproc out_dir = args.out_dir or coco_path out_img_dir = osp.join(out_dir, 'images') out_mask_dir = osp.join(out_dir, 'annotations') mmcv.mkdir_or_exist(osp.join(out_mask_dir, 'train2017')) mmcv.mkdir_or_exist(osp.join(out_mask_dir, 'val2017')) if out_dir != coco_path: shutil.copytree(osp.join(coco_path, 'images'), out_img_dir) train_list = glob(osp.join(coco_path, 'annotations', 'train2017', '*.png')) train_list = [file for file in train_list if '_labelTrainIds' not in file] test_list = glob(osp.join(coco_path, 'annotations', 'val2017', '*.png')) test_list = [file for file in test_list if '_labelTrainIds' not in file] assert (len(train_list) + len(test_list)) == COCO_LEN, 'Wrong length of list {} & {}'.format( len(train_list), len(test_list)) if args.nproc > 1: mmcv.track_parallel_progress(partial(convert_to_trainID, out_mask_dir=out_mask_dir, is_train=True), train_list, nproc=nproc) mmcv.track_parallel_progress(partial(convert_to_trainID, out_mask_dir=out_mask_dir, is_train=False), test_list, nproc=nproc) else: mmcv.track_progress( partial(convert_to_trainID, out_mask_dir=out_mask_dir, is_train=True), train_list) mmcv.track_progress( partial(convert_to_trainID, out_mask_dir=out_mask_dir, is_train=False), test_list) print('Done!')
def process(closeset_file, openset_file, merge_bg_others=False, n_proc=10): closeset_lines = list_from_file(closeset_file) convert_func = partial(convert, merge_bg_others=merge_bg_others) openset_lines = mmcv.track_parallel_progress( convert_func, closeset_lines, nproc=n_proc) list_to_file(openset_file, openset_lines)
def load_annotations(self, ann_file): self.project = sly.Project(self.img_prefix, sly.OpenMode.READ) img_infos = list() anno_list = mmcv.list_from_file(ann_file) print('data loading ...') img_infos = mmcv.track_parallel_progress(self._load_ann, anno_list, 16) print('data loading finished !!!') return img_infos
def collect_annotations(files, nproc=1): print("Loading annotation images") if nproc > 1: images = mmcv.track_parallel_progress(load_img_info, files, nproc=nproc) else: images = mmcv.track_progress(load_img_info, files) return images
def load_gt_data(filename, n_proc): mat_data = loadmat(filename, simplify_cells=True) imnames = mat_data['imnames'] txt = mat_data['txt'] wordBB = mat_data['wordBB'] charBB = mat_data['charBB'] return mmcv.track_parallel_progress(load_gt_datum, list(zip(imnames, txt, wordBB, charBB)), nproc=n_proc)
def test_track_parallel_progress_iterator(): out = StringIO() results = mmcv.track_parallel_progress( sleep_1s, ((i for i in [1, 2, 3, 4]), 4), 2, bar_width=4, file=out) assert out.getvalue() == ( '[ ] 0/4, elapsed: 0s, ETA:' '\r[> ] 1/4, 1.0 task/s, elapsed: 1s, ETA: 3s' '\r[>> ] 2/4, 2.0 task/s, elapsed: 1s, ETA: 1s' '\r[>>> ] 3/4, 1.5 task/s, elapsed: 2s, ETA: 1s' '\r[>>>>] 4/4, 2.0 task/s, elapsed: 2s, ETA: 0s\n') assert results == [1, 2, 3, 4]
def test_track_parallel_progress_list(capsys): results = mmcv.track_parallel_progress(sleep_1s, [1, 2, 3, 4], 2, bar_width=4) out, _ = capsys.readouterr() assert out == ('[ ] 0/4, elapsed: 0s, ETA:' '\r[> ] 1/4, 1.0 task/s, elapsed: 1s, ETA: 3s' '\r[>> ] 2/4, 2.0 task/s, elapsed: 1s, ETA: 1s' '\r[>>> ] 3/4, 1.5 task/s, elapsed: 2s, ETA: 1s' '\r[>>>>] 4/4, 2.0 task/s, elapsed: 2s, ETA: 0s\n') assert results == [1, 2, 3, 4]
def test_track_parallel_progress_list(): out = StringIO() results = mmcv.track_parallel_progress(sleep_1s, [1, 2, 3, 4], 2, bar_width=4, file=out) # The following cannot pass CI on Github Action # assert out.getvalue() == ( # '[ ] 0/4, elapsed: 0s, ETA:' # '\r[> ] 1/4, 1.0 task/s, elapsed: 1s, ETA: 3s' # '\r[>> ] 2/4, 2.0 task/s, elapsed: 1s, ETA: 1s' # '\r[>>> ] 3/4, 1.5 task/s, elapsed: 2s, ETA: 1s' # '\r[>>>>] 4/4, 2.0 task/s, elapsed: 2s, ETA: 0s\n') assert results == [1, 2, 3, 4]
def main( download_dir, username, password, nproc, ): dataset_dir = Path(download_dir) / "cityscapes" if username is None or password is None: raise ValueError( "You must indicate your username and password either in the script variables or by passing options --username and --pasword." ) download_cityscapes(dataset_dir, username, password, overwrite=False) install_cityscapes_api() gt_dir = dataset_dir / "gtFine" poly_files = [] for poly in mmcv.scandir(str(gt_dir), "_polygons.json", recursive=True): poly_file = str(gt_dir / poly) poly_files.append(poly_file) mmcv.track_parallel_progress(convert_json_to_label, poly_files, nproc) split_names = ["train", "val", "test"] for split in split_names: filenames = [] for poly in mmcv.scandir(str(gt_dir / split), "_polygons.json", recursive=True): filenames.append(poly.replace("_gtFine_polygons.json", "")) with open(str(dataset_dir / f"{split}.txt"), "w") as f: f.writelines(f + "\n" for f in filenames)
def process(json_dir, img_dir, out_dir, tasks=['det'], nproc=1, recog_format='jsonl', warp=False): mmcv.mkdir_or_exist(out_dir) json_file_list = glob.glob(osp.join(json_dir, '*.json')) parse_labelme_json_func = partial(parse_labelme_json, img_dir=img_dir, out_dir=out_dir, tasks=tasks, recog_format=recog_format, warp_flag=warp) if nproc <= 1: total_results = mmcv.track_progress(parse_labelme_json_func, json_file_list) else: total_results = mmcv.track_parallel_progress(parse_labelme_json_func, json_file_list, keep_order=True, nproc=nproc) total_det_line_json_list = [] total_recog_crop_line_str = [] total_recog_warp_line_str = [] for res in total_results: total_det_line_json_list.extend(res[0]) if 'recog' in tasks: total_recog_crop_line_str.extend(res[1]) total_recog_warp_line_str.extend(res[2]) mmcv.mkdir_or_exist(out_dir) det_out_file = osp.join(out_dir, 'instances_training.txt') list_to_file(det_out_file, total_det_line_json_list) if 'recog' in tasks: recog_out_file_crop = osp.join(out_dir, f'train_label.{recog_format}') list_to_file(recog_out_file_crop, total_recog_crop_line_str) if warp: recog_out_file_warp = osp.join(out_dir, f'warp_train_label.{recog_format}') list_to_file(recog_out_file_warp, total_recog_warp_line_str)
def collect_annotations(files, nproc=1): """Collect the annotation information. Args: files(list): The list of tuples (image_file, groundtruth_file) nproc(int): The number of process to collect annotations Returns: images(list): The list of image information dicts """ assert isinstance(files, list) assert isinstance(nproc, int) if nproc > 1: images = mmcv.track_parallel_progress( load_img_info, files, nproc=nproc) else: images = mmcv.track_progress(load_img_info, files) return images
def collect_annotations(files, split, nproc=1): """Collect the annotation information. Args: files(list): The list of tuples (image_file, groundtruth_file) split(str): The split of dataset. Namely: training or test nproc(int): The number of process to collect annotations Returns: images(list): The list of image information dicts """ assert isinstance(files, list) assert isinstance(split, str) assert isinstance(nproc, int) load_img_info_with_split = partial(load_img_info, split=split) if nproc > 1: images = mmcv.track_parallel_progress( load_img_info_with_split, files, nproc=nproc) else: images = mmcv.track_progress(load_img_info_with_split, files) return images
def convert_textocr(root_path, dst_image_path, dst_label_filename, annotation_filename, img_start_idx=0, nproc=1): annotation_path = osp.join(root_path, annotation_filename) if not osp.exists(annotation_path): raise Exception( f'{annotation_path} not exists, please check and try again.') src_image_root = root_path # outputs dst_label_file = osp.join(root_path, dst_label_filename) dst_image_root = osp.join(root_path, dst_image_path) os.makedirs(dst_image_root, exist_ok=True) annotation = mmcv.load(annotation_path) process_img_with_path = partial(process_img, src_image_root=src_image_root, dst_image_root=dst_image_root) tasks = [] for img_idx, img_info in enumerate(annotation['imgs'].values()): ann_ids = annotation['imgToAnns'][img_info['id']] anns = [annotation['anns'][ann_id] for ann_id in ann_ids] tasks.append((img_idx + img_start_idx, img_info, anns)) labels_list = mmcv.track_parallel_progress(process_img_with_path, tasks, keep_order=True, nproc=nproc) final_labels = [] for label_list in labels_list: final_labels += label_list list_to_file(dst_label_file, final_labels) return len(annotation['imgs'])
def main(): args = parse_args() if not osp.exists(args.data_root): raise FileNotFoundError(f'{args.data_root} does not exist!') data_root = args.data_root print('preparing training data...') dir_prefix = 'Training_set' fname_prefix = 'training' fg_dirs = [ 'Training_set/Adobe-licensed images/fg', 'Training_set/Other/fg' ] alpha_dirs = [ 'Training_set/Adobe-licensed images/alpha', 'Training_set/Other/alpha' ] extended_dirs = [ 'Training_set/Adobe-licensed images/fg_extended', 'Training_set/Other/fg_extended' ] for p in extended_dirs: p = osp.join(data_root, p) os.makedirs(p, exist_ok=True) fg_names = osp.join(dir_prefix, f'{fname_prefix}_fg_names.txt') save_json_path = f'{fname_prefix}_list_fba.json' fg_names = open(osp.join(data_root, fg_names)).readlines() fg_iter = iter(fg_names) extend_fg = ExtendFg(data_root, fg_dirs, alpha_dirs) data_infos = mmcv.track_parallel_progress(extend_fg.extend, list(fg_iter), args.nproc) mmcv.dump(data_infos, osp.join(data_root, save_json_path)) print('train done')
if __name__ == '__main__': img_dir = '/data/Dataset/PReID/dataset2/train/' train_imgfilenames = glob.glob(img_dir + '*.png') img_dir = '/data/Dataset/PReID/dataset2/query_a/' query_imgfilenames = glob.glob(img_dir + '*.png') img_dir = '/data/Dataset/PReID/dataset2/gallery_a/' gallery_imgfilenames = glob.glob(img_dir + '*.png') train_imgfilenames = train_imgfilenames[:len(train_imgfilenames) // 2] query_imgfilenames = query_imgfilenames[:len(query_imgfilenames) // 2] gallery_imgfilenames = gallery_imgfilenames[:len(gallery_imgfilenames) // 2] print("predict train hist_label") train_hist_labels = mmcv.track_parallel_progress( simple_hist_predictor_func, train_imgfilenames, 6) train_unique_hist_labels = sorted(list(set(train_hist_labels))) train_sa_index = [ i for i, v in enumerate(train_hist_labels) if (v == train_unique_hist_labels[0]) ] train_sb_index = [ i for i, v in enumerate(train_hist_labels) if (v == train_unique_hist_labels[1]) ] train_sa_infos = [train_imgfilenames[i] for i in train_sa_index] train_sb_infos = [train_imgfilenames[i] for i in train_sb_index] print("predict query hist_label") query_hist_labels = mmcv.track_parallel_progress( simple_hist_predictor_func, query_imgfilenames, 6)
def generate_json(data_root, source_bg_dir, composite, nproc, mode): """Generate training json list or test json list. It should be noted except for `source_bg_dir`, other strings are incomplete relative path. When using these strings to read from or write to disk, a data_root is added to form a complete relative path. Args: data_root (str): path to Adobe composition-1k directory. source_bg_dir (str): source background directory. composite (bool): whether composite fg with bg and write to file. nproc (int): number of processers. mode (str): training or test mode. """ if mode == 'training': dir_prefix = 'Training_set' fname_prefix = 'training' num_bg = 100 # each training fg is composited with 100 bg fg_dirs = [ 'Training_set/Adobe-licensed images/fg', 'Training_set/Other/fg' ] alpha_dirs = [ 'Training_set/Adobe-licensed images/alpha', 'Training_set/Other/alpha' ] elif mode == 'test': dir_prefix = 'Test_set' fname_prefix = 'test' num_bg = 20 # each test fg is composited with 20 bg fg_dirs = ['Test_set/Adobe-licensed images/fg'] alpha_dirs = ['Test_set/Adobe-licensed images/alpha'] else: raise KeyError(f'Unknown mode {mode}.') fg_names = osp.join(dir_prefix, f'{fname_prefix}_fg_names.txt') bg_names = osp.join(dir_prefix, f'{fname_prefix}_bg_names.txt') save_json_path = f'{fname_prefix}_list.json' fg_names = open(osp.join(data_root, fg_names)).readlines() bg_names = open(osp.join(data_root, bg_names)).readlines() assert len(fg_names) * num_bg == len(bg_names) repeat_infos = [] name_with_postfix = [] # repeat fg and alpha num_bg time for fg_name in fg_names: fg_name = fg_name.strip() alpha_path = join_first_contain(alpha_dirs, fg_name, data_root) fg_path = join_first_contain(fg_dirs, fg_name, data_root) alpha_full_path = osp.join(data_root, alpha_path) fg_full_path = osp.join(data_root, fg_path) if not osp.exists(alpha_full_path): raise FileNotFoundError(f'{alpha_full_path} does not exist!') if not osp.exists(fg_full_path): raise FileNotFoundError(f'{fg_full_path} does not exist!') # to be consistent with DIM's composition code, use PIL to read images fg = Image.open(fg_full_path).convert('RGB') alpha = (np.array(Image.open(alpha_full_path).convert('RGB')) / 255. if composite else None) repeat_infos.append((alpha, fg, alpha_path, fg_path)) for bg_idx in range(num_bg): name_with_postfix.append(fg_name[:-4] + '_' + str(bg_idx) + fg_name[-4:]) repeat_infos = chain.from_iterable( (repeat(repeat_info, num_bg) for repeat_info in repeat_infos)) source_bg_paths = [] for bg_name in bg_names: bg_name = bg_name.strip() # in coco_2017, image names do not begin with 'COCO_train2014_' if '2017' in source_bg_dir: bg_name = bg_name[15:] # get rid of 'COCO_train2014_' source_bg_paths.append(osp.join(source_bg_dir, bg_name)) constants = repeat((data_root, composite, mode), len(bg_names)) data_infos = mmcv.track_parallel_progress( get_data_info, list(zip(name_with_postfix, source_bg_paths, repeat_infos, constants)), nproc) mmcv.dump(data_infos, osp.join(data_root, save_json_path))
def convert_cocotext(root_path, split, preserve_vertical, format, nproc, img_start_idx=0): """Collect the annotation information and crop the images. The annotation format is as the following: { 'anns':{ '45346':{ 'mask': [468.9,286.7,468.9,295.2,493.0,295.8,493.0,287.2], 'class': 'machine printed', 'bbox': [468.9, 286.7, 24.1, 9.1], # x, y, w, h 'image_id': 217925, 'id': 45346, 'language': 'english', # 'english' or 'not english' 'area': 206.06, 'utf8_string': 'New', 'legibility': 'legible', # 'legible' or 'illegible' }, ... } 'imgs':{ '540965':{ 'id': 540965, 'set': 'train', # 'train' or 'val' 'width': 640, 'height': 360, 'file_name': 'COCO_train2014_000000540965.jpg' }, ... } 'imgToAnns':{ '540965': [], '260932': [63993, 63994, 63995, 63996, 63997, 63998, 63999], ... } } Args: root_path (str): Root path to the dataset split (str): Dataset split, which should be 'train' or 'val' preserve_vertical (bool): Whether to preserve vertical texts format (str): Annotation format, should be either 'jsonl' or 'txt' nproc (int): Number of processes img_start_idx (int): Index of start image Returns: img_info (dict): The dict of the img and annotation information """ annotation_path = osp.join(root_path, 'annotations/cocotext.v2.json') if not osp.exists(annotation_path): raise Exception( f'{annotation_path} not exists, please check and try again.') annotation = mmcv.load(annotation_path) # outputs dst_label_file = osp.join(root_path, f'{split}_label.{format}') dst_image_root = osp.join(root_path, 'crops', split) ignore_image_root = osp.join(root_path, 'ignores', split) src_image_root = osp.join(root_path, 'imgs') mmcv.mkdir_or_exist(dst_image_root) mmcv.mkdir_or_exist(ignore_image_root) process_img_with_path = partial(process_img, src_image_root=src_image_root, dst_image_root=dst_image_root, ignore_image_root=ignore_image_root, preserve_vertical=preserve_vertical, split=split, format=format) tasks = [] for img_idx, img_info in enumerate(annotation['imgs'].values()): if img_info['set'] == split: ann_ids = annotation['imgToAnns'][str(img_info['id'])] anns = [annotation['anns'][str(ann_id)] for ann_id in ann_ids] tasks.append((img_idx + img_start_idx, img_info, anns)) labels_list = mmcv.track_parallel_progress(process_img_with_path, tasks, keep_order=True, nproc=nproc) final_labels = [] for label_list in labels_list: final_labels += label_list list_to_file(dst_label_file, final_labels) return len(annotation['imgs'])
def process_directory(path, limit, nproc): print(f'processing {path}') mmcv.track_parallel_progress(func=partial(process_scene, path, limit), tasks=os.listdir(path), nproc=nproc)
# -*- coding: utf-8 -* - ''' mmcv创建进度条 参考:https://mmcv.readthedocs.io/en/latest/utils.html#progressbar ''' import mmcv import time def do_task(i): time.sleep(1) return i + 1 ''' 依次调用task,输出进度 ''' tasks = list(range(10)) results = mmcv.track_progress(do_task, tasks) print(results) ''' 多进程执行task,8个进程 ''' results = mmcv.track_parallel_progress(do_task, tasks, 8) print(results)