def merge(cleaned_datasets, output, save_images=False): """datum merge -o {output} {project_dirs}""" print(f"Merging datasets to {output}/") projects = [Project.load(p) for p in cleaned_datasets] datasets = [p.make_dataset() for p in projects] merged_project_dir = Path(output) # perform the merge merge_config = IntersectMerge.Conf( pairwise_dist=0.25, groups=[], output_conf_thresh=0.0, quorum=0, ) merged_dataset = IntersectMerge(conf=merge_config)(datasets) merged_project = Project() output_dataset = merged_project.make_dataset() output_dataset.define_categories(merged_dataset.categories()) merged_dataset = output_dataset.update(merged_dataset) merged_dataset.save(save_dir=merged_project_dir, save_images=save_images)
def merge_command(args): source_projects = [load_project(p) for p in args.project] dst_dir = args.dst_dir if dst_dir: if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir): raise CliException("Directory '%s' already exists " "(pass --overwrite to overwrite)" % dst_dir) else: dst_dir = generate_next_file_name('merged') source_datasets = [] for p in source_projects: log.debug("Loading project '%s' dataset", p.config.project_name) source_datasets.append(p.make_dataset()) merger = IntersectMerge( conf=IntersectMerge.Conf(pairwise_dist=args.iou_thresh, groups=args.groups, output_conf_thresh=args.output_conf_thresh, quorum=args.quorum)) merged_dataset = merger(source_datasets) merged_project = Project() output_dataset = merged_project.make_dataset() output_dataset.define_categories(merged_dataset.categories()) merged_dataset = output_dataset.update(merged_dataset) merged_dataset.save(save_dir=dst_dir) report_path = osp.join(dst_dir, 'merge_report.json') save_merge_report(merger, report_path) dst_dir = osp.abspath(dst_dir) log.info("Merge results have been saved to '%s'" % dst_dir) log.info("Report has been saved to '%s'" % report_path) return 0
def mergeDataset(self, import_args: Arg, filter_arg: Arg): config = setConfig(import_args['format']) source_datasets = dict([(path, Environment().make_importer( import_args['format'])(str(path)).make_dataset()) for path in self.datasetPathList]) itemIdsAndPath = reduce(lambda x, y: x + y, [[(item.id, path) for item in dataset] for path, dataset in source_datasets.items()]) # for itemId, path in itemIdsAndPath: for path, dataset in source_datasets.items(): itemIdsInPath = set( [itemId for itemId, _path in itemIdsAndPath if _path == path]) itemIdsOutPath = set( [itemId for itemId, _path in itemIdsAndPath if _path != path]) if itemIdsInPath & itemIdsOutPath: for subsetName, subset in dataset.subsets().items(): imgDir: Path = path / config.getImgDir(subsetName) _subset = deepcopy(subset.items) for item in _subset.values(): imgFile = Path(item.image.path) relPath = imgFile.relative_to(imgDir) newPath = imgDir / path.name / relPath oldItemId = item.id newItemId = item.id = str(path.name / relPath.parent / relPath.stem).replace( '\\', '/') item.image._path = str(newPath) del subset.items[oldItemId] subset.items[newItemId] = item newPath.parent.mkdir(parents=True, exist_ok=True) if item.image.has_data: move(str(imgFile), str(imgDir / path.name / relPath)) mergePath = (self.projectsPath / self.mergeFolderName) if mergePath.is_dir(): rmtree(mergePath, onerror=remove_readonly) mergePath.mkdir(exist_ok=True, parents=True) dst_dir = str(mergePath) merger = IntersectMerge(conf=IntersectMerge.Conf()) merged_dataset = merger(list(source_datasets.values())) merged_project = Project() output_dataset = merged_project.make_dataset() output_dataset.define_categories(merged_dataset.categories()) merged_dataset = output_dataset.update(merged_dataset) if filter_arg['no_anno_filter'].lower() == 'y': filtered_dataset = Project().make_dataset() filtered_dataset.define_categories(merged_dataset.categories()) merged_dataset = filtered_dataset.update( merged_dataset.select(lambda item: len(item.annotations) != 0)) annoId = 1 imageIdName = config.imageIdName for idx, item in tqdm(enumerate(merged_dataset), desc='datasets'): if imageIdName is not None: item.attributes[imageIdName] = idx + 1 for anno in item.annotations: anno.id = annoId annoId += 1 merged_dataset.save(save_dir=dst_dir, save_images=True) # for subsetName, subset in tqdm(merged_dataset.subsets().items(), desc='datasets'): # for idx, itemId in tqdm(enumerate(itemIds), desc='items'): # if imageIdName is not None: # merged_dataset.get(itemId,subset=subsetName).attributes[imageIdName] = idx+1 # for anno in merged_dataset.get(itemId, subset=subsetName).annotations: # anno.id = annoId # annoId += 1 # merged_dataset.save(save_dir=dst_dir, save_images=True) return self
def merge_command(args): # Workaround. Required positionals consume positionals from the end args._positionals += join_cli_args(args, 'targets', 'extra_args') has_sep = '--' in args._positionals if has_sep: pos = args._positionals.index('--') if pos == 0: raise argparse.ArgumentError(None, message="Expected at least 1 target argument") else: pos = len(args._positionals) args.targets = args._positionals[:pos] or [ProjectBuildTargets.MAIN_TARGET] args.extra_args = args._positionals[pos + has_sep:] show_plugin_help = '-h' in args.extra_args or '--help' in args.extra_args dst_dir = args.dst_dir if dst_dir: if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir): raise CliException("Directory '%s' already exists " "(pass --overwrite to overwrite)" % dst_dir) else: dst_dir = generate_next_file_name('merged') dst_dir = osp.abspath(dst_dir) project = None try: project = scope_add(load_project(args.project_dir)) except ProjectNotFoundError: if not show_plugin_help and len(args.targets) == 1 and args.project_dir: raise if project is not None: env = project.env else: env = Environment() try: converter = env.converters[args.format] except KeyError: raise CliException("Converter for format '%s' is not found" % \ args.format) export_args = converter.parse_cmdline(args.extra_args) source_datasets = [] try: if len(args.targets) == 1: source_datasets.append(project.working_tree.make_dataset()) for t in args.targets: target_dataset, target_project = parse_full_revpath(t, project) if target_project: scope_add(target_project) source_datasets.append(target_dataset) except Exception as e: raise CliException(str(e)) merger = IntersectMerge(conf=IntersectMerge.Conf( pairwise_dist=args.iou_thresh, groups=args.groups or [], output_conf_thresh=args.output_conf_thresh, quorum=args.quorum )) merged_dataset = merger(source_datasets) merged_dataset.export(save_dir=dst_dir, format=converter, **export_args) report_path = osp.join(dst_dir, 'merge_report.json') save_merge_report(merger, report_path) log.info("Merge results have been saved to '%s'" % dst_dir) log.info("Report has been saved to '%s'" % report_path) return 0