def combine_detection_dataset_dicts( dataset_names: Collection[str], keep_instance_predicate: Optional[InstancePredicate] = None, proposal_files: Optional[Collection[str]] = None, ) -> List[Instance]: """ Load and prepare dataset dicts for training / testing Args: dataset_names (Collection[str]): a list of dataset names keep_instance_predicate (Callable: Dict[str, Any] -> bool): predicate applied to instance dicts which defines whether to keep the instance proposal_files (Collection[str]): if given, a list of object proposal files that match each dataset in `dataset_names`. """ assert len(dataset_names) if proposal_files is None: proposal_files = [None] * len(dataset_names) assert len(dataset_names) == len(proposal_files) # load datasets and metadata dataset_name_to_dicts = {} for dataset_name in dataset_names: dataset_name_to_dicts[dataset_name] = DatasetCatalog.get(dataset_name) assert len( dataset_name_to_dicts), f"Dataset '{dataset_name}' is empty!" # merge categories, requires category metadata to be loaded # cat_id -> [(orig_cat_id, cat_name, dataset_name)] merged_categories = _merge_categories(dataset_names) _warn_if_merged_different_categories(merged_categories) merged_category_names = [ merged_categories[cat_id][0].mapped_name for cat_id in sorted(merged_categories) ] # map to contiguous category IDs _add_category_id_to_contiguous_id_maps_to_metadata(merged_categories) # load annotations and dataset metadata for dataset_name, proposal_file in zip(dataset_names, proposal_files): dataset_dicts = dataset_name_to_dicts[dataset_name] assert len(dataset_dicts), f"Dataset '{dataset_name}' is empty!" if proposal_file is not None: dataset_dicts = load_proposals_into_dataset( dataset_dicts, proposal_file) dataset_dicts = _maybe_filter_and_map_categories( dataset_name, dataset_dicts) print_instances_class_histogram(dataset_dicts, merged_category_names) dataset_name_to_dicts[dataset_name] = dataset_dicts if keep_instance_predicate is not None: all_datasets_dicts_plain = [ d for d in itertools.chain.from_iterable( dataset_name_to_dicts.values()) if keep_instance_predicate(d) ] else: all_datasets_dicts_plain = list( itertools.chain.from_iterable(dataset_name_to_dicts.values())) return all_datasets_dicts_plain
def combine_detection_dataset_dicts( dataset_names: Collection[str], keep_instance_predicate: Optional[InstancePredicate] = None, proposal_files: Optional[Collection[str]] = None, ) -> List[Instance]: """ Load and prepare dataset dicts for training / testing Args: dataset_names (Collection[str]): a list of dataset names keep_instance_predicate (Callable: Dict[str, Any] -> bool): predicate applied to instance dicts which defines whether to keep the instance proposal_files (Collection[str]): if given, a list of object proposal files that match each dataset in `dataset_names`. """ assert len(dataset_names) if proposal_files is None: proposal_files = [None] * len(dataset_names) assert len(dataset_names) == len(proposal_files) # load annotations and dataset metadata dataset_map = {} for dataset_name in dataset_names: dataset_dicts = DatasetCatalog.get(dataset_name) dataset_map[dataset_name] = dataset_dicts # initialize category maps _add_category_id_to_contiguous_id_maps_to_metadata(dataset_names) # apply category maps all_datasets_dicts = [] for dataset_name, proposal_file in zip(dataset_names, proposal_files): dataset_dicts = dataset_map[dataset_name] assert len(dataset_dicts), f"Dataset '{dataset_name}' is empty!" if proposal_file is not None: dataset_dicts = load_proposals_into_dataset( dataset_dicts, proposal_file) dataset_dicts = _maybe_filter_and_map_categories( dataset_name, dataset_dicts) _map_category_id_to_contiguous_id(dataset_name, dataset_dicts) print_instances_class_histogram( dataset_dicts, MetadataCatalog.get(dataset_name).thing_classes) all_datasets_dicts.append(dataset_dicts) if keep_instance_predicate is not None: all_datasets_dicts_plain = [ d for d in itertools.chain.from_iterable(all_datasets_dicts) if keep_instance_predicate(d) ] else: all_datasets_dicts_plain = list( itertools.chain.from_iterable(all_datasets_dicts)) return all_datasets_dicts_plain
def fsod_get_detection_dataset_dicts(dataset_names, filter_empty=True, min_keypoints=0, proposal_files=None): """ Load and prepare dataset dicts for instance detection/segmentation and semantic segmentation. Args: dataset_names (list[str]): a list of dataset names filter_empty (bool): whether to filter out images without instance annotations min_keypoints (int): filter out images with fewer keypoints than `min_keypoints`. Set to 0 to do nothing. proposal_files (list[str]): if given, a list of object proposal files that match each dataset in `dataset_names`. """ assert len(dataset_names) dataset_dicts_original = [ DatasetCatalog.get(dataset_name) for dataset_name in dataset_names ] for dataset_name, dicts in zip(dataset_names, dataset_dicts_original): assert len(dicts), "Dataset '{}' is empty!".format(dataset_name) if proposal_files is not None: assert len(dataset_names) == len(proposal_files) # load precomputed proposals from proposal files dataset_dicts_original = [ load_proposals_into_dataset(dataset_i_dicts, proposal_file) for dataset_i_dicts, proposal_file in zip(dataset_dicts_original, proposal_files) ] if 'train' not in dataset_names[0]: dataset_dicts = list( itertools.chain.from_iterable(dataset_dicts_original)) else: dataset_dicts_original = list( itertools.chain.from_iterable(dataset_dicts_original)) dataset_dicts_original = filter_images_with_only_crowd_annotations( dataset_dicts_original) ################################################################################### # split image-based annotations to instance-based annotations for few-shot learning dataset_dicts = [] index_dicts = [] split_flag = True if split_flag: for record in dataset_dicts_original: file_name = record['file_name'] height = record['height'] width = record['width'] image_id = record['image_id'] annotations = record['annotations'] category_dict = {} for ann_id, ann in enumerate(annotations): ann.pop("segmentation", None) ann.pop("keypoints", None) category_id = ann['category_id'] if category_id not in category_dict.keys(): category_dict[category_id] = [ann] else: category_dict[category_id].append(ann) for key, item in category_dict.items(): instance_ann = {} instance_ann['file_name'] = file_name instance_ann['height'] = height instance_ann['width'] = width instance_ann['annotations'] = item dataset_dicts.append(instance_ann) has_instances = "annotations" in dataset_dicts[0] # Keep images without instance-level GT if the dataset has semantic labels. if filter_empty and has_instances and "sem_seg_file_name" not in dataset_dicts[ 0]: dataset_dicts = filter_images_with_only_crowd_annotations( dataset_dicts) if min_keypoints > 0 and has_instances: dataset_dicts = filter_images_with_few_keypoints( dataset_dicts, min_keypoints) if has_instances: try: class_names = MetadataCatalog.get(dataset_names[0]).thing_classes check_metadata_consistency("thing_classes", dataset_names) print_instances_class_histogram(dataset_dicts, class_names) except AttributeError: # class names are not available for this dataset pass return dataset_dicts