def classes(self, annotation_type: str, release_name: Optional[str] = None): """ Returns the list of `class_type` classes Parameters ---------- annotation_type The type of annotation classes, e.g. 'tag' or 'polygon' release_name: str Version of the dataset Returns ------- classes: list List of classes in the dataset of type `class_type` """ assert self.local_path.exists() if release_name in ["latest", None]: release = self.get_release("latest") release_name = release.name return get_classes(self.local_path, release_name=release_name, annotation_type=annotation_type)
def classes(self, annotation_type: str): """ Returns the list of `class_type` classes Parameters ---------- annotation_type The type of annotation classes, e.g. 'tag' or 'polygon' Returns ------- classes: list List of classes in the dataset of type `class_type` """ assert self.local_path.exists() return get_classes(self.local_path, annotation_type=annotation_type)
def __init__( self, dataset_path: Path, annotation_type: str, partition: Optional[str] = None, split: str = "default", split_type: str = "random", release_name: Optional[str] = None, ): """ Creates a dataset Parameters ---------- dataset_path: Path, str Path to the location of the dataset on the file system annotation_type: str The type of annotation classes [tag, bounding_box, polygon] partition: str Selects one of the partitions [train, val, test] split: str Selects the split that defines the percentages used (use 'default' to select the default split) split_type: str Heuristic used to do the split [random, stratified] release_name: str Version of the dataset """ assert dataset_path is not None release_path = get_release_path(dataset_path, release_name) annotations_dir = release_path / "annotations" assert annotations_dir.exists() images_dir = dataset_path / "images" assert images_dir.exists() if partition not in ["train", "val", "test", None]: raise ValueError("partition should be either 'train', 'val', or 'test'") if split_type not in ["random", "stratified"]: raise ValueError("split_type should be either 'random', 'stratified'") if annotation_type not in ["tag", "polygon", "bounding_box"]: raise ValueError("annotation_type should be either 'tag', 'bounding_box', or 'polygon'") self.dataset_path = dataset_path self.annotation_type = annotation_type self.images_path: List[Path] = [] self.annotations_path: List[Path] = [] self.original_classes = None self.original_images_path: Optional[List[Path]] = None self.original_annotations_path: Optional[List[Path]] = None # Get the list of classes self.classes = get_classes( self.dataset_path, release_name, annotation_type=self.annotation_type, remove_background=True ) self.num_classes = len(self.classes) # Get the list of stems if partition: # Get the split if split_type == "random": split_file = f"{split_type}_{partition}.txt" elif split_type == "stratified": split_file = f"{split_type}_{annotation_type}_{partition}.txt" split_path = release_path / "lists" / split / split_file if split_path.is_file(): stems = (e.strip() for e in split_path.open()) else: raise FileNotFoundError( f"could not find a dataset partition. " f"Split the dataset using `split_dataset()` from `darwin.dataset.utils`" ) from None else: # If the partition is not specified, get all the annotations stems = [e.stem for e in annotations_dir.glob("*.json")] # Find all the annotations and their corresponding images for stem in stems: annotation_path = annotations_dir / f"{stem}.json" images = [] for ext in SUPPORTED_IMAGE_EXTENSIONS: image_path = images_dir / f"{stem}{ext}" if image_path.exists(): images.append(image_path) if len(images) < 1: raise ValueError(f"Annotation ({annotation_path}) does not have a corresponding image") if len(images) > 1: raise ValueError(f"Image ({stem}) is present with multiple extensions. This is forbidden.") assert len(images) == 1 self.images_path.append(images[0]) self.annotations_path.append(annotation_path) if len(self.images_path) == 0: raise ValueError(f"Could not find any {SUPPORTED_IMAGE_EXTENSIONS} file", f" in {images_dir}") assert len(self.images_path) == len(self.annotations_path)
def detectron2_register_dataset( dataset: str, release_name: Optional[str] = "latest", partition: Optional[str] = None, split: Optional[str] = "default", split_type: Optional[str] = "stratified", evaluator_type: Optional[str] = None, ) -> str: """Registers a local Darwin-formatted dataset in Detectron2 Parameters ---------- dataset: str Dataset slug release_name: str Version of the dataset partition: str Selects one of the partitions [train, val, test] split Selects the split that defines the percetages used (use 'default' to select the default split) split_type: str Heuristic used to do the split [random, stratified] evaluator_type: str Evaluator to be used in the val and test sets """ try: from detectron2.data import DatasetCatalog, MetadataCatalog except ImportError: print("Detectron2 not found.") sys.exit(1) from darwin.dataset.utils import get_annotations, get_classes dataset_path: Optional[Path] = None if os.path.isdir(dataset): dataset_path = Path(dataset) else: identifier = DatasetIdentifier.parse(dataset) if identifier.version: release_name = identifier.version client = _load_client(offline=True) dataset_path = None for path in client.list_local_datasets(team_slug=identifier.team_slug): if identifier.dataset_slug == path.name: dataset_path = path if not dataset_path: _error( f"Dataset '{identifier.dataset_slug}' does not exist locally. " f"Use 'darwin dataset remote' to see all the available datasets, " f"and 'darwin dataset pull' to pull them.") catalog_name = f"darwin_{dataset_path.name}" if partition: catalog_name += f"_{partition}" classes = get_classes(dataset_path=dataset_path, release_name=release_name, annotation_type="polygon") DatasetCatalog.register( catalog_name, lambda partition=partition: list( get_annotations( dataset_path, partition=partition, split=split, split_type=split_type, release_name=release_name, annotation_type="polygon", annotation_format="coco", ignore_inconsistent_examples=True, )), ) MetadataCatalog.get(catalog_name).set(thing_classes=classes) if evaluator_type: MetadataCatalog.get(catalog_name).set(evaluator_type=evaluator_type) return catalog_name