Exemplo n.º 1
0
    def annotations(
        self,
        partition: str,
        split: str = "split",
        split_type: str = "stratified",
        annotation_type: str = "polygon",
    ):
        """
        Returns all the annotations of a given split and partition in a single dictionary

        Parameters
        ----------
        partition
            Selects one of the partitions [train, val, test]
        split
            Selects the split that defines the percetages used (use 'split' to select the default split
        split_type
            Heuristic used to do the split [random, stratified]
        annotation_type
            The type of annotation classes [tag, polygon]

        Returns
        -------
        dict
            Dictionary containing all the annotations of the dataset
        """
        assert self.local_path.exists()
        return get_annotations(
            self.local_path,
            partition=partition,
            split=split,
            split_type=split_type,
            annotation_type=annotation_type,
        )
Exemplo n.º 2
0
    def annotations(
        self,
        partition: str,
        split: str = "split",
        split_type: str = "stratified",
        annotation_type: str = "polygon",
        release_name: Optional[str] = None,
        annotation_format: Optional[str] = "darwin",
    ):
        """
        Returns all the annotations of a given split and partition in a single dictionary

        Parameters
        ----------
        partition
            Selects one of the partitions [train, val, test]
        split
            Selects the split that defines the percetages used (use 'split' to select the default split
        split_type
            Heuristic used to do the split [random, stratified]
        annotation_type
            The type of annotation classes [tag, polygon]
        release_name: str
            Version of the dataset
        annotation_format: str
            Re-formatting of the annotation when loaded [coco, darwin]

        Returns
        -------
        dict
            Dictionary containing all the annotations of the dataset
        """
        assert self.local_path.exists()
        if release_name in ["latest", None]:
            release = self.get_release("latest")
            release_name = release.name

        for annotation in get_annotations(
                self.local_path,
                partition=partition,
                split=split,
                split_type=split_type,
                annotation_type=annotation_type,
                release_name=release_name,
                annotation_format=annotation_format,
        ):
            yield annotation
Exemplo n.º 3
0
def detectron2_register_dataset(
    dataset: str,
    release_name: Optional[str] = "latest",
    partition: Optional[str] = None,
    split: Optional[str] = "default",
    split_type: Optional[str] = "stratified",
    evaluator_type: Optional[str] = None,
) -> str:
    """Registers a local Darwin-formatted dataset in Detectron2

    Parameters
    ----------
    dataset: str
        Dataset slug
    release_name: str
        Version of the dataset
    partition: str
        Selects one of the partitions [train, val, test]
    split
        Selects the split that defines the percetages used (use 'default' to select the default split)
    split_type: str
        Heuristic used to do the split [random, stratified]
    evaluator_type: str
        Evaluator to be used in the val and test sets
    """
    try:
        from detectron2.data import DatasetCatalog, MetadataCatalog
    except ImportError:
        print("Detectron2 not found.")
        sys.exit(1)
    from darwin.dataset.utils import get_annotations, get_classes

    dataset_path: Optional[Path] = None
    if os.path.isdir(dataset):
        dataset_path = Path(dataset)
    else:
        identifier = DatasetIdentifier.parse(dataset)
        if identifier.version:
            release_name = identifier.version

        client = _load_client(offline=True)
        dataset_path = None
        for path in client.list_local_datasets(team_slug=identifier.team_slug):
            if identifier.dataset_slug == path.name:
                dataset_path = path

        if not dataset_path:
            _error(
                f"Dataset '{identifier.dataset_slug}' does not exist locally. "
                f"Use 'darwin dataset remote' to see all the available datasets, "
                f"and 'darwin dataset pull' to pull them.")

    catalog_name = f"darwin_{dataset_path.name}"
    if partition:
        catalog_name += f"_{partition}"

    classes = get_classes(dataset_path=dataset_path,
                          release_name=release_name,
                          annotation_type="polygon")

    DatasetCatalog.register(
        catalog_name,
        lambda partition=partition: list(
            get_annotations(
                dataset_path,
                partition=partition,
                split=split,
                split_type=split_type,
                release_name=release_name,
                annotation_type="polygon",
                annotation_format="coco",
                ignore_inconsistent_examples=True,
            )),
    )
    MetadataCatalog.get(catalog_name).set(thing_classes=classes)
    if evaluator_type:
        MetadataCatalog.get(catalog_name).set(evaluator_type=evaluator_type)
    return catalog_name