Пример #1
0
    def __init__(self, org_id: str, data_set_name: str) -> None:
        """Construct Loader."""
        self.org_id = org_id
        self.data_set_name = data_set_name
        self.api_client = RedBrickApi(cache=False)

        print_info("Retrieving dataset ...")

        # Dataset info
        try:
            dataset = self.api_client.get_datapointset(self.org_id, self.data_set_name)[
                "dataPointSet"
            ]
        except Exception as err:
            print_error(err)
            return

        print_info("Dataset successfully retrieved!")

        self.org_id = dataset["orgId"]
        self.data_set_name = dataset["name"]
        self.data_type = dataset["dataType"]
        self.datapoint_count = dataset["datapointCount"]
        self.desc = dataset["desc"]
        self.createdAt = dataset["createdAt"]
        self.createdBy = dataset["createdBy"]
        self.status = dataset["status"]
Пример #2
0
    def __init__(self, org_id: str, label_set_name: str) -> None:
        """Construct Loader."""
        self.org_id = org_id
        self.label_set_name = label_set_name
        self.api_client = RedBrickApi(cache=False)

        print_info("Counting available datapoints...")

        # All datapoints in labelset
        try:
            self.dp_ids, custom_group = self.api_client.get_datapoint_ids(
                self.org_id, self.label_set_name)
        except Exception as err:
            print_error(err)
            return

        self.task_type = custom_group.task_type
        self.data_type = custom_group.data_type
        self.taxonomy: Dict[str, int] = custom_group.taxonomy
        print_info("Number of Datapoints = %s" % len(self.dp_ids))

        # Update taxonomy mapper if segmentation
        if self.task_type == "SEGMENTATION":
            self.taxonomy_update_segmentation()

        # Get all users
        try:
            self.users = self.api_client.get_members(self.org_id)
        except Exception as err:
            print_error(err)
            return
Пример #3
0
 def __init__(
     self,
     org_id: str,
     data_set_name: str,
     storage_id: str,
     label_set_name: str = None,
 ):
     """Construct creator"""
     super().__init__()
     self.org_id = org_id
     self.data_set_name = data_set_name
     self.storage_id = storage_id
     self.label_set_name = label_set_name
     self.api_client = RedBrickApi(cache=False)
Пример #4
0
    def __init__(self, org_id: str, label_set_name: str) -> None:
        self.api = RedBrickApi()

        self.cursor = None
        self.datapointsBatch = None
        self.datapointsBatchIndex = None

        self.org_id = org_id
        self.label_set_name = label_set_name

        self.valid_task_types: List = ["SEGMENTATION", "POLYGON", "MULTI"]

        self.customGroup = self._get_custom_group()
        self.taxonomy = self.customGroup["taxonomy"]
        self.datapointCount = self.customGroup["datapointCount"]
        self.taxonomy_segm = self._get_taxonomy_segmentation()
Пример #5
0
    def __init__(self, org_id: str, project_id: str, stage_name: str) -> None:
        """Construct RemoteLabel instance."""
        print(colored("[INFO]:", "blue"),
              "Initializing remote-labeling module...")
        self.org_id = org_id
        self.project_id = project_id
        self.stage_name = stage_name
        self.api_client = RedBrickApi(cache=False)

        # Gather stage information and store
        stage_info = self.api_client.get_stage(org_id=org_id,
                                               project_id=project_id,
                                               stage_name=stage_name)
        taxonomy = self.api_client.get_taxonomy(
            orgId=org_id,
            name=stage_info["outputTaxonomyName"],
            version=stage_info["outputTaxonomyVersion"],
        )
        self.taxonomy: Taxonomy2 = taxonomy
        self.task_type = stage_info["outputType"]
Пример #6
0
class DatapointCreator:
    """Datapoint creator class."""
    def __init__(
        self,
        org_id: str,
        data_set_name: str,
        storage_id: str,
        label_set_name: str = None,
    ):
        """Construct creator"""
        super().__init__()
        self.org_id = org_id
        self.data_set_name = data_set_name
        self.storage_id = storage_id
        self.label_set_name = label_set_name
        self.api_client = RedBrickApi(cache=False)

    def create_datapoint(self,
                         name: str,
                         items: List[str],
                         labels: List[Dict] = None) -> Dict:
        """Create a datapoint in the backend"""

        datapoint_ = self.api_client.createDatapoint(
            org_id=self.org_id,
            items=items,
            name=name,
            data_set_name=self.data_set_name,
            storage_id=self.storage_id,
            label_set_name=self.label_set_name,
            labels=labels,
        )

        print_info("Datapoint successfully created. Datapoint id: {}".format(
            datapoint_["createDatapoint"]["dpId"]))

        return datapoint_["createDatapoint"]
Пример #7
0
class DatasetLoader(DatasetBase):
    """Dataset loader class."""

    def __init__(self, org_id: str, data_set_name: str) -> None:
        """Construct Loader."""
        self.org_id = org_id
        self.data_set_name = data_set_name
        self.api_client = RedBrickApi(cache=False)

        print_info("Retrieving dataset ...")

        # Dataset info
        try:
            dataset = self.api_client.get_datapointset(self.org_id, self.data_set_name)[
                "dataPointSet"
            ]
        except Exception as err:
            print_error(err)
            return

        print_info("Dataset successfully retrieved!")

        self.org_id = dataset["orgId"]
        self.data_set_name = dataset["name"]
        self.data_type = dataset["dataType"]
        self.datapoint_count = dataset["datapointCount"]
        self.desc = dataset["desc"]
        self.createdAt = dataset["createdAt"]
        self.createdBy = dataset["createdBy"]
        self.status = dataset["status"]

    def upload_items(self, items: str, storage_id: str) -> Union[Dict, None]:
        """Upload a list of items to the backend."""

        # Getting item list presign
        itemsListUploadInfo_ = self.api_client.get_itemListUploadPresign(
            org_id=self.org_id, file_name="upload-sdk.json"
        )["itemListUploadPresign"]
        presignedUrl_ = itemsListUploadInfo_["presignedUrl"]
        filePath_ = itemsListUploadInfo_["filePath"]
        fileName_ = itemsListUploadInfo_["fileName"]
        uploadId_ = itemsListUploadInfo_["uploadId"]
        createdAt_ = itemsListUploadInfo_["createdAt"]

        # Uploading items to presigned url
        print_info("Uploading file '{}'".format(items))
        with open(items, "rb") as f:
            json_payload = json.load(f)
            response = requests.put(presignedUrl_, json=json_payload)

        # Call item list upload success
        if response.ok:
            itemsListUploadSuccessInput_ = {
                "orgId": self.org_id,
                "filePath": filePath_,
                "fileName": fileName_,
                "uploadId": uploadId_,
                "taskType": "ITEMS",
                "dataType": self.data_type,
                "storageId": storage_id,
                "dpsName": self.data_set_name,
            }
            uploadSuccessPayload_ = self.api_client.itemListUploadSuccess(
                org_id=self.org_id,
                itemsListUploadSuccessInput=itemsListUploadSuccessInput_,
            )["itemListUploadSuccess"]
            importId_ = uploadSuccessPayload_["upload"]["importId"]
            print_info(
                "Upload is processing, this is your importId: {}".format(importId_)
            )
            return uploadSuccessPayload_
        else:
            print_error("Something went wrong uploading your file {}.".format(items))
            return None

    def upload_items_with_labels(
        self, items: str, storage_id: str, label_set_name: str, task_type: str
    ) -> Union[Dict, None]:
        """Upload a list of items with labels to the backend."""

        # Getting item list presign
        itemsListUploadInfo_ = self.api_client.get_itemListUploadPresign(
            org_id=self.org_id, file_name="upload-sdk.json"
        )["itemListUploadPresign"]
        presignedUrl_ = itemsListUploadInfo_["presignedUrl"]
        filePath_ = itemsListUploadInfo_["filePath"]
        fileName_ = itemsListUploadInfo_["fileName"]
        uploadId_ = itemsListUploadInfo_["uploadId"]
        createdAt_ = itemsListUploadInfo_["createdAt"]

        # Uploading items to presigned url
        print_info("Uploading file '{}'".format(items))
        with open(items, "rb") as f:
            json_payload = json.load(f)
            response = requests.put(presignedUrl_, json=json_payload)

        # Call item list upload success
        if response.ok:
            itemsListUploadSuccessInput_ = {
                "orgId": self.org_id,
                "filePath": filePath_,
                "fileName": fileName_,
                "uploadId": uploadId_,
                "taskType": task_type,
                "dataType": self.data_type,
                "storageId": storage_id,
                "dpsName": self.data_set_name,
                "cstName": label_set_name,
            }
            uploadSuccessPayload_ = self.api_client.itemListUploadSuccess(
                org_id=self.org_id,
                itemsListUploadSuccessInput=itemsListUploadSuccessInput_,
            )["itemListUploadSuccess"]
            importId_ = uploadSuccessPayload_["upload"]["importId"]
            print_info(
                "Upload is processing, this is your importId: {}".format(importId_)
            )
            return uploadSuccessPayload_
        else:
            print_error("Something went wrong uploading your file {}.".format(items))
            return None
Пример #8
0
class LabelsetLoader(LabelsetBase):
    """Labelset loader class."""
    def __init__(self, org_id: str, label_set_name: str) -> None:
        """Construct Loader."""
        self.org_id = org_id
        self.label_set_name = label_set_name
        self.api_client = RedBrickApi(cache=False)

        print_info("Counting available datapoints...")

        # All datapoints in labelset
        try:
            self.dp_ids, custom_group = self.api_client.get_datapoint_ids(
                self.org_id, self.label_set_name)
        except Exception as err:
            print_error(err)
            return

        self.task_type = custom_group.task_type
        self.data_type = custom_group.data_type
        self.taxonomy: Dict[str, int] = custom_group.taxonomy
        print_info("Number of Datapoints = %s" % len(self.dp_ids))

        # Update taxonomy mapper if segmentation
        if self.task_type == "SEGMENTATION":
            self.taxonomy_update_segmentation()

        # Get all users
        try:
            self.users = self.api_client.get_members(self.org_id)
        except Exception as err:
            print_error(err)
            return

    def __getitem__(self, index: int) -> Union[Image, Video]:
        """Get information needed for a single item."""
        dp = self.api_client.get_datapoint(
            self.org_id,
            self.label_set_name,
            self.dp_ids[index],
            self.task_type,
            self.taxonomy,
        )
        return dp

    def export(self, format: str = "redbrick") -> str:
        """Export."""
        if self.data_type == "IMAGE":
            export_img: ExportImage = ExportImage(format=format, labelset=self)
            export_img.export()
            return export_img.cache_dir
        elif self.data_type == "VIDEO":
            export_vid: ExportVideo = ExportVideo(format=format, labelset=self)
            export_vid.export()
            return export_vid.cache_dir
        else:
            err = ValueError("%s data type not supported! Please reach out to \
                    [email protected]" % self.data_type)
            print_error(err)
            return ""

    def number_of_datapoints(self) -> int:
        """Get number of datapoints."""
        return len(self.dp_ids)

    def show_data(self) -> None:
        """Visualize the data."""

        if self.data_type == "VIDEO":
            print_info("Visualizing first 20 frames...")

            num_dps = self.number_of_datapoints()
            if not num_dps:
                return

            idx = random.randint(0, num_dps - 1)

            self[idx].show_data()
            return

        # Image data type
        print_info("Visualizing data and labels...")

        # Prepare figure
        num_dps = self.number_of_datapoints()
        cols = int(min(2, num_dps))
        rows = int(min(2, math.ceil(num_dps / cols)))
        fig = plt.figure()

        # Generate random index list
        list_len = np.min([rows * cols, num_dps])
        indexes = random.sample(range(0, list_len), list_len)

        # Iterate through axes
        for i, idx in enumerate(indexes):

            ax = fig.add_subplot(rows, cols, i + 1)
            self[idx].show_data(ax=ax)  # type: ignore

        plt.tight_layout()
        plt.show()

    def taxonomy_update_segmentation(self) -> None:
        """
        Fix the taxonomy mapper object to be 1-indexed for
        segmentation projects.
        """
        for key in self.taxonomy.keys():
            self.taxonomy[key] += 1
            if self.taxonomy[key] == 0:
                print_error("Taxonomy class id's must be 0 indexed. \
                        Please contact [email protected] for help.")
                exit(1)

        # Add a background class for segmentation
        self.taxonomy["background"] = 0
Пример #9
0
class RemoteLabel:
    """An interface to RemoteLabel brick."""
    def __init__(self, org_id: str, project_id: str, stage_name: str) -> None:
        """Construct RemoteLabel instance."""
        print(colored("[INFO]:", "blue"),
              "Initializing remote-labeling module...")
        self.org_id = org_id
        self.project_id = project_id
        self.stage_name = stage_name
        self.api_client = RedBrickApi(cache=False)

        # Gather stage information and store
        stage_info = self.api_client.get_stage(org_id=org_id,
                                               project_id=project_id,
                                               stage_name=stage_name)
        taxonomy = self.api_client.get_taxonomy(
            orgId=org_id,
            name=stage_info["outputTaxonomyName"],
            version=stage_info["outputTaxonomyVersion"],
        )
        self.taxonomy: Taxonomy2 = taxonomy
        self.task_type = stage_info["outputType"]

    def get_num_tasks(self) -> int:
        """Get the number of tasks queued."""
        num = self.api_client.get_num_remote_tasks(org_id=self.org_id,
                                                   project_id=self.project_id,
                                                   stage_name=self.stage_name)
        return num

    def get_task(self, num_tasks: int) -> List[Task]:
        """User facing function to get task."""
        print(colored("[INFO]:", "blue"),
              "Retrieving task from backend...",
              end=" ")
        task = self.__get_remote_labeling_task(num_tasks=num_tasks)
        if len(task) == 0:
            print(colored("\n[WARNING]:", "yellow"),
                  "No more tasks in this stage.")
            return task
        print(colored("Done.", "green"))
        return task

    def submit_task(
        self,
        task: Task,
        labels: Union[ImageBoundingBox, VideoBoundingBox, VideoClassify],
    ) -> None:
        """User facing funciton to submit a task."""
        print(colored("[INFO]:", "blue"),
              "Submitting task to backend...",
              end=" ")
        new_subname = "remote-labeling"

        # Check that label category matches taxonomy
        check, classname = labels.compare_taxonomy(taxonomy=self.taxonomy)
        if not check:
            raise ValueError("%s is not a valid category for taxonomy %s" %
                             (classname, self.taxonomy.name))

        # Check label type
        if task.task_data_type == "IMAGE_BBOX":
            if not isinstance(labels, ImageBoundingBox):
                raise ValueError("Labels must be of type ImageBoundingBox!")
        if task.task_data_type == "VIDEO_BBOX":
            if not isinstance(labels, VideoBoundingBox):
                raise ValueError("Labels must be of type VideoBoundingBox!")

        self.__put_task_data(
            dp_id=task.dp_id,
            sub_name=new_subname,
            task_data=labels,
            taxonomy_name=task.taxonomy["name"],
            taxonomy_version=task.taxonomy["version"],
            td_type=task.task_data_type,
        )

        # Put remote labeling task
        submit_task = Task(
            org_id=task.org_id,
            project_id=task.project_id,
            stage_name=task.stage_name,
            task_id=task.task_id,
            dp_id=task.dp_id,
            sub_name=new_subname,
            taxonomy=task.taxonomy,
            items_list=task.items_list,
            items_list_presigned=task.items_list_presigned,
            task_data_type=task.task_data_type,
        )
        self.__put_remote_labeling_task(submit_task)

        print(colored("Done.", "green"))

    def put_finished_tasks(self, labels_for_tasks: List[Dict]) -> None:
        """Uploads tasks and labels batches to the backend"""
        assert len(labels_for_tasks) > 0
        print(
            colored("[INFO]:", "blue"),
            "Submitting batch of tasks to backend...",
            end=" ",
        )
        self.api_client.putRemoteLabelingTasksAndLabels(
            org_id=self.org_id,
            project_id=self.project_id,
            stage_name=self.stage_name,
            finished_tasks=labels_for_tasks,
        )
        print(colored("Done.", "green"))

    def __put_task_data(
        self,
        dp_id: str,
        sub_name: str,
        task_data: Union[ImageBoundingBox, VideoBoundingBox, VideoClassify],
        taxonomy_name: str,
        taxonomy_version: str,
        td_type: str,
    ) -> None:
        """Read labels from local folder, and submit the labels."""
        task_datas = json.loads(
            str(task_data))  # Convert the object to a dictionary

        self.api_client.putLabels(
            org_id=self.org_id,
            project_id=self.project_id,
            dp_id=dp_id,
            stage_name=self.stage_name,
            sub_name=sub_name,
            labels=task_datas,
            taxonomy_name=taxonomy_name,
            taxonomy_version=taxonomy_version,
            td_type=td_type,
        )

    def __put_remote_labeling_task(self, task: Task) -> None:
        """Put the remote labeling task to the backend."""
        finished_task = {
            "orgId": task.org_id,
            "projectId": task.project_id,
            "stageName": task.stage_name,
            "taskId": task.task_id,
            "newSubName": task.sub_name,
        }
        self.api_client.putRemoteLabelingTask(finished_task)

    def __get_remote_labeling_task(self, num_tasks: int) -> List[Task]:
        """Get the labeling tasks from API."""
        task = self.api_client.tasksToLabelRemote(
            orgId=self.org_id,
            projectId=self.project_id,
            stageName=self.stage_name,
            numTasks=num_tasks,
        )
        return task
Пример #10
0
class LabelsetIterator:
    """Construct Labelset Iterator."""
    def __init__(self, org_id: str, label_set_name: str) -> None:
        self.api = RedBrickApi()

        self.cursor = None
        self.datapointsBatch = None
        self.datapointsBatchIndex = None

        self.org_id = org_id
        self.label_set_name = label_set_name

        self.valid_task_types: List = ["SEGMENTATION", "POLYGON", "MULTI"]

        self.customGroup = self._get_custom_group()
        self.taxonomy = self.customGroup["taxonomy"]
        self.datapointCount = self.customGroup["datapointCount"]
        self.taxonomy_segm = self._get_taxonomy_segmentation()

    def _get_custom_group(self) -> None:
        return self.api.get_datapoints_paged(
            org_id=self.org_id,
            label_set_name=self.label_set_name)["customGroup"]

    def _get_taxonomy_segmentation(self) -> Any:
        if self.customGroup["taskType"] in self.valid_task_types:
            return self._create_taxonomy_segmentation()
        return None

    def _create_taxonomy_segmentation(self):
        tax_map: Dict[str, int] = {}
        self._trav_tax(self.taxonomy["categories"][0], tax_map)
        return self._taxonomy_update_segmentation(tax_map)

    def _trav_tax(self, taxonomy: Dict[Any, Any], tax_map: Dict[str,
                                                                int]) -> None:
        """Traverse the taxonomy tree structure, and fill the taxonomy mapper object."""
        children = taxonomy["children"]
        if len(children) == 0:
            return

        for child in children:
            tax_map[child["name"]] = child["classId"]
            self._trav_tax(child, tax_map)

    def _taxonomy_update_segmentation(
            self, tax_map: Dict[str, int]) -> Dict[str, int]:
        """
        Fix the taxonomy mapper object to be 1-indexed for
        segmentation projects.
        """
        for key in tax_map.keys():
            tax_map[key] += 1
            if tax_map[key] == 0:
                print_error("Taxonomy class id's must be 0 indexed. \
                        Please contact [email protected] for help.")
                exit(1)

        # Add a background class for segmentation
        tax_map["background"] = 0
        return tax_map

    def _trim_labels(self, entry) -> Dict:
        """Trims None values from labels"""
        for label in entry["labelData"]["labels"]:
            for k, v in label.copy().items():
                if v is None:
                    del label[k]
        return entry

    def __iter__(self) -> Iterable[Dict]:
        return self

    def __next__(self) -> dict:
        """Get next labels / datapoint."""

        # If cursor is None and current datapointsBatch has been processed
        if (self.datapointsBatchIndex is not None and self.cursor is None
                and len(self.datapointsBatch) == self.datapointsBatchIndex):
            raise StopIteration

        # If current datapointsBatch is None or we have finished processing current datapointsBatch
        if (self.datapointsBatch is None
                or len(self.datapointsBatch) == self.datapointsBatchIndex):
            if self.cursor is None:
                customGroup = self.api.get_datapoints_paged(
                    org_id=self.org_id, label_set_name=self.label_set_name)
            else:
                customGroup = self.api.get_datapoints_paged(
                    org_id=self.org_id,
                    label_set_name=self.label_set_name,
                    cursor=self.cursor,
                )
            self.cursor = customGroup["customGroup"]["datapointsPaged"][
                "cursor"]
            self.datapointsBatch = customGroup["customGroup"][
                "datapointsPaged"]["entries"]
            self.datapointsBatchIndex = 0

        # Current entry to return
        entry = self.datapointsBatch[self.datapointsBatchIndex]
        self.datapointsBatchIndex += 1

        return self._trim_labels(entry)