def __init__(self, org_id: str, data_set_name: str) -> None: """Construct Loader.""" self.org_id = org_id self.data_set_name = data_set_name self.api_client = RedBrickApi(cache=False) print_info("Retrieving dataset ...") # Dataset info try: dataset = self.api_client.get_datapointset(self.org_id, self.data_set_name)[ "dataPointSet" ] except Exception as err: print_error(err) return print_info("Dataset successfully retrieved!") self.org_id = dataset["orgId"] self.data_set_name = dataset["name"] self.data_type = dataset["dataType"] self.datapoint_count = dataset["datapointCount"] self.desc = dataset["desc"] self.createdAt = dataset["createdAt"] self.createdBy = dataset["createdBy"] self.status = dataset["status"]
def __init__(self, org_id: str, label_set_name: str) -> None: """Construct Loader.""" self.org_id = org_id self.label_set_name = label_set_name self.api_client = RedBrickApi(cache=False) print_info("Counting available datapoints...") # All datapoints in labelset try: self.dp_ids, custom_group = self.api_client.get_datapoint_ids( self.org_id, self.label_set_name) except Exception as err: print_error(err) return self.task_type = custom_group.task_type self.data_type = custom_group.data_type self.taxonomy: Dict[str, int] = custom_group.taxonomy print_info("Number of Datapoints = %s" % len(self.dp_ids)) # Update taxonomy mapper if segmentation if self.task_type == "SEGMENTATION": self.taxonomy_update_segmentation() # Get all users try: self.users = self.api_client.get_members(self.org_id) except Exception as err: print_error(err) return
def __init__( self, org_id: str, data_set_name: str, storage_id: str, label_set_name: str = None, ): """Construct creator""" super().__init__() self.org_id = org_id self.data_set_name = data_set_name self.storage_id = storage_id self.label_set_name = label_set_name self.api_client = RedBrickApi(cache=False)
def __init__(self, org_id: str, label_set_name: str) -> None: self.api = RedBrickApi() self.cursor = None self.datapointsBatch = None self.datapointsBatchIndex = None self.org_id = org_id self.label_set_name = label_set_name self.valid_task_types: List = ["SEGMENTATION", "POLYGON", "MULTI"] self.customGroup = self._get_custom_group() self.taxonomy = self.customGroup["taxonomy"] self.datapointCount = self.customGroup["datapointCount"] self.taxonomy_segm = self._get_taxonomy_segmentation()
def __init__(self, org_id: str, project_id: str, stage_name: str) -> None: """Construct RemoteLabel instance.""" print(colored("[INFO]:", "blue"), "Initializing remote-labeling module...") self.org_id = org_id self.project_id = project_id self.stage_name = stage_name self.api_client = RedBrickApi(cache=False) # Gather stage information and store stage_info = self.api_client.get_stage(org_id=org_id, project_id=project_id, stage_name=stage_name) taxonomy = self.api_client.get_taxonomy( orgId=org_id, name=stage_info["outputTaxonomyName"], version=stage_info["outputTaxonomyVersion"], ) self.taxonomy: Taxonomy2 = taxonomy self.task_type = stage_info["outputType"]
class DatapointCreator: """Datapoint creator class.""" def __init__( self, org_id: str, data_set_name: str, storage_id: str, label_set_name: str = None, ): """Construct creator""" super().__init__() self.org_id = org_id self.data_set_name = data_set_name self.storage_id = storage_id self.label_set_name = label_set_name self.api_client = RedBrickApi(cache=False) def create_datapoint(self, name: str, items: List[str], labels: List[Dict] = None) -> Dict: """Create a datapoint in the backend""" datapoint_ = self.api_client.createDatapoint( org_id=self.org_id, items=items, name=name, data_set_name=self.data_set_name, storage_id=self.storage_id, label_set_name=self.label_set_name, labels=labels, ) print_info("Datapoint successfully created. Datapoint id: {}".format( datapoint_["createDatapoint"]["dpId"])) return datapoint_["createDatapoint"]
class DatasetLoader(DatasetBase): """Dataset loader class.""" def __init__(self, org_id: str, data_set_name: str) -> None: """Construct Loader.""" self.org_id = org_id self.data_set_name = data_set_name self.api_client = RedBrickApi(cache=False) print_info("Retrieving dataset ...") # Dataset info try: dataset = self.api_client.get_datapointset(self.org_id, self.data_set_name)[ "dataPointSet" ] except Exception as err: print_error(err) return print_info("Dataset successfully retrieved!") self.org_id = dataset["orgId"] self.data_set_name = dataset["name"] self.data_type = dataset["dataType"] self.datapoint_count = dataset["datapointCount"] self.desc = dataset["desc"] self.createdAt = dataset["createdAt"] self.createdBy = dataset["createdBy"] self.status = dataset["status"] def upload_items(self, items: str, storage_id: str) -> Union[Dict, None]: """Upload a list of items to the backend.""" # Getting item list presign itemsListUploadInfo_ = self.api_client.get_itemListUploadPresign( org_id=self.org_id, file_name="upload-sdk.json" )["itemListUploadPresign"] presignedUrl_ = itemsListUploadInfo_["presignedUrl"] filePath_ = itemsListUploadInfo_["filePath"] fileName_ = itemsListUploadInfo_["fileName"] uploadId_ = itemsListUploadInfo_["uploadId"] createdAt_ = itemsListUploadInfo_["createdAt"] # Uploading items to presigned url print_info("Uploading file '{}'".format(items)) with open(items, "rb") as f: json_payload = json.load(f) response = requests.put(presignedUrl_, json=json_payload) # Call item list upload success if response.ok: itemsListUploadSuccessInput_ = { "orgId": self.org_id, "filePath": filePath_, "fileName": fileName_, "uploadId": uploadId_, "taskType": "ITEMS", "dataType": self.data_type, "storageId": storage_id, "dpsName": self.data_set_name, } uploadSuccessPayload_ = self.api_client.itemListUploadSuccess( org_id=self.org_id, itemsListUploadSuccessInput=itemsListUploadSuccessInput_, )["itemListUploadSuccess"] importId_ = uploadSuccessPayload_["upload"]["importId"] print_info( "Upload is processing, this is your importId: {}".format(importId_) ) return uploadSuccessPayload_ else: print_error("Something went wrong uploading your file {}.".format(items)) return None def upload_items_with_labels( self, items: str, storage_id: str, label_set_name: str, task_type: str ) -> Union[Dict, None]: """Upload a list of items with labels to the backend.""" # Getting item list presign itemsListUploadInfo_ = self.api_client.get_itemListUploadPresign( org_id=self.org_id, file_name="upload-sdk.json" )["itemListUploadPresign"] presignedUrl_ = itemsListUploadInfo_["presignedUrl"] filePath_ = itemsListUploadInfo_["filePath"] fileName_ = itemsListUploadInfo_["fileName"] uploadId_ = itemsListUploadInfo_["uploadId"] createdAt_ = itemsListUploadInfo_["createdAt"] # Uploading items to presigned url print_info("Uploading file '{}'".format(items)) with open(items, "rb") as f: json_payload = json.load(f) response = requests.put(presignedUrl_, json=json_payload) # Call item list upload success if response.ok: itemsListUploadSuccessInput_ = { "orgId": self.org_id, "filePath": filePath_, "fileName": fileName_, "uploadId": uploadId_, "taskType": task_type, "dataType": self.data_type, "storageId": storage_id, "dpsName": self.data_set_name, "cstName": label_set_name, } uploadSuccessPayload_ = self.api_client.itemListUploadSuccess( org_id=self.org_id, itemsListUploadSuccessInput=itemsListUploadSuccessInput_, )["itemListUploadSuccess"] importId_ = uploadSuccessPayload_["upload"]["importId"] print_info( "Upload is processing, this is your importId: {}".format(importId_) ) return uploadSuccessPayload_ else: print_error("Something went wrong uploading your file {}.".format(items)) return None
class LabelsetLoader(LabelsetBase): """Labelset loader class.""" def __init__(self, org_id: str, label_set_name: str) -> None: """Construct Loader.""" self.org_id = org_id self.label_set_name = label_set_name self.api_client = RedBrickApi(cache=False) print_info("Counting available datapoints...") # All datapoints in labelset try: self.dp_ids, custom_group = self.api_client.get_datapoint_ids( self.org_id, self.label_set_name) except Exception as err: print_error(err) return self.task_type = custom_group.task_type self.data_type = custom_group.data_type self.taxonomy: Dict[str, int] = custom_group.taxonomy print_info("Number of Datapoints = %s" % len(self.dp_ids)) # Update taxonomy mapper if segmentation if self.task_type == "SEGMENTATION": self.taxonomy_update_segmentation() # Get all users try: self.users = self.api_client.get_members(self.org_id) except Exception as err: print_error(err) return def __getitem__(self, index: int) -> Union[Image, Video]: """Get information needed for a single item.""" dp = self.api_client.get_datapoint( self.org_id, self.label_set_name, self.dp_ids[index], self.task_type, self.taxonomy, ) return dp def export(self, format: str = "redbrick") -> str: """Export.""" if self.data_type == "IMAGE": export_img: ExportImage = ExportImage(format=format, labelset=self) export_img.export() return export_img.cache_dir elif self.data_type == "VIDEO": export_vid: ExportVideo = ExportVideo(format=format, labelset=self) export_vid.export() return export_vid.cache_dir else: err = ValueError("%s data type not supported! Please reach out to \ [email protected]" % self.data_type) print_error(err) return "" def number_of_datapoints(self) -> int: """Get number of datapoints.""" return len(self.dp_ids) def show_data(self) -> None: """Visualize the data.""" if self.data_type == "VIDEO": print_info("Visualizing first 20 frames...") num_dps = self.number_of_datapoints() if not num_dps: return idx = random.randint(0, num_dps - 1) self[idx].show_data() return # Image data type print_info("Visualizing data and labels...") # Prepare figure num_dps = self.number_of_datapoints() cols = int(min(2, num_dps)) rows = int(min(2, math.ceil(num_dps / cols))) fig = plt.figure() # Generate random index list list_len = np.min([rows * cols, num_dps]) indexes = random.sample(range(0, list_len), list_len) # Iterate through axes for i, idx in enumerate(indexes): ax = fig.add_subplot(rows, cols, i + 1) self[idx].show_data(ax=ax) # type: ignore plt.tight_layout() plt.show() def taxonomy_update_segmentation(self) -> None: """ Fix the taxonomy mapper object to be 1-indexed for segmentation projects. """ for key in self.taxonomy.keys(): self.taxonomy[key] += 1 if self.taxonomy[key] == 0: print_error("Taxonomy class id's must be 0 indexed. \ Please contact [email protected] for help.") exit(1) # Add a background class for segmentation self.taxonomy["background"] = 0
class RemoteLabel: """An interface to RemoteLabel brick.""" def __init__(self, org_id: str, project_id: str, stage_name: str) -> None: """Construct RemoteLabel instance.""" print(colored("[INFO]:", "blue"), "Initializing remote-labeling module...") self.org_id = org_id self.project_id = project_id self.stage_name = stage_name self.api_client = RedBrickApi(cache=False) # Gather stage information and store stage_info = self.api_client.get_stage(org_id=org_id, project_id=project_id, stage_name=stage_name) taxonomy = self.api_client.get_taxonomy( orgId=org_id, name=stage_info["outputTaxonomyName"], version=stage_info["outputTaxonomyVersion"], ) self.taxonomy: Taxonomy2 = taxonomy self.task_type = stage_info["outputType"] def get_num_tasks(self) -> int: """Get the number of tasks queued.""" num = self.api_client.get_num_remote_tasks(org_id=self.org_id, project_id=self.project_id, stage_name=self.stage_name) return num def get_task(self, num_tasks: int) -> List[Task]: """User facing function to get task.""" print(colored("[INFO]:", "blue"), "Retrieving task from backend...", end=" ") task = self.__get_remote_labeling_task(num_tasks=num_tasks) if len(task) == 0: print(colored("\n[WARNING]:", "yellow"), "No more tasks in this stage.") return task print(colored("Done.", "green")) return task def submit_task( self, task: Task, labels: Union[ImageBoundingBox, VideoBoundingBox, VideoClassify], ) -> None: """User facing funciton to submit a task.""" print(colored("[INFO]:", "blue"), "Submitting task to backend...", end=" ") new_subname = "remote-labeling" # Check that label category matches taxonomy check, classname = labels.compare_taxonomy(taxonomy=self.taxonomy) if not check: raise ValueError("%s is not a valid category for taxonomy %s" % (classname, self.taxonomy.name)) # Check label type if task.task_data_type == "IMAGE_BBOX": if not isinstance(labels, ImageBoundingBox): raise ValueError("Labels must be of type ImageBoundingBox!") if task.task_data_type == "VIDEO_BBOX": if not isinstance(labels, VideoBoundingBox): raise ValueError("Labels must be of type VideoBoundingBox!") self.__put_task_data( dp_id=task.dp_id, sub_name=new_subname, task_data=labels, taxonomy_name=task.taxonomy["name"], taxonomy_version=task.taxonomy["version"], td_type=task.task_data_type, ) # Put remote labeling task submit_task = Task( org_id=task.org_id, project_id=task.project_id, stage_name=task.stage_name, task_id=task.task_id, dp_id=task.dp_id, sub_name=new_subname, taxonomy=task.taxonomy, items_list=task.items_list, items_list_presigned=task.items_list_presigned, task_data_type=task.task_data_type, ) self.__put_remote_labeling_task(submit_task) print(colored("Done.", "green")) def put_finished_tasks(self, labels_for_tasks: List[Dict]) -> None: """Uploads tasks and labels batches to the backend""" assert len(labels_for_tasks) > 0 print( colored("[INFO]:", "blue"), "Submitting batch of tasks to backend...", end=" ", ) self.api_client.putRemoteLabelingTasksAndLabels( org_id=self.org_id, project_id=self.project_id, stage_name=self.stage_name, finished_tasks=labels_for_tasks, ) print(colored("Done.", "green")) def __put_task_data( self, dp_id: str, sub_name: str, task_data: Union[ImageBoundingBox, VideoBoundingBox, VideoClassify], taxonomy_name: str, taxonomy_version: str, td_type: str, ) -> None: """Read labels from local folder, and submit the labels.""" task_datas = json.loads( str(task_data)) # Convert the object to a dictionary self.api_client.putLabels( org_id=self.org_id, project_id=self.project_id, dp_id=dp_id, stage_name=self.stage_name, sub_name=sub_name, labels=task_datas, taxonomy_name=taxonomy_name, taxonomy_version=taxonomy_version, td_type=td_type, ) def __put_remote_labeling_task(self, task: Task) -> None: """Put the remote labeling task to the backend.""" finished_task = { "orgId": task.org_id, "projectId": task.project_id, "stageName": task.stage_name, "taskId": task.task_id, "newSubName": task.sub_name, } self.api_client.putRemoteLabelingTask(finished_task) def __get_remote_labeling_task(self, num_tasks: int) -> List[Task]: """Get the labeling tasks from API.""" task = self.api_client.tasksToLabelRemote( orgId=self.org_id, projectId=self.project_id, stageName=self.stage_name, numTasks=num_tasks, ) return task
class LabelsetIterator: """Construct Labelset Iterator.""" def __init__(self, org_id: str, label_set_name: str) -> None: self.api = RedBrickApi() self.cursor = None self.datapointsBatch = None self.datapointsBatchIndex = None self.org_id = org_id self.label_set_name = label_set_name self.valid_task_types: List = ["SEGMENTATION", "POLYGON", "MULTI"] self.customGroup = self._get_custom_group() self.taxonomy = self.customGroup["taxonomy"] self.datapointCount = self.customGroup["datapointCount"] self.taxonomy_segm = self._get_taxonomy_segmentation() def _get_custom_group(self) -> None: return self.api.get_datapoints_paged( org_id=self.org_id, label_set_name=self.label_set_name)["customGroup"] def _get_taxonomy_segmentation(self) -> Any: if self.customGroup["taskType"] in self.valid_task_types: return self._create_taxonomy_segmentation() return None def _create_taxonomy_segmentation(self): tax_map: Dict[str, int] = {} self._trav_tax(self.taxonomy["categories"][0], tax_map) return self._taxonomy_update_segmentation(tax_map) def _trav_tax(self, taxonomy: Dict[Any, Any], tax_map: Dict[str, int]) -> None: """Traverse the taxonomy tree structure, and fill the taxonomy mapper object.""" children = taxonomy["children"] if len(children) == 0: return for child in children: tax_map[child["name"]] = child["classId"] self._trav_tax(child, tax_map) def _taxonomy_update_segmentation( self, tax_map: Dict[str, int]) -> Dict[str, int]: """ Fix the taxonomy mapper object to be 1-indexed for segmentation projects. """ for key in tax_map.keys(): tax_map[key] += 1 if tax_map[key] == 0: print_error("Taxonomy class id's must be 0 indexed. \ Please contact [email protected] for help.") exit(1) # Add a background class for segmentation tax_map["background"] = 0 return tax_map def _trim_labels(self, entry) -> Dict: """Trims None values from labels""" for label in entry["labelData"]["labels"]: for k, v in label.copy().items(): if v is None: del label[k] return entry def __iter__(self) -> Iterable[Dict]: return self def __next__(self) -> dict: """Get next labels / datapoint.""" # If cursor is None and current datapointsBatch has been processed if (self.datapointsBatchIndex is not None and self.cursor is None and len(self.datapointsBatch) == self.datapointsBatchIndex): raise StopIteration # If current datapointsBatch is None or we have finished processing current datapointsBatch if (self.datapointsBatch is None or len(self.datapointsBatch) == self.datapointsBatchIndex): if self.cursor is None: customGroup = self.api.get_datapoints_paged( org_id=self.org_id, label_set_name=self.label_set_name) else: customGroup = self.api.get_datapoints_paged( org_id=self.org_id, label_set_name=self.label_set_name, cursor=self.cursor, ) self.cursor = customGroup["customGroup"]["datapointsPaged"][ "cursor"] self.datapointsBatch = customGroup["customGroup"][ "datapointsPaged"]["entries"] self.datapointsBatchIndex = 0 # Current entry to return entry = self.datapointsBatch[self.datapointsBatchIndex] self.datapointsBatchIndex += 1 return self._trim_labels(entry)