class Label(DbObject, Updateable, BulkDeletable): """ Label represents an assessment on a DataRow. For example one label could contain 100 bounding boxes (annotations). """ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.reviews.supports_filtering = False label = Field.String("label") seconds_to_label = Field.Float("seconds_to_label") agreement = Field.Float("agreement") benchmark_agreement = Field.Float("benchmark_agreement") is_benchmark_reference = Field.Boolean("is_benchmark_reference") project = Relationship.ToOne("Project") data_row = Relationship.ToOne("DataRow") reviews = Relationship.ToMany("Review", False) created_by = Relationship.ToOne("User", False, "created_by") @staticmethod def bulk_delete(labels): """ Deletes all the given Labels. Args: labels (list of Label): The Labels to delete. """ BulkDeletable._bulk_delete(labels, False) def create_review(self, **kwargs): """ Creates a Review for this label. Kwargs: Review attributes. At a minimum a `Review.score` field value must be provided. """ kwargs[Entity.Review.label.name] = self kwargs[Entity.Review.project.name] = self.project() return self.client._create(Entity.Review, kwargs) def create_benchmark(self): """ Creates a Benchmark for this Label. Returns: The newly created Benchmark. """ label_id_param = "labelId" query_str = """mutation CreateBenchmarkPyApi($%s: ID!) { createBenchmark(data: {labelId: $%s}) {%s}} """ % ( label_id_param, label_id_param, query.results_query_part(Entity.Benchmark)) res = self.client.execute(query_str, {label_id_param: self.uid}) return Entity.Benchmark(self.client, res["createBenchmark"])
class Prediction(DbObject): """ A prediction created by a PredictionModel. Legacy editor only. Refer to BulkImportRequest if using the new Editor. Attributes: updated_at (datetime) created_at (datetime) label (str) agreement (float) organization (Relationship): `ToOne` relationship to Organization prediction_model (Relationship): `ToOne` relationship to PredictionModel data_row (Relationship): `ToOne` relationship to DataRow project (Relationship): `ToOne` relationship to Project """ updated_at = Field.DateTime("updated_at") created_at = Field.DateTime("created_at") organization = Relationship.ToOne("Organization", False) label = Field.String("label") agreement = Field.Float("agreement") prediction_model = Relationship.ToOne("PredictionModel", False) data_row = Relationship.ToOne("DataRow", False) project = Relationship.ToOne("Project", False)
class Review(DbObject, Deletable, Updateable): """ Reviewing labeled data is a collaborative quality assurance technique. A Review object indicates the quality of the assigned Label. The aggregated review numbers can be obtained on a Project object. Attributes: created_at (datetime) updated_at (datetime) score (float) created_by (Relationship): `ToOne` relationship to User organization (Relationship): `ToOne` relationship to Organization project (Relationship): `ToOne` relationship to Project label (Relationship): `ToOne` relationship to Label """ class NetScore(Enum): """ Negative, Zero, or Positive. """ Negative = auto() Zero = auto() Positive = auto() updated_at = Field.DateTime("updated_at") created_at = Field.DateTime("created_at") score = Field.Float("score") created_by = Relationship.ToOne("User", False, "created_by") organization = Relationship.ToOne("Organization", False) project = Relationship.ToOne("Project", False) label = Relationship.ToOne("Label", False)
class Benchmark(DbObject): """ Represents a benchmark label. The Benchmarks tool works by interspersing data to be labeled, for which there is a benchmark label, to each person labeling. These labeled data are compared against their respective benchmark and an accuracy score between 0 and 100 percent is calculated. Attributes: created_at (datetime) last_activity (datetime) average_agreement (float) completed_count (int) created_by (Relationship): `ToOne` relationship to User reference_label (Relationship): `ToOne` relationship to Label """ created_at = Field.DateTime("created_at") created_by = Relationship.ToOne("User", False, "created_by") last_activity = Field.DateTime("last_activity") average_agreement = Field.Float("average_agreement") completed_count = Field.Int("completed_count") reference_label = Relationship.ToOne("Label", False, "reference_label") def delete(self): label_param = "labelId" query_str = """mutation DeleteBenchmarkPyApi($%s: ID!) { deleteBenchmark(where: {labelId: $%s}) {id}} """ % (label_param, label_param) self.client.execute(query_str, {label_param: self.reference_label().uid})
class Prediction(DbObject): """ A prediction created by a PredictionModel. """ updated_at = Field.DateTime("updated_at") created_at = Field.DateTime("created_at") organization = Relationship.ToOne("Organization", False) label = Field.String("label") agreement = Field.Float("agreement") prediction_model = Relationship.ToOne("PredictionModel", False) data_row = Relationship.ToOne("DataRow", False) project = Relationship.ToOne("Project", False)
class Task(DbObject): """ Represents a server-side process that might take a longer time to process. Allows the Task state to be updated and checked on the client side. Attributes: updated_at (datetime) created_at (datetime) name (str) status (str) completion_percentage (float) created_by (Relationship): `ToOne` relationship to User organization (Relationship): `ToOne` relationship to Organization """ updated_at = Field.DateTime("updated_at") created_at = Field.DateTime("created_at") name = Field.String("name") status = Field.String("status") completion_percentage = Field.Float("completion_percentage") # Relationships created_by = Relationship.ToOne("User", False, "created_by") organization = Relationship.ToOne("Organization") def refresh(self): """ Refreshes Task data from the server. """ tasks = list(self._user.created_tasks(where=Task.uid == self.uid)) if len(tasks) != 1: raise ResourceNotFoundError(Task, self.uid) for field in self.fields(): setattr(self, field.name, getattr(tasks[0], field.name)) def wait_till_done(self, timeout_seconds=60): """ Waits until the task is completed. Periodically queries the server to update the task attributes. Args: timeout_seconds (float): Maximum time this method can block, in seconds. Defaults to one minute. """ check_frequency = 2 # frequency of checking, in seconds while True: if self.status != "IN_PROGRESS": return sleep_time_seconds = min(check_frequency, timeout_seconds) logger.debug("Task.wait_till_done sleeping for %.2f seconds" % sleep_time_seconds) if sleep_time_seconds <= 0: break timeout_seconds -= check_frequency time.sleep(sleep_time_seconds) self.refresh()
class Prediction(DbObject): """ A prediction created by a PredictionModel. NOTE: This is used for the legacy editor [1], if you wish to import annotations, refer to [2] [1] https://labelbox.com/docs/legacy/import-model-prediction [2] https://labelbox.com/docs/automation/model-assisted-labeling """ updated_at = Field.DateTime("updated_at") created_at = Field.DateTime("created_at") organization = Relationship.ToOne("Organization", False) label = Field.String("label") agreement = Field.Float("agreement") prediction_model = Relationship.ToOne("PredictionModel", False) data_row = Relationship.ToOne("DataRow", False) project = Relationship.ToOne("Project", False)
class Benchmark(DbObject): """ Benchmarks (also known as Golden Standard) is a quality assurance tool for training data. Training data quality is the measure of accuracy and consistency of the training data. Benchmarks works by interspersing data to be labeled, for which there is a benchmark label, to each person labeling. These labeled data are compared against their respective benchmark and an accuracy score between 0 and 100 percent is calculated. """ created_at = Field.DateTime("created_at") created_by = Relationship.ToOne("User", False, "created_by") last_activity = Field.DateTime("last_activity") average_agreement = Field.Float("average_agreement") completed_count = Field.Int("completed_count") reference_label = Relationship.ToOne("Label", False, "reference_label") def delete(self): label_param = "labelId" query_str = """mutation DeleteBenchmarkPyApi($%s: ID!) { deleteBenchmark(where: {labelId: $%s}) {id}} """ % (label_param, label_param) self.client.execute(query_str, {label_param: self.reference_label().uid})
class Project(DbObject, Updateable, Deletable): """ A Project is a container that includes a labeling frontend, an ontology, datasets and labels. Attributes: name (str) description (str) updated_at (datetime) created_at (datetime) setup_complete (datetime) last_activity_time (datetime) auto_audit_number_of_labels (int) auto_audit_percentage (float) datasets (Relationship): `ToMany` relationship to Dataset created_by (Relationship): `ToOne` relationship to User organization (Relationship): `ToOne` relationship to Organization reviews (Relationship): `ToMany` relationship to Review labeling_frontend (Relationship): `ToOne` relationship to LabelingFrontend labeling_frontend_options (Relationship): `ToMany` relationship to LabelingFrontendOptions labeling_parameter_overrides (Relationship): `ToMany` relationship to LabelingParameterOverride webhooks (Relationship): `ToMany` relationship to Webhook benchmarks (Relationship): `ToMany` relationship to Benchmark active_prediction_model (Relationship): `ToOne` relationship to PredictionModel predictions (Relationship): `ToMany` relationship to Prediction ontology (Relationship): `ToOne` relationship to Ontology """ name = Field.String("name") description = Field.String("description") updated_at = Field.DateTime("updated_at") created_at = Field.DateTime("created_at") setup_complete = Field.DateTime("setup_complete") last_activity_time = Field.DateTime("last_activity_time") auto_audit_number_of_labels = Field.Int("auto_audit_number_of_labels") auto_audit_percentage = Field.Float("auto_audit_percentage") # Relationships datasets = Relationship.ToMany("Dataset", True) created_by = Relationship.ToOne("User", False, "created_by") organization = Relationship.ToOne("Organization", False) reviews = Relationship.ToMany("Review", True) labeling_frontend = Relationship.ToOne("LabelingFrontend") labeling_frontend_options = Relationship.ToMany( "LabelingFrontendOptions", False, "labeling_frontend_options") labeling_parameter_overrides = Relationship.ToMany( "LabelingParameterOverride", False, "labeling_parameter_overrides") webhooks = Relationship.ToMany("Webhook", False) benchmarks = Relationship.ToMany("Benchmark", False) active_prediction_model = Relationship.ToOne("PredictionModel", False, "active_prediction_model") predictions = Relationship.ToMany("Prediction", False) ontology = Relationship.ToOne("Ontology", True) def members(self): """ Fetch all current members for this project Returns: A `PaginatedCollection of `ProjectMember`s """ id_param = "projectId" query_str = """query ProjectMemberOverviewPyApi($%s: ID!) { project(where: {id : $%s}) { id members(skip: %%d first: %%d){ id user { %s } role { id name } } } }""" % (id_param, id_param, query.results_query_part(Entity.User)) return PaginatedCollection(self.client, query_str, {id_param: str(self.uid)}, ["project", "members"], ProjectMember) def create_label(self, **kwargs): """ Creates a label on a Legacy Editor project. Not supported in the new Editor. Args: **kwargs: Label attributes. At minimum, the label `DataRow`. """ # Copy-paste of Client._create code so we can inject # a connection to Type. Type objects are on their way to being # deprecated and we don't want the Py client lib user to know # about them. At the same time they're connected to a Label at # label creation in a non-standard way (connect via name). logger.warning( "`create_label` is deprecated and is not compatible with the new editor." ) Label = Entity.Label kwargs[Label.project] = self kwargs[Label.seconds_to_label] = kwargs.get( Label.seconds_to_label.name, 0.0) data = { Label.attribute(attr) if isinstance(attr, str) else attr: value.uid if isinstance(value, DbObject) else value for attr, value in kwargs.items() } query_str, params = query.create(Label, data) # Inject connection to Type query_str = query_str.replace( "data: {", "data: {type: {connect: {name: \"Any\"}} ") res = self.client.execute(query_str, params) return Label(self.client, res["createLabel"]) def labels(self, datasets=None, order_by=None): """ Custom relationship expansion method to support limited filtering. Args: datasets (iterable of Dataset): Optional collection of Datasets whose Labels are sought. If not provided, all Labels in this Project are returned. order_by (None or (Field, Field.Order)): Ordering clause. """ Label = Entity.Label if datasets is not None: where = " where:{dataRow: {dataset: {id_in: [%s]}}}" % ", ".join( '"%s"' % dataset.uid for dataset in datasets) else: where = "" if order_by is not None: query.check_order_by_clause(Label, order_by) order_by_str = "orderBy: %s_%s" % (order_by[0].graphql_name, order_by[1].name.upper()) else: order_by_str = "" id_param = "projectId" query_str = """query GetProjectLabelsPyApi($%s: ID!) {project (where: {id: $%s}) {labels (skip: %%d first: %%d %s %s) {%s}}}""" % ( id_param, id_param, where, order_by_str, query.results_query_part(Label)) return PaginatedCollection(self.client, query_str, {id_param: self.uid}, ["project", "labels"], Label) def export_labels(self, timeout_seconds=60): """ Calls the server-side Label exporting that generates a JSON payload, and returns the URL to that payload. Will only generate a new URL at a max frequency of 30 min. Args: timeout_seconds (float): Max waiting time, in seconds. Returns: URL of the data file with this Project's labels. If the server didn't generate during the `timeout_seconds` period, None is returned. """ sleep_time = 2 id_param = "projectId" query_str = """mutation GetLabelExportUrlPyApi($%s: ID!) {exportLabels(data:{projectId: $%s }) {downloadUrl createdAt shouldPoll} } """ % (id_param, id_param) while True: res = self.client.execute(query_str, {id_param: self.uid}) res = res["exportLabels"] if not res["shouldPoll"]: return res["downloadUrl"] timeout_seconds -= sleep_time if timeout_seconds <= 0: return None logger.debug("Project '%s' label export, waiting for server...", self.uid) time.sleep(sleep_time) def export_issues(self, status=None): """ Calls the server-side Issues exporting that returns the URL to that payload. Args: status (string): valid values: Open, Resolved Returns: URL of the data file with this Project's issues. """ id_param = "projectId" status_param = "status" query_str = """query GetProjectIssuesExportPyApi($%s: ID!, $%s: IssueStatus) { project(where: { id: $%s }) { issueExportUrl(where: { status: $%s }) } }""" % (id_param, status_param, id_param, status_param) valid_statuses = {None, "Open", "Resolved"} if status not in valid_statuses: raise ValueError("status must be in {}. Found {}".format( valid_statuses, status)) res = self.client.execute(query_str, { id_param: self.uid, status_param: status }) res = res['project'] logger.debug("Project '%s' issues export, link generated", self.uid) return res.get('issueExportUrl') def upsert_instructions(self, instructions_file: str): """ * Uploads instructions to the UI. Running more than once will replace the instructions Args: instructions_file (str): Path to a local file. * Must be either a pdf, text, or html file. Raises: ValueError: * project must be setup * instructions file must end with one of ".text", ".txt", ".pdf", ".html" """ if self.setup_complete is None: raise ValueError( "Cannot attach instructions to a project that has not been set up." ) frontend = self.labeling_frontend() frontendId = frontend.uid if frontend.name != "Editor": logger.warning( f"This function has only been tested to work with the Editor front end. Found %s", frontend.name) supported_instruction_formats = (".text", ".txt", ".pdf", ".html") if not instructions_file.endswith(supported_instruction_formats): raise ValueError( f"instructions_file must end with one of {supported_instruction_formats}. Found {instructions_file}" ) lfo = list(self.labeling_frontend_options())[-1] instructions_url = self.client.upload_file(instructions_file) customization_options = json.loads(lfo.customization_options) customization_options['projectInstructions'] = instructions_url option_id = lfo.uid self.client.execute( """mutation UpdateFrontendWithExistingOptionsPyApi ( $frontendId: ID!, $optionsId: ID!, $name: String!, $description: String!, $customizationOptions: String! ) { updateLabelingFrontend( where: {id: $frontendId}, data: {name: $name, description: $description} ) {id} updateLabelingFrontendOptions( where: {id: $optionsId}, data: {customizationOptions: $customizationOptions} ) {id} }""", { "frontendId": frontendId, "optionsId": option_id, "name": frontend.name, "description": "Video, image, and text annotation", "customizationOptions": json.dumps(customization_options) }) def labeler_performance(self): """ Returns the labeler performances for this Project. Returns: A PaginatedCollection of LabelerPerformance objects. """ id_param = "projectId" query_str = """query LabelerPerformancePyApi($%s: ID!) { project(where: {id: $%s}) { labelerPerformance(skip: %%d first: %%d) { count user {%s} secondsPerLabel totalTimeLabeling consensus averageBenchmarkAgreement lastActivityTime} }}""" % (id_param, id_param, query.results_query_part(Entity.User)) def create_labeler_performance(client, result): result["user"] = Entity.User(client, result["user"]) # python isoformat doesn't accept Z as utc timezone result["lastActivityTime"] = datetime.fromisoformat( result["lastActivityTime"].replace('Z', '+00:00')) return LabelerPerformance(**{ utils.snake_case(key): value for key, value in result.items() }) return PaginatedCollection(self.client, query_str, {id_param: self.uid}, ["project", "labelerPerformance"], create_labeler_performance) def review_metrics(self, net_score): """ Returns this Project's review metrics. Args: net_score (None or Review.NetScore): Indicates desired metric. Returns: int, aggregation count of reviews for given `net_score`. """ if net_score not in (None, ) + tuple(Entity.Review.NetScore): raise InvalidQueryError( "Review metrics net score must be either None " "or one of Review.NetScore values") id_param = "projectId" net_score_literal = "None" if net_score is None else net_score.name query_str = """query ProjectReviewMetricsPyApi($%s: ID!){ project(where: {id:$%s}) {reviewMetrics {labelAggregate(netScore: %s) {count}}} }""" % (id_param, id_param, net_score_literal) res = self.client.execute(query_str, {id_param: self.uid}) return res["project"]["reviewMetrics"]["labelAggregate"]["count"] def setup(self, labeling_frontend, labeling_frontend_options): """ Finalizes the Project setup. Args: labeling_frontend (LabelingFrontend): Which UI to use to label the data. labeling_frontend_options (dict or str): Labeling frontend options, a.k.a. project ontology. If given a `dict` it will be converted to `str` using `json.dumps`. """ organization = self.client.get_organization() if not isinstance(labeling_frontend_options, str): labeling_frontend_options = json.dumps(labeling_frontend_options) self.labeling_frontend.connect(labeling_frontend) LFO = Entity.LabelingFrontendOptions labeling_frontend_options = self.client._create( LFO, { LFO.project: self, LFO.labeling_frontend: labeling_frontend, LFO.customization_options: labeling_frontend_options, LFO.organization: organization }) timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") self.update(setup_complete=timestamp) def validate_labeling_parameter_overrides(self, data): for idx, row in enumerate(data): if len(row) != 3: raise TypeError( f"Data must be a list of tuples containing a DataRow, priority (int), num_labels (int). Found {len(row)} items. Index: {idx}" ) data_row, priority, num_labels = row if not isinstance(data_row, DataRow): raise TypeError( f"data_row should be be of type DataRow. Found {type(data_row)}. Index: {idx}" ) for name, value in [["Priority", priority], ["Number of labels", num_labels]]: if not isinstance(value, int): raise TypeError( f"{name} must be an int. Found {type(value)} for data_row {data_row}. Index: {idx}" ) if value < 1: raise ValueError( f"{name} must be greater than 0 for data_row {data_row}. Index: {idx}" ) def set_labeling_parameter_overrides(self, data): """ Adds labeling parameter overrides to this project. See information on priority here: https://docs.labelbox.com/en/configure-editor/queue-system#reservation-system >>> project.set_labeling_parameter_overrides([ >>> (data_row_1, 2, 3), (data_row_2, 1, 4)]) Args: data (iterable): An iterable of tuples. Each tuple must contain (DataRow, priority<int>, number_of_labels<int>) for the new override. Priority: * Data will be labeled in priority order. - A lower number priority is labeled first. - Minimum priority is 1. * Priority is not the queue position. - The position is determined by the relative priority. - E.g. [(data_row_1, 5,1), (data_row_2, 2,1), (data_row_3, 10,1)] will be assigned in the following order: [data_row_2, data_row_1, data_row_3] * Datarows with parameter overrides will appear before datarows without overrides. * The priority only effects items in the queue. - Assigning a priority will not automatically add the item back into the queue. Number of labels: * The number of times a data row should be labeled. - Creates duplicate data rows in a project (one for each number of labels). * New duplicated data rows will be added to the queue. - Already labeled duplicates will not be sent back to the queue. * The queue will never assign the same datarow to a single labeler more than once. - If the number of labels is greater than the number of labelers working on a project then the extra items will remain in the queue (this can be fixed by removing the override at any time). * Setting this to 1 will result in the default behavior (no duplicates). Returns: bool, indicates if the operation was a success. """ self.validate_labeling_parameter_overrides(data) data_str = ",\n".join( "{dataRow: {id: \"%s\"}, priority: %d, numLabels: %d }" % (data_row.uid, priority, num_labels) for data_row, priority, num_labels in data) id_param = "projectId" query_str = """mutation SetLabelingParameterOverridesPyApi($%s: ID!){ project(where: { id: $%s }) {setLabelingParameterOverrides (data: [%s]) {success}}} """ % (id_param, id_param, data_str) res = self.client.execute(query_str, {id_param: self.uid}) return res["project"]["setLabelingParameterOverrides"]["success"] def unset_labeling_parameter_overrides(self, data_rows): """ Removes labeling parameter overrides to this project. * This will remove unlabeled duplicates in the queue. Args: data_rows (iterable): An iterable of DataRows. Returns: bool, indicates if the operation was a success. """ id_param = "projectId" query_str = """mutation UnsetLabelingParameterOverridesPyApi($%s: ID!){ project(where: { id: $%s}) { unsetLabelingParameterOverrides(data: [%s]) { success }}}""" % ( id_param, id_param, ",\n".join("{dataRowId: \"%s\"}" % row.uid for row in data_rows)) res = self.client.execute(query_str, {id_param: self.uid}) return res["project"]["unsetLabelingParameterOverrides"]["success"] def upsert_review_queue(self, quota_factor): """ Sets the the proportion of total assets in a project to review. More information can be found here: https://docs.labelbox.com/en/quality-assurance/review-labels#configure-review-percentage Args: quota_factor (float): Which part (percentage) of the queue to reinitiate. Between 0 and 1. """ if not 0. < quota_factor < 1.: raise ValueError("Quota factor must be in the range of [0,1]") id_param = "projectId" quota_param = "quotaFactor" query_str = """mutation UpsertReviewQueuePyApi($%s: ID!, $%s: Float!){ upsertReviewQueue(where:{project: {id: $%s}} data:{quotaFactor: $%s}) {id}}""" % ( id_param, quota_param, id_param, quota_param) res = self.client.execute(query_str, { id_param: self.uid, quota_param: quota_factor }) def extend_reservations(self, queue_type): """ Extends all the current reservations for the current user on the given queue type. Args: queue_type (str): Either "LabelingQueue" or "ReviewQueue" Returns: int, the number of reservations that were extended. """ if queue_type not in ("LabelingQueue", "ReviewQueue"): raise InvalidQueryError("Unsupported queue type: %s" % queue_type) id_param = "projectId" query_str = """mutation ExtendReservationsPyApi($%s: ID!){ extendReservations(projectId:$%s queueType:%s)}""" % ( id_param, id_param, queue_type) res = self.client.execute(query_str, {id_param: self.uid}) return res["extendReservations"] def create_prediction_model(self, name, version): """ Creates a PredictionModel connected to a Legacy Editor Project. Args: name (str): The new PredictionModel's name. version (int): The new PredictionModel's version. Returns: A newly created PredictionModel. """ logger.warning( "`create_prediction_model` is deprecated and is not compatible with the new editor." ) PM = Entity.PredictionModel model = self.client._create(PM, { PM.name.name: name, PM.version.name: version }) self.active_prediction_model.connect(model) return model def create_prediction(self, label, data_row, prediction_model=None): """ Creates a Prediction within a Legacy Editor Project. Not supported in the new Editor. Args: label (str): The `label` field of the new Prediction. data_row (DataRow): The DataRow for which the Prediction is created. prediction_model (PredictionModel or None): The PredictionModel within which the new Prediction is created. If None then this Project's active_prediction_model is used. Return: A newly created Prediction. Raises: labelbox.excepions.InvalidQueryError: if given `prediction_model` is None and this Project's active_prediction_model is also None. """ logger.warning( "`create_prediction` is deprecated and is not compatible with the new editor." ) if prediction_model is None: prediction_model = self.active_prediction_model() if prediction_model is None: raise InvalidQueryError( "Project '%s' has no active prediction model" % self.name) label_param = "label" model_param = "prediction_model_id" project_param = "project_id" data_row_param = "data_row_id" Prediction = Entity.Prediction query_str = """mutation CreatePredictionPyApi( $%s: String!, $%s: ID!, $%s: ID!, $%s: ID!) {createPrediction( data: {label: $%s, predictionModelId: $%s, projectId: $%s, dataRowId: $%s}) {%s}}""" % (label_param, model_param, project_param, data_row_param, label_param, model_param, project_param, data_row_param, query.results_query_part(Prediction)) params = { label_param: label, model_param: prediction_model.uid, data_row_param: data_row.uid, project_param: self.uid } res = self.client.execute(query_str, params) return Prediction(self.client, res["createPrediction"]) def enable_model_assisted_labeling(self, toggle: bool = True) -> bool: """ Turns model assisted labeling either on or off based on input Args: toggle (bool): True or False boolean Returns: True if toggled on or False if toggled off """ project_param = "project_id" show_param = "show" query_str = """mutation toggle_model_assisted_labelingPyApi($%s: ID!, $%s: Boolean!) { project(where: {id: $%s }) { showPredictionsToLabelers(show: $%s) { id, showingPredictionsToLabelers } } }""" % (project_param, show_param, project_param, show_param) params = {project_param: self.uid, show_param: toggle} res = self.client.execute(query_str, params) return res["project"]["showPredictionsToLabelers"][ "showingPredictionsToLabelers"] def upload_annotations( self, name: str, annotations: Union[str, Path, Iterable[Dict]], validate: bool = True) -> 'BulkImportRequest': # type: ignore """ Uploads annotations to a new Editor project. Args: name (str): name of the BulkImportRequest job annotations (str or Path or Iterable): url that is publicly accessible by Labelbox containing an ndjson file OR local path to an ndjson file OR iterable of annotation rows validate (bool): Whether or not to validate the payload before uploading. Returns: BulkImportRequest """ if isinstance(annotations, str) or isinstance(annotations, Path): def _is_url_valid(url: Union[str, Path]) -> bool: """ Verifies that the given string is a valid url. Args: url: string to be checked Returns: True if the given url is valid otherwise False """ if isinstance(url, Path): return False parsed = urlparse(url) return bool(parsed.scheme) and bool(parsed.netloc) if _is_url_valid(annotations): return BulkImportRequest.create_from_url(client=self.client, project_id=self.uid, name=name, url=str(annotations), validate=validate) else: path = Path(annotations) if not path.exists(): raise FileNotFoundError( f'{annotations} is not a valid url nor existing local file' ) return BulkImportRequest.create_from_local_file( client=self.client, project_id=self.uid, name=name, file=path, validate_file=validate, ) elif isinstance(annotations, Iterable): return BulkImportRequest.create_from_objects( client=self.client, project_id=self.uid, name=name, predictions=annotations, # type: ignore validate=validate) else: raise ValueError( f'Invalid annotations given of type: {type(annotations)}')
class Project(DbObject, Updateable, Deletable): """ A Project is a container that includes a labeling frontend, an ontology, datasets and labels. """ name = Field.String("name") description = Field.String("description") updated_at = Field.DateTime("updated_at") created_at = Field.DateTime("created_at") setup_complete = Field.DateTime("setup_complete") last_activity_time = Field.DateTime("last_activity_time") auto_audit_number_of_labels = Field.Int("auto_audit_number_of_labels") auto_audit_percentage = Field.Float("auto_audit_percentage") # Relationships datasets = Relationship.ToMany("Dataset", True) created_by = Relationship.ToOne("User", False, "created_by") organization = Relationship.ToOne("Organization", False) reviews = Relationship.ToMany("Review", True) labeling_frontend = Relationship.ToOne("LabelingFrontend") labeling_frontend_options = Relationship.ToMany( "LabelingFrontendOptions", False, "labeling_frontend_options") labeling_parameter_overrides = Relationship.ToMany( "LabelingParameterOverride", False, "labeling_parameter_overrides") webhooks = Relationship.ToMany("Webhook", False) benchmarks = Relationship.ToMany("Benchmark", False) active_prediction_model = Relationship.ToOne("PredictionModel", False, "active_prediction_model") predictions = Relationship.ToMany("Prediction", False) def create_label(self, **kwargs): """ Creates a label on this Project. Kwargs: Label attributes. At the minimum the label `DataRow`. """ # Copy-paste of Client._create code so we can inject # a connection to Type. Type objects are on their way to being # deprecated and we don't want the Py client lib user to know # about them. At the same time they're connected to a Label at # label creation in a non-standard way (connect via name). Label = Entity.Label kwargs[Label.project] = self kwargs[Label.seconds_to_label] = kwargs.get( Label.seconds_to_label.name, 0.0) data = { Label.attribute(attr) if isinstance(attr, str) else attr: value.uid if isinstance(value, DbObject) else value for attr, value in kwargs.items() } query_str, params = query.create(Label, data) # Inject connection to Type query_str = query_str.replace( "data: {", "data: {type: {connect: {name: \"Any\"}} ") res = self.client.execute(query_str, params) return Label(self.client, res["createLabel"]) def labels(self, datasets=None, order_by=None): """ Custom relationship expansion method to support limited filtering. Args: datasets (iterable of Dataset): Optional collection of Datasets whose Labels are sought. If not provided, all Labels in this Project are returned. order_by (None or (Field, Field.Order)): Ordering clause. """ Label = Entity.Label if datasets is not None: where = " where:{dataRow: {dataset: {id_in: [%s]}}}" % ", ".join( '"%s"' % dataset.uid for dataset in datasets) else: where = "" if order_by is not None: query.check_order_by_clause(Label, order_by) order_by_str = "orderBy: %s_%s" % (order_by[0].graphql_name, order_by[1].name.upper()) else: order_by_str = "" id_param = "projectId" query_str = """query GetProjectLabelsPyApi($%s: ID!) {project (where: {id: $%s}) {labels (skip: %%d first: %%d%s%s) {%s}}}""" % ( id_param, id_param, where, order_by_str, query.results_query_part(Label)) return PaginatedCollection(self.client, query_str, {id_param: self.uid}, ["project", "labels"], Label) def export_labels(self, timeout_seconds=60): """ Calls the server-side Label exporting that generates a JSON payload, and returns the URL to that payload. Will only generate a new URL at a max frequency of 30 min. Args: timeout_seconds (float): Max waiting time, in seconds. Returns: URL of the data file with this Project's labels. If the server didn't generate during the `timeout_seconds` period, None is returned. """ sleep_time = 2 id_param = "projectId" query_str = """mutation GetLabelExportUrlPyApi($%s: ID!) {exportLabels(data:{projectId: $%s }) {downloadUrl createdAt shouldPoll} } """ % (id_param, id_param) while True: res = self.client.execute(query_str, {id_param: self.uid}) res = res["exportLabels"] if not res["shouldPoll"]: return res["downloadUrl"] timeout_seconds -= sleep_time if timeout_seconds <= 0: return None logger.debug("Project '%s' label export, waiting for server...", self.uid) time.sleep(sleep_time) def labeler_performance(self): """ Returns the labeler performances for this Project. Returns: A PaginatedCollection of LabelerPerformance objects. """ id_param = "projectId" query_str = """query LabelerPerformancePyApi($%s: ID!) { project(where: {id: $%s}) { labelerPerformance(skip: %%d first: %%d) { count user {%s} secondsPerLabel totalTimeLabeling consensus averageBenchmarkAgreement lastActivityTime} }}""" % (id_param, id_param, query.results_query_part(Entity.User)) def create_labeler_performance(client, result): result["user"] = Entity.User(client, result["user"]) result["lastActivityTime"] = datetime.fromtimestamp( result["lastActivityTime"] / 1000, timezone.utc) return LabelerPerformance(**{ utils.snake_case(key): value for key, value in result.items() }) return PaginatedCollection(self.client, query_str, {id_param: self.uid}, ["project", "labelerPerformance"], create_labeler_performance) def review_metrics(self, net_score): """ Returns this Project's review metrics. Args: net_score (None or Review.NetScore): Indicates desired metric. Returns: int, aggregation count of reviews for given net_score. """ if net_score not in (None, ) + tuple(Entity.Review.NetScore): raise InvalidQueryError( "Review metrics net score must be either None " "or one of Review.NetScore values") id_param = "projectId" net_score_literal = "None" if net_score is None else net_score.name query_str = """query ProjectReviewMetricsPyApi($%s: ID!){ project(where: {id:$%s}) {reviewMetrics {labelAggregate(netScore: %s) {count}}} }""" % (id_param, id_param, net_score_literal) res = self.client.execute(query_str, {id_param: self.uid}) return res["project"]["reviewMetrics"]["labelAggregate"]["count"] def setup(self, labeling_frontend, labeling_frontend_options): """ Finalizes the Project setup. Args: labeling_frontend (LabelingFrontend): Which UI to use to label the data. labeling_frontend_options (dict or str): Labeling frontend options, a.k.a. project ontology. If given a `dict` it will be converted to `str` using `json.dumps`. """ organization = self.client.get_organization() if not isinstance(labeling_frontend_options, str): labeling_frontend_options = json.dumps(labeling_frontend_options) self.labeling_frontend.connect(labeling_frontend) LFO = Entity.LabelingFrontendOptions labeling_frontend_options = self.client._create( LFO, { LFO.project: self, LFO.labeling_frontend: labeling_frontend, LFO.customization_options: labeling_frontend_options, LFO.organization: organization }) timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") self.update(setup_complete=timestamp) def set_labeling_parameter_overrides(self, data): """ Adds labeling parameter overrides to this project. Example: >>> project.set_labeling_parameter_overrides([ >>> (data_row_1, 2, 3), (data_row_2, 1, 4)]) Args: data (iterable): An iterable of tuples. Each tuple must contain (DataRow, priority, numberOfLabels) for the new override. Returns: bool, indicates if the operation was a success. """ data_str = ",\n".join( "{dataRow: {id: \"%s\"}, priority: %d, numLabels: %d }" % (data_row.uid, priority, num_labels) for data_row, priority, num_labels in data) id_param = "projectId" query_str = """mutation SetLabelingParameterOverridesPyApi($%s: ID!){ project(where: { id: $%s }) {setLabelingParameterOverrides (data: [%s]) {success}}} """ % (id_param, id_param, data_str) res = self.client.execute(query_str, {id_param: self.uid}) return res["project"]["setLabelingParameterOverrides"]["success"] def unset_labeling_parameter_overrides(self, data_rows): """ Removes labeling parameter overrides to this project. Args: data_rows (iterable): An iterable of DataRows. Returns: bool, indicates if the operation was a success. """ id_param = "projectId" query_str = """mutation UnsetLabelingParameterOverridesPyApi($%s: ID!){ project(where: { id: $%s}) { unsetLabelingParameterOverrides(data: [%s]) { success }}}""" % ( id_param, id_param, ",\n".join("{dataRowId: \"%s\"}" % row.uid for row in data_rows)) res = self.client.execute(query_str, {id_param: self.uid}) return res["project"]["unsetLabelingParameterOverrides"]["success"] def upsert_review_queue(self, quota_factor): """ Reinitiates the review queue for this project. Args: quota_factor (float): Which part (percentage) of the queue to reinitiate. Between 0 and 1. """ id_param = "projectId" quota_param = "quotaFactor" query_str = """mutation UpsertReviewQueuePyApi($%s: ID!, $%s: Float!){ upsertReviewQueue(where:{project: {id: $%s}} data:{quotaFactor: $%s}) {id}}""" % ( id_param, quota_param, id_param, quota_param) res = self.client.execute(query_str, { id_param: self.uid, quota_param: quota_factor }) def extend_reservations(self, queue_type): """ Extends all the current reservations for the current user on the given queue type. Args: queue_type (str): Either "LabelingQueue" or "ReviewQueue" Returns: int, the number of reservations that were extended. """ if queue_type not in ("LabelingQueue", "ReviewQueue"): raise InvalidQueryError("Unsupported queue type: %s" % queue_type) id_param = "projectId" query_str = """mutation ExtendReservationsPyApi($%s: ID!){ extendReservations(projectId:$%s queueType:%s)}""" % ( id_param, id_param, queue_type) res = self.client.execute(query_str, {id_param: self.uid}) return res["extendReservations"] def create_prediction_model(self, name, version): """ Creates a PredictionModel connected to this Project. Args: name (str): The new PredictionModel's name. version (int): The new PredictionModel's version. Return: A newly created PredictionModel. """ PM = Entity.PredictionModel model = self.client._create(PM, { PM.name.name: name, PM.version.name: version }) self.active_prediction_model.connect(model) return model def create_prediction(self, label, data_row, prediction_model=None): """ Creates a Prediction within this Project. Args: label (str): The `label` field of the new Prediction. data_row (DataRow): The DataRow for which the Prediction is created. prediction_model (PredictionModel or None): The PredictionModel within which the new Prediction is created. If None then this Project's active_prediction_model is used. Return: A newly created Prediction. Raises: labelbox.excepions.InvalidQueryError: if given `prediction_model` is None and this Project's active_prediction_model is also None. """ if prediction_model is None: prediction_model = self.active_prediction_model() if prediction_model is None: raise InvalidQueryError( "Project '%s' has no active prediction model" % self.name) label_param = "label" model_param = "prediction_model_id" project_param = "project_id" data_row_param = "data_row_id" Prediction = Entity.Prediction query_str = """mutation CreatePredictionPyApi( $%s: String!, $%s: ID!, $%s: ID!, $%s: ID!) {createPrediction( data: {label: $%s, predictionModelId: $%s, projectId: $%s, dataRowId: $%s}) {%s}}""" % (label_param, model_param, project_param, data_row_param, label_param, model_param, project_param, data_row_param, query.results_query_part(Prediction)) params = { label_param: label, model_param: prediction_model.uid, data_row_param: data_row.uid, project_param: self.uid } res = self.client.execute(query_str, params) return Prediction(self.client, res["createPrediction"]) def upload_annotations( self, name: str, annotations: Union[str, Union[str, Path], Iterable[dict]], ) -> 'BulkImportRequest': # type: ignore """ Uploads annotations to a project. Args: name: name of the BulkImportRequest job annotations: url that is publicly accessible by Labelbox containing an ndjson file OR local path to an ndjson file OR iterable of annotation rows Returns: BulkImportRequest """ if isinstance(annotations, str) or isinstance(annotations, Path): def _is_url_valid(url: Union[str, Path]) -> bool: """ Verifies that the given string is a valid url. Args: url: string to be checked Returns: True if the given url is valid otherwise False """ if isinstance(url, Path): return False parsed = urlparse(url) return bool(parsed.scheme) and bool(parsed.netloc) if _is_url_valid(annotations): return BulkImportRequest.create_from_url( client=self.client, project_id=self.uid, name=name, url=str(annotations), ) else: path = Path(annotations) if not path.exists(): raise FileNotFoundError( f'{annotations} is not a valid url nor existing local file' ) return BulkImportRequest.create_from_local_file( client=self.client, project_id=self.uid, name=name, file=path, validate_file=True, ) elif isinstance(annotations, Iterable): return BulkImportRequest.create_from_objects( client=self.client, project_id=self.uid, name=name, predictions=annotations, # type: ignore ) else: raise ValueError( f'Invalid annotations given of type: {type(annotations)}')