def create_instruction(project_id, data_type, instruction_gcs_uri): """ Creates a data labeling PDF instruction for the given Google Cloud project. The PDF file should be uploaded to the project in Google Cloud Storage. """ from google.cloud import datalabeling_v1beta1 as datalabeling client = datalabeling.DataLabelingServiceClient() # [END datalabeling_create_instruction_beta] # If provided, use a provided test endpoint - this will prevent tests on # this snippet from triggering any action by a real human if 'DATALABELING_ENDPOINT' in os.environ: opts = ClientOptions(api_endpoint=os.getenv('DATALABELING_ENDPOINT')) client = datalabeling.DataLabelingServiceClient(client_options=opts) # [START datalabeling_create_instruction_beta] project_path = client.project_path(project_id) pdf_instruction = datalabeling.types.PdfInstruction( gcs_file_uri=instruction_gcs_uri) instruction = datalabeling.types.Instruction( display_name='YOUR_INSTRUCTION_DISPLAY_NAME', description='YOUR_DESCRIPTION', data_type=data_type, pdf_instruction=pdf_instruction) operation = client.create_instruction(project_path, instruction) result = operation.result() # The format of the resource name: # project_id/{project_id}/instruction/{instruction_id} print('The instruction resource name: {}'.format(result.name)) print('Display name: {}'.format(result.display_name)) print('Description: {}'.format(result.description)) print('Create time:') print('\tseconds: {}'.format(result.create_time.seconds)) print('\tnanos: {}'.format(result.create_time.nanos)) print('Data type: {}'.format( datalabeling.enums.DataType(result.data_type).name)) print('Pdf instruction:') print('\tGcs file uri: {}\n'.format(result.pdf_instruction.gcs_file_uri)) return result
def test_create_instruction(capsys): result = create_instruction.create_instruction(PROJECT_ID, 'IMAGE', INSTRUCTION_GCS_URI) out, _ = capsys.readouterr() assert 'The instruction resource name: ' in out # Delete the created instruction. instruction_name = result.name client = datalabeling.DataLabelingServiceClient() client.delete_instruction(instruction_name)
def label_image(dataset_resource_name, instruction_resource_name, annotation_spec_set_resource_name): """Labels an image dataset.""" from google.cloud import datalabeling_v1beta1 as datalabeling client = datalabeling.DataLabelingServiceClient() # [END datalabeling_label_image_beta] # If provided, use a provided test endpoint - this will prevent tests on # this snippet from triggering any action by a real human if "DATALABELING_ENDPOINT" in os.environ: opts = ClientOptions(api_endpoint=os.getenv("DATALABELING_ENDPOINT")) client = datalabeling.DataLabelingServiceClient(client_options=opts) # [START datalabeling_label_image_beta] basic_config = datalabeling.HumanAnnotationConfig( instruction=instruction_resource_name, annotated_dataset_display_name="YOUR_ANNOTATED_DATASET_DISPLAY_NAME", label_group="YOUR_LABEL_GROUP", replica_count=1, ) feature = datalabeling.LabelImageRequest.Feature.CLASSIFICATION # annotation_spec_set_resource_name needs to be created beforehand. # See the examples in the following: # https://cloud.google.com/ai-platform/data-labeling/docs/label-sets config = datalabeling.ImageClassificationConfig( annotation_spec_set=annotation_spec_set_resource_name, allow_multi_label=False, answer_aggregation_type=datalabeling.StringAggregationType. MAJORITY_VOTE, ) response = client.label_image( request={ "parent": dataset_resource_name, "basic_config": basic_config, "feature": feature, "image_classification_config": config, }) print("Label_image operation name: {}".format(response.operation.name)) return response
def annotation_spec_set(): # create a temporary annotation_spec_set response = create_annotation_spec_set.create_annotation_spec_set( PROJECT_ID) yield response # tear down client = datalabeling.DataLabelingServiceClient() client.delete_annotation_spec_set(response.name)
def test_delete_annotated_dataset_exception(self): # Mock the API response channel = ChannelStub(responses=[CustomException()]) patch = mock.patch("google.api_core.grpc_helpers.create_channel") with patch as create_channel: create_channel.return_value = channel client = datalabeling_v1beta1.DataLabelingServiceClient() with pytest.raises(CustomException): client.delete_annotated_dataset()
def test_create_annotation_spec_set(capsys): response = create_annotation_spec_set.create_annotation_spec_set( PROJECT_ID) out, _ = capsys.readouterr() if 'The annotation_spec_set resource name:' not in out: raise AssertionError # Delete the created annotation spec set. annotation_spec_set_name = response.name client = datalabeling.DataLabelingServiceClient() client.delete_annotation_spec_set(annotation_spec_set_name)
def instruction(): # create a temporary instruction instruction = create_instruction.create_instruction( PROJECT_ID, 'TEXT', 'gs://cloud-samples-data/datalabeling/instruction/test.pdf') yield instruction # tear down client = datalabeling.DataLabelingServiceClient() client.delete_instruction(instruction.name)
def get_dataset(dataset_resource_name): """Gets a dataset for the given Google Cloud project.""" from google.cloud import datalabeling_v1beta1 as datalabeling client = datalabeling.DataLabelingServiceClient() # [END datalabeling_get_dataset_beta] # If provided, use a provided test endpoint - this will prevent tests on # this snippet from triggering any action by a real human if 'DATALABELING_ENDPOINT' in os.environ: opts = ClientOptions(api_endpoint=os.getenv('DATALABELING_ENDPOINT')) client = datalabeling.DataLabelingServiceClient(client_options=opts) # [START datalabeling_get_dataset_beta] response = client.get_dataset(dataset_resource_name) print('The dataset resource name: {}\n'.format(response.name)) print('Display name: {}'.format(response.display_name)) print('Description: {}'.format(response.description)) print('Create time:') print('\tseconds: {}'.format(response.create_time.seconds)) print('\tnanos: {}'.format(response.create_time.nanos))
def create_annotation_spec_set(project_id): """Creates a data labeling annotation spec set for the given Google Cloud project. """ from google.cloud import datalabeling_v1beta1 as datalabeling client = datalabeling.DataLabelingServiceClient() # [END datalabeling_create_annotation_spec_set_beta] # If provided, use a provided test endpoint - this will prevent tests on # this snippet from triggering any action by a real human if 'DATALABELING_ENDPOINT' in os.environ: opts = ClientOptions(api_endpoint=os.getenv('DATALABELING_ENDPOINT')) client = datalabeling.DataLabelingServiceClient(client_options=opts) # [START datalabeling_create_annotation_spec_set_beta] project_path = client.project_path(project_id) annotation_spec_1 = datalabeling.types.AnnotationSpec( display_name='label_1', description='label_description_1') annotation_spec_2 = datalabeling.types.AnnotationSpec( display_name='label_2', description='label_description_2') annotation_spec_set = datalabeling.types.AnnotationSpecSet( display_name='YOUR_ANNOTATION_SPEC_SET_DISPLAY_NAME', description='YOUR_DESCRIPTION', annotation_specs=[annotation_spec_1, annotation_spec_2]) response = client.create_annotation_spec_set(project_path, annotation_spec_set) # The format of the resource name: # project_id/{project_id}/annotationSpecSets/{annotationSpecSets_id} print('The annotation_spec_set resource name: {}'.format(response.name)) print('Display name: {}'.format(response.display_name)) print('Description: {}'.format(response.description)) print('Annotation specs:') for annotation_spec in response.annotation_specs: print('\tDisplay name: {}'.format(annotation_spec.display_name)) print('\tDescription: {}\n'.format(annotation_spec.description)) return response
def test_get_data_item_exception(self): # Mock the API response channel = ChannelStub(responses=[CustomException()]) patch = mock.patch("google.api_core.grpc_helpers.create_channel") with patch as create_channel: create_channel.return_value = channel client = datalabeling_v1beta1.DataLabelingServiceClient() # Setup request name = client.data_item_path("[PROJECT]", "[DATASET]", "[DATA_ITEM]") with pytest.raises(CustomException): client.get_data_item(name)
def get_dataset(dataset_resource_name): """Gets a dataset for the given Google Cloud project.""" from google.cloud import datalabeling_v1beta1 as datalabeling client = datalabeling.DataLabelingServiceClient() response = client.get_dataset(dataset_resource_name) print('The dataset resource name: {}\n'.format(response.name)) print('Display name: {}'.format(response.display_name)) print('Description: {}'.format(response.description)) print('Create time:') print('\tseconds: {}'.format(response.create_time.seconds)) print('\tnanos: {}'.format(response.create_time.nanos))
def label_video( dataset_resource_name, instruction_resource_name, annotation_spec_set_resource_name ): """Labels a video dataset.""" from google.cloud import datalabeling_v1beta1 as datalabeling client = datalabeling.DataLabelingServiceClient() # [END datalabeling_label_video_beta] # If provided, use a provided test endpoint - this will prevent tests on # this snippet from triggering any action by a real human if "DATALABELING_ENDPOINT" in os.environ: opts = ClientOptions(api_endpoint=os.getenv("DATALABELING_ENDPOINT")) client = datalabeling.DataLabelingServiceClient(client_options=opts) # [START datalabeling_label_video_beta] basic_config = datalabeling.HumanAnnotationConfig( instruction=instruction_resource_name, annotated_dataset_display_name="YOUR_ANNOTATED_DATASET_DISPLAY_NAME", label_group="YOUR_LABEL_GROUP", replica_count=1, ) feature = datalabeling.LabelVideoRequest.Feature.OBJECT_TRACKING config = datalabeling.ObjectTrackingConfig( annotation_spec_set=annotation_spec_set_resource_name ) response = client.label_video( request={ "parent": dataset_resource_name, "basic_config": basic_config, "feature": feature, "object_tracking_config": config, } ) print("Label_video operation name: {}".format(response.operation.name)) return response
def test_label_image(capsys, annotation_spec_set, instruction, dataset): # Start labeling. response = label_image.label_image(dataset.name, instruction.name, annotation_spec_set.name) out, _ = capsys.readouterr() assert 'Label_image operation name: ' in out operation_name = response.operation.name # Cancels the labeling operation. response.cancel() assert response.cancelled() is True client = datalabeling.DataLabelingServiceClient() # If provided, use a provided test endpoint - this will prevent tests on # this snippet from triggering any action by a real human if 'DATALABELING_ENDPOINT' in os.environ: opts = ClientOptions(api_endpoint=os.getenv('DATALABELING_ENDPOINT')) client = datalabeling.DataLabelingServiceClient(client_options=opts) client.transport._operations_client.cancel_operation(operation_name)
def test_delete_instruction_exception(self): # Mock the API response channel = ChannelStub(responses=[CustomException()]) patch = mock.patch("google.api_core.grpc_helpers.create_channel") with patch as create_channel: create_channel.return_value = channel client = datalabeling_v1beta1.DataLabelingServiceClient() # Setup request name = client.instruction_path("[PROJECT]", "[INSTRUCTION]") with pytest.raises(CustomException): client.delete_instruction(name)
def test_list_annotated_datasets_exception(self): channel = ChannelStub(responses=[CustomException()]) patch = mock.patch("google.api_core.grpc_helpers.create_channel") with patch as create_channel: create_channel.return_value = channel client = datalabeling_v1beta1.DataLabelingServiceClient() # Setup request parent = client.dataset_path("[PROJECT]", "[DATASET]") paged_list_response = client.list_annotated_datasets(parent) with pytest.raises(CustomException): list(paged_list_response)
def test_get_annotation_spec_set_exception(self): # Mock the API response channel = ChannelStub(responses=[CustomException()]) patch = mock.patch("google.api_core.grpc_helpers.create_channel") with patch as create_channel: create_channel.return_value = channel client = datalabeling_v1beta1.DataLabelingServiceClient() # Setup request name = client.annotation_spec_set_path("[PROJECT]", "[ANNOTATION_SPEC_SET]") with pytest.raises(CustomException): client.get_annotation_spec_set(name)
def test_create_annotation_spec_set_exception(self): # Mock the API response channel = ChannelStub(responses=[CustomException()]) patch = mock.patch("google.api_core.grpc_helpers.create_channel") with patch as create_channel: create_channel.return_value = channel client = datalabeling_v1beta1.DataLabelingServiceClient() # Setup request parent = client.project_path("[PROJECT]") annotation_spec_set = {} with pytest.raises(CustomException): client.create_annotation_spec_set(parent, annotation_spec_set)
def test_delete_annotated_dataset(self): channel = ChannelStub() patch = mock.patch("google.api_core.grpc_helpers.create_channel") with patch as create_channel: create_channel.return_value = channel client = datalabeling_v1beta1.DataLabelingServiceClient() client.delete_annotated_dataset() assert len(channel.requests) == 1 expected_request = data_labeling_service_pb2.DeleteAnnotatedDatasetRequest( ) actual_request = channel.requests[0][1] assert expected_request == actual_request
def test_label_text(capsys, annotation_spec_set, instruction, dataset): # Start labeling. response = label_text.label_text(dataset.name, instruction.name, annotation_spec_set.name) out, _ = capsys.readouterr() assert 'Label_text operation name: ' in out operation_name = response.operation.name # Cancels the labeling operation. response.cancel() assert response.cancelled() is True client = datalabeling.DataLabelingServiceClient() client.transport._operations_client.cancel_operation(operation_name)
def list_datasets(project_id): """Lists datasets for the given Google Cloud project.""" from google.cloud import datalabeling_v1beta1 as datalabeling client = datalabeling.DataLabelingServiceClient() # [END datalabeling_list_datasets_beta] # If provided, use a provided test endpoint - this will prevent tests on # this snippet from triggering any action by a real human if 'DATALABELING_ENDPOINT' in os.environ: opts = ClientOptions(api_endpoint=os.getenv('DATALABELING_ENDPOINT')) client = datalabeling.DataLabelingServiceClient(client_options=opts) # [START datalabeling_list_datasets_beta] formatted_project_name = client.project_path(project_id) response = client.list_datasets(formatted_project_name) for element in response: # The format of resource name: # project_id/{project_id}/datasets/{dataset_id} print('The dataset resource name: {}\n'.format(element.name)) print('Display name: {}'.format(element.display_name)) print('Description: {}'.format(element.description)) print('Create time:') print('\tseconds: {}'.format(element.create_time.seconds)) print('\tnanos: {}'.format(element.create_time.nanos))
def list_datasets(project_id): """Lists datasets for the given Google Cloud project.""" from google.cloud import datalabeling_v1beta1 as datalabeling client = datalabeling.DataLabelingServiceClient() formatted_project_name = client.project_path(project_id) response = client.list_datasets(formatted_project_name) for element in response: # The format of resource name: # project_id/{project_id}/datasets/{dataset_id} print('The dataset resource name: {}\n'.format(element.name)) print('Display name: {}'.format(element.display_name)) print('Description: {}'.format(element.description)) print('Create time:') print('\tseconds: {}'.format(element.create_time.seconds)) print('\tnanos: {}'.format(element.create_time.nanos))
def test_delete_dataset(self): channel = ChannelStub() patch = mock.patch("google.api_core.grpc_helpers.create_channel") with patch as create_channel: create_channel.return_value = channel client = datalabeling_v1beta1.DataLabelingServiceClient() # Setup Request name = client.dataset_path("[PROJECT]", "[DATASET]") client.delete_dataset(name) assert len(channel.requests) == 1 expected_request = data_labeling_service_pb2.DeleteDatasetRequest( name=name) actual_request = channel.requests[0][1] assert expected_request == actual_request
def import_data(dataset_resource_name, data_type, input_gcs_uri): """Imports data to the given Google Cloud project and dataset.""" from google.cloud import datalabeling_v1beta1 as datalabeling client = datalabeling.DataLabelingServiceClient() gcs_source = datalabeling.types.GcsSource(input_uri=input_gcs_uri, mime_type='text/csv') csv_input_config = datalabeling.types.InputConfig(data_type=data_type, gcs_source=gcs_source) response = client.import_data(dataset_resource_name, csv_input_config) result = response.result() # The format of resource name: # project_id/{project_id}/datasets/{dataset_id} print('Dataset resource name: {}\n'.format(result.dataset)) return result
def export_data(dataset_resource_name, annotated_dataset_resource_name, export_gcs_uri): """Exports a dataset from the given Google Cloud project.""" from google.cloud import datalabeling_v1beta1 as datalabeling client = datalabeling.DataLabelingServiceClient() gcs_destination = datalabeling.types.GcsDestination( output_uri=export_gcs_uri, mime_type='text/csv') output_config = datalabeling.types.OutputConfig( gcs_destination=gcs_destination) response = client.export_data(dataset_resource_name, annotated_dataset_resource_name, output_config) print('Dataset ID: {}\n'.format(response.result().dataset)) print('Output config:') print('\tGcs destination:') print('\t\tOutput URI: {}\n'.format( response.result().output_config.gcs_destination.output_uri))
def test_import_data_exception(self): # Setup Response error = status_pb2.Status() operation = longrunning_operations_pb2.Operation( name="operations/test_import_data_exception", done=True) operation.error.CopyFrom(error) # Mock the API response channel = ChannelStub(responses=[operation]) patch = mock.patch("google.api_core.grpc_helpers.create_channel") with patch as create_channel: create_channel.return_value = channel client = datalabeling_v1beta1.DataLabelingServiceClient() # Setup Request name = client.dataset_path("[PROJECT]", "[DATASET]") input_config = {} response = client.import_data(name, input_config) exception = response.exception() assert exception.errors[0] == error
def test_label_audio(self): # Setup Expected Response name = "name3373707" display_name = "displayName1615086568" description = "description-1724546052" example_count = 1517063674 completed_example_count = 612567290 expected_response = { "name": name, "display_name": display_name, "description": description, "example_count": example_count, "completed_example_count": completed_example_count, } expected_response = dataset_pb2.AnnotatedDataset(**expected_response) operation = longrunning_operations_pb2.Operation( name="operations/test_label_audio", done=True) operation.response.Pack(expected_response) # Mock the API response channel = ChannelStub(responses=[operation]) patch = mock.patch("google.api_core.grpc_helpers.create_channel") with patch as create_channel: create_channel.return_value = channel client = datalabeling_v1beta1.DataLabelingServiceClient() # Setup Request parent = client.dataset_path("[PROJECT]", "[DATASET]") basic_config = {} feature = enums.LabelAudioRequest.Feature.FEATURE_UNSPECIFIED response = client.label_audio(parent, basic_config, feature) result = response.result() assert expected_response == result assert len(channel.requests) == 1 expected_request = data_labeling_service_pb2.LabelAudioRequest( parent=parent, basic_config=basic_config, feature=feature) actual_request = channel.requests[0][1] assert expected_request == actual_request
def test_label_audio_exception(self): # Setup Response error = status_pb2.Status() operation = longrunning_operations_pb2.Operation( name="operations/test_label_audio_exception", done=True) operation.error.CopyFrom(error) # Mock the API response channel = ChannelStub(responses=[operation]) patch = mock.patch("google.api_core.grpc_helpers.create_channel") with patch as create_channel: create_channel.return_value = channel client = datalabeling_v1beta1.DataLabelingServiceClient() # Setup Request parent = client.dataset_path("[PROJECT]", "[DATASET]") basic_config = {} feature = enums.LabelAudioRequest.Feature.FEATURE_UNSPECIFIED response = client.label_audio(parent, basic_config, feature) exception = response.exception() assert exception.errors[0] == error
def test_export_data(self): # Setup Expected Response dataset = "dataset1443214456" total_count = 407761836 export_count = 529256252 expected_response = { "dataset": dataset, "total_count": total_count, "export_count": export_count, } expected_response = proto_operations_pb2.ExportDataOperationResponse( **expected_response) operation = longrunning_operations_pb2.Operation( name="operations/test_export_data", done=True) operation.response.Pack(expected_response) # Mock the API response channel = ChannelStub(responses=[operation]) patch = mock.patch("google.api_core.grpc_helpers.create_channel") with patch as create_channel: create_channel.return_value = channel client = datalabeling_v1beta1.DataLabelingServiceClient() # Setup Request name = client.dataset_path("[PROJECT]", "[DATASET]") annotated_dataset = "annotatedDataset-1407812655" output_config = {} response = client.export_data(name, annotated_dataset, output_config) result = response.result() assert expected_response == result assert len(channel.requests) == 1 expected_request = data_labeling_service_pb2.ExportDataRequest( name=name, annotated_dataset=annotated_dataset, output_config=output_config) actual_request = channel.requests[0][1] assert expected_request == actual_request
def create_annotation_spec_set(project_id): """Creates a data labeling annotation spec set for the given Google Cloud project. """ from google.cloud import datalabeling_v1beta1 as datalabeling client = datalabeling.DataLabelingServiceClient() project_path = client.project_path(project_id) input_data = json.loads( '["Apply Eye Makeup", "Apply Lipstick", "Archery", "Baby Crawling", "Balance Beam", "Band Marching", "Baseball Pitch", "Basketball Shooting", "Basketball Dunk", "Bench Press", "Biking", "Billiards Shot", "Blow Dry Hair", "Blowing Candles", "Body Weight Squats", "Bowling", "Boxing Punching Bag", "Boxing Speed Bag", "Breaststroke", "Brushing Teeth", "Clean and Jerk", "Cliff Diving", "Cricket Bowling", "Cricket Shot", "Cutting In Kitchen", "Diving", "Drumming", "Fencing", "Field Hockey Penalty", "Floor Gymnastics", "Frisbee Catch", "Front Crawl", "Golf Swing", "Haircut", "Hammer Throw", "Hammering", "Handstand Pushups", "Handstand Walking", "Head Massage", "High Jump", "Horse Race", "Horse Riding", "Hula Hoop", "Ice Dancing", "Javelin Throw", "Juggling Balls", "Jump Rope", "Jumping Jack", "Kayaking", "Knitting", "Long Jump", "Lunges", "Military Parade", "Mixing Batter", "Mopping Floor", "Nun chucks", "Parallel Bars", "Pizza Tossing", "Playing Guitar", "Playing Piano", "Playing Tabla", "Playing Violin", "Playing Cello", "Playing Daf", "Playing Dhol", "Playing Flute", "Playing Sitar", "Pole Vault", "Pommel Horse", "Pull Ups", "Punch", "Push Ups", "Rafting", "Rock Climbing Indoor", "Rope Climbing", "Rowing", "Salsa Spins", "Shaving Beard", "Shotput", "Skate Boarding", "Skiing", "Skijet", "Sky Diving", "Soccer Juggling", "Soccer Penalty", "Still Rings", "Sumo Wrestling", "Surfing", "Swing", "Table Tennis Shot", "Tai Chi", "Tennis Swing", "Throw Discus", "Trampoline Jumping", "Typing", "Uneven Bars", "Volleyball Spiking", "Walking with a dog", "Wall Pushups", "Writing On Board", "Yo Yo"]' ) annotation_specs = [ datalabeling.types.AnnotationSpec( display_name=n, description=n, ) for n in input_data ] annotation_spec_set = datalabeling.types.AnnotationSpecSet( display_name='UCF-101 Full Label Set', description='Labels for the UCF-101 dataset', annotation_specs=annotation_specs) response = client.create_annotation_spec_set(project_path, annotation_spec_set) # The format of the resource name: # project_id/{project_id}/annotationSpecSets/{annotationSpecSets_id} print('The annotation_spec_set resource name: {}'.format(response.name)) print('Display name: {}'.format(response.display_name)) print('Description: {}'.format(response.description)) print('Annotation specs:') for annotation_spec in response.annotation_specs: print('\tDisplay name: {}'.format(annotation_spec.display_name)) print('\tDescription: {}\n'.format(annotation_spec.description)) return response
def create_dataset(project_id): """Creates a dataset for the given Google Cloud project.""" from google.cloud import datalabeling_v1beta1 as datalabeling client = datalabeling.DataLabelingServiceClient() formatted_project_name = client.project_path(project_id) dataset = datalabeling.types.Dataset( display_name='YOUR_ANNOTATION_SPEC_SET_DISPLAY_NAME', description='YOUR_DESCRIPTION') response = client.create_dataset(formatted_project_name, dataset) # The format of resource name: # project_id/{project_id}/datasets/{dataset_id} print('The dataset resource name: {}\n'.format(response.name)) print('Display name: {}'.format(response.display_name)) print('Description: {}'.format(response.description)) print('Create time:') print('\tseconds: {}'.format(response.create_time.seconds)) print('\tnanos: {}'.format(response.create_time.nanos)) return response