Python Dataset示例，google.cloud.automl.Dataset Python示例

示例#1

0

显示文件

def create_dataset(project_id, display_name):
    """Create a dataset."""
    # [START automl_vision_object_detection_create_dataset]
    from google.cloud import automl

    # TODO(developer): Uncomment and set the following variables
    # project_id = "YOUR_PROJECT_ID"
    # display_name = "your_datasets_display_name"

    client = automl.AutoMlClient()

    # A resource that represents Google Cloud Platform location.
    project_location = f"projects/{project_id}/locations/us-central1"
    metadata = automl.ImageObjectDetectionDatasetMetadata()
    dataset = automl.Dataset(
        display_name=display_name,
        image_object_detection_dataset_metadata=metadata,
    )

    # Create a dataset with the dataset metadata in the region.
    response = client.create_dataset(parent=project_location, dataset=dataset)

    created_dataset = response.result()

    # Display the dataset information
    print("Dataset name: {}".format(created_dataset.name))
    print("Dataset id: {}".format(created_dataset.name.split("/")[-1]))

示例#2

0

显示文件

文件： language_text_classification_create_dataset.py 项目： renovate-bot/python-automl

def create_dataset(project_id, display_name):
    """Create a dataset."""
    # [START automl_language_text_classification_create_dataset]
    from google.cloud import automl

    # TODO(developer): Uncomment and set the following variables
    # project_id = "YOUR_PROJECT_ID"
    # display_name = "YOUR_DATASET_NAME"

    client = automl.AutoMlClient()

    # A resource that represents Google Cloud Platform location.
    project_location = f"projects/{project_id}/locations/us-central1"
    # Specify the classification type
    # Types:
    # MultiLabel: Multiple labels are allowed for one example.
    # MultiClass: At most one label is allowed per example.
    metadata = automl.TextClassificationDatasetMetadata(
        classification_type=automl.ClassificationType.MULTICLASS)
    dataset = automl.Dataset(
        display_name=display_name,
        text_classification_dataset_metadata=metadata,
    )

    # Create a dataset with the dataset metadata in the region.
    response = client.create_dataset(parent=project_location, dataset=dataset)

    created_dataset = response.result()

    # Display the dataset information
    print("Dataset name: {}".format(created_dataset.name))
    print("Dataset id: {}".format(created_dataset.name.split("/")[-1]))

示例#3

0

显示文件

def create_dataset(project_id, display_name):
    """Create a dataset."""
    # [START automl_translate_create_dataset]
    from google.cloud import automl

    # TODO(developer): Uncomment and set the following variables
    # project_id = "YOUR_PROJECT_ID"
    # display_name = "YOUR_DATASET_NAME"

    client = automl.AutoMlClient()

    # A resource that represents Google Cloud Platform location.
    project_location = f"projects/{project_id}/locations/us-central1"
    # For a list of supported languages, see:
    # https://cloud.google.com/translate/automl/docs/languages
    dataset_metadata = automl.TranslationDatasetMetadata(
        source_language_code="en", target_language_code="ja")
    dataset = automl.Dataset(
        display_name=display_name,
        translation_dataset_metadata=dataset_metadata,
    )

    # Create a dataset with the dataset metadata in the region.
    response = client.create_dataset(parent=project_location, dataset=dataset)

    created_dataset = response.result()

    # Display the dataset information
    print("Dataset name: {}".format(created_dataset.name))
    print("Dataset id: {}".format(created_dataset.name.split("/")[-1]))

示例#4

0

显示文件

文件： language_sentiment_analysis_create_dataset.py 项目： renovate-bot/python-automl

def create_dataset(project_id, display_name):
    """Create a dataset."""
    # [START automl_language_sentiment_analysis_create_dataset]
    from google.cloud import automl

    # TODO(developer): Uncomment and set the following variables
    # project_id = "YOUR_PROJECT_ID"
    # display_name = "YOUR_DATASET_NAME"

    client = automl.AutoMlClient()

    # A resource that represents Google Cloud Platform location.
    project_location = f"projects/{project_id}/locations/us-central1"

    # Each dataset requires a sentiment score with a defined sentiment_max
    # value, for more information on TextSentimentDatasetMetadata, see:
    # https://cloud.google.com/natural-language/automl/docs/prepare#sentiment-analysis
    # https://cloud.google.com/automl/docs/reference/rpc/google.cloud.automl.v1#textsentimentdatasetmetadata
    metadata = automl.TextSentimentDatasetMetadata(
        sentiment_max=4)  # Possible max sentiment score: 1-10

    dataset = automl.Dataset(display_name=display_name,
                             text_sentiment_dataset_metadata=metadata)

    # Create a dataset with the dataset metadata in the region.
    response = client.create_dataset(parent=project_location, dataset=dataset)

    created_dataset = response.result()

    # Display the dataset information
    print("Dataset name: {}".format(created_dataset.name))
    print("Dataset id: {}".format(created_dataset.name.split("/")[-1]))

示例#5

0

显示文件

    def train_text_extraction_model(
            self,
            display_name: (str, 'the display name for the dataset and model'),
            input_paths:
        (str,
         'the paths to csv files describing the input data for a new dataset'
         ) = '',
            dataset_id: (str, 'the id of an existing dataset to reuse') = '',
            train_budget_milli_node_hours: int = 24000) -> Operation:

        dataset = None
        if len(dataset_id) == 0:
            dataset = automl.Dataset(
                display_name=display_name,
                text_extraction_dataset_metadata={},
            )

        metadata = automl.TextExtractionModelMetadata()

        model = automl.Model(display_name=display_name,
                             dataset_id=dataset_id,
                             text_extraction_model_metadata=metadata)

        long_running_operation = self.train_automl_model(
            model=model,
            dataset=dataset,
            dataset_id=dataset_id,
            input_paths=input_paths)

        return long_running_operation

示例#6

0

显示文件

def dataset_id():
    client = automl.AutoMlClient()
    project_location = f"projects/{PROJECT_ID}/locations/us-central1"
    display_name = "test_" + datetime.datetime.now().strftime("%Y%m%d%H%M%S")
    metadata = automl.TextExtractionDatasetMetadata()
    dataset = automl.Dataset(
        display_name=display_name, text_extraction_dataset_metadata=metadata
    )
    response = client.create_dataset(parent=project_location, dataset=dataset)
    dataset_id = response.result().name.split("/")[-1]

    yield dataset_id

示例#7

0

显示文件

    def train_image_classification_model(
            self,
            display_name: (str, 'the display name for the dataset and model'),
            input_paths:
        (str,
         'the paths to csv files describing the input data for a new dataset'
         ) = '',
            dataset_id: (str, 'the id of an existing dataset to reuse') = '',
            classification_type: (automl.ClassificationType,
                                  'MULTICLASS or MULTILABEL'
                                  ) = automl.ClassificationType.MULTICLASS,
            train_budget_milli_node_hours: int = 24000) -> Operation:

        dataset = None
        if len(dataset_id) == 0:
            # Specify the classification type
            # Types:
            # MultiLabel: Multiple labels are allowed for one example.
            # MultiClass: At most one label is allowed per example.
            # https://cloud.google.com/automl/docs/reference/rpc/google.cloud.automl.v1#classificationtype
            metadata = automl.ImageClassificationDatasetMetadata(
                classification_type=classification_type)
            dataset = automl.Dataset(
                display_name=display_name,
                image_classification_dataset_metadata=metadata,
            )

        # Leave model unset to use the default base model provided by Google
        # train_budget_milli_node_hours: The actual train_cost will be equal or
        # less than this value.
        # https://cloud.google.com/automl/docs/reference/rpc/google.cloud.automl.v1#imageclassificationmodelmetadata
        metadata = automl.ImageClassificationModelMetadata(
            train_budget_milli_node_hours=train_budget_milli_node_hours)
        model = automl.Model(
            display_name=display_name,
            dataset_id=dataset_id,
            image_classification_model_metadata=metadata,
        )

        long_running_operation = self.train_automl_model(
            model=model,
            dataset=dataset,
            dataset_id=dataset_id,
            input_paths=input_paths)

        return long_running_operation

示例#8

0

显示文件

    def train_text_classification_model(
        self,
        display_name: (str, 'the display name for the dataset and model'),
        input_paths: (
            str,
            'the paths to csv files describing the input data for a new dataset'
        ) = '',
        dataset_id: (str, 'the id of an existing dataset to reuse') = '',
        classification_type: (
            automl.ClassificationType,
            'MULTICLASS or MULTILABEL') = automl.ClassificationType.MULTICLASS
    ) -> Operation:

        dataset = None
        if len(dataset_id) == 0:
            # Specify the classification type
            # Types:
            # MultiLabel: Multiple labels are allowed for one example.
            # MultiClass: At most one label is allowed per example.
            # https://cloud.google.com/automl/docs/reference/rpc/google.cloud.automl.v1#classificationtype
            metadata = automl.TextClassificationDatasetMetadata(
                classification_type=classification_type)
            dataset = automl.Dataset(
                display_name=display_name,
                text_classification_dataset_metadata=metadata,
            )

        model = automl.Model(display_name=display_name,
                             dataset_id=dataset_id,
                             text_classification_model_metadata={})

        long_running_operation = self.train_automl_model(
            model=model,
            dataset=dataset,
            dataset_id=dataset_id,
            input_paths=input_paths)

        return long_running_operation

示例#9

0

显示文件

    
    display_name = "%s_%d_%d_%d"%(data_config['dataset_name'],
                                  int(data_config['noise_ratio']*100),
                                  data_config['pos_limit'], data_config['neg_limit'])

    start_time = time.time()
    
    client = automl.AutoMlClient()

    project_location = f"projects/{project_id}/locations/us-central1"

    metadata = automl.ImageClassificationDatasetMetadata(
        classification_type=automl.ClassificationType.MULTICLASS
    )
    dataset = automl.Dataset(
        display_name=display_name,
        image_classification_dataset_metadata=metadata,
    )

    # Create a dataset with the dataset metadata in the region.
    response = client.create_dataset(parent=project_location, dataset=dataset)

    created_dataset = response.result()

    # Display the dataset information
    print("Dataset name: {}".format(created_dataset.name))
    print("Dataset id: {}".format(created_dataset.name.split("/")[-1]))

    storage_client = storage.Client()
    bucket = storage_client.get_bucket(f"{project_id}-vcm")

    pos_train_images_gc = add_images_to_gc(data['pos_train_images'])