Exemplo n.º 1
0
def train_automl_model(data_set, timestamp, develop_mode):
    # train
    model_display_name = '{}-{}'.format(ENDPOINT_NAME, timestamp)
    job = aiplatform.AutoMLTabularTrainingJob(
        display_name='train-{}'.format(model_display_name),
        optimization_prediction_type='classification')
    model = job.run(
        dataset=data_set,
        # See https://googleapis.dev/python/aiplatform/latest/aiplatform.html#
        predefined_split_column_name='data_split',
        target_column='ontime',
        model_display_name=model_display_name,
        budget_milli_node_hours=(300 if develop_mode else 2000),
        disable_early_stopping=False,
        export_evaluated_data_items=True,
        export_evaluated_data_items_bigquery_destination_uri=
        '{}:dsongcp.ch9_automl_evaluated'.format(PROJECT),
        export_evaluated_data_items_override_destination=True,
        sync=develop_mode)
    return model
def create_training_pipeline_tabular_regression_sample(
    project: str,
    display_name: str,
    dataset_id: int,
    location: str = "us-central1",
    model_display_name: str = None,
    training_fraction_split: float = 0.8,
    validation_fraction_split: float = 0.1,
    test_fraction_split: float = 0.1,
    budget_milli_node_hours: int = 8000,
    disable_early_stopping: bool = False,
    sync: bool = True,
):
    aiplatform.init(project=project, location=location)

    tabular_regression_job = aiplatform.AutoMLTabularTrainingJob(
        display_name=display_name,
    )

    my_tabular_dataset = aiplatform.TabularDataset(dataset_id)

    model = tabular_regression_job.run(
        dataset=my_tabular_dataset,
        training_fraction_split=training_fraction_split,
        validation_fraction_split=validation_fraction_split,
        test_fraction_split=test_fraction_split,
        budget_milli_node_hours=budget_milli_node_hours,
        model_display_name=model_display_name,
        disable_early_stopping=disable_early_stopping,
        sync=sync,
    )

    model.wait()

    print(model.display_name)
    print(model.resource_name)
    print(model.uri)
    return model
    def test_end_to_end_tabular(self, shared_state):
        """Build dataset, train a custom and AutoML model, deploy, and get predictions"""

        assert shared_state["bucket"]
        bucket = shared_state["bucket"]

        blob = bucket.blob(_BLOB_PATH)

        # Download the CSV file into memory and save it directory to staging bucket
        with request.urlopen(_DATASET_SRC) as response:
            data = response.read()
            blob.upload_from_string(data)

        # Collection of resources generated by this test, to be deleted during teardown
        shared_state["resources"] = []

        aiplatform.init(
            project=e2e_base._PROJECT,
            location=e2e_base._LOCATION,
            staging_bucket=shared_state["staging_bucket_name"],
        )

        # Create and import to single managed dataset for both training jobs

        ds = aiplatform.TabularDataset.create(
            display_name=f"{self._temp_prefix}-dataset-{uuid.uuid4()}",
            gcs_source=[
                f'gs://{shared_state["staging_bucket_name"]}/{_BLOB_PATH}'
            ],
            sync=False,
        )

        shared_state["resources"].extend([ds])

        # Define both training jobs

        custom_job = aiplatform.CustomTrainingJob(
            display_name=
            f"{self._temp_prefix}-train-housing-custom-{uuid.uuid4()}",
            script_path=_LOCAL_TRAINING_SCRIPT_PATH,
            container_uri="gcr.io/cloud-aiplatform/training/tf-cpu.2-2:latest",
            requirements=["gcsfs==0.7.1"],
            model_serving_container_image_uri=
            "gcr.io/cloud-aiplatform/prediction/tf2-cpu.2-2:latest",
        )

        automl_job = aiplatform.AutoMLTabularTrainingJob(
            display_name=
            f"{self._temp_prefix}-train-housing-automl-{uuid.uuid4()}",
            optimization_prediction_type="regression",
            optimization_objective="minimize-rmse",
        )

        # Kick off both training jobs, AutoML job will take approx one hour to run

        custom_model = custom_job.run(
            ds,
            replica_count=1,
            model_display_name=
            f"{self._temp_prefix}-custom-housing-model-{uuid.uuid4()}",
            sync=False,
        )

        automl_model = automl_job.run(
            dataset=ds,
            target_column="median_house_value",
            model_display_name=
            f"{self._temp_prefix}-automl-housing-model-{uuid.uuid4()}",
            sync=False,
        )

        shared_state["resources"].extend(
            [automl_job, automl_model, custom_job, custom_model])

        # Deploy both models after training completes
        custom_endpoint = custom_model.deploy(machine_type="n1-standard-4",
                                              sync=False)
        automl_endpoint = automl_model.deploy(machine_type="n1-standard-4",
                                              sync=False)
        shared_state["resources"].extend([automl_endpoint, custom_endpoint])

        # Send online prediction with same instance to both deployed models
        # This sample is taken from an observation where median_house_value = 94600
        custom_endpoint.wait()
        custom_prediction = custom_endpoint.predict([
            {
                "longitude": -124.35,
                "latitude": 40.54,
                "housing_median_age": 52.0,
                "total_rooms": 1820.0,
                "total_bedrooms": 300.0,
                "population": 806,
                "households": 270.0,
                "median_income": 3.014700,
            },
        ])
        automl_endpoint.wait()
        automl_prediction = automl_endpoint.predict([
            {
                "longitude": "-124.35",
                "latitude": "40.54",
                "housing_median_age": "52.0",
                "total_rooms": "1820.0",
                "total_bedrooms": "300.0",
                "population": "806",
                "households": "270.0",
                "median_income": "3.014700",
            },
        ])

        # Ensure a single prediction was returned
        assert len(custom_prediction.predictions) == 1
        assert len(automl_prediction.predictions) == 1

        # Ensure the models are remotely accurate
        try:
            automl_result = automl_prediction.predictions[0]["value"]
            custom_result = custom_prediction.predictions[0][0]
            assert 200000 > automl_result > 50000
            assert 200000 > custom_result > 50000
        except KeyError as e:
            raise RuntimeError("Unexpected prediction response structure:", e)
Exemplo n.º 4
0
 def _create_automl_tabular_training_job(self):
     vertexai_model_name = self._params['vertexai_model_name']
     prediction_type = self._params['prediction_type']
     return aiplatform.AutoMLTabularTrainingJob(
         display_name=f'{vertexai_model_name}',
         optimization_prediction_type=f'{prediction_type}')
Exemplo n.º 5
0
    def test_end_to_end_tabular(self, shared_state):
        """Build dataset, train a custom and AutoML model, deploy, and get predictions"""

        assert shared_state["bucket"]
        bucket = shared_state["bucket"]

        blob = bucket.blob(_BLOB_PATH)

        # Download the CSV file into memory and save it directory to staging bucket
        with request.urlopen(_DATASET_SRC) as response:
            data = response.read()
            blob.upload_from_string(data)

        # Collection of resources generated by this test, to be deleted during teardown
        shared_state["resources"] = []

        aiplatform.init(
            project=e2e_base._PROJECT,
            location=e2e_base._LOCATION,
            staging_bucket=shared_state["staging_bucket_name"],
        )

        # Create and import to single managed dataset for both training jobs

        dataset_gcs_source = f'gs://{shared_state["staging_bucket_name"]}/{_BLOB_PATH}'

        ds = aiplatform.TabularDataset.create(
            display_name=self._make_display_name("dataset"),
            gcs_source=[dataset_gcs_source],
            sync=False,
            create_request_timeout=180.0,
        )

        shared_state["resources"].extend([ds])

        # Define both training jobs

        custom_job = aiplatform.CustomTrainingJob(
            display_name=self._make_display_name("train-housing-custom"),
            script_path=_LOCAL_TRAINING_SCRIPT_PATH,
            container_uri="gcr.io/cloud-aiplatform/training/tf-cpu.2-2:latest",
            requirements=["gcsfs==0.7.1"],
            model_serving_container_image_uri=
            "gcr.io/cloud-aiplatform/prediction/tf2-cpu.2-2:latest",
        )

        automl_job = aiplatform.AutoMLTabularTrainingJob(
            display_name=self._make_display_name("train-housing-automl"),
            optimization_prediction_type="regression",
            optimization_objective="minimize-rmse",
        )

        # Kick off both training jobs, AutoML job will take approx one hour to run

        custom_model = custom_job.run(
            ds,
            replica_count=1,
            model_display_name=self._make_display_name("custom-housing-model"),
            timeout=1234,
            restart_job_on_worker_restart=True,
            enable_web_access=True,
            sync=False,
            create_request_timeout=None,
        )

        automl_model = automl_job.run(
            dataset=ds,
            target_column="median_house_value",
            model_display_name=self._make_display_name("automl-housing-model"),
            sync=False,
        )

        shared_state["resources"].extend(
            [automl_job, automl_model, custom_job, custom_model])

        # Deploy both models after training completes
        custom_endpoint = custom_model.deploy(machine_type="n1-standard-4",
                                              sync=False)
        automl_endpoint = automl_model.deploy(machine_type="n1-standard-4",
                                              sync=False)
        shared_state["resources"].extend([automl_endpoint, custom_endpoint])

        custom_batch_prediction_job = custom_model.batch_predict(
            job_display_name=self._make_display_name("automl-housing-model"),
            instances_format="csv",
            machine_type="n1-standard-4",
            gcs_source=dataset_gcs_source,
            gcs_destination_prefix=
            f'gs://{shared_state["staging_bucket_name"]}/bp_results/',
            sync=False,
        )

        shared_state["resources"].append(custom_batch_prediction_job)

        in_progress_done_check = custom_job.done()
        custom_job.wait_for_resource_creation()

        automl_job.wait_for_resource_creation()
        custom_batch_prediction_job.wait_for_resource_creation()

        # Send online prediction with same instance to both deployed models
        # This sample is taken from an observation where median_house_value = 94600
        custom_endpoint.wait()

        # Check scheduling is correctly set
        assert (custom_job._gca_resource.training_task_inputs["scheduling"]
                ["timeout"] == "1234s")
        assert (custom_job._gca_resource.training_task_inputs["scheduling"]
                ["restartJobOnWorkerRestart"] is True)

        custom_prediction = custom_endpoint.predict([_INSTANCE], timeout=180.0)

        custom_batch_prediction_job.wait()

        automl_endpoint.wait()
        automl_prediction = automl_endpoint.predict(
            [{k: str(v)
              for k, v in _INSTANCE.items()}],  # Cast int values to strings
            timeout=180.0,
        )

        # Test lazy loading of Endpoint, check getter was never called after predict()
        custom_endpoint = aiplatform.Endpoint(custom_endpoint.resource_name)
        custom_endpoint.predict([_INSTANCE])

        completion_done_check = custom_job.done()
        assert custom_endpoint._skipped_getter_call()

        assert (custom_job.state ==
                gca_pipeline_state.PipelineState.PIPELINE_STATE_SUCCEEDED)
        assert (automl_job.state ==
                gca_pipeline_state.PipelineState.PIPELINE_STATE_SUCCEEDED)
        assert (custom_batch_prediction_job.state ==
                gca_job_state.JobState.JOB_STATE_SUCCEEDED)

        # Ensure a single prediction was returned
        assert len(custom_prediction.predictions) == 1
        assert len(automl_prediction.predictions) == 1

        # Ensure the models are remotely accurate
        try:
            automl_result = automl_prediction.predictions[0]["value"]
            custom_result = custom_prediction.predictions[0][0]
            assert 200000 > automl_result > 50000
            assert 200000 > custom_result > 50000
        except KeyError as e:
            raise RuntimeError("Unexpected prediction response structure:", e)

        # Check done() method works correctly
        assert in_progress_done_check is False
        assert completion_done_check is True