def train_automl_model(data_set, timestamp, develop_mode): # train model_display_name = '{}-{}'.format(ENDPOINT_NAME, timestamp) job = aiplatform.AutoMLTabularTrainingJob( display_name='train-{}'.format(model_display_name), optimization_prediction_type='classification') model = job.run( dataset=data_set, # See https://googleapis.dev/python/aiplatform/latest/aiplatform.html# predefined_split_column_name='data_split', target_column='ontime', model_display_name=model_display_name, budget_milli_node_hours=(300 if develop_mode else 2000), disable_early_stopping=False, export_evaluated_data_items=True, export_evaluated_data_items_bigquery_destination_uri= '{}:dsongcp.ch9_automl_evaluated'.format(PROJECT), export_evaluated_data_items_override_destination=True, sync=develop_mode) return model
def create_training_pipeline_tabular_regression_sample( project: str, display_name: str, dataset_id: int, location: str = "us-central1", model_display_name: str = None, training_fraction_split: float = 0.8, validation_fraction_split: float = 0.1, test_fraction_split: float = 0.1, budget_milli_node_hours: int = 8000, disable_early_stopping: bool = False, sync: bool = True, ): aiplatform.init(project=project, location=location) tabular_regression_job = aiplatform.AutoMLTabularTrainingJob( display_name=display_name, ) my_tabular_dataset = aiplatform.TabularDataset(dataset_id) model = tabular_regression_job.run( dataset=my_tabular_dataset, training_fraction_split=training_fraction_split, validation_fraction_split=validation_fraction_split, test_fraction_split=test_fraction_split, budget_milli_node_hours=budget_milli_node_hours, model_display_name=model_display_name, disable_early_stopping=disable_early_stopping, sync=sync, ) model.wait() print(model.display_name) print(model.resource_name) print(model.uri) return model
def test_end_to_end_tabular(self, shared_state): """Build dataset, train a custom and AutoML model, deploy, and get predictions""" assert shared_state["bucket"] bucket = shared_state["bucket"] blob = bucket.blob(_BLOB_PATH) # Download the CSV file into memory and save it directory to staging bucket with request.urlopen(_DATASET_SRC) as response: data = response.read() blob.upload_from_string(data) # Collection of resources generated by this test, to be deleted during teardown shared_state["resources"] = [] aiplatform.init( project=e2e_base._PROJECT, location=e2e_base._LOCATION, staging_bucket=shared_state["staging_bucket_name"], ) # Create and import to single managed dataset for both training jobs ds = aiplatform.TabularDataset.create( display_name=f"{self._temp_prefix}-dataset-{uuid.uuid4()}", gcs_source=[ f'gs://{shared_state["staging_bucket_name"]}/{_BLOB_PATH}' ], sync=False, ) shared_state["resources"].extend([ds]) # Define both training jobs custom_job = aiplatform.CustomTrainingJob( display_name= f"{self._temp_prefix}-train-housing-custom-{uuid.uuid4()}", script_path=_LOCAL_TRAINING_SCRIPT_PATH, container_uri="gcr.io/cloud-aiplatform/training/tf-cpu.2-2:latest", requirements=["gcsfs==0.7.1"], model_serving_container_image_uri= "gcr.io/cloud-aiplatform/prediction/tf2-cpu.2-2:latest", ) automl_job = aiplatform.AutoMLTabularTrainingJob( display_name= f"{self._temp_prefix}-train-housing-automl-{uuid.uuid4()}", optimization_prediction_type="regression", optimization_objective="minimize-rmse", ) # Kick off both training jobs, AutoML job will take approx one hour to run custom_model = custom_job.run( ds, replica_count=1, model_display_name= f"{self._temp_prefix}-custom-housing-model-{uuid.uuid4()}", sync=False, ) automl_model = automl_job.run( dataset=ds, target_column="median_house_value", model_display_name= f"{self._temp_prefix}-automl-housing-model-{uuid.uuid4()}", sync=False, ) shared_state["resources"].extend( [automl_job, automl_model, custom_job, custom_model]) # Deploy both models after training completes custom_endpoint = custom_model.deploy(machine_type="n1-standard-4", sync=False) automl_endpoint = automl_model.deploy(machine_type="n1-standard-4", sync=False) shared_state["resources"].extend([automl_endpoint, custom_endpoint]) # Send online prediction with same instance to both deployed models # This sample is taken from an observation where median_house_value = 94600 custom_endpoint.wait() custom_prediction = custom_endpoint.predict([ { "longitude": -124.35, "latitude": 40.54, "housing_median_age": 52.0, "total_rooms": 1820.0, "total_bedrooms": 300.0, "population": 806, "households": 270.0, "median_income": 3.014700, }, ]) automl_endpoint.wait() automl_prediction = automl_endpoint.predict([ { "longitude": "-124.35", "latitude": "40.54", "housing_median_age": "52.0", "total_rooms": "1820.0", "total_bedrooms": "300.0", "population": "806", "households": "270.0", "median_income": "3.014700", }, ]) # Ensure a single prediction was returned assert len(custom_prediction.predictions) == 1 assert len(automl_prediction.predictions) == 1 # Ensure the models are remotely accurate try: automl_result = automl_prediction.predictions[0]["value"] custom_result = custom_prediction.predictions[0][0] assert 200000 > automl_result > 50000 assert 200000 > custom_result > 50000 except KeyError as e: raise RuntimeError("Unexpected prediction response structure:", e)
def _create_automl_tabular_training_job(self): vertexai_model_name = self._params['vertexai_model_name'] prediction_type = self._params['prediction_type'] return aiplatform.AutoMLTabularTrainingJob( display_name=f'{vertexai_model_name}', optimization_prediction_type=f'{prediction_type}')
def test_end_to_end_tabular(self, shared_state): """Build dataset, train a custom and AutoML model, deploy, and get predictions""" assert shared_state["bucket"] bucket = shared_state["bucket"] blob = bucket.blob(_BLOB_PATH) # Download the CSV file into memory and save it directory to staging bucket with request.urlopen(_DATASET_SRC) as response: data = response.read() blob.upload_from_string(data) # Collection of resources generated by this test, to be deleted during teardown shared_state["resources"] = [] aiplatform.init( project=e2e_base._PROJECT, location=e2e_base._LOCATION, staging_bucket=shared_state["staging_bucket_name"], ) # Create and import to single managed dataset for both training jobs dataset_gcs_source = f'gs://{shared_state["staging_bucket_name"]}/{_BLOB_PATH}' ds = aiplatform.TabularDataset.create( display_name=self._make_display_name("dataset"), gcs_source=[dataset_gcs_source], sync=False, create_request_timeout=180.0, ) shared_state["resources"].extend([ds]) # Define both training jobs custom_job = aiplatform.CustomTrainingJob( display_name=self._make_display_name("train-housing-custom"), script_path=_LOCAL_TRAINING_SCRIPT_PATH, container_uri="gcr.io/cloud-aiplatform/training/tf-cpu.2-2:latest", requirements=["gcsfs==0.7.1"], model_serving_container_image_uri= "gcr.io/cloud-aiplatform/prediction/tf2-cpu.2-2:latest", ) automl_job = aiplatform.AutoMLTabularTrainingJob( display_name=self._make_display_name("train-housing-automl"), optimization_prediction_type="regression", optimization_objective="minimize-rmse", ) # Kick off both training jobs, AutoML job will take approx one hour to run custom_model = custom_job.run( ds, replica_count=1, model_display_name=self._make_display_name("custom-housing-model"), timeout=1234, restart_job_on_worker_restart=True, enable_web_access=True, sync=False, create_request_timeout=None, ) automl_model = automl_job.run( dataset=ds, target_column="median_house_value", model_display_name=self._make_display_name("automl-housing-model"), sync=False, ) shared_state["resources"].extend( [automl_job, automl_model, custom_job, custom_model]) # Deploy both models after training completes custom_endpoint = custom_model.deploy(machine_type="n1-standard-4", sync=False) automl_endpoint = automl_model.deploy(machine_type="n1-standard-4", sync=False) shared_state["resources"].extend([automl_endpoint, custom_endpoint]) custom_batch_prediction_job = custom_model.batch_predict( job_display_name=self._make_display_name("automl-housing-model"), instances_format="csv", machine_type="n1-standard-4", gcs_source=dataset_gcs_source, gcs_destination_prefix= f'gs://{shared_state["staging_bucket_name"]}/bp_results/', sync=False, ) shared_state["resources"].append(custom_batch_prediction_job) in_progress_done_check = custom_job.done() custom_job.wait_for_resource_creation() automl_job.wait_for_resource_creation() custom_batch_prediction_job.wait_for_resource_creation() # Send online prediction with same instance to both deployed models # This sample is taken from an observation where median_house_value = 94600 custom_endpoint.wait() # Check scheduling is correctly set assert (custom_job._gca_resource.training_task_inputs["scheduling"] ["timeout"] == "1234s") assert (custom_job._gca_resource.training_task_inputs["scheduling"] ["restartJobOnWorkerRestart"] is True) custom_prediction = custom_endpoint.predict([_INSTANCE], timeout=180.0) custom_batch_prediction_job.wait() automl_endpoint.wait() automl_prediction = automl_endpoint.predict( [{k: str(v) for k, v in _INSTANCE.items()}], # Cast int values to strings timeout=180.0, ) # Test lazy loading of Endpoint, check getter was never called after predict() custom_endpoint = aiplatform.Endpoint(custom_endpoint.resource_name) custom_endpoint.predict([_INSTANCE]) completion_done_check = custom_job.done() assert custom_endpoint._skipped_getter_call() assert (custom_job.state == gca_pipeline_state.PipelineState.PIPELINE_STATE_SUCCEEDED) assert (automl_job.state == gca_pipeline_state.PipelineState.PIPELINE_STATE_SUCCEEDED) assert (custom_batch_prediction_job.state == gca_job_state.JobState.JOB_STATE_SUCCEEDED) # Ensure a single prediction was returned assert len(custom_prediction.predictions) == 1 assert len(automl_prediction.predictions) == 1 # Ensure the models are remotely accurate try: automl_result = automl_prediction.predictions[0]["value"] custom_result = custom_prediction.predictions[0][0] assert 200000 > automl_result > 50000 assert 200000 > custom_result > 50000 except KeyError as e: raise RuntimeError("Unexpected prediction response structure:", e) # Check done() method works correctly assert in_progress_done_check is False assert completion_done_check is True