def test_run_with_two_split_raises( self, mock_dataset_text, sync, ): aiplatform.init(project=_TEST_PROJECT) job = training_jobs.AutoMLTextTrainingJob( display_name=_TEST_DISPLAY_NAME, prediction_type="classification", multi_label=True, ) with pytest.raises(ValueError): model_from_job = job.run( dataset=mock_dataset_text, model_display_name=_TEST_MODEL_DISPLAY_NAME, training_fraction_split=_TEST_FRACTION_SPLIT_TRAINING, validation_fraction_split=_TEST_FRACTION_SPLIT_VALIDATION, test_fraction_split=_TEST_FRACTION_SPLIT_TEST, training_filter_split=_TEST_FILTER_SPLIT_TRAINING, validation_filter_split=_TEST_FILTER_SPLIT_VALIDATION, test_filter_split=_TEST_FILTER_SPLIT_TEST, sync=sync, ) if not sync: model_from_job.wait()
def test_run_raises_if_pipeline_fails( self, mock_pipeline_service_create_and_get_with_fail, mock_dataset_text, sync): aiplatform.init(project=_TEST_PROJECT) job = training_jobs.AutoMLTextTrainingJob( display_name=_TEST_DISPLAY_NAME, prediction_type=_TEST_PREDICTION_TYPE_CLASSIFICATION, multi_label=_TEST_CLASSIFICATION_MULTILABEL, ) with pytest.raises(RuntimeError): job.run( model_display_name=_TEST_MODEL_DISPLAY_NAME, dataset=mock_dataset_text, training_fraction_split=_TEST_FRACTION_SPLIT_TRAINING, validation_fraction_split=_TEST_FRACTION_SPLIT_VALIDATION, test_fraction_split=_TEST_FRACTION_SPLIT_TEST, sync=sync, ) if not sync: job.wait() with pytest.raises(RuntimeError): job.get_model()
def test_run_called_twice_raises(self, mock_dataset_text, sync): aiplatform.init(project=_TEST_PROJECT) job = training_jobs.AutoMLTextTrainingJob( display_name=_TEST_DISPLAY_NAME, prediction_type="classification", multi_label=True, ) job.run( dataset=mock_dataset_text, model_display_name=_TEST_MODEL_DISPLAY_NAME, training_fraction_split=_TEST_FRACTION_SPLIT_TRAINING, validation_fraction_split=_TEST_FRACTION_SPLIT_VALIDATION, test_fraction_split=_TEST_FRACTION_SPLIT_TEST, sync=sync, ) with pytest.raises(RuntimeError): job.run( dataset=mock_dataset_text, model_display_name=_TEST_MODEL_DISPLAY_NAME, training_fraction_split=_TEST_FRACTION_SPLIT_TRAINING, validation_fraction_split=_TEST_FRACTION_SPLIT_VALIDATION, test_fraction_split=_TEST_FRACTION_SPLIT_TEST, sync=sync, )
def test_splits_default( self, mock_pipeline_service_create, mock_pipeline_service_get, mock_dataset_text, mock_model_service_get, mock_model, sync, ): """ Initiate aiplatform with encryption key name. Create and run an AutoML Video Classification training job, verify calls and return value """ aiplatform.init( project=_TEST_PROJECT, encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME, ) job = training_jobs.AutoMLTextTrainingJob( display_name=_TEST_DISPLAY_NAME, prediction_type=_TEST_PREDICTION_TYPE_CLASSIFICATION, multi_label=_TEST_CLASSIFICATION_MULTILABEL, ) model_from_job = job.run( dataset=mock_dataset_text, model_display_name=_TEST_MODEL_DISPLAY_NAME, sync=sync, create_request_timeout=None, ) if not sync: model_from_job.wait() true_managed_model = gca_model.Model( display_name=_TEST_MODEL_DISPLAY_NAME, description=mock_model._gca_resource.description, encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC, ) true_input_data_config = gca_training_pipeline.InputDataConfig( dataset_id=mock_dataset_text.name, ) true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, training_task_definition=schema.training_job.definition. automl_text_classification, training_task_inputs=_TEST_TRAINING_TASK_INPUTS_CLASSIFICATION, model_to_upload=true_managed_model, input_data_config=true_input_data_config, encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC, ) mock_pipeline_service_create.assert_called_once_with( parent=initializer.global_config.common_location_path(), training_pipeline=true_training_pipeline, timeout=None, )
def test_run_call_pipeline_if_no_model_display_name( self, mock_pipeline_service_create, mock_dataset_text, mock_model_service_get, mock_model, sync, ): aiplatform.init(project=_TEST_PROJECT) job = training_jobs.AutoMLTextTrainingJob( display_name=_TEST_DISPLAY_NAME, prediction_type="classification", multi_label=True, ) model_from_job = job.run( dataset=mock_dataset_text, training_fraction_split=_TEST_FRACTION_SPLIT_TRAINING, validation_fraction_split=_TEST_FRACTION_SPLIT_VALIDATION, test_fraction_split=_TEST_FRACTION_SPLIT_TEST, model_display_name=None, # Omit model_display_name sync=sync, ) if not sync: model_from_job.wait() true_fraction_split = gca_training_pipeline.FractionSplit( training_fraction=_TEST_FRACTION_SPLIT_TRAINING, validation_fraction=_TEST_FRACTION_SPLIT_VALIDATION, test_fraction=_TEST_FRACTION_SPLIT_TEST, ) # Test that if defaults to the job display name true_managed_model = gca_model.Model(display_name=_TEST_DISPLAY_NAME) true_input_data_config = gca_training_pipeline.InputDataConfig( fraction_split=true_fraction_split, dataset_id=mock_dataset_text.name, ) true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, training_task_definition=schema.training_job.definition. automl_text_classification, training_task_inputs=_TEST_TRAINING_TASK_INPUTS_CLASSIFICATION, model_to_upload=true_managed_model, input_data_config=true_input_data_config, ) mock_pipeline_service_create.assert_called_once_with( parent=initializer.global_config.common_location_path(), training_pipeline=true_training_pipeline, )
def test_init_all_parameters_extraction(self): """Ensure all private members are set correctly at initialization""" aiplatform.init(project=_TEST_PROJECT) job = training_jobs.AutoMLTextTrainingJob( display_name=_TEST_DISPLAY_NAME, prediction_type=_TEST_PREDICTION_TYPE_EXTRACTION, ) assert job._display_name == _TEST_DISPLAY_NAME assert (job._training_task_definition == schema.training_job.definition.automl_text_extraction) assert (job._training_task_inputs_dict == training_job_inputs.AutoMlTextExtractionInputs())
def test_init_all_parameters_sentiment(self): """Ensure all private members are set correctly at initialization""" aiplatform.init(project=_TEST_PROJECT) job = training_jobs.AutoMLTextTrainingJob( display_name=_TEST_DISPLAY_NAME, prediction_type=_TEST_PREDICTION_TYPE_SENTIMENT, sentiment_max=_TEST_SENTIMENT_MAX, ) assert job._display_name == _TEST_DISPLAY_NAME assert (job._training_task_definition == schema.training_job.definition.automl_text_sentiment) assert (job._training_task_inputs_dict == training_job_inputs. AutoMlTextSentimentInputs(sentiment_max=_TEST_SENTIMENT_MAX))
def test_init_all_parameters_classification(self): """Ensure all private members are set correctly at initialization""" aiplatform.init(project=_TEST_PROJECT) job = training_jobs.AutoMLTextTrainingJob( display_name=_TEST_DISPLAY_NAME, prediction_type=_TEST_PREDICTION_TYPE_CLASSIFICATION, multi_label=_TEST_CLASSIFICATION_MULTILABEL, ) assert job._display_name == _TEST_DISPLAY_NAME assert (job._training_task_definition == schema.training_job.definition.automl_text_classification) assert (job._training_task_inputs_dict == training_job_inputs.AutoMlTextClassificationInputs( multi_label=_TEST_CLASSIFICATION_MULTILABEL))
def test_run_call_pipeline_service_create_sentiment( self, mock_pipeline_service_create, mock_pipeline_service_get, mock_dataset_text, mock_model_service_get, sync, ): """Create and run an AutoML Text Sentiment training job, verify calls and return value""" aiplatform.init(project=_TEST_PROJECT) job = training_jobs.AutoMLTextTrainingJob( display_name=_TEST_DISPLAY_NAME, labels=_TEST_LABELS, prediction_type=_TEST_PREDICTION_TYPE_SENTIMENT, sentiment_max=10, ) model_from_job = job.run( dataset=mock_dataset_text, model_display_name=_TEST_MODEL_DISPLAY_NAME, model_labels=_TEST_MODEL_LABELS, training_filter_split=_TEST_FILTER_SPLIT_TRAINING, validation_filter_split=_TEST_FILTER_SPLIT_VALIDATION, test_filter_split=_TEST_FILTER_SPLIT_TEST, sync=sync, create_request_timeout=None, ) if not sync: model_from_job.wait() true_filter_split = gca_training_pipeline.FilterSplit( training_filter=_TEST_FILTER_SPLIT_TRAINING, validation_filter=_TEST_FILTER_SPLIT_VALIDATION, test_filter=_TEST_FILTER_SPLIT_TEST, ) true_managed_model = gca_model.Model( display_name=_TEST_MODEL_DISPLAY_NAME, labels=_TEST_MODEL_LABELS) true_input_data_config = gca_training_pipeline.InputDataConfig( filter_split=true_filter_split, dataset_id=mock_dataset_text.name, ) true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, labels=_TEST_LABELS, training_task_definition=schema.training_job.definition. automl_text_sentiment, training_task_inputs=_TEST_TRAINING_TASK_INPUTS_SENTIMENT, model_to_upload=true_managed_model, input_data_config=true_input_data_config, ) mock_pipeline_service_create.assert_called_once_with( parent=initializer.global_config.common_location_path(), training_pipeline=true_training_pipeline, timeout=None, ) mock_model_service_get.assert_called_once_with( name=_TEST_MODEL_NAME, retry=base._DEFAULT_RETRY) assert job._gca_resource is mock_pipeline_service_get.return_value assert model_from_job._gca_resource is mock_model_service_get.return_value assert job.get_model( )._gca_resource is mock_model_service_get.return_value assert not job.has_failed assert job.state == gca_pipeline_state.PipelineState.PIPELINE_STATE_SUCCEEDED
def test_run_call_pipeline_service_create_classification_with_timeout( self, mock_pipeline_service_create, mock_pipeline_service_get, mock_dataset_text, mock_model_service_get, sync, ): """Create and run an AutoML Text Classification training job, verify calls and return value""" aiplatform.init(project=_TEST_PROJECT) job = training_jobs.AutoMLTextTrainingJob( display_name=_TEST_DISPLAY_NAME, labels=_TEST_LABELS, prediction_type=_TEST_PREDICTION_TYPE_CLASSIFICATION, multi_label=_TEST_CLASSIFICATION_MULTILABEL, training_encryption_spec_key_name= _TEST_PIPELINE_ENCRYPTION_KEY_NAME, model_encryption_spec_key_name=_TEST_MODEL_ENCRYPTION_KEY_NAME, ) model_from_job = job.run( dataset=mock_dataset_text, model_display_name=_TEST_MODEL_DISPLAY_NAME, model_labels=_TEST_MODEL_LABELS, training_filter_split=_TEST_FILTER_SPLIT_TRAINING, validation_filter_split=_TEST_FILTER_SPLIT_VALIDATION, test_filter_split=_TEST_FILTER_SPLIT_TEST, sync=sync, create_request_timeout=180.0, ) if not sync: model_from_job.wait() true_filter_split = gca_training_pipeline.FilterSplit( training_filter=_TEST_FILTER_SPLIT_TRAINING, validation_filter=_TEST_FILTER_SPLIT_VALIDATION, test_filter=_TEST_FILTER_SPLIT_TEST, ) true_managed_model = gca_model.Model( display_name=_TEST_MODEL_DISPLAY_NAME, labels=_TEST_MODEL_LABELS, encryption_spec=_TEST_MODEL_ENCRYPTION_SPEC, ) true_input_data_config = gca_training_pipeline.InputDataConfig( filter_split=true_filter_split, dataset_id=mock_dataset_text.name, ) true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, labels=_TEST_LABELS, training_task_definition=schema.training_job.definition. automl_text_classification, training_task_inputs=_TEST_TRAINING_TASK_INPUTS_CLASSIFICATION, model_to_upload=true_managed_model, input_data_config=true_input_data_config, encryption_spec=_TEST_PIPELINE_ENCRYPTION_SPEC, ) mock_pipeline_service_create.assert_called_once_with( parent=initializer.global_config.common_location_path(), training_pipeline=true_training_pipeline, timeout=180.0, )