def test_deploy_with_traffic_percent(self, deploy_model_mock, sync): with mock.patch.object(endpoint_service_client.EndpointServiceClient, "get_endpoint") as get_endpoint_mock: get_endpoint_mock.return_value = gca_endpoint.Endpoint( display_name=_TEST_DISPLAY_NAME, name=_TEST_ENDPOINT_NAME, traffic_split={"model1": 100}, ) test_endpoint = models.Endpoint(_TEST_ENDPOINT_NAME) test_model = models.Model(_TEST_ID) test_endpoint.deploy(model=test_model, traffic_percentage=70, sync=sync) if not sync: test_endpoint.wait() automatic_resources = gca_machine_resources.AutomaticResources( min_replica_count=1, max_replica_count=1, ) deployed_model = gca_endpoint.DeployedModel( automatic_resources=automatic_resources, model=test_model.resource_name, display_name=None, ) deploy_model_mock.assert_called_once_with( endpoint=test_endpoint.resource_name, deployed_model=deployed_model, traffic_split={ "model1": 30, "0": 70 }, metadata=(), )
def test_deploy_with_display_name(self, deploy_model_mock, sync): test_endpoint = models.Endpoint(_TEST_ENDPOINT_NAME) test_model = models.Model(_TEST_ID) test_endpoint.deploy(model=test_model, deployed_model_display_name=_TEST_DISPLAY_NAME, sync=sync) if not sync: test_endpoint.wait() automatic_resources = gca_machine_resources.AutomaticResources( min_replica_count=1, max_replica_count=1, ) deployed_model = gca_endpoint.DeployedModel( automatic_resources=automatic_resources, model=test_model.resource_name, display_name=_TEST_DISPLAY_NAME, ) deploy_model_mock.assert_called_once_with( endpoint=test_endpoint.resource_name, deployed_model=deployed_model, traffic_split={"0": 100}, metadata=(), )
def deploy_model_with_explanations_mock(): with mock.patch.object(endpoint_service_client.EndpointServiceClient, "deploy_model") as deploy_model_mock: deployed_model = gca_endpoint.DeployedModel( model=_TEST_MODEL_NAME, display_name=_TEST_DISPLAY_NAME, ) deploy_model_lro_mock = mock.Mock(ga_operation.Operation) deploy_model_lro_mock.result.return_value = gca_endpoint_service.DeployModelResponse( deployed_model=deployed_model, ) deploy_model_mock.return_value = deploy_model_lro_mock yield deploy_model_mock
def _setUpDeleteVertexModelMocks(self): importlib.reload(initializer) importlib.reload(aiplatform) self._endpoint_name = 'endpoint_name' self._deployed_model_id = 'model_id' self._mock_create_client = mock.Mock() initializer.global_config.create_client = self._mock_create_client self._mock_create_client.return_value = mock.Mock( spec=endpoint_service_client.EndpointServiceClient) self._mock_get_endpoint = mock.Mock() endpoint_service_client.EndpointServiceClient.get_endpoint = self._mock_get_endpoint self._mock_get_endpoint.return_value = endpoint.Endpoint( display_name=self._endpoint_name) aiplatform.init( project=self._project_id, location=None, credentials=mock.Mock(spec=auth_credentials.AnonymousCredentials())) self._mock_endpoint = aiplatform.Endpoint( endpoint_name='projects/{}/locations/us-central1/endpoints/1234'.format( self._project_id)) self._mock_endpoint_list = mock.Mock() aiplatform.Endpoint.list = self._mock_endpoint_list self._mock_endpoint_list.return_value = [self._mock_endpoint] self._mock_model_delete = mock.Mock() self._mock_endpoint.undeploy = self._mock_model_delete self._mock_list_models = mock.Mock() self._mock_list_models.return_value = [ endpoint.DeployedModel( display_name=self._model_name, id=self._deployed_model_id) ] self._mock_endpoint.list_models = self._mock_list_models self._ai_platform_serving_args_vertex = { 'endpoint_name': self._endpoint_name, 'project_id': self._project_id, }
def test_deploy_with_explanations(self, deploy_model_with_explanations_mock, sync): test_endpoint = models.Endpoint(_TEST_ENDPOINT_NAME) test_model = models.Model(_TEST_ID) test_endpoint.deploy( model=test_model, machine_type=_TEST_MACHINE_TYPE, accelerator_type=_TEST_ACCELERATOR_TYPE, accelerator_count=_TEST_ACCELERATOR_COUNT, explanation_metadata=_TEST_EXPLANATION_METADATA, explanation_parameters=_TEST_EXPLANATION_PARAMETERS, sync=sync, ) if not sync: test_endpoint.wait() expected_machine_spec = gca_machine_resources.MachineSpec( machine_type=_TEST_MACHINE_TYPE, accelerator_type=_TEST_ACCELERATOR_TYPE, accelerator_count=_TEST_ACCELERATOR_COUNT, ) expected_dedicated_resources = gca_machine_resources.DedicatedResources( machine_spec=expected_machine_spec, min_replica_count=1, max_replica_count=1, ) expected_deployed_model = gca_endpoint.DeployedModel( dedicated_resources=expected_dedicated_resources, model=test_model.resource_name, display_name=None, explanation_spec=gca_endpoint.explanation.ExplanationSpec( metadata=_TEST_EXPLANATION_METADATA, parameters=_TEST_EXPLANATION_PARAMETERS, ), ) deploy_model_with_explanations_mock.assert_called_once_with( endpoint=test_endpoint.resource_name, deployed_model=expected_deployed_model, traffic_split={"0": 100}, metadata=(), )
def test_deploy_with_dedicated_resources(self, deploy_model_mock, sync): test_endpoint = models.Endpoint(_TEST_ENDPOINT_NAME) test_model = models.Model(_TEST_ID) test_endpoint.deploy( model=test_model, machine_type=_TEST_MACHINE_TYPE, accelerator_type=_TEST_ACCELERATOR_TYPE, accelerator_count=_TEST_ACCELERATOR_COUNT, service_account=_TEST_SERVICE_ACCOUNT, sync=sync, ) if not sync: test_endpoint.wait() expected_machine_spec = gca_machine_resources.MachineSpec( machine_type=_TEST_MACHINE_TYPE, accelerator_type=_TEST_ACCELERATOR_TYPE, accelerator_count=_TEST_ACCELERATOR_COUNT, ) expected_dedicated_resources = gca_machine_resources.DedicatedResources( machine_spec=expected_machine_spec, min_replica_count=1, max_replica_count=1, ) expected_deployed_model = gca_endpoint.DeployedModel( dedicated_resources=expected_dedicated_resources, model=test_model.resource_name, display_name=None, service_account=_TEST_SERVICE_ACCOUNT, ) deploy_model_mock.assert_called_once_with( endpoint=test_endpoint.resource_name, deployed_model=expected_deployed_model, traffic_split={"0": 100}, metadata=(), )
"input_tensor_name": "dense_input", "encoding": "BAG_OF_FEATURES", "modality": "numeric", "index_feature_mapping": ["abc", "def", "ghj"], } }, outputs={"medv": { "output_tensor_name": "dense_2" }}, ) _TEST_EXPLANATION_PARAMETERS = aiplatform.explain.ExplanationParameters( {"sampled_shapley_attribution": { "path_count": 10 }}) _TEST_DEPLOYED_MODELS = [ gca_endpoint.DeployedModel(id=_TEST_ID, display_name=_TEST_DISPLAY_NAME), gca_endpoint.DeployedModel(id=_TEST_ID_2, display_name=_TEST_DISPLAY_NAME_2), gca_endpoint.DeployedModel(id=_TEST_ID_3, display_name=_TEST_DISPLAY_NAME_3), ] _TEST_DEPLOYED_MODELS_WITH_EXPLANATION = [ gca_endpoint.DeployedModel( id=_TEST_ID, display_name=_TEST_DISPLAY_NAME, explanation_spec=gca_explanation.ExplanationSpec( metadata=_TEST_EXPLANATION_METADATA, parameters=_TEST_EXPLANATION_PARAMETERS, ), ), gca_endpoint.DeployedModel(
) _TEST_ENDPOINT_NAME_ALT_LOCATION = ( f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION_2}/endpoints/{_TEST_ID}" ) _TEST_PARENT = f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}" _TEST_MODEL_NAME = ( f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}/models/{_TEST_ID}") _TEST_MODEL_ID = "1028944691210842416" _TEST_PREDICTION = [[1.0, 2.0, 3.0], [3.0, 3.0, 1.0]] _TEST_INSTANCES = [[1.0, 2.0, 3.0], [1.0, 3.0, 4.0]] _TEST_CREDENTIALS = mock.Mock(spec=auth_credentials.AnonymousCredentials()) _TEST_SERVICE_ACCOUNT = "*****@*****.**" _TEST_DEPLOYED_MODELS = [ gca_endpoint.DeployedModel(id=_TEST_ID, display_name=_TEST_DISPLAY_NAME), gca_endpoint.DeployedModel(id=_TEST_ID_2, display_name=_TEST_DISPLAY_NAME_2), gca_endpoint.DeployedModel(id=_TEST_ID_3, display_name=_TEST_DISPLAY_NAME_3), ] _TEST_TRAFFIC_SPLIT = {_TEST_ID: 0, _TEST_ID_2: 100, _TEST_ID_3: 0} _TEST_LONG_TRAFFIC_SPLIT = { "m1": 40, "m2": 10, "m3": 30, "m4": 0, "m5": 5, "m6": 8,
) _TEST_ENDPOINT_NAME_ALT_LOCATION = ( f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION_2}/endpoints/{_TEST_ID}" ) _TEST_PARENT = f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}" _TEST_MODEL_NAME = ( f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}/models/{_TEST_ID}") _TEST_MODEL_ID = "1028944691210842416" _TEST_PREDICTION = [[1.0, 2.0, 3.0], [3.0, 3.0, 1.0]] _TEST_INSTANCES = [[1.0, 2.0, 3.0], [1.0, 3.0, 4.0]] _TEST_CREDENTIALS = mock.Mock(spec=auth_credentials.AnonymousCredentials()) _TEST_SERVICE_ACCOUNT = "*****@*****.**" _TEST_DEPLOYED_MODELS = [ gca_endpoint.DeployedModel(id=_TEST_ID, display_name=_TEST_DISPLAY_NAME), gca_endpoint.DeployedModel(id=_TEST_ID_2, display_name=_TEST_DISPLAY_NAME_2), ] _TEST_MACHINE_TYPE = "n1-standard-32" _TEST_ACCELERATOR_TYPE = "NVIDIA_TESLA_P100" _TEST_ACCELERATOR_COUNT = 2 _TEST_EXPLANATIONS = [ gca_prediction_service.explanation.Explanation(attributions=[]) ] _TEST_ATTRIBUTIONS = [ gca_prediction_service.explanation.Attribution( baseline_output_value=1.0,