def test_deploy_with_display_name(self, deploy_model_mock, sync):
        test_endpoint = models.Endpoint(_TEST_ENDPOINT_NAME)
        test_model = models.Model(_TEST_ID)
        test_endpoint.deploy(model=test_model,
                             deployed_model_display_name=_TEST_DISPLAY_NAME,
                             sync=sync)

        if not sync:
            test_endpoint.wait()

        automatic_resources = gca_machine_resources.AutomaticResources(
            min_replica_count=1,
            max_replica_count=1,
        )
        deployed_model = gca_endpoint.DeployedModel(
            automatic_resources=automatic_resources,
            model=test_model.resource_name,
            display_name=_TEST_DISPLAY_NAME,
        )
        deploy_model_mock.assert_called_once_with(
            endpoint=test_endpoint.resource_name,
            deployed_model=deployed_model,
            traffic_split={"0": 100},
            metadata=(),
        )
    def test_deploy_with_traffic_percent(self, deploy_model_mock, sync):
        with mock.patch.object(endpoint_service_client.EndpointServiceClient,
                               "get_endpoint") as get_endpoint_mock:
            get_endpoint_mock.return_value = gca_endpoint.Endpoint(
                display_name=_TEST_DISPLAY_NAME,
                name=_TEST_ENDPOINT_NAME,
                traffic_split={"model1": 100},
            )

            test_endpoint = models.Endpoint(_TEST_ENDPOINT_NAME)
            test_model = models.Model(_TEST_ID)
            test_endpoint.deploy(model=test_model,
                                 traffic_percentage=70,
                                 sync=sync)
            if not sync:
                test_endpoint.wait()
            automatic_resources = gca_machine_resources.AutomaticResources(
                min_replica_count=1,
                max_replica_count=1,
            )
            deployed_model = gca_endpoint.DeployedModel(
                automatic_resources=automatic_resources,
                model=test_model.resource_name,
                display_name=None,
            )
            deploy_model_mock.assert_called_once_with(
                endpoint=test_endpoint.resource_name,
                deployed_model=deployed_model,
                traffic_split={
                    "model1": 30,
                    "0": 70
                },
                metadata=(),
            )
Exemplo n.º 3
0
    def test_deploy_no_endpoint_dedicated_resources(self, deploy_model_mock, sync):

        test_model = models.Model(_TEST_ID)
        test_endpoint = test_model.deploy(
            machine_type=_TEST_MACHINE_TYPE,
            accelerator_type=_TEST_ACCELERATOR_TYPE,
            accelerator_count=_TEST_ACCELERATOR_COUNT,
            sync=sync,
        )

        if not sync:
            test_endpoint.wait()

        expected_machine_spec = gca_machine_resources.MachineSpec(
            machine_type=_TEST_MACHINE_TYPE,
            accelerator_type=_TEST_ACCELERATOR_TYPE,
            accelerator_count=_TEST_ACCELERATOR_COUNT,
        )
        expected_dedicated_resources = gca_machine_resources.DedicatedResources(
            machine_spec=expected_machine_spec, min_replica_count=1, max_replica_count=1
        )
        expected_deployed_model = gca_endpoint.DeployedModel(
            dedicated_resources=expected_dedicated_resources,
            model=test_model.resource_name,
            display_name=None,
        )
        deploy_model_mock.assert_called_once_with(
            endpoint=test_endpoint.resource_name,
            deployed_model=expected_deployed_model,
            traffic_split={"0": 100},
            metadata=(),
        )
def deploy_model_mock():
    with mock.patch.object(endpoint_service_client.EndpointServiceClient,
                           "deploy_model") as deploy_model_mock:
        deployed_model = gca_endpoint.DeployedModel(
            model=_TEST_MODEL_NAME,
            display_name=_TEST_DISPLAY_NAME,
        )
        deploy_model_lro_mock = mock.Mock(ga_operation.Operation)
        deploy_model_lro_mock.result.return_value = gca_endpoint_service.DeployModelResponse(
            deployed_model=deployed_model, )
        deploy_model_mock.return_value = deploy_model_lro_mock
        yield deploy_model_mock
 def test_deploy_with_max_replica_count(self, deploy_model_mock, sync):
     aiplatform.init(project=_TEST_PROJECT, location=_TEST_LOCATION)
     test_endpoint = models.Endpoint(_TEST_ENDPOINT_NAME)
     test_model = models.Model(_TEST_ID)
     test_endpoint.deploy(model=test_model, max_replica_count=2, sync=sync)
     if not sync:
         test_endpoint.wait()
     automatic_resources = gca_machine_resources.AutomaticResources(
         min_replica_count=1, max_replica_count=2,
     )
     deployed_model = gca_endpoint.DeployedModel(
         automatic_resources=automatic_resources,
         model=test_model.resource_name,
         display_name=None,
     )
     deploy_model_mock.assert_called_once_with(
         endpoint=test_endpoint.resource_name,
         deployed_model=deployed_model,
         traffic_split={"0": 100},
         metadata=(),
     )
Exemplo n.º 6
0
    def test_deploy_with_dedicated_resources(self, deploy_model_mock, sync):
        aiplatform.init(project=_TEST_PROJECT, location=_TEST_LOCATION)
        test_endpoint = models.Endpoint(_TEST_ENDPOINT_NAME)
        test_model = models.Model(_TEST_ID)
        test_endpoint.deploy(
            model=test_model,
            machine_type=_TEST_MACHINE_TYPE,
            accelerator_type=_TEST_ACCELERATOR_TYPE,
            accelerator_count=_TEST_ACCELERATOR_COUNT,
            service_account=_TEST_SERVICE_ACCOUNT,
            sync=sync,
        )

        if not sync:
            test_endpoint.wait()

        expected_machine_spec = gca_machine_resources.MachineSpec(
            machine_type=_TEST_MACHINE_TYPE,
            accelerator_type=_TEST_ACCELERATOR_TYPE,
            accelerator_count=_TEST_ACCELERATOR_COUNT,
        )
        expected_dedicated_resources = gca_machine_resources.DedicatedResources(
            machine_spec=expected_machine_spec,
            min_replica_count=1,
            max_replica_count=1,
        )
        expected_deployed_model = gca_endpoint.DeployedModel(
            dedicated_resources=expected_dedicated_resources,
            model=test_model.resource_name,
            display_name=None,
            service_account=_TEST_SERVICE_ACCOUNT,
        )
        deploy_model_mock.assert_called_once_with(
            endpoint=test_endpoint.resource_name,
            deployed_model=expected_deployed_model,
            traffic_split={"0": 100},
            metadata=(),
        )
)
_TEST_ENDPOINT_NAME_ALT_LOCATION = (
    f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION_2}/endpoints/{_TEST_ID}"
)
_TEST_PARENT = f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}"
_TEST_MODEL_NAME = (
    f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}/models/{_TEST_ID}")

_TEST_MODEL_ID = "1028944691210842416"
_TEST_PREDICTION = [[1.0, 2.0, 3.0], [3.0, 3.0, 1.0]]
_TEST_INSTANCES = [[1.0, 2.0, 3.0], [1.0, 3.0, 4.0]]
_TEST_CREDENTIALS = mock.Mock(spec=auth_credentials.AnonymousCredentials())
_TEST_SERVICE_ACCOUNT = "*****@*****.**"

_TEST_DEPLOYED_MODELS = [
    gca_endpoint.DeployedModel(id=_TEST_ID, display_name=_TEST_DISPLAY_NAME),
    gca_endpoint.DeployedModel(id=_TEST_ID_2,
                               display_name=_TEST_DISPLAY_NAME_2),
]

_TEST_MACHINE_TYPE = "n1-standard-32"
_TEST_ACCELERATOR_TYPE = "NVIDIA_TESLA_P100"
_TEST_ACCELERATOR_COUNT = 2

_TEST_EXPLANATIONS = [
    gca_prediction_service_v1beta1.explanation.Explanation(attributions=[])
]

_TEST_ATTRIBUTIONS = [
    gca_prediction_service_v1beta1.explanation.Attribution(
        baseline_output_value=1.0,