示例#1
0
def test_deploy_async_inference(production_variant, name_from_base,
                                sagemaker_session):
    model = Model(MODEL_IMAGE,
                  MODEL_DATA,
                  role=ROLE,
                  name=MODEL_NAME,
                  sagemaker_session=sagemaker_session)

    async_inference_config = AsyncInferenceConfig(output_path="s3://some-path")
    async_inference_config_dict = {
        "OutputConfig": {
            "S3OutputPath": "s3://some-path",
        },
    }

    model.deploy(
        instance_type=INSTANCE_TYPE,
        initial_instance_count=INSTANCE_COUNT,
        async_inference_config=async_inference_config,
    )

    sagemaker_session.endpoint_from_production_variants.assert_called_with(
        name=ENDPOINT_NAME,
        production_variants=[BASE_PRODUCTION_VARIANT],
        tags=None,
        kms_key=None,
        wait=True,
        data_capture_config_dict=None,
        async_inference_config_dict=async_inference_config_dict,
    )
示例#2
0
def test_deploy_accelerator_type(production_variant, create_sagemaker_model,
                                 sagemaker_session):
    model = Model(MODEL_DATA,
                  MODEL_IMAGE,
                  role=ROLE,
                  name=MODEL_NAME,
                  sagemaker_session=sagemaker_session)

    production_variant_result = copy.deepcopy(BASE_PRODUCTION_VARIANT)
    production_variant_result["AcceleratorType"] = ACCELERATOR_TYPE
    production_variant.return_value = production_variant_result

    model.deploy(
        instance_type=INSTANCE_TYPE,
        initial_instance_count=INSTANCE_COUNT,
        accelerator_type=ACCELERATOR_TYPE,
    )

    create_sagemaker_model.assert_called_with(INSTANCE_TYPE, ACCELERATOR_TYPE,
                                              None)
    production_variant.assert_called_with(MODEL_NAME,
                                          INSTANCE_TYPE,
                                          INSTANCE_COUNT,
                                          accelerator_type=ACCELERATOR_TYPE)

    sagemaker_session.endpoint_from_production_variants.assert_called_with(
        name=MODEL_NAME,
        production_variants=[production_variant_result],
        tags=None,
        kms_key=None,
        wait=True,
        data_capture_config_dict=None,
    )
示例#3
0
def test_deploy_update_endpoint(sagemaker_session):
    model = Model(MODEL_DATA,
                  MODEL_IMAGE,
                  role=ROLE,
                  sagemaker_session=sagemaker_session)
    model.deploy(instance_type=INSTANCE_TYPE,
                 initial_instance_count=INSTANCE_COUNT,
                 update_endpoint=True)
    sagemaker_session.create_endpoint_config.assert_called_with(
        name=model.name,
        model_name=model.name,
        initial_instance_count=INSTANCE_COUNT,
        instance_type=INSTANCE_TYPE,
        accelerator_type=None,
        tags=None,
        kms_key=None,
        data_capture_config_dict=None,
    )
    config_name = sagemaker_session.create_endpoint_config(
        name=model.name,
        model_name=model.name,
        initial_instance_count=INSTANCE_COUNT,
        instance_type=INSTANCE_TYPE,
        accelerator_type=ACCELERATOR_TYPE,
    )
    sagemaker_session.update_endpoint.assert_called_with(model.name,
                                                         config_name,
                                                         wait=True)
    sagemaker_session.create_endpoint.assert_not_called()
示例#4
0
def test_deploy_predictor_cls(production_variant, sagemaker_session):
    model = Model(
        MODEL_IMAGE,
        MODEL_DATA,
        role=ROLE,
        name=MODEL_NAME,
        predictor_cls=sagemaker.predictor.Predictor,
        sagemaker_session=sagemaker_session,
    )

    endpoint_name = "foo"
    predictor = model.deploy(
        instance_type=INSTANCE_TYPE,
        initial_instance_count=INSTANCE_COUNT,
        endpoint_name=endpoint_name,
    )

    assert isinstance(predictor, sagemaker.predictor.Predictor)
    assert predictor.endpoint_name == endpoint_name
    assert predictor.sagemaker_session == sagemaker_session

    endpoint_name_async = "foo-async"
    predictor_async = model.deploy(
        instance_type=INSTANCE_TYPE,
        initial_instance_count=INSTANCE_COUNT,
        endpoint_name=endpoint_name_async,
        async_inference_config=AsyncInferenceConfig(),
    )

    assert isinstance(predictor_async,
                      sagemaker.predictor_async.AsyncPredictor)
    assert predictor_async.name == model.name
    assert predictor_async.endpoint_name == endpoint_name_async
    assert predictor_async.sagemaker_session == sagemaker_session
示例#5
0
def test_deploy_no_role(sagemaker_session):
    model = Model(MODEL_IMAGE, MODEL_DATA, sagemaker_session=sagemaker_session)

    with pytest.raises(ValueError,
                       match="Role can not be null for deploying a model"):
        model.deploy(instance_type=INSTANCE_TYPE,
                     initial_instance_count=INSTANCE_COUNT)
def endpoint_name(sagemaker_session):
    endpoint_name = unique_name_from_base("model-quality-monitor-integ")
    xgb_model_data = sagemaker_session.upload_data(
        path=os.path.join(XGBOOST_DATA_PATH, "xgb_model.tar.gz"),
        key_prefix="integ-test-data/xgboost/model",
    )

    xgb_image = image_uris.retrieve("xgboost",
                                    sagemaker_session.boto_region_name,
                                    version="1",
                                    image_scope="inference")

    with tests.integ.timeout.timeout_and_delete_endpoint_by_name(
            endpoint_name=endpoint_name,
            sagemaker_session=sagemaker_session,
            hours=2):
        xgb_model = Model(
            model_data=xgb_model_data,
            image_uri=xgb_image,
            name=endpoint_name,  # model name
            role=ROLE,
            sagemaker_session=sagemaker_session,
        )
        xgb_model.deploy(
            INSTANCE_COUNT,
            INSTANCE_TYPE,
            endpoint_name=endpoint_name,
            data_capture_config=DataCaptureConfig(
                True, sagemaker_session=sagemaker_session),
        )
        yield endpoint_name
示例#7
0
def test_deploy_data_capture_config(production_variant, name_from_base,
                                    sagemaker_session):
    model = Model(MODEL_IMAGE,
                  MODEL_DATA,
                  role=ROLE,
                  name=MODEL_NAME,
                  sagemaker_session=sagemaker_session)

    data_capture_config = Mock()
    data_capture_config_dict = {"EnableCapture": True}
    data_capture_config._to_request_dict.return_value = data_capture_config_dict
    model.deploy(
        instance_type=INSTANCE_TYPE,
        initial_instance_count=INSTANCE_COUNT,
        data_capture_config=data_capture_config,
    )

    data_capture_config._to_request_dict.assert_called_with()
    sagemaker_session.endpoint_from_production_variants.assert_called_with(
        name=ENDPOINT_NAME,
        production_variants=[BASE_PRODUCTION_VARIANT],
        tags=None,
        kms_key=None,
        wait=True,
        data_capture_config_dict=data_capture_config_dict,
        async_inference_config_dict=None,
    )
示例#8
0
def test_deploy_wrong_serverless_config(sagemaker_session):
    model = Model(MODEL_IMAGE, MODEL_DATA, role=ROLE)
    with pytest.raises(
            ValueError,
            match=
            "serverless_inference_config needs to be a ServerlessInferenceConfig object",
    ):
        model.deploy(serverless_inference_config={})
def test_jumpstart_inference_model_class(setup):

    model_id, model_version = "catboost-classification-model", "1.0.0"
    instance_type, instance_count = "ml.m5.xlarge", 1

    print("Starting inference...")

    image_uri = image_uris.retrieve(
        region=None,
        framework=None,
        image_scope="inference",
        model_id=model_id,
        model_version=model_version,
        instance_type=instance_type,
    )

    script_uri = script_uris.retrieve(model_id=model_id,
                                      model_version=model_version,
                                      script_scope="inference")

    model_uri = model_uris.retrieve(model_id=model_id,
                                    model_version=model_version,
                                    model_scope="inference")

    model = Model(
        image_uri=image_uri,
        model_data=model_uri,
        source_dir=script_uri,
        entry_point=INFERENCE_ENTRY_POINT_SCRIPT_NAME,
        role=get_sm_session().get_caller_identity_arn(),
        sagemaker_session=get_sm_session(),
        enable_network_isolation=True,
    )

    model.deploy(
        initial_instance_count=instance_count,
        instance_type=instance_type,
        tags=[{
            "Key": JUMPSTART_TAG,
            "Value": os.environ[ENV_VAR_JUMPSTART_SDK_TEST_SUITE_ID]
        }],
    )

    endpoint_invoker = EndpointInvoker(endpoint_name=model.endpoint_name, )

    download_inference_assets()
    ground_truth_label, features = get_tabular_data(
        InferenceTabularDataname.MULTICLASS)

    response = endpoint_invoker.invoke_tabular_endpoint(features)

    assert response is not None
示例#10
0
def test_deploy_creates_correct_session(local_session, session):
    # We expect a LocalSession when deploying to instance_type = 'local'
    model = Model(MODEL_IMAGE, MODEL_DATA, role=ROLE)
    model.deploy(endpoint_name="blah",
                 instance_type="local",
                 initial_instance_count=1)
    assert model.sagemaker_session == local_session.return_value

    # We expect a real Session when deploying to instance_type != local/local_gpu
    model = Model(MODEL_IMAGE, MODEL_DATA, role=ROLE)
    model.deploy(endpoint_name="remote_endpoint",
                 instance_type="ml.m4.4xlarge",
                 initial_instance_count=2)
    assert model.sagemaker_session == session.return_value
def test_script_mode_model_tags_jumpstart_models(repack_model,
                                                 sagemaker_session):

    jumpstart_source_dir = f"s3://{list(JUMPSTART_BUCKET_NAME_SET)[0]}/source_dirs/source.tar.gz"
    t = Model(
        entry_point=ENTRY_POINT_INFERENCE,
        role=ROLE,
        sagemaker_session=sagemaker_session,
        source_dir=jumpstart_source_dir,
        image_uri=IMAGE_URI,
        model_data=MODEL_DATA,
    )
    t.deploy(instance_type=INSTANCE_TYPE,
             initial_instance_count=INSTANCE_COUNT)

    assert sagemaker_session.create_model.call_args_list[0][1]["tags"] == [
        {
            "Key": JumpStartTag.INFERENCE_SCRIPT_URI.value,
            "Value": jumpstart_source_dir,
        },
    ]
    assert sagemaker_session.endpoint_from_production_variants.call_args_list[
        0][1]["tags"] == [
            {
                "Key": JumpStartTag.INFERENCE_SCRIPT_URI.value,
                "Value": jumpstart_source_dir,
            },
        ]

    non_jumpstart_source_dir = "s3://blah/blah/blah"
    t = Model(
        entry_point=ENTRY_POINT_INFERENCE,
        role=ROLE,
        sagemaker_session=sagemaker_session,
        source_dir=non_jumpstart_source_dir,
        image_uri=IMAGE_URI,
        model_data=MODEL_DATA,
    )
    t.deploy(instance_type=INSTANCE_TYPE,
             initial_instance_count=INSTANCE_COUNT)

    assert {
        "Key": JumpStartTag.INFERENCE_SCRIPT_URI.value,
        "Value": non_jumpstart_source_dir,
    } not in sagemaker_session.create_model.call_args_list[0][1]["tags"]

    assert {
        "Key": JumpStartTag.INFERENCE_SCRIPT_URI.value,
        "Value": non_jumpstart_source_dir,
    } not in sagemaker_session.create_model.call_args_list[0][1]["tags"]
示例#12
0
def test_deploy(name_from_base, prepare_container_def, production_variant,
                sagemaker_session):
    production_variant.return_value = BASE_PRODUCTION_VARIANT

    container_def = {
        "Image": MODEL_IMAGE,
        "Environment": {},
        "ModelDataUrl": MODEL_DATA
    }
    prepare_container_def.return_value = container_def

    model = Model(MODEL_IMAGE,
                  MODEL_DATA,
                  role=ROLE,
                  sagemaker_session=sagemaker_session)
    model.deploy(instance_type=INSTANCE_TYPE,
                 initial_instance_count=INSTANCE_COUNT)

    name_from_base.assert_called_with(MODEL_IMAGE)
    assert 2 == name_from_base.call_count

    prepare_container_def.assert_called_with(INSTANCE_TYPE,
                                             accelerator_type=None,
                                             serverless_inference_config=None)
    production_variant.assert_called_with(
        MODEL_NAME,
        INSTANCE_TYPE,
        INSTANCE_COUNT,
        accelerator_type=None,
        serverless_inference_config=None,
    )

    sagemaker_session.create_model.assert_called_with(
        MODEL_NAME,
        ROLE,
        container_def,
        vpc_config=None,
        enable_network_isolation=False,
        tags=None)

    sagemaker_session.endpoint_from_production_variants.assert_called_with(
        name=MODEL_NAME,
        production_variants=[BASE_PRODUCTION_VARIANT],
        tags=None,
        kms_key=None,
        wait=True,
        data_capture_config_dict=None,
        async_inference_config_dict=None,
    )
示例#13
0
def _test_hub_model(sagemaker_session, framework_version, ecr_image, instance_type, model_dir, accelerator_type=None):
    endpoint_name = sagemaker.utils.unique_name_from_base("sagemaker-huggingface-serving-hub-model")

    env = {
        "HF_MODEL_ID": "sshleifer/tiny-distilbert-base-uncased-finetuned-sst-2-english",
        "HF_TASK": "text-classification",
    }

    hf_model = Model(
        env=env,
        role="SageMakerRole",
        image_uri=ecr_image,
        sagemaker_session=sagemaker_session,
        predictor_cls=Predictor,
    )

    with timeout_and_delete_endpoint(endpoint_name, sagemaker_session, minutes=30):
        predictor = hf_model.deploy(
            initial_instance_count=1,
            instance_type=instance_type,
            endpoint_name=endpoint_name,
        )

        data = {
            "inputs": "Camera - You are awarded a SiPix Digital Camera! call 09061221066 fromm landline. Delivery within 28 days."
        }
        predictor.serializer = JSONSerializer()
        predictor.deserializer = JSONDeserializer()

        output = predictor.predict(data)

        assert "score" in output[0]
示例#14
0
def test_deploy_wrong_async_inferenc_config(sagemaker_session):
    model = Model(MODEL_IMAGE,
                  MODEL_DATA,
                  sagemaker_session=sagemaker_session,
                  role=ROLE)

    with pytest.raises(
            ValueError,
            match=
            "async_inference_config needs to be a AsyncInferenceConfig object"
    ):
        model.deploy(
            instance_type=INSTANCE_TYPE,
            initial_instance_count=INSTANCE_COUNT,
            async_inference_config={},
        )
示例#15
0
def test_script_mode_model_uses_proper_sagemaker_submit_dir(
        repack_model, sagemaker_session):

    source_dir = "s3://blah/blah/blah"
    t = Model(
        entry_point=ENTRY_POINT_INFERENCE,
        role=ROLE,
        sagemaker_session=sagemaker_session,
        source_dir=source_dir,
        image_uri=IMAGE_URI,
        model_data=MODEL_DATA,
    )
    t.deploy(instance_type=INSTANCE_TYPE,
             initial_instance_count=INSTANCE_COUNT)

    assert (
        sagemaker_session.create_model.call_args_list[0][0][2]["Environment"]
        ["SAGEMAKER_SUBMIT_DIRECTORY"] == "/opt/ml/model/code")
示例#16
0
def test_deploy_async(production_variant, sagemaker_session):
    model = Model(MODEL_DATA,
                  MODEL_IMAGE,
                  role=ROLE,
                  name=MODEL_NAME,
                  sagemaker_session=sagemaker_session)

    model.deploy(instance_type=INSTANCE_TYPE,
                 initial_instance_count=INSTANCE_COUNT,
                 wait=False)

    sagemaker_session.endpoint_from_production_variants.assert_called_with(
        name=MODEL_NAME,
        production_variants=[BASE_PRODUCTION_VARIANT],
        tags=None,
        kms_key=None,
        wait=False,
        data_capture_config_dict=None,
    )
示例#17
0
def test_deploy_update_endpoint_optional_args(sagemaker_session):
    endpoint_name = "endpoint-name"
    tags = [{"Key": "Value"}]
    kms_key = "foo"
    data_capture_config = Mock()

    model = Model(MODEL_DATA,
                  MODEL_IMAGE,
                  role=ROLE,
                  sagemaker_session=sagemaker_session)
    model.deploy(
        instance_type=INSTANCE_TYPE,
        initial_instance_count=INSTANCE_COUNT,
        update_endpoint=True,
        endpoint_name=endpoint_name,
        accelerator_type=ACCELERATOR_TYPE,
        tags=tags,
        kms_key=kms_key,
        wait=False,
        data_capture_config=data_capture_config,
    )
    sagemaker_session.create_endpoint_config.assert_called_with(
        name=model.name,
        model_name=model.name,
        initial_instance_count=INSTANCE_COUNT,
        instance_type=INSTANCE_TYPE,
        accelerator_type=ACCELERATOR_TYPE,
        tags=tags,
        kms_key=kms_key,
        data_capture_config_dict=data_capture_config._to_request_dict(),
    )
    config_name = sagemaker_session.create_endpoint_config(
        name=model.name,
        model_name=model.name,
        initial_instance_count=INSTANCE_COUNT,
        instance_type=INSTANCE_TYPE,
        accelerator_type=ACCELERATOR_TYPE,
        wait=False,
    )
    sagemaker_session.update_endpoint.assert_called_with(endpoint_name,
                                                         config_name,
                                                         wait=False)
    sagemaker_session.create_endpoint.assert_not_called()
def test_script_mode_model_same_calls_as_framework(repack_model,
                                                   sagemaker_session):
    t = Model(
        entry_point=ENTRY_POINT_INFERENCE,
        role=ROLE,
        sagemaker_session=sagemaker_session,
        source_dir=SCRIPT_URI,
        image_uri=IMAGE_URI,
        model_data=MODEL_DATA,
    )
    t.deploy(instance_type=INSTANCE_TYPE,
             initial_instance_count=INSTANCE_COUNT)

    assert len(sagemaker_session.create_model.call_args_list) == 1
    assert len(sagemaker_session.endpoint_from_production_variants.
               call_args_list) == 1
    assert len(repack_model.call_args_list) == 1

    generic_model_create_model_args = sagemaker_session.create_model.call_args_list
    generic_model_endpoint_from_production_variants_args = (
        sagemaker_session.endpoint_from_production_variants.call_args_list)
    generic_model_repack_model_args = repack_model.call_args_list

    sagemaker_session.create_model.reset_mock()
    sagemaker_session.endpoint_from_production_variants.reset_mock()
    repack_model.reset_mock()

    t = DummyFrameworkModel(
        entry_point=ENTRY_POINT_INFERENCE,
        role=ROLE,
        sagemaker_session=sagemaker_session,
        source_dir=SCRIPT_URI,
        image_uri=IMAGE_URI,
        model_data=MODEL_DATA,
    )
    t.deploy(instance_type=INSTANCE_TYPE,
             initial_instance_count=INSTANCE_COUNT)

    assert generic_model_create_model_args == sagemaker_session.create_model.call_args_list
    assert (generic_model_endpoint_from_production_variants_args ==
            sagemaker_session.endpoint_from_production_variants.call_args_list)
    assert generic_model_repack_model_args == repack_model.call_args_list
示例#19
0
def test_deploy_generates_endpoint_name_each_time_from_model_name(
        production_variant, base_from_name, name_from_base, sagemaker_session):
    model = Model(MODEL_IMAGE,
                  MODEL_DATA,
                  name=MODEL_NAME,
                  role=ROLE,
                  sagemaker_session=sagemaker_session)

    model.deploy(
        instance_type=INSTANCE_TYPE,
        initial_instance_count=INSTANCE_COUNT,
    )
    model.deploy(
        instance_type=INSTANCE_TYPE,
        initial_instance_count=INSTANCE_COUNT,
    )

    base_from_name.assert_called_with(MODEL_NAME)
    name_from_base.assert_called_with(base_from_name.return_value)
    assert 2 == name_from_base.call_count
def test_deploy_kms_key(production_variant, name_from_base, sagemaker_session):
    model = Model(MODEL_IMAGE,
                  MODEL_DATA,
                  role=ROLE,
                  name=MODEL_NAME,
                  sagemaker_session=sagemaker_session)

    key = "some-key-arn"
    model.deploy(instance_type=INSTANCE_TYPE,
                 initial_instance_count=INSTANCE_COUNT,
                 kms_key=key)

    sagemaker_session.endpoint_from_production_variants.assert_called_with(
        name=ENDPOINT_NAME,
        production_variants=[BASE_PRODUCTION_VARIANT],
        tags=None,
        kms_key=key,
        wait=True,
        data_capture_config_dict=None,
    )
示例#21
0
def test_script_mode_model_uses_jumpstart_base_name(repack_model,
                                                    sagemaker_session):

    jumpstart_source_dir = f"s3://{list(JUMPSTART_BUCKET_NAME_SET)[0]}/source_dirs/source.tar.gz"
    t = Model(
        entry_point=ENTRY_POINT_INFERENCE,
        role=ROLE,
        sagemaker_session=sagemaker_session,
        source_dir=jumpstart_source_dir,
        image_uri=IMAGE_URI,
        model_data=MODEL_DATA,
    )
    t.deploy(instance_type=INSTANCE_TYPE,
             initial_instance_count=INSTANCE_COUNT)

    assert sagemaker_session.create_model.call_args_list[0][0][0].startswith(
        JUMPSTART_RESOURCE_BASE_NAME)

    assert sagemaker_session.endpoint_from_production_variants.call_args_list[
        0].startswith(JUMPSTART_RESOURCE_BASE_NAME)

    sagemaker_session.create_model.reset_mock()
    sagemaker_session.endpoint_from_production_variants.reset_mock()

    non_jumpstart_source_dir = "s3://blah/blah/blah"
    t = Model(
        entry_point=ENTRY_POINT_INFERENCE,
        role=ROLE,
        sagemaker_session=sagemaker_session,
        source_dir=non_jumpstart_source_dir,
        image_uri=IMAGE_URI,
        model_data=MODEL_DATA,
    )
    t.deploy(instance_type=INSTANCE_TYPE,
             initial_instance_count=INSTANCE_COUNT)

    assert not sagemaker_session.create_model.call_args_list[0][0][
        0].startswith(JUMPSTART_RESOURCE_BASE_NAME)

    assert not sagemaker_session.endpoint_from_production_variants.call_args_list[
        0][1]["name"].startswith(JUMPSTART_RESOURCE_BASE_NAME)
def test_deploy_endpoint_name(sagemaker_session):
    model = Model(MODEL_IMAGE,
                  MODEL_DATA,
                  role=ROLE,
                  sagemaker_session=sagemaker_session)

    endpoint_name = "blah"
    model.deploy(
        endpoint_name=endpoint_name,
        instance_type=INSTANCE_TYPE,
        initial_instance_count=INSTANCE_COUNT,
    )

    assert endpoint_name == model.endpoint_name
    sagemaker_session.endpoint_from_production_variants.assert_called_with(
        name=endpoint_name,
        production_variants=[BASE_PRODUCTION_VARIANT],
        tags=None,
        kms_key=None,
        wait=True,
        data_capture_config_dict=None,
    )
示例#23
0
def test_deploy_serverless_inference(production_variant,
                                     create_sagemaker_model,
                                     sagemaker_session):
    model = Model(MODEL_IMAGE,
                  MODEL_DATA,
                  role=ROLE,
                  name=MODEL_NAME,
                  sagemaker_session=sagemaker_session)

    production_variant_result = copy.deepcopy(BASE_PRODUCTION_VARIANT)
    production_variant.return_value = production_variant_result

    serverless_inference_config = ServerlessInferenceConfig()
    serverless_inference_config_dict = {
        "MemorySizeInMB": 2048,
        "MaxConcurrency": 5,
    }

    model.deploy(serverless_inference_config=serverless_inference_config, )

    create_sagemaker_model.assert_called_with(None, None, None,
                                              serverless_inference_config)
    production_variant.assert_called_with(
        MODEL_NAME,
        None,
        None,
        accelerator_type=None,
        serverless_inference_config=serverless_inference_config_dict,
    )

    sagemaker_session.endpoint_from_production_variants.assert_called_with(
        name=ENDPOINT_NAME,
        production_variants=[production_variant_result],
        tags=None,
        kms_key=None,
        wait=True,
        data_capture_config_dict=None,
        async_inference_config_dict=None,
    )
示例#24
0
def test_deploy_tags(create_sagemaker_model, production_variant,
                     sagemaker_session):
    model = Model(MODEL_DATA,
                  MODEL_IMAGE,
                  role=ROLE,
                  name=MODEL_NAME,
                  sagemaker_session=sagemaker_session)

    tags = [{"Key": "ModelName", "Value": "TestModel"}]
    model.deploy(instance_type=INSTANCE_TYPE,
                 initial_instance_count=INSTANCE_COUNT,
                 tags=tags)

    create_sagemaker_model.assert_called_with(INSTANCE_TYPE, None, tags)
    sagemaker_session.endpoint_from_production_variants.assert_called_with(
        name=MODEL_NAME,
        production_variants=[BASE_PRODUCTION_VARIANT],
        tags=tags,
        kms_key=None,
        wait=True,
        data_capture_config_dict=None,
    )
示例#25
0
def test_deploy_wrong_inference_type(sagemaker_session):
    model = Model(MODEL_IMAGE, MODEL_DATA, role=ROLE)

    bad_args = (
        {
            "instance_type": INSTANCE_TYPE
        },
        {
            "initial_instance_count": INSTANCE_COUNT
        },
        {
            "instance_type": None,
            "initial_instance_count": None
        },
    )
    for args in bad_args:
        with pytest.raises(
                ValueError,
                match=
                "Must specify instance type and instance count unless using serverless inference",
        ):
            model.deploy(args)
示例#26
0
def _predictor(model_dir, image, framework_version, sagemaker_local_session,
               instance_type):

    model_file = pt_model if "pytorch" in image else tf_model

    model = Model(
        model_data=f"file://{model_dir}/{model_file}",
        role=ROLE,
        image_uri=image,
        sagemaker_session=sagemaker_local_session,
        predictor_cls=Predictor,
    )
    with local_mode_utils.lock():
        try:
            predictor = model.deploy(1, instance_type)
            yield predictor
        finally:
            predictor.delete_endpoint()
示例#27
0
def _test_sm_trained_model(sagemaker_session,
                           framework_version,
                           ecr_image,
                           instance_type,
                           model_dir,
                           accelerator_type=None):
    endpoint_name = sagemaker.utils.unique_name_from_base(
        "sagemaker-huggingface-serving-trained-model")

    model_data = sagemaker_session.upload_data(
        path=model_dir,
        key_prefix="sagemaker-huggingface-serving-trained-model/models",
    )

    model_file = pt_model if "pytorch" in ecr_image else tf_model

    hf_model = Model(
        model_data=f"{model_data}/{model_file}",
        role="SageMakerRole",
        image_uri=ecr_image,
        sagemaker_session=sagemaker_session,
        predictor_cls=Predictor,
    )

    with timeout_and_delete_endpoint(endpoint_name,
                                     sagemaker_session,
                                     minutes=30):
        predictor = hf_model.deploy(
            initial_instance_count=1,
            instance_type=instance_type,
            endpoint_name=endpoint_name,
        )

        data = {
            "inputs":
            "Camera - You are awarded a SiPix Digital Camera! call 09061221066 fromm landline. Delivery within 28 days."
        }
        predictor.serializer = JSONSerializer()
        predictor.deserializer = JSONDeserializer()

        output = predictor.predict(data)

        assert "score" in output[0]
示例#28
0
def test_deploy_predictor_cls(production_variant, sagemaker_session):
    model = Model(
        MODEL_DATA,
        MODEL_IMAGE,
        role=ROLE,
        name=MODEL_NAME,
        predictor_cls=sagemaker.predictor.RealTimePredictor,
        sagemaker_session=sagemaker_session,
    )

    endpoint_name = "foo"
    predictor = model.deploy(
        instance_type=INSTANCE_TYPE,
        initial_instance_count=INSTANCE_COUNT,
        endpoint_name=endpoint_name,
    )

    assert isinstance(predictor, sagemaker.predictor.RealTimePredictor)
    assert predictor.endpoint == endpoint_name
    assert predictor.sagemaker_session == sagemaker_session
# -*- coding: utf-8 -*-
import boto3
import yaml

import sagemaker
from sagemaker.amazon.amazon_estimator import get_image_uri
from sagemaker.model import Model

with open('config.yaml', "r") as f:
    config = yaml.load(f)

sess = sagemaker.Session()
role = config['role']
model_data = config['model_data']

if __name__ == '__main__':
    container = get_image_uri(boto3.Session().region_name, 'xgboost')

    xgb = Model(model_data=model_data,
                image=container,
                role=role,
                sagemaker_session=sess)

    xgb_predictor = xgb.deploy(initial_instance_count=1,
                               instance_type='ml.m4.xlarge')
示例#30
0
args = parser.parse_args()

role = config.configmap["role"]
image = config.configmap["image"]
model_data = config.configmap["model"]
sm_model_name = config.configmap["model_name"]
endpoint_name = config.configmap["endpoint_name"]
file_name = "0.png"

if args.deploy:
    torchserve_model = Model(
            model_data=model_data,
            image_uri=image,
            role=role,
            predictor_cls=RealTimePredictor,
            name=sm_model_name)

    torchserve_model.deploy(
        instance_type='ml.m4.xlarge',
        initial_instance_count=1,
        endpoint_name=endpoint_name)

with open(file_name, 'rb') as f:
 payload = f.read()
 payload = payload

predictor = RealTimePredictor(endpoint_name)
response = predictor.predict(data=payload)
print("Model prediction: {}".format(json.loads(response)))