def test_deploy_async_inference(production_variant, name_from_base, sagemaker_session): model = Model(MODEL_IMAGE, MODEL_DATA, role=ROLE, name=MODEL_NAME, sagemaker_session=sagemaker_session) async_inference_config = AsyncInferenceConfig(output_path="s3://some-path") async_inference_config_dict = { "OutputConfig": { "S3OutputPath": "s3://some-path", }, } model.deploy( instance_type=INSTANCE_TYPE, initial_instance_count=INSTANCE_COUNT, async_inference_config=async_inference_config, ) sagemaker_session.endpoint_from_production_variants.assert_called_with( name=ENDPOINT_NAME, production_variants=[BASE_PRODUCTION_VARIANT], tags=None, kms_key=None, wait=True, data_capture_config_dict=None, async_inference_config_dict=async_inference_config_dict, )
def test_deploy_accelerator_type(production_variant, create_sagemaker_model, sagemaker_session): model = Model(MODEL_DATA, MODEL_IMAGE, role=ROLE, name=MODEL_NAME, sagemaker_session=sagemaker_session) production_variant_result = copy.deepcopy(BASE_PRODUCTION_VARIANT) production_variant_result["AcceleratorType"] = ACCELERATOR_TYPE production_variant.return_value = production_variant_result model.deploy( instance_type=INSTANCE_TYPE, initial_instance_count=INSTANCE_COUNT, accelerator_type=ACCELERATOR_TYPE, ) create_sagemaker_model.assert_called_with(INSTANCE_TYPE, ACCELERATOR_TYPE, None) production_variant.assert_called_with(MODEL_NAME, INSTANCE_TYPE, INSTANCE_COUNT, accelerator_type=ACCELERATOR_TYPE) sagemaker_session.endpoint_from_production_variants.assert_called_with( name=MODEL_NAME, production_variants=[production_variant_result], tags=None, kms_key=None, wait=True, data_capture_config_dict=None, )
def test_deploy_update_endpoint(sagemaker_session): model = Model(MODEL_DATA, MODEL_IMAGE, role=ROLE, sagemaker_session=sagemaker_session) model.deploy(instance_type=INSTANCE_TYPE, initial_instance_count=INSTANCE_COUNT, update_endpoint=True) sagemaker_session.create_endpoint_config.assert_called_with( name=model.name, model_name=model.name, initial_instance_count=INSTANCE_COUNT, instance_type=INSTANCE_TYPE, accelerator_type=None, tags=None, kms_key=None, data_capture_config_dict=None, ) config_name = sagemaker_session.create_endpoint_config( name=model.name, model_name=model.name, initial_instance_count=INSTANCE_COUNT, instance_type=INSTANCE_TYPE, accelerator_type=ACCELERATOR_TYPE, ) sagemaker_session.update_endpoint.assert_called_with(model.name, config_name, wait=True) sagemaker_session.create_endpoint.assert_not_called()
def test_deploy_predictor_cls(production_variant, sagemaker_session): model = Model( MODEL_IMAGE, MODEL_DATA, role=ROLE, name=MODEL_NAME, predictor_cls=sagemaker.predictor.Predictor, sagemaker_session=sagemaker_session, ) endpoint_name = "foo" predictor = model.deploy( instance_type=INSTANCE_TYPE, initial_instance_count=INSTANCE_COUNT, endpoint_name=endpoint_name, ) assert isinstance(predictor, sagemaker.predictor.Predictor) assert predictor.endpoint_name == endpoint_name assert predictor.sagemaker_session == sagemaker_session endpoint_name_async = "foo-async" predictor_async = model.deploy( instance_type=INSTANCE_TYPE, initial_instance_count=INSTANCE_COUNT, endpoint_name=endpoint_name_async, async_inference_config=AsyncInferenceConfig(), ) assert isinstance(predictor_async, sagemaker.predictor_async.AsyncPredictor) assert predictor_async.name == model.name assert predictor_async.endpoint_name == endpoint_name_async assert predictor_async.sagemaker_session == sagemaker_session
def test_deploy_no_role(sagemaker_session): model = Model(MODEL_IMAGE, MODEL_DATA, sagemaker_session=sagemaker_session) with pytest.raises(ValueError, match="Role can not be null for deploying a model"): model.deploy(instance_type=INSTANCE_TYPE, initial_instance_count=INSTANCE_COUNT)
def endpoint_name(sagemaker_session): endpoint_name = unique_name_from_base("model-quality-monitor-integ") xgb_model_data = sagemaker_session.upload_data( path=os.path.join(XGBOOST_DATA_PATH, "xgb_model.tar.gz"), key_prefix="integ-test-data/xgboost/model", ) xgb_image = image_uris.retrieve("xgboost", sagemaker_session.boto_region_name, version="1", image_scope="inference") with tests.integ.timeout.timeout_and_delete_endpoint_by_name( endpoint_name=endpoint_name, sagemaker_session=sagemaker_session, hours=2): xgb_model = Model( model_data=xgb_model_data, image_uri=xgb_image, name=endpoint_name, # model name role=ROLE, sagemaker_session=sagemaker_session, ) xgb_model.deploy( INSTANCE_COUNT, INSTANCE_TYPE, endpoint_name=endpoint_name, data_capture_config=DataCaptureConfig( True, sagemaker_session=sagemaker_session), ) yield endpoint_name
def test_deploy_data_capture_config(production_variant, name_from_base, sagemaker_session): model = Model(MODEL_IMAGE, MODEL_DATA, role=ROLE, name=MODEL_NAME, sagemaker_session=sagemaker_session) data_capture_config = Mock() data_capture_config_dict = {"EnableCapture": True} data_capture_config._to_request_dict.return_value = data_capture_config_dict model.deploy( instance_type=INSTANCE_TYPE, initial_instance_count=INSTANCE_COUNT, data_capture_config=data_capture_config, ) data_capture_config._to_request_dict.assert_called_with() sagemaker_session.endpoint_from_production_variants.assert_called_with( name=ENDPOINT_NAME, production_variants=[BASE_PRODUCTION_VARIANT], tags=None, kms_key=None, wait=True, data_capture_config_dict=data_capture_config_dict, async_inference_config_dict=None, )
def test_deploy_wrong_serverless_config(sagemaker_session): model = Model(MODEL_IMAGE, MODEL_DATA, role=ROLE) with pytest.raises( ValueError, match= "serverless_inference_config needs to be a ServerlessInferenceConfig object", ): model.deploy(serverless_inference_config={})
def test_jumpstart_inference_model_class(setup): model_id, model_version = "catboost-classification-model", "1.0.0" instance_type, instance_count = "ml.m5.xlarge", 1 print("Starting inference...") image_uri = image_uris.retrieve( region=None, framework=None, image_scope="inference", model_id=model_id, model_version=model_version, instance_type=instance_type, ) script_uri = script_uris.retrieve(model_id=model_id, model_version=model_version, script_scope="inference") model_uri = model_uris.retrieve(model_id=model_id, model_version=model_version, model_scope="inference") model = Model( image_uri=image_uri, model_data=model_uri, source_dir=script_uri, entry_point=INFERENCE_ENTRY_POINT_SCRIPT_NAME, role=get_sm_session().get_caller_identity_arn(), sagemaker_session=get_sm_session(), enable_network_isolation=True, ) model.deploy( initial_instance_count=instance_count, instance_type=instance_type, tags=[{ "Key": JUMPSTART_TAG, "Value": os.environ[ENV_VAR_JUMPSTART_SDK_TEST_SUITE_ID] }], ) endpoint_invoker = EndpointInvoker(endpoint_name=model.endpoint_name, ) download_inference_assets() ground_truth_label, features = get_tabular_data( InferenceTabularDataname.MULTICLASS) response = endpoint_invoker.invoke_tabular_endpoint(features) assert response is not None
def test_deploy_creates_correct_session(local_session, session): # We expect a LocalSession when deploying to instance_type = 'local' model = Model(MODEL_IMAGE, MODEL_DATA, role=ROLE) model.deploy(endpoint_name="blah", instance_type="local", initial_instance_count=1) assert model.sagemaker_session == local_session.return_value # We expect a real Session when deploying to instance_type != local/local_gpu model = Model(MODEL_IMAGE, MODEL_DATA, role=ROLE) model.deploy(endpoint_name="remote_endpoint", instance_type="ml.m4.4xlarge", initial_instance_count=2) assert model.sagemaker_session == session.return_value
def test_script_mode_model_tags_jumpstart_models(repack_model, sagemaker_session): jumpstart_source_dir = f"s3://{list(JUMPSTART_BUCKET_NAME_SET)[0]}/source_dirs/source.tar.gz" t = Model( entry_point=ENTRY_POINT_INFERENCE, role=ROLE, sagemaker_session=sagemaker_session, source_dir=jumpstart_source_dir, image_uri=IMAGE_URI, model_data=MODEL_DATA, ) t.deploy(instance_type=INSTANCE_TYPE, initial_instance_count=INSTANCE_COUNT) assert sagemaker_session.create_model.call_args_list[0][1]["tags"] == [ { "Key": JumpStartTag.INFERENCE_SCRIPT_URI.value, "Value": jumpstart_source_dir, }, ] assert sagemaker_session.endpoint_from_production_variants.call_args_list[ 0][1]["tags"] == [ { "Key": JumpStartTag.INFERENCE_SCRIPT_URI.value, "Value": jumpstart_source_dir, }, ] non_jumpstart_source_dir = "s3://blah/blah/blah" t = Model( entry_point=ENTRY_POINT_INFERENCE, role=ROLE, sagemaker_session=sagemaker_session, source_dir=non_jumpstart_source_dir, image_uri=IMAGE_URI, model_data=MODEL_DATA, ) t.deploy(instance_type=INSTANCE_TYPE, initial_instance_count=INSTANCE_COUNT) assert { "Key": JumpStartTag.INFERENCE_SCRIPT_URI.value, "Value": non_jumpstart_source_dir, } not in sagemaker_session.create_model.call_args_list[0][1]["tags"] assert { "Key": JumpStartTag.INFERENCE_SCRIPT_URI.value, "Value": non_jumpstart_source_dir, } not in sagemaker_session.create_model.call_args_list[0][1]["tags"]
def test_deploy(name_from_base, prepare_container_def, production_variant, sagemaker_session): production_variant.return_value = BASE_PRODUCTION_VARIANT container_def = { "Image": MODEL_IMAGE, "Environment": {}, "ModelDataUrl": MODEL_DATA } prepare_container_def.return_value = container_def model = Model(MODEL_IMAGE, MODEL_DATA, role=ROLE, sagemaker_session=sagemaker_session) model.deploy(instance_type=INSTANCE_TYPE, initial_instance_count=INSTANCE_COUNT) name_from_base.assert_called_with(MODEL_IMAGE) assert 2 == name_from_base.call_count prepare_container_def.assert_called_with(INSTANCE_TYPE, accelerator_type=None, serverless_inference_config=None) production_variant.assert_called_with( MODEL_NAME, INSTANCE_TYPE, INSTANCE_COUNT, accelerator_type=None, serverless_inference_config=None, ) sagemaker_session.create_model.assert_called_with( MODEL_NAME, ROLE, container_def, vpc_config=None, enable_network_isolation=False, tags=None) sagemaker_session.endpoint_from_production_variants.assert_called_with( name=MODEL_NAME, production_variants=[BASE_PRODUCTION_VARIANT], tags=None, kms_key=None, wait=True, data_capture_config_dict=None, async_inference_config_dict=None, )
def _test_hub_model(sagemaker_session, framework_version, ecr_image, instance_type, model_dir, accelerator_type=None): endpoint_name = sagemaker.utils.unique_name_from_base("sagemaker-huggingface-serving-hub-model") env = { "HF_MODEL_ID": "sshleifer/tiny-distilbert-base-uncased-finetuned-sst-2-english", "HF_TASK": "text-classification", } hf_model = Model( env=env, role="SageMakerRole", image_uri=ecr_image, sagemaker_session=sagemaker_session, predictor_cls=Predictor, ) with timeout_and_delete_endpoint(endpoint_name, sagemaker_session, minutes=30): predictor = hf_model.deploy( initial_instance_count=1, instance_type=instance_type, endpoint_name=endpoint_name, ) data = { "inputs": "Camera - You are awarded a SiPix Digital Camera! call 09061221066 fromm landline. Delivery within 28 days." } predictor.serializer = JSONSerializer() predictor.deserializer = JSONDeserializer() output = predictor.predict(data) assert "score" in output[0]
def test_deploy_wrong_async_inferenc_config(sagemaker_session): model = Model(MODEL_IMAGE, MODEL_DATA, sagemaker_session=sagemaker_session, role=ROLE) with pytest.raises( ValueError, match= "async_inference_config needs to be a AsyncInferenceConfig object" ): model.deploy( instance_type=INSTANCE_TYPE, initial_instance_count=INSTANCE_COUNT, async_inference_config={}, )
def test_script_mode_model_uses_proper_sagemaker_submit_dir( repack_model, sagemaker_session): source_dir = "s3://blah/blah/blah" t = Model( entry_point=ENTRY_POINT_INFERENCE, role=ROLE, sagemaker_session=sagemaker_session, source_dir=source_dir, image_uri=IMAGE_URI, model_data=MODEL_DATA, ) t.deploy(instance_type=INSTANCE_TYPE, initial_instance_count=INSTANCE_COUNT) assert ( sagemaker_session.create_model.call_args_list[0][0][2]["Environment"] ["SAGEMAKER_SUBMIT_DIRECTORY"] == "/opt/ml/model/code")
def test_deploy_async(production_variant, sagemaker_session): model = Model(MODEL_DATA, MODEL_IMAGE, role=ROLE, name=MODEL_NAME, sagemaker_session=sagemaker_session) model.deploy(instance_type=INSTANCE_TYPE, initial_instance_count=INSTANCE_COUNT, wait=False) sagemaker_session.endpoint_from_production_variants.assert_called_with( name=MODEL_NAME, production_variants=[BASE_PRODUCTION_VARIANT], tags=None, kms_key=None, wait=False, data_capture_config_dict=None, )
def test_deploy_update_endpoint_optional_args(sagemaker_session): endpoint_name = "endpoint-name" tags = [{"Key": "Value"}] kms_key = "foo" data_capture_config = Mock() model = Model(MODEL_DATA, MODEL_IMAGE, role=ROLE, sagemaker_session=sagemaker_session) model.deploy( instance_type=INSTANCE_TYPE, initial_instance_count=INSTANCE_COUNT, update_endpoint=True, endpoint_name=endpoint_name, accelerator_type=ACCELERATOR_TYPE, tags=tags, kms_key=kms_key, wait=False, data_capture_config=data_capture_config, ) sagemaker_session.create_endpoint_config.assert_called_with( name=model.name, model_name=model.name, initial_instance_count=INSTANCE_COUNT, instance_type=INSTANCE_TYPE, accelerator_type=ACCELERATOR_TYPE, tags=tags, kms_key=kms_key, data_capture_config_dict=data_capture_config._to_request_dict(), ) config_name = sagemaker_session.create_endpoint_config( name=model.name, model_name=model.name, initial_instance_count=INSTANCE_COUNT, instance_type=INSTANCE_TYPE, accelerator_type=ACCELERATOR_TYPE, wait=False, ) sagemaker_session.update_endpoint.assert_called_with(endpoint_name, config_name, wait=False) sagemaker_session.create_endpoint.assert_not_called()
def test_script_mode_model_same_calls_as_framework(repack_model, sagemaker_session): t = Model( entry_point=ENTRY_POINT_INFERENCE, role=ROLE, sagemaker_session=sagemaker_session, source_dir=SCRIPT_URI, image_uri=IMAGE_URI, model_data=MODEL_DATA, ) t.deploy(instance_type=INSTANCE_TYPE, initial_instance_count=INSTANCE_COUNT) assert len(sagemaker_session.create_model.call_args_list) == 1 assert len(sagemaker_session.endpoint_from_production_variants. call_args_list) == 1 assert len(repack_model.call_args_list) == 1 generic_model_create_model_args = sagemaker_session.create_model.call_args_list generic_model_endpoint_from_production_variants_args = ( sagemaker_session.endpoint_from_production_variants.call_args_list) generic_model_repack_model_args = repack_model.call_args_list sagemaker_session.create_model.reset_mock() sagemaker_session.endpoint_from_production_variants.reset_mock() repack_model.reset_mock() t = DummyFrameworkModel( entry_point=ENTRY_POINT_INFERENCE, role=ROLE, sagemaker_session=sagemaker_session, source_dir=SCRIPT_URI, image_uri=IMAGE_URI, model_data=MODEL_DATA, ) t.deploy(instance_type=INSTANCE_TYPE, initial_instance_count=INSTANCE_COUNT) assert generic_model_create_model_args == sagemaker_session.create_model.call_args_list assert (generic_model_endpoint_from_production_variants_args == sagemaker_session.endpoint_from_production_variants.call_args_list) assert generic_model_repack_model_args == repack_model.call_args_list
def test_deploy_generates_endpoint_name_each_time_from_model_name( production_variant, base_from_name, name_from_base, sagemaker_session): model = Model(MODEL_IMAGE, MODEL_DATA, name=MODEL_NAME, role=ROLE, sagemaker_session=sagemaker_session) model.deploy( instance_type=INSTANCE_TYPE, initial_instance_count=INSTANCE_COUNT, ) model.deploy( instance_type=INSTANCE_TYPE, initial_instance_count=INSTANCE_COUNT, ) base_from_name.assert_called_with(MODEL_NAME) name_from_base.assert_called_with(base_from_name.return_value) assert 2 == name_from_base.call_count
def test_deploy_kms_key(production_variant, name_from_base, sagemaker_session): model = Model(MODEL_IMAGE, MODEL_DATA, role=ROLE, name=MODEL_NAME, sagemaker_session=sagemaker_session) key = "some-key-arn" model.deploy(instance_type=INSTANCE_TYPE, initial_instance_count=INSTANCE_COUNT, kms_key=key) sagemaker_session.endpoint_from_production_variants.assert_called_with( name=ENDPOINT_NAME, production_variants=[BASE_PRODUCTION_VARIANT], tags=None, kms_key=key, wait=True, data_capture_config_dict=None, )
def test_script_mode_model_uses_jumpstart_base_name(repack_model, sagemaker_session): jumpstart_source_dir = f"s3://{list(JUMPSTART_BUCKET_NAME_SET)[0]}/source_dirs/source.tar.gz" t = Model( entry_point=ENTRY_POINT_INFERENCE, role=ROLE, sagemaker_session=sagemaker_session, source_dir=jumpstart_source_dir, image_uri=IMAGE_URI, model_data=MODEL_DATA, ) t.deploy(instance_type=INSTANCE_TYPE, initial_instance_count=INSTANCE_COUNT) assert sagemaker_session.create_model.call_args_list[0][0][0].startswith( JUMPSTART_RESOURCE_BASE_NAME) assert sagemaker_session.endpoint_from_production_variants.call_args_list[ 0].startswith(JUMPSTART_RESOURCE_BASE_NAME) sagemaker_session.create_model.reset_mock() sagemaker_session.endpoint_from_production_variants.reset_mock() non_jumpstart_source_dir = "s3://blah/blah/blah" t = Model( entry_point=ENTRY_POINT_INFERENCE, role=ROLE, sagemaker_session=sagemaker_session, source_dir=non_jumpstart_source_dir, image_uri=IMAGE_URI, model_data=MODEL_DATA, ) t.deploy(instance_type=INSTANCE_TYPE, initial_instance_count=INSTANCE_COUNT) assert not sagemaker_session.create_model.call_args_list[0][0][ 0].startswith(JUMPSTART_RESOURCE_BASE_NAME) assert not sagemaker_session.endpoint_from_production_variants.call_args_list[ 0][1]["name"].startswith(JUMPSTART_RESOURCE_BASE_NAME)
def test_deploy_endpoint_name(sagemaker_session): model = Model(MODEL_IMAGE, MODEL_DATA, role=ROLE, sagemaker_session=sagemaker_session) endpoint_name = "blah" model.deploy( endpoint_name=endpoint_name, instance_type=INSTANCE_TYPE, initial_instance_count=INSTANCE_COUNT, ) assert endpoint_name == model.endpoint_name sagemaker_session.endpoint_from_production_variants.assert_called_with( name=endpoint_name, production_variants=[BASE_PRODUCTION_VARIANT], tags=None, kms_key=None, wait=True, data_capture_config_dict=None, )
def test_deploy_serverless_inference(production_variant, create_sagemaker_model, sagemaker_session): model = Model(MODEL_IMAGE, MODEL_DATA, role=ROLE, name=MODEL_NAME, sagemaker_session=sagemaker_session) production_variant_result = copy.deepcopy(BASE_PRODUCTION_VARIANT) production_variant.return_value = production_variant_result serverless_inference_config = ServerlessInferenceConfig() serverless_inference_config_dict = { "MemorySizeInMB": 2048, "MaxConcurrency": 5, } model.deploy(serverless_inference_config=serverless_inference_config, ) create_sagemaker_model.assert_called_with(None, None, None, serverless_inference_config) production_variant.assert_called_with( MODEL_NAME, None, None, accelerator_type=None, serverless_inference_config=serverless_inference_config_dict, ) sagemaker_session.endpoint_from_production_variants.assert_called_with( name=ENDPOINT_NAME, production_variants=[production_variant_result], tags=None, kms_key=None, wait=True, data_capture_config_dict=None, async_inference_config_dict=None, )
def test_deploy_tags(create_sagemaker_model, production_variant, sagemaker_session): model = Model(MODEL_DATA, MODEL_IMAGE, role=ROLE, name=MODEL_NAME, sagemaker_session=sagemaker_session) tags = [{"Key": "ModelName", "Value": "TestModel"}] model.deploy(instance_type=INSTANCE_TYPE, initial_instance_count=INSTANCE_COUNT, tags=tags) create_sagemaker_model.assert_called_with(INSTANCE_TYPE, None, tags) sagemaker_session.endpoint_from_production_variants.assert_called_with( name=MODEL_NAME, production_variants=[BASE_PRODUCTION_VARIANT], tags=tags, kms_key=None, wait=True, data_capture_config_dict=None, )
def test_deploy_wrong_inference_type(sagemaker_session): model = Model(MODEL_IMAGE, MODEL_DATA, role=ROLE) bad_args = ( { "instance_type": INSTANCE_TYPE }, { "initial_instance_count": INSTANCE_COUNT }, { "instance_type": None, "initial_instance_count": None }, ) for args in bad_args: with pytest.raises( ValueError, match= "Must specify instance type and instance count unless using serverless inference", ): model.deploy(args)
def _predictor(model_dir, image, framework_version, sagemaker_local_session, instance_type): model_file = pt_model if "pytorch" in image else tf_model model = Model( model_data=f"file://{model_dir}/{model_file}", role=ROLE, image_uri=image, sagemaker_session=sagemaker_local_session, predictor_cls=Predictor, ) with local_mode_utils.lock(): try: predictor = model.deploy(1, instance_type) yield predictor finally: predictor.delete_endpoint()
def _test_sm_trained_model(sagemaker_session, framework_version, ecr_image, instance_type, model_dir, accelerator_type=None): endpoint_name = sagemaker.utils.unique_name_from_base( "sagemaker-huggingface-serving-trained-model") model_data = sagemaker_session.upload_data( path=model_dir, key_prefix="sagemaker-huggingface-serving-trained-model/models", ) model_file = pt_model if "pytorch" in ecr_image else tf_model hf_model = Model( model_data=f"{model_data}/{model_file}", role="SageMakerRole", image_uri=ecr_image, sagemaker_session=sagemaker_session, predictor_cls=Predictor, ) with timeout_and_delete_endpoint(endpoint_name, sagemaker_session, minutes=30): predictor = hf_model.deploy( initial_instance_count=1, instance_type=instance_type, endpoint_name=endpoint_name, ) data = { "inputs": "Camera - You are awarded a SiPix Digital Camera! call 09061221066 fromm landline. Delivery within 28 days." } predictor.serializer = JSONSerializer() predictor.deserializer = JSONDeserializer() output = predictor.predict(data) assert "score" in output[0]
def test_deploy_predictor_cls(production_variant, sagemaker_session): model = Model( MODEL_DATA, MODEL_IMAGE, role=ROLE, name=MODEL_NAME, predictor_cls=sagemaker.predictor.RealTimePredictor, sagemaker_session=sagemaker_session, ) endpoint_name = "foo" predictor = model.deploy( instance_type=INSTANCE_TYPE, initial_instance_count=INSTANCE_COUNT, endpoint_name=endpoint_name, ) assert isinstance(predictor, sagemaker.predictor.RealTimePredictor) assert predictor.endpoint == endpoint_name assert predictor.sagemaker_session == sagemaker_session
# -*- coding: utf-8 -*- import boto3 import yaml import sagemaker from sagemaker.amazon.amazon_estimator import get_image_uri from sagemaker.model import Model with open('config.yaml', "r") as f: config = yaml.load(f) sess = sagemaker.Session() role = config['role'] model_data = config['model_data'] if __name__ == '__main__': container = get_image_uri(boto3.Session().region_name, 'xgboost') xgb = Model(model_data=model_data, image=container, role=role, sagemaker_session=sess) xgb_predictor = xgb.deploy(initial_instance_count=1, instance_type='ml.m4.xlarge')
args = parser.parse_args() role = config.configmap["role"] image = config.configmap["image"] model_data = config.configmap["model"] sm_model_name = config.configmap["model_name"] endpoint_name = config.configmap["endpoint_name"] file_name = "0.png" if args.deploy: torchserve_model = Model( model_data=model_data, image_uri=image, role=role, predictor_cls=RealTimePredictor, name=sm_model_name) torchserve_model.deploy( instance_type='ml.m4.xlarge', initial_instance_count=1, endpoint_name=endpoint_name) with open(file_name, 'rb') as f: payload = f.read() payload = payload predictor = RealTimePredictor(endpoint_name) response = predictor.predict(data=payload) print("Model prediction: {}".format(json.loads(response)))