class Predictor(object): def __init__(self, endpoint_name, sagemaker_session=None): """ Args: endpoint_name (str): name of the Sagemaker endpoint sagemaker_session (sagemaker.session.Session): Manage interactions with the Amazon SageMaker APIs and any other AWS services needed. """ self.endpoint_name = endpoint_name self._realtime_predictor = RealTimePredictor( endpoint_name, serializer=sagemaker.predictor.json_serializer, deserializer=sagemaker.predictor.json_deserializer, sagemaker_session=sagemaker_session) def get_action(self, obs=None): """Get prediction from the endpoint Args: obs (list/str): observation of the environment Returns: action: action to take from the prediction event_id: event id of the current prediction model_id: model id of the hosted model action_prob: action probability distribution sample_prob: sample probability distribution used for data split """ payload = {} payload['request_type'] = "observation" payload['observation'] = obs response = self._realtime_predictor.predict(payload) action = response['action'] action_prob = response['action_prob'] event_id = response['event_id'] model_id = response['model_id'] sample_prob = response['sample_prob'] return action, event_id, model_id, action_prob, sample_prob def get_hosted_model_id(self): """Return hostdd model id in the hosting endpoint Returns: str: model id of the model being hosted """ payload = {} payload['request_type'] = "model_id" payload['observation'] = None response = self._realtime_predictor.predict(payload) model_id = response['model_id'] return model_id def delete_endpoint(self): """Delete the Sagemaker endpoint """ logger.warning(f"Deleting hosting endpoint '{self.endpoint_name}'...") self._realtime_predictor.delete_endpoint()
def test_multi_data_model_deploy_pretrained_models_local_mode(container_image, sagemaker_session): timestamp = sagemaker_timestamp() endpoint_name = "test-multimodel-endpoint-{}".format(timestamp) model_name = "test-multimodel-{}".format(timestamp) # Define pretrained model local path pretrained_model_data_local_path = os.path.join(DATA_DIR, "sparkml_model", "mleap_model.tar.gz") with timeout(minutes=30): model_data_prefix = os.path.join( "s3://", sagemaker_session.default_bucket(), "multimodel-{}/".format(timestamp) ) multi_data_model = MultiDataModel( name=model_name, model_data_prefix=model_data_prefix, image=container_image, role=ROLE, sagemaker_session=sagemaker_session, ) # Add model before deploy multi_data_model.add_model(pretrained_model_data_local_path, PRETRAINED_MODEL_PATH_1) # Deploy model to an endpoint multi_data_model.deploy(1, "local", endpoint_name=endpoint_name) # Add models after deploy multi_data_model.add_model(pretrained_model_data_local_path, PRETRAINED_MODEL_PATH_2) endpoint_models = [] for model_path in multi_data_model.list_models(): endpoint_models.append(model_path) assert PRETRAINED_MODEL_PATH_1 in endpoint_models assert PRETRAINED_MODEL_PATH_2 in endpoint_models predictor = RealTimePredictor( endpoint=endpoint_name, sagemaker_session=multi_data_model.sagemaker_session, serializer=npy_serializer, deserializer=string_deserializer, ) data = numpy.zeros(shape=(1, 1, 28, 28)) result = predictor.predict(data, target_model=PRETRAINED_MODEL_PATH_1) assert result == "Invoked model: {}".format(PRETRAINED_MODEL_PATH_1) result = predictor.predict(data, target_model=PRETRAINED_MODEL_PATH_2) assert result == "Invoked model: {}".format(PRETRAINED_MODEL_PATH_2) # Cleanup multi_data_model.sagemaker_session.sagemaker_client.delete_endpoint_config( EndpointConfigName=endpoint_name ) multi_data_model.sagemaker_session.delete_endpoint(endpoint_name) multi_data_model.delete_model() with pytest.raises(Exception) as exception: sagemaker_session.sagemaker_client.describe_model(ModelName=multi_data_model.name) assert "Could not find model" in str(exception.value) sagemaker_session.sagemaker_client.describe_endpoint_config(name=endpoint_name) assert "Could not find endpoint" in str(exception.value)
def test_inference_pipeline_model_deploy(sagemaker_session, cpu_instance_type): sparkml_data_path = os.path.join(DATA_DIR, "sparkml_model") xgboost_data_path = os.path.join(DATA_DIR, "xgboost_model") endpoint_name = "test-inference-pipeline-deploy-{}".format( sagemaker_timestamp()) sparkml_model_data = sagemaker_session.upload_data( path=os.path.join(sparkml_data_path, "mleap_model.tar.gz"), key_prefix="integ-test-data/sparkml/model", ) xgb_model_data = sagemaker_session.upload_data( path=os.path.join(xgboost_data_path, "xgb_model.tar.gz"), key_prefix="integ-test-data/xgboost/model", ) with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): sparkml_model = SparkMLModel( model_data=sparkml_model_data, env={"SAGEMAKER_SPARKML_SCHEMA": SCHEMA}, sagemaker_session=sagemaker_session, ) xgb_image = get_image_uri(sagemaker_session.boto_region_name, "xgboost") xgb_model = Model(model_data=xgb_model_data, image=xgb_image, sagemaker_session=sagemaker_session) model = PipelineModel( models=[sparkml_model, xgb_model], role="SageMakerRole", sagemaker_session=sagemaker_session, name=endpoint_name, ) model.deploy(1, cpu_instance_type, endpoint_name=endpoint_name) predictor = RealTimePredictor( endpoint=endpoint_name, sagemaker_session=sagemaker_session, serializer=json_serializer, content_type=CONTENT_TYPE_CSV, accept=CONTENT_TYPE_CSV, ) with open(VALID_DATA_PATH, "r") as f: valid_data = f.read() assert predictor.predict(valid_data) == "0.714013934135" with open(INVALID_DATA_PATH, "r") as f: invalid_data = f.read() assert predictor.predict(invalid_data) is None model.delete_model() with pytest.raises(Exception) as exception: sagemaker_session.sagemaker_client.describe_model(ModelName=model.name) assert "Could not find model" in str(exception.value)
def test_predict_invocation_with_target_variant(sagemaker_session, multi_variant_endpoint): predictor = RealTimePredictor( endpoint=multi_variant_endpoint.endpoint_name, sagemaker_session=sagemaker_session, serializer=csv_serializer, content_type=CONTENT_TYPE_CSV, accept=CONTENT_TYPE_CSV, ) # Validate that no exception is raised when the target_variant is specified. predictor.predict(TEST_CSV_DATA, target_variant=TEST_VARIANT_1) predictor.predict(TEST_CSV_DATA, target_variant=TEST_VARIANT_2)
def test_classification_request_pb(sagemaker_session): request = classification_pb2.ClassificationRequest() request.model_spec.name = "generic_model" request.model_spec.signature_name = DEFAULT_SERVING_SIGNATURE_DEF_KEY example = request.input.example_list.examples.add() example.features.feature[PREDICT_INPUTS].float_list.value.extend([6.4, 3.2, 4.5, 1.5]) predictor = RealTimePredictor(sagemaker_session=sagemaker_session, endpoint=ENDPOINT, deserializer=tf_deserializer, serializer=tf_serializer) expected_response = classification_pb2.ClassificationResponse() classes = expected_response.result.classifications.add().classes class_0 = classes.add() class_0.label = "0" class_0.score = 0.00128903763834 class_1 = classes.add() class_1.label = "1" class_1.score = 0.981432199478 class_2 = classes.add() class_2.label = "2" class_2.score = 0.0172787327319 mock_response(expected_response.SerializeToString(), sagemaker_session, PROTO_CONTENT_TYPE) result = predictor.predict(request) sagemaker_session.sagemaker_runtime_client.invoke_endpoint.assert_called_once_with( Accept=PROTO_CONTENT_TYPE, Body=request.SerializeToString(), ContentType=PROTO_CONTENT_TYPE, EndpointName='myendpoint' ) # python 2 and 3 protobuf serialization has different precision so I'm checking # the version here if sys.version_info < (3, 0): assert str(result) == """result { classifications { classes { label: "0" score: 0.00128903763834 } classes { label: "1" score: 0.981432199478 } classes { label: "2" score: 0.0172787327319 } } } """ else: assert str(result) == """result {
def test_predict_call_with_headers_and_json(): sagemaker_session = json_sagemaker_session() predictor = RealTimePredictor( ENDPOINT, sagemaker_session, content_type="not/json", accept="also/not-json", serializer=json_serializer, ) data = [1, 2] result = predictor.predict(data) assert sagemaker_session.sagemaker_runtime_client.invoke_endpoint.called expected_request_args = { "Accept": "also/not-json", "Body": json.dumps(data), "ContentType": "not/json", "EndpointName": ENDPOINT, } call_args, kwargs = sagemaker_session.sagemaker_runtime_client.invoke_endpoint.call_args assert kwargs == expected_request_args assert result == json.dumps([RETURN_VALUE])
def test_predict_tensor_request_csv(sagemaker_session): data = [6.4, 3.2, 0.5, 1.5] tensor_proto = tf.make_tensor_proto(values=np.asarray(data), shape=[1, len(data)], dtype=tf.float32) predictor = RealTimePredictor( serializer=tf_csv_serializer, deserializer=tf_json_deserializer, sagemaker_session=sagemaker_session, endpoint=ENDPOINT, ) mock_response( json.dumps(CLASSIFICATION_RESPONSE).encode("utf-8"), sagemaker_session, JSON_CONTENT_TYPE) result = predictor.predict(tensor_proto) sagemaker_session.sagemaker_runtime_client.invoke_endpoint.assert_called_once_with( Accept=JSON_CONTENT_TYPE, Body="6.4,3.2,0.5,1.5", ContentType=CSV_CONTENT_TYPE, EndpointName="myendpoint", ) assert result == CLASSIFICATION_RESPONSE
def execute_inference(image): """ Receives image and executes inference against sagemaker endpoint. """ boto_session = boto3.Session( aws_access_key_id=settings.AWS_ACCESS_KEY_ID, aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY) sess = sagemaker.Session(boto_session=boto_session) ENDPOINT_MODEL = 'clothes-30-model' predictor = RealTimePredictor(endpoint=ENDPOINT_MODEL, sagemaker_session=sess, content_type='application/json', accept='application/json') # Convert tensor to JSON format. image = image.tolist() image = json.dumps(image) # Get a prediction from the endpoint. result = predictor.predict(image) # Convert result into python dict. result = json.loads(result) return result
def test_inference_pipeline_model_deploy(sagemaker_session): sparkml_data_path = os.path.join(DATA_DIR, 'sparkml_model') xgboost_data_path = os.path.join(DATA_DIR, 'xgboost_model') endpoint_name = 'test-inference-pipeline-deploy-{}'.format( sagemaker_timestamp()) sparkml_model_data = sagemaker_session.upload_data( path=os.path.join(sparkml_data_path, 'mleap_model.tar.gz'), key_prefix='integ-test-data/sparkml/model') xgb_model_data = sagemaker_session.upload_data( path=os.path.join(xgboost_data_path, 'xgb_model.tar.gz'), key_prefix='integ-test-data/xgboost/model') with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): sparkml_model = SparkMLModel(model_data=sparkml_model_data, env={'SAGEMAKER_SPARKML_SCHEMA': SCHEMA}, sagemaker_session=sagemaker_session) xgb_image = get_image_uri(sagemaker_session.boto_region_name, 'xgboost') xgb_model = Model(model_data=xgb_model_data, image=xgb_image, sagemaker_session=sagemaker_session) model = PipelineModel(models=[sparkml_model, xgb_model], role='SageMakerRole', sagemaker_session=sagemaker_session, name=endpoint_name) model.deploy(1, 'ml.m4.xlarge', endpoint_name=endpoint_name) predictor = RealTimePredictor(endpoint=endpoint_name, sagemaker_session=sagemaker_session, serializer=json_serializer, content_type=CONTENT_TYPE_CSV, accept=CONTENT_TYPE_CSV) with open(VALID_DATA_PATH, 'r') as f: valid_data = f.read() assert predictor.predict(valid_data) == '0.714013934135' with open(INVALID_DATA_PATH, 'r') as f: invalid_data = f.read() assert (predictor.predict(invalid_data) is None) model.delete_model() with pytest.raises(Exception) as exception: sagemaker_session.sagemaker_client.describe_model(ModelName=model.name) assert 'Could not find model' in str(exception.value)
def test_predict_call_pass_through(): sagemaker_session = empty_sagemaker_session() predictor = RealTimePredictor(ENDPOINT, sagemaker_session) data = "untouched" result = predictor.predict(data) assert sagemaker_session.sagemaker_runtime_client.invoke_endpoint.called expected_request_args = {"Body": data, "EndpointName": ENDPOINT} call_args, kwargs = sagemaker_session.sagemaker_runtime_client.invoke_endpoint.call_args assert kwargs == expected_request_args assert result == RETURN_VALUE
def test_predict_call_pass_through(): sagemaker_session = empty_sagemaker_session() predictor = RealTimePredictor(ENDPOINT, sagemaker_session) data = "untouched" result = predictor.predict(data) assert sagemaker_session.sagemaker_runtime_client.invoke_endpoint.called expected_request_args = { 'Body': data, 'EndpointName': ENDPOINT } call_args, kwargs = sagemaker_session.sagemaker_runtime_client.invoke_endpoint.call_args assert kwargs == expected_request_args assert result == RETURN_VALUE
def main(): args = parse_args() sm = boto3.client('sagemaker') endpoint_desc = sm.describe_endpoint(EndpointName=args.endpoint) print(endpoint_desc) print('----------') predictor = RealTimePredictor(endpoint=args.endpoint) with open(args.file, 'rb') as f: payload = f.read() payload = payload response = predictor.predict(data=payload).decode('utf-8') print(response)
def lambda_handler(event, context): sm_fm_endpoint = event["sm_fm_endpoint"] attribute = event["attribute"] fm_predictor = RealTimePredictor(endpoint=sm_fm_endpoint, sagemaker_session=sm_sess) fm_predictor.content_type = 'application/json' fm_predictor.serializer = fm_serializer fm_predictor.deserializer = json_deserializer payload = build_spare_matrix_payload(attribute) result = fm_predictor.predict(payload) pp_result = post_process(result) return {'statusCode': 200, 'body': pp_result}
def predict(payload): # Configurations region = os.environ['REGION'] access_id = os.environ['ACCESS_ID'] secret_key = os.environ['SECRET_KEY'] endpoint = 'brain-model-ep--2020-06-16-14-46-14' boto_session = boto3.Session(region_name=region, aws_access_key_id=access_id, aws_secret_access_key=secret_key) session = sagemaker.Session(boto_session=boto_session) predictor = RealTimePredictor(endpoint=endpoint, sagemaker_session=session, serializer=None) class_mappings = ['Intracranial', 'Mass Effect', 'Midline Shift'] # Predicting prediction = json.loads(predictor.predict(payload)) return class_mappings[np.argmax(prediction)]
def test_classification_request_csv(sagemaker_session): data = [1, 2, 3] predictor = RealTimePredictor(serializer=tf_csv_serializer, deserializer=tf_deserializer, sagemaker_session=sagemaker_session, endpoint=ENDPOINT) expected_response = json_format.Parse( json.dumps(CLASSIFICATION_RESPONSE), classification_pb2.ClassificationResponse() ).SerializeToString() mock_response(expected_response, sagemaker_session, PROTO_CONTENT_TYPE) result = predictor.predict(data) sagemaker_session.sagemaker_runtime_client.invoke_endpoint.assert_called_once_with( Accept=PROTO_CONTENT_TYPE, Body='1,2,3', ContentType=CSV_CONTENT_TYPE, EndpointName='myendpoint' ) # python 2 and 3 protobuf serialization has different precision so I'm checking # the version here if sys.version_info < (3, 0): assert str(result) == """result { classifications { classes { label: "0" score: 0.00128903763834 } classes { label: "1" score: 0.981432199478 } classes { label: "2" score: 0.0172787327319 } } } """ else: assert str(result) == """result {
def test_classification_request_json(sagemaker_session): data = [1, 2, 3] predictor = RealTimePredictor(endpoint=ENDPOINT, sagemaker_session=sagemaker_session, deserializer=tf_json_deserializer, serializer=tf_json_serializer) mock_response(json.dumps(CLASSIFICATION_RESPONSE).encode('utf-8'), sagemaker_session, JSON_CONTENT_TYPE) result = predictor.predict(data) sagemaker_session.sagemaker_runtime_client.invoke_endpoint.assert_called_once_with( Accept=JSON_CONTENT_TYPE, Body='[1, 2, 3]', ContentType=JSON_CONTENT_TYPE, EndpointName='myendpoint' ) assert result == CLASSIFICATION_RESPONSE
def lambda_handler(event, context): #print("Received event: " + json.dumps(event, indent=2)) data = json.loads(json.dumps(event)) payload = data['data'] # Overwriting value payload["contact__has_telephone"] = False #print(payload) entities = [ 'data', 'features', 'descriptions', 'prediction', 'explanation_shap_values', 'explanation_shap_interaction_values' ] explainer = RealTimePredictor( endpoint=ENDPOINT_NAME, sagemaker_session=sagemaker_session, serializer=json_serializer, deserializer=json_deserializer, content_type="application/json; entities={}".format( ",".join(entities)), accept=CONTENT_TYPE_JSON, ) output = explainer.predict(payload) shap_values = output['explanation']['shap_values'] #print(shap_values) prediction = output['prediction'] print("Credit default risk: {:.2%}".format(prediction)) credit_balance = abs( shap_values['finance__accounts__checking__balance__negative']) credit_amount = abs(shap_values['credit__amount']) print(credit_balance, credit_amount) if credit_balance > credit_amount: result = "credit_balance: {}".format(credit_balance) else: result = "credit_amount: {}".format(credit_amount) print(result) return result
def test_predict_tensor_request_csv(sagemaker_session): data = [6.4, 3.2, .5, 1.5] tensor_proto = tf.make_tensor_proto(values=np.asarray(data), shape=[1, len(data)], dtype=tf.float32) predictor = RealTimePredictor(serializer=tf_csv_serializer, deserializer=tf_json_deserializer, sagemaker_session=sagemaker_session, endpoint=ENDPOINT) mock_response(json.dumps(CLASSIFICATION_RESPONSE).encode('utf-8'), sagemaker_session, JSON_CONTENT_TYPE) result = predictor.predict(tensor_proto) sagemaker_session.sagemaker_runtime_client.invoke_endpoint.assert_called_once_with( Accept=JSON_CONTENT_TYPE, Body='6.4,3.2,0.5,1.5', ContentType=CSV_CONTENT_TYPE, EndpointName='myendpoint' ) assert result == CLASSIFICATION_RESPONSE
def test_predict_call_with_headers_and_csv(): sagemaker_session = ret_csv_sagemaker_session() predictor = RealTimePredictor(ENDPOINT, sagemaker_session, accept=CSV_CONTENT_TYPE, serializer=csv_serializer) data = [1, 2] result = predictor.predict(data) assert sagemaker_session.sagemaker_runtime_client.invoke_endpoint.called expected_request_args = { 'Accept': CSV_CONTENT_TYPE, 'Body': '1,2', 'ContentType': CSV_CONTENT_TYPE, 'EndpointName': ENDPOINT } call_args, kwargs = sagemaker_session.sagemaker_runtime_client.invoke_endpoint.call_args assert kwargs == expected_request_args assert result == CSV_RETURN_VALUE
def test_predict_call_with_headers(): sagemaker_session = empty_sagemaker_session() predictor = RealTimePredictor(ENDPOINT, sagemaker_session, content_type=DEFAULT_CONTENT_TYPE, accept=DEFAULT_CONTENT_TYPE) data = "untouched" result = predictor.predict(data) assert sagemaker_session.sagemaker_runtime_client.invoke_endpoint.called expected_request_args = { 'Accept': DEFAULT_CONTENT_TYPE, 'Body': data, 'ContentType': DEFAULT_CONTENT_TYPE, 'EndpointName': ENDPOINT } call_args, kwargs = sagemaker_session.sagemaker_runtime_client.invoke_endpoint.call_args assert kwargs == expected_request_args assert result == RETURN_VALUE
def test_predict_call_with_headers_and_json(): sagemaker_session = json_sagemaker_session() predictor = RealTimePredictor(ENDPOINT, sagemaker_session, content_type='not/json', accept='also/not-json', serializer=json_serializer) data = [1, 2] result = predictor.predict(data) assert sagemaker_session.sagemaker_runtime_client.invoke_endpoint.called expected_request_args = { 'Accept': 'also/not-json', 'Body': json.dumps(data), 'ContentType': 'not/json', 'EndpointName': ENDPOINT } call_args, kwargs = sagemaker_session.sagemaker_runtime_client.invoke_endpoint.call_args assert kwargs == expected_request_args assert result == json.dumps([RETURN_VALUE])
def test_multi_model_predict_call_with_headers(): sagemaker_session = empty_sagemaker_session() predictor = RealTimePredictor( ENDPOINT, sagemaker_session, content_type=DEFAULT_CONTENT_TYPE, accept=DEFAULT_CONTENT_TYPE ) data = "untouched" result = predictor.predict(data, target_model="model.tar.gz") assert sagemaker_session.sagemaker_runtime_client.invoke_endpoint.called expected_request_args = { "Accept": DEFAULT_CONTENT_TYPE, "Body": data, "ContentType": DEFAULT_CONTENT_TYPE, "EndpointName": ENDPOINT, "TargetModel": "model.tar.gz", } call_args, kwargs = sagemaker_session.sagemaker_runtime_client.invoke_endpoint.call_args assert kwargs == expected_request_args assert result == RETURN_VALUE
def test_predict_call_with_target_variant(): sagemaker_session = empty_sagemaker_session() predictor = RealTimePredictor( ENDPOINT, sagemaker_session, content_type=DEFAULT_CONTENT_TYPE, accept=DEFAULT_CONTENT_TYPE ) data = "untouched" result = predictor.predict(data, target_variant=PRODUCTION_VARIANT_1) assert sagemaker_session.sagemaker_runtime_client.invoke_endpoint.called expected_request_args = { "Accept": DEFAULT_CONTENT_TYPE, "Body": data, "ContentType": DEFAULT_CONTENT_TYPE, "EndpointName": ENDPOINT, "TargetVariant": PRODUCTION_VARIANT_1, } call_args, kwargs = sagemaker_session.sagemaker_runtime_client.invoke_endpoint.call_args assert kwargs == expected_request_args assert result == RETURN_VALUE
from sagemaker.amazon.amazon_estimator import get_image_uri endpoint_name = 'creditcardfraudlogistic' Model( model_data= 's3://creditcardfraud123/logistic/output/linear-learner-191112-2119-002-ac3cc459/output/model.tar.gz', image=get_image_uri(region_name='us-east-1', repo_name='linear-learner', repo_version='latest'), role='AmazonSageMaker-ExecutionRole-20191005T164168').deploy( initial_instance_count=1, instance_type='ml.t2.2xlarge', endpoint_name=endpoint_name) predictor = RealTimePredictor(endpoint_name) predictor.content_type = 'text/csv' predictor.serializer = csv_serializer predictor.deserializer = json_deserializer data = '83916.0,-0.46612620502545604,1.05888696127596,1.6867741713450801,-0.10791713399150099,-0.0534658672545062,-0.67078459643593,0.657296448523877,0.0267747128155009,-0.777065639315537,-0.16451379457928,1.6033857689344901,1.08437897734507,0.621801289885425,0.209210718774203,0.054395914001364995,0.30196805090530604,-0.610384355760504,-0.0111685840197793,0.22161067607904003,0.14522945875429,-0.155193422608588,-0.386047830532794,-0.019162727901044996,0.53588061095157,-0.22766218008636102,0.0387309462886897,0.266651773221212,0.114305983146032,2.58' print(' ') if predictor.predict(data)['predictions'][0]['predicted_label'] == 0: print('Not a Fraudulent Transaction') else: print('Fraudulent Transaction') predictor.delete_endpoint() predictor.delete_model()
#!/usr/local/bin/python3.7 import boto3 import io import csv from boto3.session import Session from sagemaker.predictor import RealTimePredictor from sagemaker.session import Session from sagemaker import get_execution_role import boto3 import json import time boto3_session = boto3.Session(profile_name='Test') sagemaker_sess = Session(boto_session=boto3_session) bt_endpoint = RealTimePredictor("blazingtext-2018-10-08-14-55-37-874", sagemaker_sess) words = ["awesome"] payload = {"instances": words} i = 0 while True: response = bt_endpoint.predict(json.dumps(payload)) vecs = json.loads(response) i = i + 1 print("First vec value : " + str(vecs[0]['vector'][0]) + " & " + str(i)) time.sleep(1)
args = parser.parse_args() role = config.configmap["role"] image = config.configmap["image"] model_data = config.configmap["model"] sm_model_name = config.configmap["model_name"] endpoint_name = config.configmap["endpoint_name"] file_name = "0.png" if args.deploy: torchserve_model = Model( model_data=model_data, image_uri=image, role=role, predictor_cls=RealTimePredictor, name=sm_model_name) torchserve_model.deploy( instance_type='ml.m4.xlarge', initial_instance_count=1, endpoint_name=endpoint_name) with open(file_name, 'rb') as f: payload = f.read() payload = payload predictor = RealTimePredictor(endpoint_name) response = predictor.predict(data=payload) print("Model prediction: {}".format(json.loads(response)))
def test_inference_pipeline_model_deploy(sagemaker_session): sparkml_data_path = os.path.join(DATA_DIR, 'sparkml_model') xgboost_data_path = os.path.join(DATA_DIR, 'xgboost_model') endpoint_name = 'test-inference-pipeline-deploy-{}'.format(sagemaker_timestamp()) sparkml_model_data = sagemaker_session.upload_data( path=os.path.join(sparkml_data_path, 'mleap_model.tar.gz'), key_prefix='integ-test-data/sparkml/model') xgb_model_data = sagemaker_session.upload_data( path=os.path.join(xgboost_data_path, 'xgb_model.tar.gz'), key_prefix='integ-test-data/xgboost/model') schema = json.dumps({ "input": [ { "name": "Pclass", "type": "float" }, { "name": "Embarked", "type": "string" }, { "name": "Age", "type": "float" }, { "name": "Fare", "type": "float" }, { "name": "SibSp", "type": "float" }, { "name": "Sex", "type": "string" } ], "output": { "name": "features", "struct": "vector", "type": "double" } }) with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): sparkml_model = SparkMLModel(model_data=sparkml_model_data, env={'SAGEMAKER_SPARKML_SCHEMA': schema}, sagemaker_session=sagemaker_session) xgb_image = get_image_uri(sagemaker_session.boto_region_name, 'xgboost') xgb_model = Model(model_data=xgb_model_data, image=xgb_image, sagemaker_session=sagemaker_session) model = PipelineModel(models=[sparkml_model, xgb_model], role='SageMakerRole', sagemaker_session=sagemaker_session, name=endpoint_name) model.deploy(1, 'ml.m4.xlarge', endpoint_name=endpoint_name) predictor = RealTimePredictor(endpoint=endpoint_name, sagemaker_session=sagemaker_session, serializer=json_serializer, content_type=CONTENT_TYPE_CSV, accept=CONTENT_TYPE_CSV) valid_data = '1.0,C,38.0,71.5,1.0,female' assert predictor.predict(valid_data) == "0.714013934135" invalid_data = "1.0,28.0,C,38.0,71.5,1.0" assert (predictor.predict(invalid_data) is None) model.delete_model() with pytest.raises(Exception) as exception: sagemaker_session.sagemaker_client.describe_model(ModelName=model.name) assert 'Could not find model' in str(exception.value)
def test_multi_data_model_deploy_train_model_from_amazon_first_party_estimator( container_image, sagemaker_session, cpu_instance_type): timestamp = sagemaker_timestamp() endpoint_name = "test-multimodel-endpoint-{}".format(timestamp) model_name = "test-multimodel-{}".format(timestamp) with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): rcf_model_v1 = __rcf_training_job(sagemaker_session, container_image, cpu_instance_type, 50, 20) model_data_prefix = os.path.join("s3://", sagemaker_session.default_bucket(), "multimodel-{}/".format(timestamp)) multi_data_model = MultiDataModel( name=model_name, model_data_prefix=model_data_prefix, model=rcf_model_v1, sagemaker_session=sagemaker_session, ) # Add model before deploy multi_data_model.add_model(rcf_model_v1.model_data, PRETRAINED_MODEL_PATH_1) # Deploy model to an endpoint multi_data_model.deploy(1, cpu_instance_type, endpoint_name=endpoint_name) # Train another model rcf_model_v2 = __rcf_training_job(sagemaker_session, container_image, cpu_instance_type, 70, 20) # Deploy newly trained model multi_data_model.add_model(rcf_model_v2.model_data, PRETRAINED_MODEL_PATH_2) # List model assertions endpoint_models = [] for model_path in multi_data_model.list_models(): endpoint_models.append(model_path) assert PRETRAINED_MODEL_PATH_1 in endpoint_models assert PRETRAINED_MODEL_PATH_2 in endpoint_models # Define a predictor to set `serializer` parameter with npy_serializer # instead of `json_serializer` in the default predictor returned by `MXNetPredictor` # Since we are using a placeholder container image the prediction results are not accurate. predictor = RealTimePredictor( endpoint=endpoint_name, sagemaker_session=sagemaker_session, serializer=npy_serializer, deserializer=string_deserializer, ) data = numpy.random.rand(1, 14) # Prediction result for the first model result = predictor.predict(data, target_model=PRETRAINED_MODEL_PATH_1) assert result == "Invoked model: {}".format(PRETRAINED_MODEL_PATH_1) # Prediction result for the second model result = predictor.predict(data, target_model=PRETRAINED_MODEL_PATH_2) assert result == "Invoked model: {}".format(PRETRAINED_MODEL_PATH_2) # Cleanup sagemaker_session.sagemaker_client.delete_endpoint_config( EndpointConfigName=endpoint_name) multi_data_model.delete_model() with pytest.raises(Exception) as exception: sagemaker_session.sagemaker_client.describe_model(ModelName=model_name) assert "Could not find model" in str(exception.value) sagemaker_session.sagemaker_client.describe_endpoint_config( name=endpoint_name) assert "Could not find endpoint" in str(exception.value)
# In[ ]: from sagemaker.sklearn.model import SKLearnModel model = SKLearnModel(model_data=artifact, role=get_execution_role(), entry_point='script.py') endpoint_name = 'auroraml-churn-endpoint' model.deploy(instance_type='ml.c5.large', initial_instance_count=1, endpoint_name=endpoint_name) # In[ ]: import sagemaker.sklearn from sagemaker.predictor import json_serializer, csv_serializer, json_deserializer, RealTimePredictor from sagemaker.content_types import CONTENT_TYPE_CSV, CONTENT_TYPE_JSON predictor = RealTimePredictor(endpoint=endpoint_name, sagemaker_session=sess, content_type=CONTENT_TYPE_CSV, accept=CONTENT_TYPE_CSV) test_data = test_data.drop("churn", axis=1) train_data = train_data.drop("churn", axis=1) for i in range(10): predictor.predict(test_data.to_csv(sep=',', header=False, index=False)) predictor.predict(train_data.to_csv(sep=',', header=False, index=False))
def test_multi_data_model_deploy_pretrained_models_update_endpoint( container_image, sagemaker_session, cpu_instance_type, alternative_cpu_instance_type): timestamp = sagemaker_timestamp() endpoint_name = "test-multimodel-endpoint-{}".format(timestamp) model_name = "test-multimodel-{}".format(timestamp) # Define pretrained model local path pretrained_model_data_local_path = os.path.join(DATA_DIR, "sparkml_model", "mleap_model.tar.gz") with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): model_data_prefix = os.path.join("s3://", sagemaker_session.default_bucket(), "multimodel-{}/".format(timestamp)) multi_data_model = MultiDataModel( name=model_name, model_data_prefix=model_data_prefix, image=container_image, role=ROLE, sagemaker_session=sagemaker_session, ) # Add model before deploy multi_data_model.add_model(pretrained_model_data_local_path, PRETRAINED_MODEL_PATH_1) # Deploy model to an endpoint multi_data_model.deploy(1, cpu_instance_type, endpoint_name=endpoint_name) # Add model after deploy multi_data_model.add_model(pretrained_model_data_local_path, PRETRAINED_MODEL_PATH_2) # List model assertions endpoint_models = [] for model_path in multi_data_model.list_models(): endpoint_models.append(model_path) assert PRETRAINED_MODEL_PATH_1 in endpoint_models assert PRETRAINED_MODEL_PATH_2 in endpoint_models predictor = RealTimePredictor( endpoint=endpoint_name, sagemaker_session=sagemaker_session, serializer=npy_serializer, deserializer=string_deserializer, ) data = numpy.zeros(shape=(1, 1, 28, 28)) result = predictor.predict(data, target_model=PRETRAINED_MODEL_PATH_1) assert result == "Invoked model: {}".format(PRETRAINED_MODEL_PATH_1) result = predictor.predict(data, target_model=PRETRAINED_MODEL_PATH_2) assert result == "Invoked model: {}".format(PRETRAINED_MODEL_PATH_2) old_endpoint = sagemaker_session.sagemaker_client.describe_endpoint( EndpointName=endpoint_name) old_config_name = old_endpoint["EndpointConfigName"] # Update endpoint multi_data_model.deploy(1, alternative_cpu_instance_type, endpoint_name=endpoint_name, update_endpoint=True) # Wait for endpoint to finish updating for _ in retries(40, "Waiting for 'InService' endpoint status", seconds_to_sleep=30): new_endpoint = sagemaker_session.sagemaker_client.describe_endpoint( EndpointName=endpoint_name) if new_endpoint["EndpointStatus"] == "InService": break new_config_name = new_endpoint["EndpointConfigName"] new_config = sagemaker_session.sagemaker_client.describe_endpoint_config( EndpointConfigName=new_config_name) assert old_config_name != new_config_name assert new_config["ProductionVariants"][0][ "InstanceType"] == alternative_cpu_instance_type assert new_config["ProductionVariants"][0]["InitialInstanceCount"] == 1 # Cleanup sagemaker_session.sagemaker_client.delete_endpoint_config( EndpointConfigName=old_config_name) sagemaker_session.sagemaker_client.delete_endpoint_config( EndpointConfigName=new_config_name) multi_data_model.delete_model() with pytest.raises(Exception) as exception: sagemaker_session.sagemaker_client.describe_model(ModelName=model_name) assert "Could not find model" in str(exception.value) sagemaker_session.sagemaker_client.describe_endpoint_config( name=old_config_name) assert "Could not find endpoint" in str(exception.value) sagemaker_session.sagemaker_client.describe_endpoint_config( name=new_config_name) assert "Could not find endpoint" in str(exception.value)
""" Invoke deployed endpoint """ import sagemaker from sagemaker.predictor import csv_serializer, RealTimePredictor from sagemaker.content_types import CONTENT_TYPE_CSV, CONTENT_TYPE_JSON from ...sm_utils import parse_invoke_args if __name__ == "__main__": args = parse_invoke_args() ep_name = args.end_point sm_sess = sagemaker.Session() if (args.delete_ep): print(f'Deleting EndPoint {ep_name}') sm_client = sm_sess.boto_session.client('sagemaker') sm_client.delete_endpoint(EndpointName=ep_name) else: print(f'Invoking EndPoint {ep_name}') payload = 'M, 0.44, 0.365, 0.125, 0.516, 0.2155, 0.114, 0.155' actual_rings = 10 predictor = RealTimePredictor(endpoint=ep_name, sagemaker_session=sm_sess, serializer=csv_serializer, content_type=CONTENT_TYPE_CSV, accept=CONTENT_TYPE_JSON) print(predictor.predict(payload))