def main(): parser = argparse.ArgumentParser() parser.add_argument('model_data', help='s3 path to model.tar.gz') parser.add_argument('--instance_type', default='ml.m5.large') parser.add_argument('--role', default=os.environ['SAGEMAKER_ROLE']) args = parser.parse_args() model = Model(args.model_data, args.role, framework_version='1.12.0') model.deploy(initial_instance_count=1, instance_type=args.instance_type)
def deploy(model_data, endpoint_name): """ Deploys a SageMaker endpoint for a trained model. :param model_data: S3 location of the model artifacts. :param endpoint_name: Name to assign to the SageMaker endpoint. """ # Create the model endpoint. model = Model(model_data=model_data, role=os.getenv("SAGEMAKER_ROLE"), image=os.getenv("SAGEMAKER_IMAGE_URI")) model.deploy(1, 'ml.t2.medium', endpoint_name=endpoint_name)
def tfs_predictor_with_model_and_entry_point_and_dependencies( sagemaker_local_session, tf_full_version): endpoint_name = sagemaker.utils.unique_name_from_base( "sagemaker-tensorflow-serving") entry_point = os.path.join( tests.integ.DATA_DIR, "tfs/tfs-test-entrypoint-and-dependencies/inference.py") dependencies = [ os.path.join(tests.integ.DATA_DIR, "tfs/tfs-test-entrypoint-and-dependencies/dependency.py") ] model_data = "file://" + os.path.join( tests.integ.DATA_DIR, "tensorflow-serving-test-model.tar.gz") model = Model( entry_point=entry_point, model_data=model_data, role="SageMakerRole", dependencies=dependencies, framework_version=tf_full_version, sagemaker_session=sagemaker_local_session, ) predictor = model.deploy(1, "local", endpoint_name=endpoint_name) try: yield predictor finally: predictor.delete_endpoint()
def tfs_predictor_with_model_and_entry_point_and_dependencies( instance_type, sagemaker_session, tf_full_version): endpoint_name = sagemaker.utils.unique_name_from_base( 'sagemaker-tensorflow-serving') model_data = sagemaker_session.upload_data( path=os.path.join(tests.integ.DATA_DIR, 'tensorflow-serving-test-model.tar.gz'), key_prefix='tensorflow-serving/models') with tests.integ.timeout.timeout_and_delete_endpoint_by_name( endpoint_name, sagemaker_session): entry_point = os.path.join( tests.integ.DATA_DIR, 'tfs/tfs-test-entrypoint-and-dependencies/inference.py') dependencies = [ os.path.join( tests.integ.DATA_DIR, 'tfs/tfs-test-entrypoint-and-dependencies/dependency.py') ] model = Model(entry_point=entry_point, model_data=model_data, role='SageMakerRole', dependencies=dependencies, framework_version=tf_full_version, sagemaker_session=sagemaker_session) predictor = model.deploy(1, instance_type, endpoint_name=endpoint_name) yield predictor
def test_tfs_model(sagemaker_session, tf_version): model = Model("s3://some/data.tar.gz", role=ROLE, framework_version=tf_version, sagemaker_session=sagemaker_session) cdef = model.prepare_container_def(INSTANCE_TYPE) assert cdef['Image'].endswith('sagemaker-tensorflow-serving:{}-cpu'.format(tf_version)) assert cdef['Environment'] == {} predictor = model.deploy(INSTANCE_COUNT, INSTANCE_TYPE) assert isinstance(predictor, Predictor)
def test_enabling_data_capture_on_endpoint_shows_correct_data_capture_status( sagemaker_session, tf_full_version ): endpoint_name = unique_name_from_base("sagemaker-tensorflow-serving") model_data = sagemaker_session.upload_data( path=os.path.join(tests.integ.DATA_DIR, "tensorflow-serving-test-model.tar.gz"), key_prefix="tensorflow-serving/models", ) with tests.integ.timeout.timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): model = Model( model_data=model_data, role=ROLE, framework_version=tf_full_version, sagemaker_session=sagemaker_session, ) predictor = model.deploy( initial_instance_count=INSTANCE_COUNT, instance_type=INSTANCE_TYPE, endpoint_name=endpoint_name, ) endpoint_desc = sagemaker_session.sagemaker_client.describe_endpoint( EndpointName=predictor.endpoint ) endpoint_config_desc = sagemaker_session.sagemaker_client.describe_endpoint_config( EndpointConfigName=endpoint_desc["EndpointConfigName"] ) assert endpoint_config_desc.get("DataCaptureConfig") is None predictor.enable_data_capture() # Wait for endpoint to finish updating # Endpoint update takes ~7min. 40 retries * 30s sleeps = 20min timeout for _ in retries( max_retry_count=40, exception_message_prefix="Waiting for 'InService' endpoint status", seconds_to_sleep=30, ): new_endpoint = sagemaker_session.sagemaker_client.describe_endpoint( EndpointName=predictor.endpoint ) if new_endpoint["EndpointStatus"] == "InService": break endpoint_desc = sagemaker_session.sagemaker_client.describe_endpoint( EndpointName=predictor.endpoint ) endpoint_config_desc = sagemaker_session.sagemaker_client.describe_endpoint_config( EndpointConfigName=endpoint_desc["EndpointConfigName"] ) assert endpoint_config_desc["DataCaptureConfig"]["EnableCapture"]
def test_tfs_model_image_accelerator(sagemaker_session, tf_version): model = Model( "s3://some/data.tar.gz", role=ROLE, framework_version=tf_version, sagemaker_session=sagemaker_session, ) cdef = model.prepare_container_def(INSTANCE_TYPE, accelerator_type=ACCELERATOR_TYPE) assert cdef["Image"].endswith("sagemaker-tensorflow-serving-eia:{}-cpu".format(tf_version)) predictor = model.deploy(INSTANCE_COUNT, INSTANCE_TYPE) assert isinstance(predictor, Predictor)
def test_tfs_model_image_accelerator_not_supported(sagemaker_session): model = Model( "s3://some/data.tar.gz", role=ROLE, framework_version="1.13.1", sagemaker_session=sagemaker_session, ) # assert error is not raised model.deploy(instance_type="ml.c4.xlarge", initial_instance_count=1, accelerator_type="ml.eia1.medium") model = Model( "s3://some/data.tar.gz", role=ROLE, framework_version="1.14", sagemaker_session=sagemaker_session, ) # assert error is not raised model.deploy(instance_type="ml.c4.xlarge", initial_instance_count=1) with pytest.raises(AttributeError) as e: model.deploy( instance_type="ml.c4.xlarge", accelerator_type="ml.eia1.medium", initial_instance_count=1, ) assert str(e.value) == "The TensorFlow version 1.14 doesn't support EIA."
def tfs_predictor(instance_type, sagemaker_session, tf_full_version): endpoint_name = sagemaker.utils.name_from_base( 'sagemaker-tensorflow-serving') model_data = sagemaker_session.upload_data( path='tests/data/tensorflow-serving-test-model.tar.gz', key_prefix='tensorflow-serving/models') with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): model = Model(model_data=model_data, role='SageMakerRole', framework_version=tf_full_version, sagemaker_session=sagemaker_session) predictor = model.deploy(1, instance_type, endpoint_name=endpoint_name) yield predictor
def tfs_predictor(sagemaker_session, tf_full_version): endpoint_name = sagemaker.utils.unique_name_from_base("sagemaker-tensorflow-serving") model_data = sagemaker_session.upload_data( path=os.path.join(tests.integ.DATA_DIR, "tensorflow-serving-test-model.tar.gz"), key_prefix="tensorflow-serving/models", ) with tests.integ.timeout.timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): model = Model( model_data=model_data, role="SageMakerRole", framework_version=tf_full_version, sagemaker_session=sagemaker_session, ) predictor = model.deploy(1, "ml.c5.xlarge", endpoint_name=endpoint_name) yield predictor
def tfs_predictor_with_accelerator(sagemaker_session, ei_tf_full_version, cpu_instance_type): endpoint_name = sagemaker.utils.unique_name_from_base("sagemaker-tensorflow-serving") model_data = sagemaker_session.upload_data( path=os.path.join(tests.integ.DATA_DIR, "tensorflow-serving-test-model.tar.gz"), key_prefix="tensorflow-serving/models", ) with tests.integ.timeout.timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): model = Model( model_data=model_data, role="SageMakerRole", framework_version=ei_tf_full_version, sagemaker_session=sagemaker_session, ) predictor = model.deploy( 1, cpu_instance_type, endpoint_name=endpoint_name, accelerator_type="ml.eia1.medium" ) yield predictor
def tfs_predictor_with_accelerator(sagemaker_session, tf_full_version): endpoint_name = sagemaker.utils.unique_name_from_base( "sagemaker-tensorflow-serving") instance_type = 'ml.c4.large' accelerator_type = 'ml.eia1.medium' model_data = sagemaker_session.upload_data( path='tests/data/tensorflow-serving-test-model.tar.gz', key_prefix='tensorflow-serving/models') with tests.integ.timeout.timeout_and_delete_endpoint_by_name( endpoint_name, sagemaker_session): model = Model(model_data=model_data, role='SageMakerRole', framework_version=tf_full_version, sagemaker_session=sagemaker_session) predictor = model.deploy(1, instance_type, endpoint_name=endpoint_name, accelerator_type=accelerator_type) yield predictor
def tfs_predictor_with_model_and_entry_point_same_tar(sagemaker_local_session, tf_full_version, tmpdir): endpoint_name = sagemaker.utils.unique_name_from_base( "sagemaker-tensorflow-serving") model_tar = tar_dir( os.path.join(tests.integ.DATA_DIR, "tfs/tfs-test-model-with-inference"), tmpdir) model = Model( model_data="file://" + model_tar, role="SageMakerRole", framework_version=tf_full_version, sagemaker_session=sagemaker_local_session, ) predictor = model.deploy(1, "local", endpoint_name=endpoint_name) try: yield predictor finally: predictor.delete_endpoint()
def tfs_predictor_with_model_and_entry_point_same_tar(instance_type, sagemaker_session, tf_full_version, tmpdir): endpoint_name = sagemaker.utils.unique_name_from_base( 'sagemaker-tensorflow-serving') model_tar = tar_dir( os.path.join(tests.integ.DATA_DIR, 'tfs/tfs-test-model-with-inference'), tmpdir) model_data = sagemaker_session.upload_data( path=model_tar, key_prefix='tensorflow-serving/models') with tests.integ.timeout.timeout_and_delete_endpoint_by_name( endpoint_name, sagemaker_session): model = Model(model_data=model_data, role='SageMakerRole', framework_version=tf_full_version, sagemaker_session=sagemaker_session) predictor = model.deploy(1, instance_type, endpoint_name=endpoint_name) yield predictor
def Deploy_Model(payload, update=False): ''' SERVICE NUMBER 2. The function deploys the model as a endpoint payload = {model_data:model_data} sample model_data : 's3://fypcementbucket/models/model_2021_2_19/sagemaker-tensorflow-scriptmode-2021-02-21-13-37-05-805/output/model.tar.gz' ''' from sagemaker.tensorflow.serving import Model import boto3 client = boto3.client('sagemaker') #Try deleting the previous configurations if present try: client.delete_endpoint_config( EndpointConfigName= 'sagemaker-tensorflow-serving-2021-02-21-fypmodel-endpoint') except: pass model = Model( model_data= 's3://fypcementbucket/models/model_3_16_14_52_30/sagemaker-tensorflow-scriptmode-2021-03-16-14-52-31-100/output/model.tar.gz', role= "arn:aws:iam::968710761052:role/service-role/AmazonSageMaker-ExecutionRole-20210205T194406", framework_version='1.12.0') newThread = threading.Thread(target=Thread_Handler) newThread.start() predictor = model.deploy( initial_instance_count=1, instance_type='ml.c5.xlarge', endpoint_name= 'sagemaker-tensorflow-serving-2021-02-21-fypmodel-endpoint', update_endpoint=update) Get_ModelStatus()
def Deploy_Model(payload, update=False): ''' SERVICE NUMBER 2. The function deploys the model as a endpoint payload = {model_data:model_data} sample model_data : 's3://fypcementbucket/models/model_2021_2_19/sagemaker-tensorflow-scriptmode-2021-02-21-13-37-05-805/output/model.tar.gz' ''' from sagemaker.tensorflow.serving import Model model = Model( model_data=payload['model_data'], role="arn:aws:iam::968710761052:role/service-role/AmazonSageMaker-ExecutionRole-20210205T194406", framework_version='1.12.0' ) newThread = threading.Thread(target=Thread_Handler) newThread.start() predictor = model.deploy(initial_instance_count=1, instance_type='ml.c5.xlarge', endpoint_name='sagemaker-tensorflow-serving-2021-02-21-fypmodel-endpoint', update_endpoint=update) Get_ModelStatus()
cf_configuration = json.load(cf_configuration_file) commit_id = cf_configuration["Parameters"]["CommitID"] timestamp = cf_configuration["Parameters"]["Timestamp"] model_data = cf_configuration["Parameters"]["ModelData"] stage = cf_configuration["Parameters"]["Environment"] sagemaker_session = sagemaker.Session() role = get_execution_role() tensorflow_serving_model = Model(model_data=model_data, role=role, framework_version='1.13', sagemaker_session=sagemaker_session) predictor = tensorflow_serving_model.deploy(initial_instance_count=1, instance_type='ml.t2.medium') result = predictor.predict(json_str) result = result['predictions'] pred = np.array(result) pred = pred.argmax(axis=1).tolist() y_test_pred = y_test.argmax(axis=1).tolist() if ((stage == 'qa') or (stage == 'prod')): predictor.delete_endpoint() match = 0 for i, x in enumerate(pred): if (y_test_pred[i] == pred[i]): match = match + 1 acc = match / len(pred)
def test_disabling_data_capture_on_endpoint_shows_correct_data_capture_status( sagemaker_session, tf_full_version): endpoint_name = unique_name_from_base("sagemaker-tensorflow-serving") model_data = sagemaker_session.upload_data( path=os.path.join(tests.integ.DATA_DIR, "tensorflow-serving-test-model.tar.gz"), key_prefix="tensorflow-serving/models", ) with tests.integ.timeout.timeout_and_delete_endpoint_by_name( endpoint_name, sagemaker_session): model = Model( model_data=model_data, role=ROLE, framework_version=tf_full_version, sagemaker_session=sagemaker_session, ) destination_s3_uri = os.path.join("s3://", sagemaker_session.default_bucket(), endpoint_name, "custom") predictor = model.deploy( initial_instance_count=INSTANCE_COUNT, instance_type=INSTANCE_TYPE, endpoint_name=endpoint_name, data_capture_config=DataCaptureConfig( enable_capture=True, sampling_percentage=CUSTOM_SAMPLING_PERCENTAGE, destination_s3_uri=destination_s3_uri, capture_options=CUSTOM_CAPTURE_OPTIONS, csv_content_types=CUSTOM_CSV_CONTENT_TYPES, json_content_types=CUSTOM_JSON_CONTENT_TYPES, sagemaker_session=sagemaker_session, ), ) endpoint_desc = sagemaker_session.sagemaker_client.describe_endpoint( EndpointName=predictor.endpoint) endpoint_config_desc = sagemaker_session.sagemaker_client.describe_endpoint_config( EndpointConfigName=endpoint_desc["EndpointConfigName"]) assert endpoint_config_desc["DataCaptureConfig"]["EnableCapture"] assert (endpoint_config_desc["DataCaptureConfig"] ["InitialSamplingPercentage"] == CUSTOM_SAMPLING_PERCENTAGE) assert endpoint_config_desc["DataCaptureConfig"]["CaptureOptions"] == [ { "CaptureMode": "Input" } ] assert (endpoint_config_desc["DataCaptureConfig"] ["CaptureContentTypeHeader"]["CsvContentTypes"] == CUSTOM_CSV_CONTENT_TYPES) assert (endpoint_config_desc["DataCaptureConfig"] ["CaptureContentTypeHeader"]["JsonContentTypes"] == CUSTOM_JSON_CONTENT_TYPES) predictor.disable_data_capture() # Wait for endpoint to finish updating # Endpoint update takes ~7min. 25 retries * 60s sleeps = 25min timeout for _ in retries( max_retry_count=25, exception_message_prefix= "Waiting for 'InService' endpoint status", seconds_to_sleep=60, ): new_endpoint = sagemaker_session.sagemaker_client.describe_endpoint( EndpointName=predictor.endpoint) if new_endpoint["EndpointStatus"] == "InService": break endpoint_desc = sagemaker_session.sagemaker_client.describe_endpoint( EndpointName=predictor.endpoint) endpoint_config_desc = sagemaker_session.sagemaker_client.describe_endpoint_config( EndpointConfigName=endpoint_desc["EndpointConfigName"]) assert not endpoint_config_desc["DataCaptureConfig"]["EnableCapture"]
Exported된 SavedModel은 각 그래프에 correspond하는 tag-set가 있어야한다. SavedModel을 export하거나 save할때 specify해야한다. SavedModel의 tag-sets를 inspect하려면 SavedModel CLI를 사용. !!! https://www.tensorflow.org/programmers_guide/saved_model_cli !!! (404 에러) -> 리뷰: C C. SavedModel CLI https://www.tensorflow.org/guide/saved_model#details_of_the_savedmodel_command_line_interface #################### [Stack Overflow 답변] There are two APIs for deploying TensorFlow models: tensorflow.Model and tensorflow.serving.Model. It isn't clear from the code-snippet which one you're using, but the SageMaker docs recommend the latter deploying from pre-existing s3 artifacts: from sagemaker.tensorflow.serving import Model model = Model(model_data='s3://mybucket/model.tar.gz', role='MySageMakerRole') predictor = model.deploy(initial_instance_count=1, instance_type='ml.c5.xlarge') # Reference: https://github.com/aws/sagemaker-python-sdk/blob/c919e4dee3a00243f0b736af93fb156d17b04796/src/sagemaker/tensorflow/deploying_tensorflow_serving.rst#deploying-directly-from-model-artifacts If you haven't specified an image argument for tensorflow.Model, SageMaker should be using the default TensorFlow serving image (seems like "../tensorflow-inference"). image (str) – A Docker image URI (default: None). If not specified, a default image for TensorFlow Serving will be used. If all of this seems needlessly complex to you, I'm working on a platform that makes this set up a single line of code -- I'd love for you to try it, dm me at https://twitter.com/yoavz_.
model_data = "s3://{}/battlesnake-aws/pretrainedmodels/model.tar.gz".format( s3_bucket) print("Make an endpoint with {}".format(model_data)) model = Model( model_data=model_data, role=role, entry_point="inference.py", source_dir='RLlibEnv/inference/inference_src', framework_version='2.1.0', name="battlesnake-rllib", ) # Deploy an inference endpoint predictor = model.deploy(initial_instance_count=1, instance_type=endpoint_instance_type, endpoint_name='battlesnake-endpoint') state = np.zeros(shape=(1, 21, 21, 6), dtype=np.float32).tolist() health_dict = {0: 50, 1: 50} json = { "turn": 4, "board": { "height": 11, "width": 11, "food": [], "snakes": [] }, "you": { "id": "snake-id-string",