def __init__(self, state_id, model, model_name=None, instance_type=None, tags=None, **kwargs): """ Args: state_id (str): State name whose length **must be** less than or equal to 128 unicode characters. State names **must be** unique within the scope of the whole state machine. model (sagemaker.model.Model): The SageMaker model to use in the ModelStep. If :py:class:`TrainingStep` was used to train the model and saving the model is the next step in the workflow, the output of :py:func:`TrainingStep.get_expected_model()` can be passed here. model_name (str or Placeholder, optional): Specify a model name, this is required for creating the model. We recommend to use :py:class:`~stepfunctions.inputs.ExecutionInput` placeholder collection to pass the value dynamically in each execution. instance_type (str, optional): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'. tags (list[dict] or Placeholders, optional): `List of tags <https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html>`_ to associate with the resource. parameters(dict, optional): The value of this field is merged with other arguments to become the request payload for SageMaker `CreateModel <https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_CreateModel.html>`_. (Default: None) You can use `parameters` to override the value provided by other arguments and specify any field's value dynamically using `Placeholders <https://aws-step-functions-data-science-sdk.readthedocs.io/en/stable/placeholders.html?highlight=placeholder#stepfunctions.inputs.Placeholder>`_. """ if isinstance(model, FrameworkModel): model_parameters = model_config(model=model, instance_type=instance_type, role=model.role, image_uri=model.image_uri) if model_name: model_parameters['ModelName'] = model_name elif isinstance(model, Model): model_parameters = { 'ExecutionRoleArn': model.role, 'ModelName': model_name or model.name, 'PrimaryContainer': { 'Environment': model.env, 'Image': model.image_uri, 'ModelDataUrl': model.model_data } } else: raise ValueError( "Expected 'model' parameter to be of type 'sagemaker.model.Model', but received type '{}'" .format(type(model).__name__)) if 'S3Operations' in model_parameters: del model_parameters['S3Operations'] if tags: model_parameters['Tags'] = tags if isinstance( tags, Placeholder) else tags_dict_to_kv_list(tags) if Field.Parameters.value in kwargs and isinstance( kwargs[Field.Parameters.value], dict): # Update model parameters with input parameters merge_dicts(model_parameters, kwargs[Field.Parameters.value]) kwargs[Field.Parameters.value] = model_parameters """ Example resource arn: arn:aws:states:::sagemaker:createModel """ kwargs[Field.Resource.value] = get_service_integration_arn( SAGEMAKER_SERVICE_NAME, SageMakerApi.CreateModel) super(ModelStep, self).__init__(state_id, **kwargs)
def test_byo_model_config(sagemaker_session): byo_model = model.Model( model_data="{{ model_data }}", image="{{ image }}", role="{{ role }}", env={"{{ key }}": "{{ value }}"}, name="model", sagemaker_session=sagemaker_session, ) config = airflow.model_config(instance_type="ml.c4.xlarge", model=byo_model) expected_config = { "ModelName": "model", "PrimaryContainer": { "Image": "{{ image }}", "Environment": { "{{ key }}": "{{ value }}" }, "ModelDataUrl": "{{ model_data }}", }, "ExecutionRoleArn": "{{ role }}", } assert config == expected_config
def __init__(self, state_id, model, model_name=None, instance_type=None, **kwargs): """ Args: state_id (str): State name whose length **must be** less than or equal to 128 unicode characters. State names **must be** unique within the scope of the whole state machine. model (sagemaker.model.Model): The SageMaker model to use in the ModelStep. If :py:class:`TrainingStep` was used to train the model and saving the model is the next step in the workflow, the output of :py:func:`TrainingStep.get_expected_model()` can be passed here. model_name (str or Placeholder, optional): Specify a model name, this is required for creating the model. We recommend to use :py:class:`~stepfunctions.inputs.ExecutionInput` placeholder collection to pass the value dynamically in each execution. instance_type (str, optional): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'. This parameter is typically required when the estimator used is not an `Amazon built-in algorithm <https://docs.aws.amazon.com/sagemaker/latest/dg/algos.html>`_. """ if isinstance(model, FrameworkModel): parameters = model_config(model=model, instance_type=instance_type, role=model.role, image=model.image) if model_name: parameters['ModelName'] = model_name elif isinstance(model, Model): parameters = { 'ExecutionRoleArn': model.role, 'ModelName': model_name or model.name, 'PrimaryContainer': { 'Environment': {}, 'Image': model.image, 'ModelDataUrl': model.model_data } } else: raise ValueError("Expected 'model' parameter to be of type 'sagemaker.model.Model', but received type '{}'".format(type(model).__name__)) if 'S3Operations' in parameters: del parameters['S3Operations'] kwargs[Field.Parameters.value] = parameters kwargs[Field.Resource.value] = 'arn:aws:states:::sagemaker:createModel' super(ModelStep, self).__init__(state_id, **kwargs)
def test_framework_model_config(sagemaker_session): chainer_model = chainer.ChainerModel( model_data="{{ model_data }}", role="{{ role }}", entry_point="{{ entry_point }}", source_dir="{{ source_dir }}", image=None, py_version="py3", framework_version="5.0.0", model_server_workers="{{ model_server_worker }}", sagemaker_session=sagemaker_session, ) config = airflow.model_config(instance_type="ml.c4.xlarge", model=chainer_model) expected_config = { "ModelName": "sagemaker-chainer-%s" % TIME_STAMP, "PrimaryContainer": { "Image": "520713654638.dkr.ecr.us-west-2.amazonaws.com/sagemaker-chainer:5.0.0-cpu-py3", "Environment": { "SAGEMAKER_PROGRAM": "{{ entry_point }}", "SAGEMAKER_SUBMIT_DIRECTORY": "s3://output/sagemaker-chainer-%s/source/sourcedir.tar.gz" % TIME_STAMP, "SAGEMAKER_ENABLE_CLOUDWATCH_METRICS": "false", "SAGEMAKER_CONTAINER_LOG_LEVEL": "20", "SAGEMAKER_REGION": "us-west-2", "SAGEMAKER_MODEL_SERVER_WORKERS": "{{ model_server_worker }}", }, "ModelDataUrl": "{{ model_data }}", }, "ExecutionRoleArn": "{{ role }}", "S3Operations": { "S3Upload": [{ "Path": "{{ source_dir }}", "Bucket": "output", "Key": "sagemaker-chainer-%s/source/sourcedir.tar.gz" % TIME_STAMP, "Tar": True, }] }, } assert config == expected_config
def __init__(self, state_id, model, model_name=None, instance_type=None, tags=None, **kwargs): """ Args: state_id (str): State name whose length **must be** less than or equal to 128 unicode characters. State names **must be** unique within the scope of the whole state machine. model (sagemaker.model.Model): The SageMaker model to use in the ModelStep. If :py:class:`TrainingStep` was used to train the model and saving the model is the next step in the workflow, the output of :py:func:`TrainingStep.get_expected_model()` can be passed here. model_name (str or Placeholder, optional): Specify a model name, this is required for creating the model. We recommend to use :py:class:`~stepfunctions.inputs.ExecutionInput` placeholder collection to pass the value dynamically in each execution. instance_type (str, optional): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'. tags (list[dict], optional): `List to tags <https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html>`_ to associate with the resource. """ if isinstance(model, FrameworkModel): parameters = model_config(model=model, instance_type=instance_type, role=model.role, image_uri=model.image_uri) if model_name: parameters['ModelName'] = model_name elif isinstance(model, Model): parameters = { 'ExecutionRoleArn': model.role, 'ModelName': model_name or model.name, 'PrimaryContainer': { 'Environment': model.env, 'Image': model.image_uri, 'ModelDataUrl': model.model_data } } else: raise ValueError( "Expected 'model' parameter to be of type 'sagemaker.model.Model', but received type '{}'" .format(type(model).__name__)) if 'S3Operations' in parameters: del parameters['S3Operations'] if tags: parameters['Tags'] = tags_dict_to_kv_list(tags) kwargs[Field.Parameters.value] = parameters """ Example resource arn: arn:aws:states:::sagemaker:createModel """ kwargs[Field.Resource.value] = get_service_integration_arn( SAGEMAKER_SERVICE_NAME, SageMakerApi.CreateModel) super(ModelStep, self).__init__(state_id, **kwargs)
def test_framework_model_config(sagemaker_session): chainer_model = chainer.ChainerModel( model_data="{{ model_data }}", role="{{ role }}", entry_point="{{ entry_point }}", source_dir="{{ source_dir }}", image=None, py_version='py3', framework_version='5.0.0', model_server_workers="{{ model_server_worker }}", sagemaker_session=sagemaker_session) config = airflow.model_config(instance_type='ml.c4.xlarge', model=chainer_model) expected_config = { 'ModelName': "sagemaker-chainer-%s" % TIME_STAMP, 'PrimaryContainer': { 'Image': '520713654638.dkr.ecr.us-west-2.amazonaws.com/sagemaker-chainer:5.0.0-cpu-py3', 'Environment': { 'SAGEMAKER_PROGRAM': '{{ entry_point }}', 'SAGEMAKER_SUBMIT_DIRECTORY': "s3://output/sagemaker-chainer-%s/source/sourcedir.tar.gz" % TIME_STAMP, 'SAGEMAKER_ENABLE_CLOUDWATCH_METRICS': 'false', 'SAGEMAKER_CONTAINER_LOG_LEVEL': '20', 'SAGEMAKER_REGION': 'us-west-2', 'SAGEMAKER_MODEL_SERVER_WORKERS': '{{ model_server_worker }}' }, 'ModelDataUrl': '{{ model_data }}' }, 'ExecutionRoleArn': '{{ role }}', 'S3Operations': { 'S3Upload': [{ 'Path': '{{ source_dir }}', 'Bucket': 'output', 'Key': "sagemaker-chainer-%s/source/sourcedir.tar.gz" % TIME_STAMP, 'Tar': True }] } } assert config == expected_config
def _build_airflow_workflow(estimator, instance_type, inputs=None, mini_batch_size=None): training_config = sm_airflow.training_config( estimator=estimator, inputs=inputs, mini_batch_size=mini_batch_size) model = estimator.create_model() assert model is not None model_config = sm_airflow.model_config(instance_type, model) assert model_config is not None transform_config = sm_airflow.transform_config_from_estimator( estimator=estimator, task_id="transform_config", task_type="training", instance_count=SINGLE_INSTANCE_COUNT, instance_type=estimator.train_instance_type, data=inputs, content_type="text/csv", input_filter="$", output_filter="$", ) default_args = { "owner": "airflow", "start_date": airflow.utils.dates.days_ago(2), "provide_context": True, } dag = DAG("tensorflow_example", default_args=default_args, schedule_interval="@once") train_op = SageMakerTrainingOperator(task_id="tf_training", config=training_config, wait_for_completion=True, dag=dag) transform_op = SageMakerTransformOperator(task_id="transform_operator", config=transform_config, wait_for_completion=True, dag=dag) transform_op.set_upstream(train_op) return training_config
def test_amazon_alg_model_config(sagemaker_session): pca_model = pca.PCAModel(model_data="{{ model_data }}", role="{{ role }}", sagemaker_session=sagemaker_session) config = airflow.model_config(instance_type="ml.c4.xlarge", model=pca_model) expected_config = { "ModelName": "pca-%s" % TIME_STAMP, "PrimaryContainer": { "Image": "174872318107.dkr.ecr.us-west-2.amazonaws.com/pca:1", "Environment": {}, "ModelDataUrl": "{{ model_data }}", }, "ExecutionRoleArn": "{{ role }}", } assert config == expected_config
def test_amazon_alg_model_config(sagemaker_session): pca_model = pca.PCAModel(model_data="{{ model_data }}", role="{{ role }}", sagemaker_session=sagemaker_session) config = airflow.model_config(instance_type='ml.c4.xlarge', model=pca_model) expected_config = { 'ModelName': "pca-%s" % TIME_STAMP, 'PrimaryContainer': { 'Image': '174872318107.dkr.ecr.us-west-2.amazonaws.com/pca:1', 'Environment': {}, 'ModelDataUrl': '{{ model_data }}' }, 'ExecutionRoleArn': '{{ role }}' } assert config == expected_config
def test_byo_framework_model_config(sagemaker_session): byo_model = model.FrameworkModel( model_data="{{ model_data }}", image="{{ image }}", role="{{ role }}", entry_point="{{ entry_point }}", source_dir="{{ source_dir }}", env={"{{ key }}": "{{ value }}"}, name="model", sagemaker_session=sagemaker_session, ) config = airflow.model_config(instance_type="ml.c4.xlarge", model=byo_model) expected_config = { "ModelName": "model", "PrimaryContainer": { "Image": "{{ image }}", "Environment": { "{{ key }}": "{{ value }}", "SAGEMAKER_PROGRAM": "{{ entry_point }}", "SAGEMAKER_SUBMIT_DIRECTORY": "s3://output/model/source/sourcedir.tar.gz", "SAGEMAKER_ENABLE_CLOUDWATCH_METRICS": "false", "SAGEMAKER_CONTAINER_LOG_LEVEL": "20", "SAGEMAKER_REGION": "us-west-2", }, "ModelDataUrl": "{{ model_data }}", }, "ExecutionRoleArn": "{{ role }}", "S3Operations": { "S3Upload": [{ "Path": "{{ source_dir }}", "Bucket": "output", "Key": "model/source/sourcedir.tar.gz", "Tar": True, }] }, } assert config == expected_config
def test_byo_model_config(sagemaker_session): byo_model = model.Model( model_data="{{ model_data }}", image="{{ image }}", role="{{ role }}", env={"{{ key }}": "{{ value }}"}, name='model', sagemaker_session=sagemaker_session) config = airflow.model_config(instance_type='ml.c4.xlarge', model=byo_model) expected_config = { 'ModelName': 'model', 'PrimaryContainer': { 'Image': '{{ image }}', 'Environment': {'{{ key }}': '{{ value }}'}, 'ModelDataUrl': '{{ model_data }}' }, 'ExecutionRoleArn': '{{ role }}' } assert config == expected_config
def test_byo_framework_model_config(sagemaker_session): byo_model = model.FrameworkModel(model_data="{{ model_data }}", image="{{ image }}", role="{{ role }}", entry_point="{{ entry_point }}", source_dir="{{ source_dir }}", env={"{{ key }}": "{{ value }}"}, name='model', sagemaker_session=sagemaker_session) config = airflow.model_config(instance_type='ml.c4.xlarge', model=byo_model) expected_config = { 'ModelName': 'model', 'PrimaryContainer': { 'Image': '{{ image }}', 'Environment': { '{{ key }}': '{{ value }}', 'SAGEMAKER_PROGRAM': '{{ entry_point }}', 'SAGEMAKER_SUBMIT_DIRECTORY': 's3://output/model/source/sourcedir.tar.gz', 'SAGEMAKER_ENABLE_CLOUDWATCH_METRICS': 'false', 'SAGEMAKER_CONTAINER_LOG_LEVEL': '20', 'SAGEMAKER_REGION': 'us-west-2' }, 'ModelDataUrl': '{{ model_data }}' }, 'ExecutionRoleArn': '{{ role }}', 'S3Operations': { 'S3Upload': [{ 'Path': '{{ source_dir }}', 'Bucket': 'output', 'Key': 'model/source/sourcedir.tar.gz', 'Tar': True }] } } assert config == expected_config
def __init__( self, state_id, model, model_data_url=None, sagemaker_submit_directory=None, model_name=None, instance_type=None, tags=None, **kwargs, ): """ Args: state_id (str): State name whose length **must be** less than or equal to 128 unicode characters. State names **must be** unique within the scope of the whole state machine. model (sagemaker.model.Model): The SageMaker model to use in the ModelStep. If :py:class:`TrainingStep` was used to train the model and saving the model is the next step in the workflow, the output of :py:func:`TrainingStep.get_expected_model()` can be passed here. model_name (str or Placeholder, optional): Specify a model name, this is required for creating the model. We recommend to use :py:class:`~stepfunctions.inputs.ExecutionInput` placeholder collection to pass the value dynamically in each execution. instance_type (str, optional): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'. This parameter is typically required when the estimator used is not an `Amazon built-in algorithm <https://docs.aws.amazon.com/sagemaker/latest/dg/algos.html>`_. tags (list[dict], optional): `List to tags <https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html>`_ to associate with the resource. """ if isinstance(model, FrameworkModel): parameters = model_config( model=model, instance_type=instance_type, role=model.role, image=model.image, ) if model_name: parameters["ModelName"] = model_name # placeholder for model data url if model_data_url: parameters["PrimaryContainer"]["ModelDataUrl"] = model_data_url # placeholder for sagemaker script if sagemaker_submit_directory: parameters["PrimaryContainer"]["Environment"][ "SAGEMAKER_SUBMIT_DIRECTORY" ] = sagemaker_submit_directory print(parameters) elif isinstance(model, Model): parameters = { "ExecutionRoleArn": model.role, "ModelName": model_name or model.name, "PrimaryContainer": { "Environment": {}, "Image": model.image, "ModelDataUrl": model.model_data, }, } else: raise ValueError( ( f"Expected 'model' parameter to be of type 'sagemaker.model.Model'" f", but received type '{type(model).__name__}'" ) ) if "S3Operations" in parameters: del parameters["S3Operations"] if tags: parameters["Tags"] = tags_dict_to_kv_list(tags) kwargs[Field.Parameters.value] = parameters kwargs[Field.Resource.value] = "arn:aws:states:::sagemaker:createModel" super(MLMaxModelStep, self).__init__(state_id, **kwargs)