def __init__(self,
                 state_id,
                 tuner,
                 job_name,
                 data,
                 wait_for_completion=True,
                 tags=None,
                 **kwargs):
        """
        Args:
            state_id (str): State name whose length **must be** less than or equal to 128 unicode characters. State names **must be** unique within the scope of the whole state machine.
            tuner (sagemaker.tuner.HyperparameterTuner): The tuner to use in the TuningStep.
            job_name (str or Placeholder): Specify a tuning job name.  We recommend to use :py:class:`~stepfunctions.inputs.ExecutionInput` placeholder collection to pass the value dynamically in each execution.
            data: Information about the training data. Please refer to the ``fit()`` method of the associated estimator in the tuner, as this can take any of the following forms:

                * (str) - The S3 location where training data is saved.
                * (dict[str, str] or dict[str, sagemaker.inputs.TrainingInput]) - If using multiple
                    channels for training data, you can specify a dict mapping channel names to
                    strings or :func:`~sagemaker.inputs.TrainingInput` objects.
                * (sagemaker.inputs.TrainingInput) - Channel configuration for S3 data sources that can
                    provide additional information about the training dataset. See
                    :func:`sagemaker.inputs.TrainingInput` for full details.
                * (sagemaker.amazon.amazon_estimator.RecordSet) - A collection of
                    Amazon :class:`Record` objects serialized and stored in S3.
                    For use with an estimator for an Amazon algorithm.
                * (list[sagemaker.amazon.amazon_estimator.RecordSet]) - A list of
                    :class:`sagemaker.amazon.amazon_estimator.RecordSet` objects,
                    where each instance is a different channel of training data.
            wait_for_completion(bool, optional): Boolean value set to `True` if the Task state should wait for the tuning job to complete before proceeding to the next step in the workflow. Set to `False` if the Task state should submit the tuning job and proceed to the next step. (default: True)
            tags (list[dict], optional): `List to tags <https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html>`_ to associate with the resource.
        """
        if wait_for_completion:
            kwargs[
                Field.Resource.
                value] = 'arn:aws:states:::sagemaker:createHyperParameterTuningJob.sync'
        else:
            kwargs[
                Field.Resource.
                value] = 'arn:aws:states:::sagemaker:createHyperParameterTuningJob'

        parameters = tuning_config(tuner=tuner, inputs=data,
                                   job_name=job_name).copy()

        if job_name is not None:
            parameters['HyperParameterTuningJobName'] = job_name

        if 'S3Operations' in parameters:
            del parameters['S3Operations']

        if tags:
            parameters['Tags'] = tags_dict_to_kv_list(tags)

        kwargs[Field.Parameters.value] = parameters

        super(TuningStep, self).__init__(state_id, **kwargs)
# create estimator
fm_estimator = Estimator(image_name=container,
                         role=role,
                         sagemaker_session=sagemaker.session.Session(sess),
                         **config["train_model"]["estimator_config"])

# train_config specifies SageMaker training configuration
train_config = training_config(estimator=fm_estimator,
                               inputs=config["train_model"]["inputs"])

# create tuner
fm_tuner = HyperparameterTuner(estimator=fm_estimator,
                               **config["tune_model"]["tuner_config"])

# create tuning config
tuner_config = tuning_config(tuner=fm_tuner,
                             inputs=config["tune_model"]["inputs"])

# create transform config
transform_config = transform_config_from_estimator(
    estimator=fm_estimator,
    task_id="model_tuning" if hpo_enabled else "model_training",
    task_type="tuning" if hpo_enabled else "training",
    **config["batch_transform"]["transform_config"])

# =============================================================================
# define airflow DAG and tasks
# =============================================================================

# define airflow DAG

args = {'owner': 'airflow', 'start_date': airflow.utils.dates.days_ago(2)}
    def __init__(self,
                 state_id,
                 tuner,
                 job_name,
                 data,
                 wait_for_completion=True,
                 tags=None,
                 **kwargs):
        """
        Args:
            state_id (str): State name whose length **must be** less than or equal to 128 unicode characters. State names **must be** unique within the scope of the whole state machine.
            tuner (sagemaker.tuner.HyperparameterTuner): The tuner to use in the TuningStep.
            job_name (str or Placeholder): Specify a tuning job name.  We recommend to use :py:class:`~stepfunctions.inputs.ExecutionInput` placeholder collection to pass the value dynamically in each execution.
            data: Information about the training data. Please refer to the ``fit()`` method of the associated estimator in the tuner, as this can take any of the following forms:

                * (str) - The S3 location where training data is saved.
                * (dict[str, str] or dict[str, sagemaker.inputs.TrainingInput]) - If using multiple
                    channels for training data, you can specify a dict mapping channel names to
                    strings or :func:`~sagemaker.inputs.TrainingInput` objects.
                * (sagemaker.inputs.TrainingInput) - Channel configuration for S3 data sources that can
                    provide additional information about the training dataset. See
                    :func:`sagemaker.inputs.TrainingInput` for full details.
                * (sagemaker.amazon.amazon_estimator.RecordSet) - A collection of
                    Amazon :class:`Record` objects serialized and stored in S3.
                    For use with an estimator for an Amazon algorithm.
                * (list[sagemaker.amazon.amazon_estimator.RecordSet]) - A list of
                    :class:`sagemaker.amazon.amazon_estimator.RecordSet` objects,
                    where each instance is a different channel of training data.
            wait_for_completion(bool, optional): Boolean value set to `True` if the Task state should wait for the tuning job to complete before proceeding to the next step in the workflow. Set to `False` if the Task state should submit the tuning job and proceed to the next step. (default: True)
            tags (list[dict] or Placeholder, optional): `List of tags <https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html>`_ to associate with the resource.
            parameters(dict, optional): The value of this field is merged with other arguments to become the request payload for SageMaker `CreateHyperParameterTuningJob <https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_CreateHyperParameterTuningJob.html>`_.
                You can use `parameters` to override the value provided by other arguments and specify any field's value dynamically using `Placeholders <https://aws-step-functions-data-science-sdk.readthedocs.io/en/stable/placeholders.html?highlight=placeholder#stepfunctions.inputs.Placeholder>`_.

        """
        if wait_for_completion:
            """
            Example resource arn: arn:aws:states:::sagemaker:createHyperParameterTuningJob.sync
            """

            kwargs[Field.Resource.value] = get_service_integration_arn(
                SAGEMAKER_SERVICE_NAME,
                SageMakerApi.CreateHyperParameterTuningJob,
                IntegrationPattern.WaitForCompletion)
        else:
            """
            Example resource arn: arn:aws:states:::sagemaker:createHyperParameterTuningJob
            """

            kwargs[Field.Resource.value] = get_service_integration_arn(
                SAGEMAKER_SERVICE_NAME,
                SageMakerApi.CreateHyperParameterTuningJob)

        tuning_parameters = tuning_config(tuner=tuner,
                                          inputs=data,
                                          job_name=job_name).copy()

        if job_name is not None:
            tuning_parameters['HyperParameterTuningJobName'] = job_name

        if 'S3Operations' in tuning_parameters:
            del tuning_parameters['S3Operations']

        if tags:
            tuning_parameters['Tags'] = tags if isinstance(
                tags, Placeholder) else tags_dict_to_kv_list(tags)

        if Field.Parameters.value in kwargs and isinstance(
                kwargs[Field.Parameters.value], dict):
            # Update tuning parameters with input parameters
            merge_dicts(tuning_parameters, kwargs[Field.Parameters.value])

        kwargs[Field.Parameters.value] = tuning_parameters
        super(TuningStep, self).__init__(state_id, **kwargs)
Exemplo n.º 4
0
def test_framework_tuning_config(sagemaker_session):
    mxnet_estimator = mxnet.MXNet(entry_point="{{ entry_point }}",
                                  source_dir="{{ source_dir }}",
                                  py_version='py3',
                                  framework_version='1.3.0',
                                  role="{{ role }}",
                                  train_instance_count=1,
                                  train_instance_type='ml.m4.xlarge',
                                  sagemaker_session=sagemaker_session,
                                  base_job_name="{{ base_job_name }}",
                                  hyperparameters={'batch_size': 100})

    hyperparameter_ranges = {
        'optimizer': tuner.CategoricalParameter(['sgd', 'Adam']),
        'learning_rate': tuner.ContinuousParameter(0.01, 0.2),
        'num_epoch': tuner.IntegerParameter(10, 50)
    }
    objective_metric_name = 'Validation-accuracy'
    metric_definitions = [{
        'Name': 'Validation-accuracy',
        'Regex': 'Validation-accuracy=([0-9\\.]+)'
    }]

    mxnet_tuner = tuner.HyperparameterTuner(
        estimator=mxnet_estimator,
        objective_metric_name=objective_metric_name,
        hyperparameter_ranges=hyperparameter_ranges,
        metric_definitions=metric_definitions,
        strategy='Bayesian',
        objective_type='Maximize',
        max_jobs="{{ max_job }}",
        max_parallel_jobs="{{ max_parallel_job }}",
        tags=[{
            '{{ key }}': '{{ value }}'
        }],
        base_tuning_job_name="{{ base_job_name }}")

    data = "{{ training_data }}"

    config = airflow.tuning_config(mxnet_tuner, data)
    expected_config = {
        'HyperParameterTuningJobName':
        "{{ base_job_name }}-{{ execution_date.strftime('%y%m%d-%H%M') }}",
        'HyperParameterTuningJobConfig': {
            'Strategy': 'Bayesian',
            'HyperParameterTuningJobObjective': {
                'Type': 'Maximize',
                'MetricName': 'Validation-accuracy'
            },
            'ResourceLimits': {
                'MaxNumberOfTrainingJobs': '{{ max_job }}',
                'MaxParallelTrainingJobs': '{{ max_parallel_job }}'
            },
            'ParameterRanges': {
                'ContinuousParameterRanges': [{
                    'Name': 'learning_rate',
                    'MinValue': '0.01',
                    'MaxValue': '0.2'
                }],
                'CategoricalParameterRanges': [{
                    'Name': 'optimizer',
                    'Values': ['"sgd"', '"Adam"']
                }],
                'IntegerParameterRanges': [{
                    'Name': 'num_epoch',
                    'MinValue': '10',
                    'MaxValue': '50'
                }]
            }
        },
        'TrainingJobDefinition': {
            'AlgorithmSpecification': {
                'TrainingImage':
                '520713654638.dkr.ecr.us-west-2.amazonaws.com/sagemaker-mxnet:1.3.0-cpu-py3',
                'TrainingInputMode':
                'File',
                'MetricDefinitions': [{
                    'Name':
                    'Validation-accuracy',
                    'Regex':
                    'Validation-accuracy=([0-9\\.]+)'
                }]
            },
            'OutputDataConfig': {
                'S3OutputPath': 's3://output/'
            },
            'StoppingCondition': {
                'MaxRuntimeInSeconds': 86400
            },
            'ResourceConfig': {
                'InstanceCount': 1,
                'InstanceType': 'ml.m4.xlarge',
                'VolumeSizeInGB': 30
            },
            'RoleArn':
            '{{ role }}',
            'InputDataConfig': [{
                'DataSource': {
                    'S3DataSource': {
                        'S3DataDistributionType': 'FullyReplicated',
                        'S3DataType': 'S3Prefix',
                        'S3Uri': '{{ training_data }}'
                    }
                },
                'ChannelName': 'training'
            }],
            'StaticHyperParameters': {
                'batch_size':
                '100',
                'sagemaker_submit_directory':
                '"s3://output/{{ base_job_name }}'
                '-{{ execution_date.strftime(\'%Y-%m-%d-%H-%M-%S\') }}'
                '/source/sourcedir.tar.gz"',
                'sagemaker_program':
                '"{{ entry_point }}"',
                'sagemaker_enable_cloudwatch_metrics':
                'false',
                'sagemaker_container_log_level':
                '20',
                'sagemaker_job_name':
                '"{{ base_job_name }}-'
                '{{ execution_date.strftime(\'%Y-%m-%d-%H-%M-%S\') }}"',
                'sagemaker_region':
                '"us-west-2"'
            }
        },
        'Tags': [{
            '{{ key }}': '{{ value }}'
        }],
        'S3Operations': {
            'S3Upload': [{
                'Path': '{{ source_dir }}',
                'Bucket': 'output',
                'Key': "{{ base_job_name }}-"
                "{{ execution_date.strftime('%Y-%m-%d-%H-%M-%S') }}/source/sourcedir.tar.gz",
                'Tar': True
            }]
        }
    }

    assert config == expected_config
def test_framework_tuning_config(sagemaker_session):
    mxnet_estimator = mxnet.MXNet(
        entry_point="{{ entry_point }}",
        source_dir="{{ source_dir }}",
        py_version="py3",
        framework_version="1.3.0",
        role="{{ role }}",
        train_instance_count=1,
        train_instance_type="ml.m4.xlarge",
        sagemaker_session=sagemaker_session,
        base_job_name="{{ base_job_name }}",
        hyperparameters={"batch_size": 100},
    )

    hyperparameter_ranges = {
        "optimizer": tuner.CategoricalParameter(["sgd", "Adam"]),
        "learning_rate": tuner.ContinuousParameter(0.01, 0.2),
        "num_epoch": tuner.IntegerParameter(10, 50),
    }
    objective_metric_name = "Validation-accuracy"
    metric_definitions = [{
        "Name": "Validation-accuracy",
        "Regex": "Validation-accuracy=([0-9\\.]+)"
    }]

    mxnet_tuner = tuner.HyperparameterTuner(
        estimator=mxnet_estimator,
        objective_metric_name=objective_metric_name,
        hyperparameter_ranges=hyperparameter_ranges,
        metric_definitions=metric_definitions,
        strategy="Bayesian",
        objective_type="Maximize",
        max_jobs="{{ max_job }}",
        max_parallel_jobs="{{ max_parallel_job }}",
        tags=[{
            "{{ key }}": "{{ value }}"
        }],
        base_tuning_job_name="{{ base_job_name }}",
    )

    data = "{{ training_data }}"

    config = airflow.tuning_config(mxnet_tuner, data)
    expected_config = {
        "HyperParameterTuningJobName": "{{ base_job_name }}-%s" % TIME_STAMP,
        "HyperParameterTuningJobConfig": {
            "Strategy": "Bayesian",
            "HyperParameterTuningJobObjective": {
                "Type": "Maximize",
                "MetricName": "Validation-accuracy",
            },
            "ResourceLimits": {
                "MaxNumberOfTrainingJobs": "{{ max_job }}",
                "MaxParallelTrainingJobs": "{{ max_parallel_job }}",
            },
            "ParameterRanges": {
                "ContinuousParameterRanges": [{
                    "Name": "learning_rate",
                    "MinValue": "0.01",
                    "MaxValue": "0.2",
                    "ScalingType": "Auto",
                }],
                "CategoricalParameterRanges": [{
                    "Name": "optimizer",
                    "Values": ['"sgd"', '"Adam"']
                }],
                "IntegerParameterRanges": [{
                    "Name": "num_epoch",
                    "MinValue": "10",
                    "MaxValue": "50",
                    "ScalingType": "Auto"
                }],
            },
        },
        "TrainingJobDefinition": {
            "AlgorithmSpecification": {
                "TrainingImage":
                "520713654638.dkr.ecr.us-west-2.amazonaws.com/sagemaker-mxnet:1.3.0-cpu-py3",
                "TrainingInputMode":
                "File",
                "MetricDefinitions": [{
                    "Name":
                    "Validation-accuracy",
                    "Regex":
                    "Validation-accuracy=([0-9\\.]+)"
                }],
            },
            "OutputDataConfig": {
                "S3OutputPath": "s3://output/"
            },
            "StoppingCondition": {
                "MaxRuntimeInSeconds": 86400
            },
            "ResourceConfig": {
                "InstanceCount": 1,
                "InstanceType": "ml.m4.xlarge",
                "VolumeSizeInGB": 30,
            },
            "RoleArn":
            "{{ role }}",
            "InputDataConfig": [{
                "DataSource": {
                    "S3DataSource": {
                        "S3DataDistributionType": "FullyReplicated",
                        "S3DataType": "S3Prefix",
                        "S3Uri": "{{ training_data }}",
                    }
                },
                "ChannelName": "training",
            }],
            "StaticHyperParameters": {
                "batch_size":
                "100",
                "sagemaker_submit_directory":
                '"s3://output/{{ base_job_name }}-%s/source/sourcedir.tar.gz"'
                % TIME_STAMP,
                "sagemaker_program":
                '"{{ entry_point }}"',
                "sagemaker_enable_cloudwatch_metrics":
                "false",
                "sagemaker_container_log_level":
                "20",
                "sagemaker_job_name":
                '"{{ base_job_name }}-%s"' % TIME_STAMP,
                "sagemaker_region":
                '"us-west-2"',
            },
        },
        "Tags": [{
            "{{ key }}": "{{ value }}"
        }],
        "S3Operations": {
            "S3Upload": [{
                "Path":
                "{{ source_dir }}",
                "Bucket":
                "output",
                "Key":
                "{{ base_job_name }}-%s/source/sourcedir.tar.gz" % TIME_STAMP,
                "Tar":
                True,
            }]
        },
    }

    assert config == expected_config