def test_byo_training_config_required_args(sagemaker_session):
    byo = estimator.Estimator(
        image_name="byo",
        role="{{ role }}",
        train_instance_count="{{ instance_count }}",
        train_instance_type="ml.c4.2xlarge",
        sagemaker_session=sagemaker_session,
    )

    byo.set_hyperparameters(epochs=32, feature_dim=1024, mini_batch_size=256)

    data = {"train": "{{ training_data }}"}

    config = airflow.training_config(byo, data)
    expected_config = {
        "AlgorithmSpecification": {
            "TrainingImage": "byo",
            "TrainingInputMode": "File"
        },
        "OutputDataConfig": {
            "S3OutputPath": "s3://output/"
        },
        "TrainingJobName":
        "byo-%s" % TIME_STAMP,
        "StoppingCondition": {
            "MaxRuntimeInSeconds": 86400
        },
        "ResourceConfig": {
            "InstanceCount": "{{ instance_count }}",
            "InstanceType": "ml.c4.2xlarge",
            "VolumeSizeInGB": 30,
        },
        "RoleArn":
        "{{ role }}",
        "InputDataConfig": [{
            "DataSource": {
                "S3DataSource": {
                    "S3DataDistributionType": "FullyReplicated",
                    "S3DataType": "S3Prefix",
                    "S3Uri": "{{ training_data }}",
                }
            },
            "ChannelName": "train",
        }],
        "HyperParameters": {
            "epochs": "32",
            "feature_dim": "1024",
            "mini_batch_size": "256"
        },
    }
    assert config == expected_config
Пример #2
0
def test_byo_training_config_required_args(sagemaker_session):
    job_name = get_job_name('byo')
    byo = estimator.Estimator(image_name="byo",
                              role="{{ role }}",
                              train_instance_count="{{ instance_count }}",
                              train_instance_type="ml.c4.2xlarge",
                              sagemaker_session=sagemaker_session)

    byo.set_hyperparameters(epochs=32, feature_dim=1024, mini_batch_size=256)

    data = {'train': "{{ training_data }}"}

    config = airflow.training_config(byo, data)
    expected_config = {
        'AlgorithmSpecification': {
            'TrainingImage': 'byo',
            'TrainingInputMode': 'File'
        },
        'OutputDataConfig': {
            'S3OutputPath': 's3://output/'
        },
        'TrainingJobName':
        job_name,
        'StoppingCondition': {
            'MaxRuntimeInSeconds': 86400
        },
        'ResourceConfig': {
            'InstanceCount': '{{ instance_count }}',
            'InstanceType': 'ml.c4.2xlarge',
            'VolumeSizeInGB': 30
        },
        'RoleArn':
        '{{ role }}',
        'InputDataConfig': [{
            'DataSource': {
                'S3DataSource': {
                    'S3DataDistributionType': 'FullyReplicated',
                    'S3DataType': 'S3Prefix',
                    'S3Uri': '{{ training_data }}'
                }
            },
            'ChannelName': 'train'
        }],
        'HyperParameters': {
            'epochs': '32',
            'feature_dim': '1024',
            'mini_batch_size': '256'
        }
    }
    assert config == expected_config
Пример #3
0
def test_byo_training_config_all_args(sagemaker_session):
    byo = estimator.Estimator(
        image_name="byo",
        role="{{ role }}",
        train_instance_count="{{ instance_count }}",
        train_instance_type="ml.c4.2xlarge",
        train_volume_size="{{ train_volume_size }}",
        train_volume_kms_key="{{ train_volume_kms_key }}",
        train_max_run="{{ train_max_run }}",
        input_mode='Pipe',
        output_path="{{ output_path }}",
        output_kms_key="{{ output_volume_kms_key }}",
        base_job_name="{{ base_job_name }}",
        tags=[{"{{ key }}": "{{ value }}"}],
        subnets=["{{ subnet }}"],
        security_group_ids=["{{ security_group_ids }}"],
        model_uri="{{ model_uri }}",
        model_channel_name="{{ model_chanel }}",
        sagemaker_session=sagemaker_session)

    byo.set_hyperparameters(epochs=32,
                            feature_dim=1024,
                            mini_batch_size=256)

    data = {'train': "{{ training_data }}"}

    config = airflow.training_config(byo, data)
    expected_config = {
        'AlgorithmSpecification': {
            'TrainingImage': 'byo',
            'TrainingInputMode': 'Pipe'
        },
        'OutputDataConfig': {
            'S3OutputPath': '{{ output_path }}',
            'KmsKeyId': '{{ output_volume_kms_key }}'
        },
        'TrainingJobName': "{{ base_job_name }}-{{ execution_date.strftime('%Y-%m-%d-%H-%M-%S') }}",
        'StoppingCondition': {
            'MaxRuntimeInSeconds': '{{ train_max_run }}'
        },
        'ResourceConfig': {
            'InstanceCount': '{{ instance_count }}',
            'InstanceType': 'ml.c4.2xlarge',
            'VolumeSizeInGB': '{{ train_volume_size }}',
            'VolumeKmsKeyId': '{{ train_volume_kms_key }}'
        },
        'RoleArn': '{{ role }}',
        'InputDataConfig': [
            {
                'DataSource': {
                    'S3DataSource': {
                        'S3DataDistributionType': 'FullyReplicated',
                        'S3DataType': 'S3Prefix',
                        'S3Uri': '{{ training_data }}'
                    }
                },
                'ChannelName': 'train'
            },
            {
                'DataSource': {
                    'S3DataSource': {
                        'S3DataDistributionType': 'FullyReplicated',
                        'S3DataType': 'S3Prefix',
                        'S3Uri': '{{ model_uri }}'
                    }
                },
                'ContentType': 'application/x-sagemaker-model',
                'InputMode': 'File',
                'ChannelName': '{{ model_chanel }}'
            }
        ],
        'VpcConfig': {
            'Subnets': ['{{ subnet }}'],
            'SecurityGroupIds': ['{{ security_group_ids }}']
        },
        'HyperParameters': {
            'epochs': '32',
            'feature_dim': '1024',
            'mini_batch_size': '256'},
        'Tags': [{'{{ key }}': '{{ value }}'}]
    }
    assert config == expected_config
def test_byo_training_config_all_args(sagemaker_session):
    byo = estimator.Estimator(
        image_name="byo",
        role="{{ role }}",
        train_instance_count="{{ instance_count }}",
        train_instance_type="ml.c4.2xlarge",
        train_volume_size="{{ train_volume_size }}",
        train_volume_kms_key="{{ train_volume_kms_key }}",
        train_max_run="{{ train_max_run }}",
        input_mode="Pipe",
        output_path="{{ output_path }}",
        output_kms_key="{{ output_volume_kms_key }}",
        base_job_name="{{ base_job_name }}",
        tags=[{
            "{{ key }}": "{{ value }}"
        }],
        subnets=["{{ subnet }}"],
        security_group_ids=["{{ security_group_ids }}"],
        model_uri="{{ model_uri }}",
        model_channel_name="{{ model_chanel }}",
        sagemaker_session=sagemaker_session,
    )

    byo.set_hyperparameters(epochs=32, feature_dim=1024, mini_batch_size=256)

    data = {"train": "{{ training_data }}"}

    config = airflow.training_config(byo, data)
    expected_config = {
        "AlgorithmSpecification": {
            "TrainingImage": "byo",
            "TrainingInputMode": "Pipe"
        },
        "OutputDataConfig": {
            "S3OutputPath": "{{ output_path }}",
            "KmsKeyId": "{{ output_volume_kms_key }}",
        },
        "TrainingJobName":
        "{{ base_job_name }}-%s" % TIME_STAMP,
        "StoppingCondition": {
            "MaxRuntimeInSeconds": "{{ train_max_run }}"
        },
        "ResourceConfig": {
            "InstanceCount": "{{ instance_count }}",
            "InstanceType": "ml.c4.2xlarge",
            "VolumeSizeInGB": "{{ train_volume_size }}",
            "VolumeKmsKeyId": "{{ train_volume_kms_key }}",
        },
        "RoleArn":
        "{{ role }}",
        "InputDataConfig": [
            {
                "DataSource": {
                    "S3DataSource": {
                        "S3DataDistributionType": "FullyReplicated",
                        "S3DataType": "S3Prefix",
                        "S3Uri": "{{ training_data }}",
                    }
                },
                "ChannelName": "train",
            },
            {
                "DataSource": {
                    "S3DataSource": {
                        "S3DataDistributionType": "FullyReplicated",
                        "S3DataType": "S3Prefix",
                        "S3Uri": "{{ model_uri }}",
                    }
                },
                "ContentType": "application/x-sagemaker-model",
                "InputMode": "File",
                "ChannelName": "{{ model_chanel }}",
            },
        ],
        "VpcConfig": {
            "Subnets": ["{{ subnet }}"],
            "SecurityGroupIds": ["{{ security_group_ids }}"],
        },
        "HyperParameters": {
            "epochs": "32",
            "feature_dim": "1024",
            "mini_batch_size": "256"
        },
        "Tags": [{
            "{{ key }}": "{{ value }}"
        }],
    }
    assert config == expected_config