def test_model_config_from_amazon_alg_estimator(sagemaker_session):
    knn_estimator = knn.KNN(
        role="{{ role }}",
        train_instance_count="{{ instance_count }}",
        train_instance_type="ml.m4.xlarge",
        k=16,
        sample_size=128,
        predictor_type="regressor",
        sagemaker_session=sagemaker_session,
    )

    record = amazon_estimator.RecordSet("{{ record }}", 10000, 100, "S3Prefix")

    # simulate training
    airflow.training_config(knn_estimator, record, mini_batch_size=256)

    config = airflow.model_config_from_estimator(instance_type="ml.c4.xlarge",
                                                 estimator=knn_estimator,
                                                 task_id="task_id",
                                                 task_type="tuning")
    expected_config = {
        "ModelName": "knn-%s" % TIME_STAMP,
        "PrimaryContainer": {
            "Image":
            "174872318107.dkr.ecr.us-west-2.amazonaws.com/knn:1",
            "Environment": {},
            "ModelDataUrl":
            "s3://output/{{ ti.xcom_pull(task_ids='task_id')['Tuning']['BestTrainingJob']"
            "['TrainingJobName'] }}/output/model.tar.gz",
        },
        "ExecutionRoleArn": "{{ role }}",
    }

    assert config == expected_config
示例#2
0
def test_model_config_from_amazon_alg_estimator(sagemaker_session):
    knn_estimator = knn.KNN(role="{{ role }}",
                            train_instance_count="{{ instance_count }}",
                            train_instance_type='ml.m4.xlarge',
                            k=16,
                            sample_size=128,
                            predictor_type='regressor',
                            sagemaker_session=sagemaker_session)

    record = amazon_estimator.RecordSet("{{ record }}", 10000, 100, 'S3Prefix')

    # simulate training
    airflow.training_config(knn_estimator, record, mini_batch_size=256)

    config = airflow.model_config_from_estimator(instance_type='ml.c4.xlarge',
                                                 estimator=knn_estimator,
                                                 task_id='task_id',
                                                 task_type='tuning')
    expected_config = {
        'ModelName': "knn-%s" % TIME_STAMP,
        'PrimaryContainer': {
            'Image':
            '174872318107.dkr.ecr.us-west-2.amazonaws.com/knn:1',
            'Environment': {},
            'ModelDataUrl':
            "s3://output/{{ ti.xcom_pull(task_ids='task_id')['Tuning']['BestTrainingJob']"
            "['TrainingJobName'] }}/output/model.tar.gz"
        },
        'ExecutionRoleArn': '{{ role }}'
    }

    assert config == expected_config
示例#3
0
def test_model_config_from_amazon_alg_estimator(sagemaker_session):
    job_name = get_job_name('knn')
    knn_estimator = knn.KNN(role="{{ role }}",
                            train_instance_count="{{ instance_count }}",
                            train_instance_type='ml.m4.xlarge',
                            k=16,
                            sample_size=128,
                            predictor_type='regressor',
                            sagemaker_session=sagemaker_session)

    record = amazon_estimator.RecordSet("{{ record }}", 10000, 100, 'S3Prefix')

    # simulate training
    airflow.training_config(knn_estimator, record, mini_batch_size=256)

    config = airflow.model_config_from_estimator(instance_type='ml.c4.xlarge',
                                                 estimator=knn_estimator)
    expected_config = {
        'ModelName': job_name,
        'PrimaryContainer': {
            'Image': '174872318107.dkr.ecr.us-west-2.amazonaws.com/knn:1',
            'Environment': {},
            'ModelDataUrl':
            "s3://output/{}/output/model.tar.gz".format(job_name)
        },
        'ExecutionRoleArn': '{{ role }}'
    }

    assert config == expected_config
def test_model_config_from_framework_estimator(sagemaker_session):
    mxnet_estimator = mxnet.MXNet(
        entry_point="{{ entry_point }}",
        source_dir="{{ source_dir }}",
        py_version="py3",
        framework_version="1.3.0",
        role="{{ role }}",
        train_instance_count=1,
        train_instance_type="ml.m4.xlarge",
        sagemaker_session=sagemaker_session,
        base_job_name="{{ base_job_name }}",
        hyperparameters={"batch_size": 100},
    )

    data = "{{ training_data }}"

    # simulate training
    airflow.training_config(mxnet_estimator, data)

    config = airflow.model_config_from_estimator(
        instance_type="ml.c4.xlarge",
        estimator=mxnet_estimator,
        task_id="task_id",
        task_type="training",
    )
    expected_config = {
        "ModelName": "sagemaker-mxnet-%s" % TIME_STAMP,
        "PrimaryContainer": {
            "Image":
            "520713654638.dkr.ecr.us-west-2.amazonaws.com/sagemaker-mxnet:1.3.0-cpu-py3",
            "Environment": {
                "SAGEMAKER_PROGRAM":
                "{{ entry_point }}",
                "SAGEMAKER_SUBMIT_DIRECTORY":
                "s3://output/{{ ti.xcom_pull(task_ids='task_id')['Training']"
                "['TrainingJobName'] }}/source/sourcedir.tar.gz",
                "SAGEMAKER_ENABLE_CLOUDWATCH_METRICS":
                "false",
                "SAGEMAKER_CONTAINER_LOG_LEVEL":
                "20",
                "SAGEMAKER_REGION":
                "us-west-2",
            },
            "ModelDataUrl":
            "s3://output/{{ ti.xcom_pull(task_ids='task_id')['Training']['TrainingJobName'] }}"
            "/output/model.tar.gz",
        },
        "ExecutionRoleArn": "{{ role }}",
    }

    assert config == expected_config
示例#5
0
def test_model_config_from_framework_estimator(sagemaker_session):
    mxnet_estimator = mxnet.MXNet(entry_point="{{ entry_point }}",
                                  source_dir="{{ source_dir }}",
                                  py_version='py3',
                                  framework_version='1.3.0',
                                  role="{{ role }}",
                                  train_instance_count=1,
                                  train_instance_type='ml.m4.xlarge',
                                  sagemaker_session=sagemaker_session,
                                  base_job_name="{{ base_job_name }}",
                                  hyperparameters={'batch_size': 100})

    data = "{{ training_data }}"

    # simulate training
    airflow.training_config(mxnet_estimator, data)

    config = airflow.model_config_from_estimator(instance_type='ml.c4.xlarge',
                                                 estimator=mxnet_estimator,
                                                 task_id='task_id',
                                                 task_type='training')
    expected_config = {
        'ModelName': "sagemaker-mxnet-%s" % TIME_STAMP,
        'PrimaryContainer': {
            'Image':
            '520713654638.dkr.ecr.us-west-2.amazonaws.com/sagemaker-mxnet:1.3.0-cpu-py3',
            'Environment': {
                'SAGEMAKER_PROGRAM':
                '{{ entry_point }}',
                'SAGEMAKER_SUBMIT_DIRECTORY':
                "s3://output/{{ ti.xcom_pull(task_ids='task_id')['Training']"
                "['TrainingJobName'] }}/source/sourcedir.tar.gz",
                'SAGEMAKER_ENABLE_CLOUDWATCH_METRICS':
                'false',
                'SAGEMAKER_CONTAINER_LOG_LEVEL':
                '20',
                'SAGEMAKER_REGION':
                'us-west-2'
            },
            'ModelDataUrl':
            "s3://output/{{ ti.xcom_pull(task_ids='task_id')['Training']['TrainingJobName'] }}"
            "/output/model.tar.gz"
        },
        'ExecutionRoleArn': '{{ role }}'
    }

    assert config == expected_config