def test_transform_config_from_amazon_alg_estimator(sagemaker_session): knn_estimator = knn.KNN( role="{{ role }}", train_instance_count="{{ instance_count }}", train_instance_type="ml.m4.xlarge", k=16, sample_size=128, predictor_type="regressor", sagemaker_session=sagemaker_session, ) record = amazon_estimator.RecordSet("{{ record }}", 10000, 100, "S3Prefix") transform_data = "{{ transform_data }}" # simulate training airflow.training_config(knn_estimator, record, mini_batch_size=256) config = airflow.transform_config_from_estimator( estimator=knn_estimator, task_id="task_id", task_type="training", instance_count="{{ instance_count }}", instance_type="ml.p2.xlarge", data=transform_data, ) expected_config = { "Model": { "ModelName": "knn-%s" % TIME_STAMP, "PrimaryContainer": { "Image": "174872318107.dkr.ecr.us-west-2.amazonaws.com/knn:1", "Environment": {}, "ModelDataUrl": "s3://output/{{ ti.xcom_pull(task_ids='task_id')['Training']['TrainingJobName'] }}" "/output/model.tar.gz", }, "ExecutionRoleArn": "{{ role }}", }, "Transform": { "TransformJobName": "knn-%s" % TIME_STAMP, "ModelName": "knn-%s" % TIME_STAMP, "TransformInput": { "DataSource": { "S3DataSource": { "S3DataType": "S3Prefix", "S3Uri": "{{ transform_data }}" } } }, "TransformOutput": { "S3OutputPath": "s3://output/knn-%s" % TIME_STAMP }, "TransformResources": { "InstanceCount": "{{ instance_count }}", "InstanceType": "ml.p2.xlarge", }, }, } assert config == expected_config
def test_model_config_from_amazon_alg_estimator(sagemaker_session): knn_estimator = knn.KNN( role="{{ role }}", train_instance_count="{{ instance_count }}", train_instance_type="ml.m4.xlarge", k=16, sample_size=128, predictor_type="regressor", sagemaker_session=sagemaker_session, ) record = amazon_estimator.RecordSet("{{ record }}", 10000, 100, "S3Prefix") # simulate training airflow.training_config(knn_estimator, record, mini_batch_size=256) config = airflow.model_config_from_estimator(instance_type="ml.c4.xlarge", estimator=knn_estimator, task_id="task_id", task_type="tuning") expected_config = { "ModelName": "knn-%s" % TIME_STAMP, "PrimaryContainer": { "Image": "174872318107.dkr.ecr.us-west-2.amazonaws.com/knn:1", "Environment": {}, "ModelDataUrl": "s3://output/{{ ti.xcom_pull(task_ids='task_id')['Tuning']['BestTrainingJob']" "['TrainingJobName'] }}/output/model.tar.gz", }, "ExecutionRoleArn": "{{ role }}", } assert config == expected_config
def test_model_config_from_amazon_alg_estimator(sagemaker_session): knn_estimator = knn.KNN(role="{{ role }}", train_instance_count="{{ instance_count }}", train_instance_type='ml.m4.xlarge', k=16, sample_size=128, predictor_type='regressor', sagemaker_session=sagemaker_session) record = amazon_estimator.RecordSet("{{ record }}", 10000, 100, 'S3Prefix') # simulate training airflow.training_config(knn_estimator, record, mini_batch_size=256) config = airflow.model_config_from_estimator(instance_type='ml.c4.xlarge', estimator=knn_estimator, task_id='task_id', task_type='tuning') expected_config = { 'ModelName': "knn-%s" % TIME_STAMP, 'PrimaryContainer': { 'Image': '174872318107.dkr.ecr.us-west-2.amazonaws.com/knn:1', 'Environment': {}, 'ModelDataUrl': "s3://output/{{ ti.xcom_pull(task_ids='task_id')['Tuning']['BestTrainingJob']" "['TrainingJobName'] }}/output/model.tar.gz" }, 'ExecutionRoleArn': '{{ role }}' } assert config == expected_config
def test_model_config_from_amazon_alg_estimator(sagemaker_session): job_name = get_job_name('knn') knn_estimator = knn.KNN(role="{{ role }}", train_instance_count="{{ instance_count }}", train_instance_type='ml.m4.xlarge', k=16, sample_size=128, predictor_type='regressor', sagemaker_session=sagemaker_session) record = amazon_estimator.RecordSet("{{ record }}", 10000, 100, 'S3Prefix') # simulate training airflow.training_config(knn_estimator, record, mini_batch_size=256) config = airflow.model_config_from_estimator(instance_type='ml.c4.xlarge', estimator=knn_estimator) expected_config = { 'ModelName': job_name, 'PrimaryContainer': { 'Image': '174872318107.dkr.ecr.us-west-2.amazonaws.com/knn:1', 'Environment': {}, 'ModelDataUrl': "s3://output/{}/output/model.tar.gz".format(job_name) }, 'ExecutionRoleArn': '{{ role }}' } assert config == expected_config
def test_transform_config_from_amazon_alg_estimator(sagemaker_session): knn_estimator = knn.KNN(role="{{ role }}", train_instance_count="{{ instance_count }}", train_instance_type='ml.m4.xlarge', k=16, sample_size=128, predictor_type='regressor', sagemaker_session=sagemaker_session) record = amazon_estimator.RecordSet("{{ record }}", 10000, 100, 'S3Prefix') transform_data = "{{ transform_data }}" # simulate training airflow.training_config(knn_estimator, record, mini_batch_size=256) config = airflow.transform_config_from_estimator( estimator=knn_estimator, instance_count="{{ instance_count }}", instance_type="ml.p2.xlarge", data=transform_data) expected_config = { 'Model': { 'ModelName': "knn-{{ execution_date.strftime('%Y-%m-%d-%H-%M-%S') }}", 'PrimaryContainer': { 'Image': '174872318107.dkr.ecr.us-west-2.amazonaws.com/knn:1', 'Environment': {}, 'ModelDataUrl': "s3://output/knn-{{ execution_date.strftime('%Y-%m-%d-%H-%M-%S') }}" "/output/model.tar.gz" }, 'ExecutionRoleArn': '{{ role }}' }, 'Transform': { 'TransformJobName': "knn-{{ execution_date.strftime('%Y-%m-%d-%H-%M-%S') }}", 'ModelName': "knn-{{ execution_date.strftime('%Y-%m-%d-%H-%M-%S') }}", 'TransformInput': { 'DataSource': { 'S3DataSource': { 'S3DataType': 'S3Prefix', 'S3Uri': '{{ transform_data }}' } } }, 'TransformOutput': { 'S3OutputPath': "s3://output/knn-{{ execution_date.strftime('%Y-%m-%d-%H-%M-%S') }}" }, 'TransformResources': { 'InstanceCount': '{{ instance_count }}', 'InstanceType': 'ml.p2.xlarge' } } } assert config == expected_config
def test_amazon_alg_training_config_required_args(sagemaker_session): ntm_estimator = ntm.NTM( role="{{ role }}", num_topics=10, train_instance_count="{{ instance_count }}", train_instance_type="ml.c4.2xlarge", sagemaker_session=sagemaker_session, ) ntm_estimator.epochs = 32 record = amazon_estimator.RecordSet("{{ record }}", 10000, 100, "S3Prefix") config = airflow.training_config(ntm_estimator, record, mini_batch_size=256) expected_config = { "AlgorithmSpecification": { "TrainingImage": "174872318107.dkr.ecr.us-west-2.amazonaws.com/ntm:1", "TrainingInputMode": "File", }, "OutputDataConfig": { "S3OutputPath": "s3://output/" }, "TrainingJobName": "ntm-%s" % TIME_STAMP, "StoppingCondition": { "MaxRuntimeInSeconds": 86400 }, "ResourceConfig": { "InstanceCount": "{{ instance_count }}", "InstanceType": "ml.c4.2xlarge", "VolumeSizeInGB": 30, }, "RoleArn": "{{ role }}", "InputDataConfig": [{ "DataSource": { "S3DataSource": { "S3DataDistributionType": "ShardedByS3Key", "S3DataType": "S3Prefix", "S3Uri": "{{ record }}", } }, "ChannelName": "train", }], "HyperParameters": { "num_topics": "10", "epochs": "32", "mini_batch_size": "256", "feature_dim": "100", }, } assert config == expected_config
def test_amazon_alg_training_config_required_args(sagemaker_session): job_name = get_job_name('ntm') ntm_estimator = ntm.NTM(role="{{ role }}", num_topics=10, train_instance_count="{{ instance_count }}", train_instance_type="ml.c4.2xlarge", sagemaker_session=sagemaker_session) ntm_estimator.epochs = 32 record = amazon_estimator.RecordSet("{{ record }}", 10000, 100, 'S3Prefix') config = airflow.training_config(ntm_estimator, record, mini_batch_size=256) expected_config = { 'AlgorithmSpecification': { 'TrainingImage': '174872318107.dkr.ecr.us-west-2.amazonaws.com/ntm:1', 'TrainingInputMode': 'File' }, 'OutputDataConfig': { 'S3OutputPath': 's3://output/' }, 'TrainingJobName': job_name, 'StoppingCondition': { 'MaxRuntimeInSeconds': 86400 }, 'ResourceConfig': { 'InstanceCount': '{{ instance_count }}', 'InstanceType': 'ml.c4.2xlarge', 'VolumeSizeInGB': 30 }, 'RoleArn': '{{ role }}', 'InputDataConfig': [{ 'DataSource': { 'S3DataSource': { 'S3DataDistributionType': 'ShardedByS3Key', 'S3DataType': 'S3Prefix', 'S3Uri': '{{ record }}' } }, 'ChannelName': 'train' }], 'HyperParameters': { 'num_topics': '10', 'epochs': '32', 'mini_batch_size': '256', 'feature_dim': '100' } } assert config == expected_config
def test_deploy_config_from_amazon_alg_estimator(sagemaker_session): knn_estimator = knn.KNN(role="{{ role }}", train_instance_count="{{ instance_count }}", train_instance_type='ml.m4.xlarge', k=16, sample_size=128, predictor_type='regressor', sagemaker_session=sagemaker_session) record = amazon_estimator.RecordSet("{{ record }}", 10000, 100, 'S3Prefix') # simulate training airflow.training_config(knn_estimator, record, mini_batch_size=256) config = airflow.deploy_config_from_estimator( estimator=knn_estimator, initial_instance_count="{{ instance_count }}", instance_type="ml.p2.xlarge") expected_config = { 'Model': { 'ModelName': "knn-{{ execution_date.strftime('%Y-%m-%d-%H-%M-%S') }}", 'PrimaryContainer': { 'Image': '174872318107.dkr.ecr.us-west-2.amazonaws.com/knn:1', 'Environment': {}, 'ModelDataUrl': "s3://output/knn-{{ execution_date.strftime('%Y-%m-%d-%H-%M-%S') }}" "/output/model.tar.gz" }, 'ExecutionRoleArn': '{{ role }}' }, 'EndpointConfig': { 'EndpointConfigName': "knn-{{ execution_date.strftime('%Y-%m-%d-%H-%M-%S') }}", 'ProductionVariants': [{ 'InstanceType': 'ml.p2.xlarge', 'InitialInstanceCount': '{{ instance_count }}', 'ModelName': "knn-{{ execution_date.strftime('%Y-%m-%d-%H-%M-%S') }}", 'VariantName': 'AllTraffic', 'InitialVariantWeight': 1 }] }, 'Endpoint': { 'EndpointName': "knn-{{ execution_date.strftime('%Y-%m-%d-%H-%M-%S') }}", 'EndpointConfigName': "knn-{{ execution_date.strftime('%Y-%m-%d-%H-%M-%S') }}" } } assert config == expected_config
def test_amazon_alg_training_config_all_args(sagemaker_session): ntm_estimator = ntm.NTM( role="{{ role }}", num_topics=10, train_instance_count="{{ instance_count }}", train_instance_type="ml.c4.2xlarge", train_volume_size="{{ train_volume_size }}", train_volume_kms_key="{{ train_volume_kms_key }}", train_max_run="{{ train_max_run }}", input_mode='Pipe', output_path="{{ output_path }}", output_kms_key="{{ output_volume_kms_key }}", base_job_name="{{ base_job_name }}", tags=[{"{{ key }}": "{{ value }}"}], subnets=["{{ subnet }}"], security_group_ids=["{{ security_group_ids }}"], sagemaker_session=sagemaker_session) ntm_estimator.epochs = 32 ntm_estimator.mini_batch_size = 256 record = amazon_estimator.RecordSet("{{ record }}", 10000, 100, 'S3Prefix') config = airflow.training_config(ntm_estimator, record) expected_config = { 'AlgorithmSpecification': { 'TrainingImage': '174872318107.dkr.ecr.us-west-2.amazonaws.com/ntm:1', 'TrainingInputMode': 'Pipe' }, 'OutputDataConfig': { 'S3OutputPath': '{{ output_path }}', 'KmsKeyId': '{{ output_volume_kms_key }}' }, 'TrainingJobName': "{{ base_job_name }}-{{ execution_date.strftime('%Y-%m-%d-%H-%M-%S') }}", 'StoppingCondition': { 'MaxRuntimeInSeconds': '{{ train_max_run }}' }, 'ResourceConfig': { 'InstanceCount': '{{ instance_count }}', 'InstanceType': 'ml.c4.2xlarge', 'VolumeSizeInGB': '{{ train_volume_size }}', 'VolumeKmsKeyId': '{{ train_volume_kms_key }}' }, 'RoleArn': '{{ role }}', 'InputDataConfig': [{ 'DataSource': { 'S3DataSource': { 'S3DataDistributionType': 'ShardedByS3Key', 'S3DataType': 'S3Prefix', 'S3Uri': '{{ record }}' } }, 'ChannelName': 'train' }], 'VpcConfig': { 'Subnets': ['{{ subnet }}'], 'SecurityGroupIds': ['{{ security_group_ids }}'] }, 'HyperParameters': { 'num_topics': '10', 'epochs': '32', 'mini_batch_size': '256', 'feature_dim': '100' }, 'Tags': [{'{{ key }}': '{{ value }}'}] } assert config == expected_config
def test_amazon_alg_training_config_all_args(sagemaker_session): ntm_estimator = ntm.NTM( role="{{ role }}", num_topics=10, train_instance_count="{{ instance_count }}", train_instance_type="ml.c4.2xlarge", train_volume_size="{{ train_volume_size }}", train_volume_kms_key="{{ train_volume_kms_key }}", train_max_run="{{ train_max_run }}", input_mode="Pipe", output_path="{{ output_path }}", output_kms_key="{{ output_volume_kms_key }}", base_job_name="{{ base_job_name }}", tags=[{ "{{ key }}": "{{ value }}" }], subnets=["{{ subnet }}"], security_group_ids=["{{ security_group_ids }}"], sagemaker_session=sagemaker_session, ) ntm_estimator.epochs = 32 record = amazon_estimator.RecordSet("{{ record }}", 10000, 100, "S3Prefix") config = airflow.training_config(ntm_estimator, record, mini_batch_size=256) expected_config = { "AlgorithmSpecification": { "TrainingImage": "174872318107.dkr.ecr.us-west-2.amazonaws.com/ntm:1", "TrainingInputMode": "Pipe", }, "OutputDataConfig": { "S3OutputPath": "{{ output_path }}", "KmsKeyId": "{{ output_volume_kms_key }}", }, "TrainingJobName": "{{ base_job_name }}-%s" % TIME_STAMP, "StoppingCondition": { "MaxRuntimeInSeconds": "{{ train_max_run }}" }, "ResourceConfig": { "InstanceCount": "{{ instance_count }}", "InstanceType": "ml.c4.2xlarge", "VolumeSizeInGB": "{{ train_volume_size }}", "VolumeKmsKeyId": "{{ train_volume_kms_key }}", }, "RoleArn": "{{ role }}", "InputDataConfig": [{ "DataSource": { "S3DataSource": { "S3DataDistributionType": "ShardedByS3Key", "S3DataType": "S3Prefix", "S3Uri": "{{ record }}", } }, "ChannelName": "train", }], "VpcConfig": { "Subnets": ["{{ subnet }}"], "SecurityGroupIds": ["{{ security_group_ids }}"], }, "HyperParameters": { "num_topics": "10", "epochs": "32", "mini_batch_size": "256", "feature_dim": "100", }, "Tags": [{ "{{ key }}": "{{ value }}" }], } assert config == expected_config