def test_sagemaker_container_hosts_should_have_lowercase_names(): random.seed(a=42) def assert_all_lowercase(hosts): for host in hosts: assert host.lower() == host sagemaker_container = _SageMakerContainer("local", 2, "my-image", sagemaker_session=Mock()) assert_all_lowercase(sagemaker_container.hosts) sagemaker_container = _SageMakerContainer("local", 10, "my-image", sagemaker_session=Mock()) assert_all_lowercase(sagemaker_container.hosts) sagemaker_container = _SageMakerContainer("local", 1, "my-image", sagemaker_session=Mock()) assert_all_lowercase(sagemaker_container.hosts)
def test_train(popen, get_data_source_instance, tmpdir, sagemaker_session): data_source = Mock() data_source.get_root_dir.return_value = 'foo' get_data_source_instance.return_value = data_source directories = [str(tmpdir.mkdir('container-root')), str(tmpdir.mkdir('data'))] with patch('sagemaker.local.image._SageMakerContainer._create_tmp_folder', side_effect=directories): instance_count = 2 image = 'my-image' sagemaker_container = _SageMakerContainer('local', instance_count, image, sagemaker_session=sagemaker_session) sagemaker_container.train( INPUT_DATA_CONFIG, OUTPUT_DATA_CONFIG, HYPERPARAMETERS, TRAINING_JOB_NAME) docker_compose_file = os.path.join(sagemaker_container.container_root, 'docker-compose.yaml') call_args = popen.call_args[0][0] assert call_args is not None expected = ['docker-compose', '-f', docker_compose_file, 'up', '--build', '--abort-on-container-exit'] for i, v in enumerate(expected): assert call_args[i] == v with open(docker_compose_file, 'r') as f: config = yaml.load(f) assert len(config['services']) == instance_count for h in sagemaker_container.hosts: assert config['services'][h]['image'] == image assert config['services'][h]['command'] == 'train' assert 'AWS_REGION={}'.format(REGION) in config['services'][h]['environment'] assert 'TRAINING_JOB_NAME={}'.format(TRAINING_JOB_NAME) in config['services'][h]['environment'] # assert that expected by sagemaker container output directories exist assert os.path.exists(os.path.join(sagemaker_container.container_root, 'output')) assert os.path.exists(os.path.join(sagemaker_container.container_root, 'output/data'))
def test_train_with_hyperparameters_without_job_name(get_data_source_instance, tmpdir, sagemaker_session): data_source = Mock() data_source.get_root_dir.return_value = "foo" get_data_source_instance.return_value = data_source directories = [ str(tmpdir.mkdir("container-root")), str(tmpdir.mkdir("data")) ] with patch("sagemaker.local.image._SageMakerContainer._create_tmp_folder", side_effect=directories): instance_count = 2 image = "my-image" sagemaker_container = _SageMakerContainer( "local", instance_count, image, sagemaker_session=sagemaker_session) sagemaker_container.train(INPUT_DATA_CONFIG, OUTPUT_DATA_CONFIG, HYPERPARAMETERS, TRAINING_JOB_NAME) docker_compose_file = os.path.join(sagemaker_container.container_root, "docker-compose.yaml") with open(docker_compose_file, "r") as f: config = yaml.load(f) for h in sagemaker_container.hosts: assert ("TRAINING_JOB_NAME={}".format(TRAINING_JOB_NAME) in config["services"][h]["environment"])
def test_serve(tmpdir, sagemaker_session, caplog): caplog.set_level(logging.INFO) with patch( "sagemaker.local.image._SageMakerContainer._create_tmp_folder", return_value=str(tmpdir.mkdir("container-root")), ): image = "my-image" sagemaker_container = _SageMakerContainer( "local", 1, image, sagemaker_session=sagemaker_session) environment = { "env1": 1, "env2": "b", "SAGEMAKER_SUBMIT_DIRECTORY": "s3://some/path" } sagemaker_container.serve("/some/model/path", environment) docker_compose_file = os.path.join(sagemaker_container.container_root, "docker-compose.yaml") with open(docker_compose_file, "r") as f: config = yaml.load(f, Loader=yaml.SafeLoader) for h in sagemaker_container.hosts: assert config["services"][h]["image"] == image assert config["services"][h]["command"] == "serve" assert "[Masked]" in caplog.text
def test_train( popen, get_data_source_instance, retrieve_artifacts, cleanup, tmpdir, sagemaker_session, caplog ): data_source = Mock() data_source.get_root_dir.return_value = "foo" get_data_source_instance.return_value = data_source caplog.set_level(logging.INFO) directories = [str(tmpdir.mkdir("container-root")), str(tmpdir.mkdir("data"))] with patch( "sagemaker.local.image._SageMakerContainer._create_tmp_folder", side_effect=directories ): instance_count = 2 image = "my-image" sagemaker_container = _SageMakerContainer( "local", instance_count, image, sagemaker_session=sagemaker_session ) sagemaker_container.train( INPUT_DATA_CONFIG, OUTPUT_DATA_CONFIG, HYPERPARAMETERS, TRAINING_JOB_NAME ) docker_compose_file = os.path.join( sagemaker_container.container_root, "docker-compose.yaml" ) call_args = popen.call_args[0][0] assert call_args is not None expected = [ "docker-compose", "-f", docker_compose_file, "up", "--build", "--abort-on-container-exit", ] for i, v in enumerate(expected): assert call_args[i] == v with open(docker_compose_file, "r") as f: config = yaml.load(f, Loader=yaml.SafeLoader) assert len(config["services"]) == instance_count for h in sagemaker_container.hosts: assert config["services"][h]["image"] == image assert config["services"][h]["command"] == "train" # TODO-reinvent-2019 [akarpur]: uncomment the below assert statement # assert "AWS_REGION={}".format(REGION) in config["services"][h]["environment"] assert ( "TRAINING_JOB_NAME={}".format(TRAINING_JOB_NAME) in config["services"][h]["environment"] ) # assert that expected by sagemaker container output directories exist assert os.path.exists(os.path.join(sagemaker_container.container_root, "output")) assert os.path.exists(os.path.join(sagemaker_container.container_root, "output/data")) retrieve_artifacts.assert_called_once() cleanup.assert_called_once() assert "[Masked]" in caplog.text
def test_train_local_code(_download_folder, _cleanup, popen, _stream_output, _local_session, tmpdir, sagemaker_session): directories = [str(tmpdir.mkdir('container-root')), str(tmpdir.mkdir('data'))] with patch('sagemaker.local.image._SageMakerContainer._create_tmp_folder', side_effect=directories): instance_count = 2 image = 'my-image' sagemaker_container = _SageMakerContainer('local', instance_count, image, sagemaker_session=sagemaker_session) sagemaker_container.train(INPUT_DATA_CONFIG, LOCAL_CODE_HYPERPARAMETERS) docker_compose_file = os.path.join(sagemaker_container.container_root, 'docker-compose.yaml') shared_folder_path = os.path.join(sagemaker_container.container_root, 'shared') with open(docker_compose_file, 'r') as f: config = yaml.load(f) assert len(config['services']) == instance_count for h in sagemaker_container.hosts: assert config['services'][h]['image'] == image assert config['services'][h]['command'] == 'train' volumes = config['services'][h]['volumes'] assert '%s:/opt/ml/code' % '/tmp/code' in volumes assert '%s:/opt/ml/shared' % shared_folder_path in volumes
def test_serve_local_code(up, copy, copytree, tmpdir, sagemaker_session): with patch('sagemaker.local.image._SageMakerContainer._create_tmp_folder', return_value=str(tmpdir.mkdir('container-root'))): image = 'my-image' sagemaker_container = _SageMakerContainer('local', 1, image, sagemaker_session=sagemaker_session) primary_container = {'ModelDataUrl': '/some/model/path', 'Environment': {'env1': 1, 'env2': 'b', 'SAGEMAKER_SUBMIT_DIRECTORY': 'file:///tmp/code' } } sagemaker_container.serve(primary_container) docker_compose_file = os.path.join(sagemaker_container.container_root, 'docker-compose.yaml') with open(docker_compose_file, 'r') as f: config = yaml.load(f) for h in sagemaker_container.hosts: assert config['services'][h]['image'] == image assert config['services'][h]['command'] == 'serve' volumes = config['services'][h]['volumes'] assert '%s:/opt/ml/code' % '/tmp/code' in volumes
def test_train(_download_folder, _cleanup, popen, _stream_output, LocalSession, tmpdir, sagemaker_session): directories = [str(tmpdir.mkdir('container-root')), str(tmpdir.mkdir('data'))] with patch('sagemaker.local.image._SageMakerContainer._create_tmp_folder', side_effect=directories): instance_count = 2 image = 'my-image' sagemaker_container = _SageMakerContainer('local', instance_count, image, sagemaker_session=sagemaker_session) sagemaker_container.train(INPUT_DATA_CONFIG, HYPERPARAMETERS) channel_dir = os.path.join(directories[1], 'b') download_folder_calls = [call('my-own-bucket', 'prefix', channel_dir)] _download_folder.assert_has_calls(download_folder_calls) docker_compose_file = os.path.join(sagemaker_container.container_root, 'docker-compose.yaml') call_args = popen.call_args[0][0] assert call_args is not None expected = ['docker-compose', '-f', docker_compose_file, 'up', '--build', '--abort-on-container-exit'] for i, v in enumerate(expected): assert call_args[i] == v with open(docker_compose_file, 'r') as f: config = yaml.load(f) assert len(config['services']) == instance_count for h in sagemaker_container.hosts: assert config['services'][h]['image'] == image assert config['services'][h]['command'] == 'train'
def test_train_with_hyperparameters_without_job_name(get_data_source_instance, tmpdir, sagemaker_session): data_source = Mock() data_source.get_root_dir.return_value = 'foo' get_data_source_instance.return_value = data_source directories = [ str(tmpdir.mkdir('container-root')), str(tmpdir.mkdir('data')) ] with patch('sagemaker.local.image._SageMakerContainer._create_tmp_folder', side_effect=directories): instance_count = 2 image = 'my-image' sagemaker_container = _SageMakerContainer( 'local', instance_count, image, sagemaker_session=sagemaker_session) sagemaker_container.train(INPUT_DATA_CONFIG, OUTPUT_DATA_CONFIG, HYPERPARAMETERS, TRAINING_JOB_NAME) docker_compose_file = os.path.join(sagemaker_container.container_root, 'docker-compose.yaml') with open(docker_compose_file, 'r') as f: config = yaml.load(f) for h in sagemaker_container.hosts: assert 'TRAINING_JOB_NAME={}'.format( TRAINING_JOB_NAME) in config['services'][h]['environment']
def test_train_error(get_data_source_instance, retrieve_artifacts, cleanup, _stream_output, tmpdir, sagemaker_session): data_source = Mock() data_source.get_root_dir.return_value = 'foo' get_data_source_instance.return_value = data_source directories = [ str(tmpdir.mkdir('container-root')), str(tmpdir.mkdir('data')) ] with patch('sagemaker.local.image._SageMakerContainer._create_tmp_folder', side_effect=directories): instance_count = 2 image = 'my-image' sagemaker_container = _SageMakerContainer( 'local', instance_count, image, sagemaker_session=sagemaker_session) with pytest.raises(RuntimeError) as e: sagemaker_container.train(INPUT_DATA_CONFIG, OUTPUT_DATA_CONFIG, HYPERPARAMETERS, TRAINING_JOB_NAME) assert 'this is expected' in str(e) retrieve_artifacts.assert_called_once() cleanup.assert_called_once()
def test_serve_local_code(tmpdir, sagemaker_session): with patch('sagemaker.local.image._SageMakerContainer._create_tmp_folder', return_value=str(tmpdir.mkdir('container-root'))): image = 'my-image' sagemaker_container = _SageMakerContainer( 'local', 1, image, sagemaker_session=sagemaker_session) environment = { 'env1': 1, 'env2': 'b', 'SAGEMAKER_SUBMIT_DIRECTORY': 'file:///tmp/code' } sagemaker_container.serve('/some/model/path', environment) docker_compose_file = os.path.join(sagemaker_container.container_root, 'docker-compose.yaml') with open(docker_compose_file, 'r') as f: config = yaml.load(f) for h in sagemaker_container.hosts: assert config['services'][h]['image'] == image assert config['services'][h]['command'] == 'serve' volumes = config['services'][h]['volumes'] assert '%s:/opt/ml/code' % '/tmp/code' in volumes assert 'SAGEMAKER_SUBMIT_DIRECTORY=/opt/ml/code' in config[ 'services'][h]['environment']
def create_endpoint(self, EndpointName, EndpointConfigName): instance_type = self.variants[0]['InstanceType'] instance_count = self.variants[0]['InitialInstanceCount'] self.serve_container = _SageMakerContainer( instance_type, instance_count, self.primary_container['Image'], self.sagemaker_session) self.serve_container.serve(self.primary_container) self.created_endpoint = True i = 0 http = urllib3.PoolManager() serving_port = 8080 if self.sagemaker_session.config and 'local' in self.sagemaker_session.config: serving_port = self.sagemaker_session.config['local'].get( 'serving_port', 8080) endpoint_url = "http://localhost:%s/ping" % serving_port while True: i += 1 if i >= 10: raise RuntimeError( "Giving up, endpoint: %s didn't launch correctly" % EndpointName) logger.info("Checking if endpoint is up, attempt: %s" % i) try: r = http.request('GET', endpoint_url) if r.status != 200: logger.info("Container still not up, got: %s" % r.status) else: return except urllib3.exceptions.RequestError: logger.info("Container still not up") time.sleep(1)
def test_train_local_code(tmpdir, sagemaker_session): directories = [ str(tmpdir.mkdir('container-root')), str(tmpdir.mkdir('data')) ] with patch('sagemaker.local.image._SageMakerContainer._create_tmp_folder', side_effect=directories): instance_count = 2 image = 'my-image' sagemaker_container = _SageMakerContainer( 'local', instance_count, image, sagemaker_session=sagemaker_session) sagemaker_container.train(INPUT_DATA_CONFIG, OUTPUT_DATA_CONFIG, LOCAL_CODE_HYPERPARAMETERS, TRAINING_JOB_NAME) docker_compose_file = os.path.join(sagemaker_container.container_root, 'docker-compose.yaml') shared_folder_path = os.path.join(sagemaker_container.container_root, 'shared') with open(docker_compose_file, 'r') as f: config = yaml.load(f) assert len(config['services']) == instance_count for h in sagemaker_container.hosts: assert config['services'][h]['image'] == image assert config['services'][h]['command'] == 'train' volumes = config['services'][h]['volumes'] assert '%s:/opt/ml/code' % '/tmp/code' in volumes assert '%s:/opt/ml/shared' % shared_folder_path in volumes
def test_serve_local_code(tmpdir, sagemaker_session): with patch( "sagemaker.local.image._SageMakerContainer._create_tmp_folder", return_value=str(tmpdir.mkdir("container-root")), ): image = "my-image" sagemaker_container = _SageMakerContainer( "local", 1, image, sagemaker_session=sagemaker_session) environment = { "env1": 1, "env2": "b", "SAGEMAKER_SUBMIT_DIRECTORY": "file:///tmp/code" } sagemaker_container.serve("/some/model/path", environment) docker_compose_file = os.path.join(sagemaker_container.container_root, "docker-compose.yaml") with open(docker_compose_file, "r") as f: config = yaml.load(f) for h in sagemaker_container.hosts: assert config["services"][h]["image"] == image assert config["services"][h]["command"] == "serve" volumes = config["services"][h]["volumes"] assert "%s:/opt/ml/code" % "/tmp/code" in volumes assert ("SAGEMAKER_SUBMIT_DIRECTORY=/opt/ml/code" in config["services"][h]["environment"])
def test_train_local_code(get_data_source_instance, tmpdir, sagemaker_session): data_source = Mock() data_source.get_root_dir.return_value = 'foo' get_data_source_instance.return_value = data_source directories = [str(tmpdir.mkdir('container-root')), str(tmpdir.mkdir('data'))] with patch('sagemaker.local.image._SageMakerContainer._create_tmp_folder', side_effect=directories): instance_count = 2 image = 'my-image' sagemaker_container = _SageMakerContainer('local', instance_count, image, sagemaker_session=sagemaker_session) sagemaker_container.train( INPUT_DATA_CONFIG, OUTPUT_DATA_CONFIG, LOCAL_CODE_HYPERPARAMETERS, TRAINING_JOB_NAME) docker_compose_file = os.path.join(sagemaker_container.container_root, 'docker-compose.yaml') shared_folder_path = os.path.join(sagemaker_container.container_root, 'shared') with open(docker_compose_file, 'r') as f: config = yaml.load(f) assert len(config['services']) == instance_count for h in sagemaker_container.hosts: assert config['services'][h]['image'] == image assert config['services'][h]['command'] == 'train' volumes = config['services'][h]['volumes'] assert '%s:/opt/ml/code' % '/tmp/code' in volumes assert '%s:/opt/ml/shared' % shared_folder_path in volumes config_file_root = os.path.join(sagemaker_container.container_root, h, 'input', 'config') hyperparameters_file = os.path.join(config_file_root, 'hyperparameters.json') hyperparameters_data = json.load(open(hyperparameters_file)) assert hyperparameters_data['sagemaker_submit_directory'] == json.dumps('/opt/ml/code')
def create_training_job(self, TrainingJobName, AlgorithmSpecification, OutputDataConfig, ResourceConfig, InputDataConfig=None, **kwargs): """ Create a training job in Local Mode Args: TrainingJobName (str): local training job name. AlgorithmSpecification (dict): Identifies the training algorithm to use. InputDataConfig (dict): Describes the training dataset and the location where it is stored. OutputDataConfig (dict): Identifies the location where you want to save the results of model training. ResourceConfig (dict): Identifies the resources to use for local model traininig. HyperParameters (dict) [optional]: Specifies these algorithm-specific parameters to influence the quality of the final model. """ InputDataConfig = InputDataConfig or {} container = _SageMakerContainer( ResourceConfig["InstanceType"], ResourceConfig["InstanceCount"], AlgorithmSpecification["TrainingImage"], self.sagemaker_session, ) training_job = _LocalTrainingJob(container) hyperparameters = kwargs[ "HyperParameters"] if "HyperParameters" in kwargs else {} training_job.start(InputDataConfig, OutputDataConfig, hyperparameters, TrainingJobName) LocalSagemakerClient._training_jobs[TrainingJobName] = training_job
def test_train_local_intermediate_output(tmpdir, sagemaker_session): directories = [ str(tmpdir.mkdir('container-root')), str(tmpdir.mkdir('data')) ] with patch('sagemaker.local.image._SageMakerContainer._create_tmp_folder', side_effect=directories): instance_count = 2 image = 'my-image' sagemaker_container = _SageMakerContainer( 'local', instance_count, image, sagemaker_session=sagemaker_session) output_path = str(tmpdir.mkdir('customer_intermediate_output')) output_data_config = {'S3OutputPath': 'file://%s' % output_path} hyperparameters = {'sagemaker_s3_output': output_path} sagemaker_container.train(INPUT_DATA_CONFIG, output_data_config, hyperparameters, TRAINING_JOB_NAME) docker_compose_file = os.path.join(sagemaker_container.container_root, 'docker-compose.yaml') intermediate_folder_path = os.path.join(output_path, 'output/intermediate') with open(docker_compose_file, 'r') as f: config = yaml.load(f) assert len(config['services']) == instance_count for h in sagemaker_container.hosts: assert config['services'][h]['image'] == image assert config['services'][h]['command'] == 'train' volumes = config['services'][h]['volumes'] assert '%s:/opt/ml/output/intermediate' % intermediate_folder_path in volumes
def test_serve(up, copy, copytree, tmpdir, sagemaker_session): with patch('sagemaker.local.image._SageMakerContainer._create_tmp_folder', return_value=str(tmpdir.mkdir('container-root'))): image = 'my-image' sagemaker_container = _SageMakerContainer( 'local', 1, image, sagemaker_session=sagemaker_session) primary_container = { 'ModelDataUrl': '/some/model/path', 'Environment': { 'env1': 1, 'env2': 'b' } } sagemaker_container.serve(primary_container) docker_compose_file = os.path.join(sagemaker_container.container_root, 'docker-compose.yaml') with open(docker_compose_file, 'r') as f: config = yaml.load(f) for h in sagemaker_container.hosts: assert config['services'][h]['image'] == image assert config['services'][h]['command'] == 'serve'
def test_train_with_hyperparameters_without_job_name(download_folder, _cleanup, _stream_output, LocalSession, tmpdir): directories = [ str(tmpdir.mkdir('container-root')), str(tmpdir.mkdir('data')) ] with patch('sagemaker.local.image._SageMakerContainer._create_tmp_folder', side_effect=directories): instance_count = 2 image = 'my-image' sagemaker_container = _SageMakerContainer( 'local', instance_count, image, sagemaker_session=LocalSession) sagemaker_container.train(INPUT_DATA_CONFIG, HYPERPARAMETERS, TRAINING_JOB_NAME) docker_compose_file = os.path.join(sagemaker_container.container_root, 'docker-compose.yaml') with open(docker_compose_file, 'r') as f: config = yaml.load(f) for h in sagemaker_container.hosts: assert 'TRAINING_JOB_NAME={}'.format( TRAINING_JOB_NAME) in config['services'][h]['environment']
def create_training_job(self, TrainingJobName, AlgorithmSpecification, RoleArn, InputDataConfig, OutputDataConfig, ResourceConfig, StoppingCondition, HyperParameters, Tags=None): self.train_container = _SageMakerContainer( ResourceConfig['InstanceType'], ResourceConfig['InstanceCount'], AlgorithmSpecification['TrainingImage'], self.sagemaker_session) for channel in InputDataConfig: data_distribution = channel['DataSource']['S3DataSource'][ 'S3DataDistributionType'] if data_distribution != 'FullyReplicated': raise RuntimeError( "DataDistribution: %s is not currently supported in Local Mode" % data_distribution) self.s3_model_artifacts = self.train_container.train( InputDataConfig, HyperParameters)
def test_container_does_not_enable_nvidia_docker_for_cpu_containers(sagemaker_session): instance_count = 1 image = 'my-image' sagemaker_container = _SageMakerContainer('local', instance_count, image, sagemaker_session=sagemaker_session) docker_host = sagemaker_container._create_docker_host('host-1', {}, set(), 'train', []) assert 'runtime' not in docker_host
def test_container_has_gpu_support(tmpdir, sagemaker_session): instance_count = 1 image = 'my-image' sagemaker_container = _SageMakerContainer('local_gpu', instance_count, image, sagemaker_session=sagemaker_session) docker_host = sagemaker_container._create_docker_host('host-1', {}, set(), 'train', []) assert 'runtime' in docker_host assert docker_host['runtime'] == 'nvidia'
def test_container_does_not_enable_nvidia_docker_for_cpu_containers(sagemaker_session): instance_count = 1 image = "my-image" sagemaker_container = _SageMakerContainer( "local", instance_count, image, sagemaker_session=sagemaker_session ) docker_host = sagemaker_container._create_docker_host("host-1", {}, set(), "train", []) assert "runtime" not in docker_host
def test_container_has_gpu_support(tmpdir, sagemaker_session): instance_count = 1 image = "my-image" sagemaker_container = _SageMakerContainer( "local_gpu", instance_count, image, sagemaker_session=sagemaker_session ) docker_host = sagemaker_container._create_docker_host("host-1", {}, set(), "train", []) assert "runtime" in docker_host assert docker_host["runtime"] == "nvidia"
def test_prepare_serving_volumes_with_local_model(sagemaker_session): sagemaker_container = _SageMakerContainer('local', 1, 'some-image', sagemaker_session=sagemaker_session) sagemaker_container.container_root = '/tmp/container_root' volumes = sagemaker_container._prepare_serving_volumes('/path/to/my_model') assert len(volumes) == 1 assert volumes[0].container_dir == '/opt/ml/model' assert volumes[0].host_dir == '/path/to/my_model'
def test_train(download_folder, _cleanup, popen, _stream_output, LocalSession, tmpdir, sagemaker_session): directories = [ str(tmpdir.mkdir('container-root')), str(tmpdir.mkdir('data')) ] with patch('sagemaker.local.image._SageMakerContainer._create_tmp_folder', side_effect=directories): instance_count = 2 image = 'my-image' sagemaker_container = _SageMakerContainer( 'local', instance_count, image, sagemaker_session=sagemaker_session) sagemaker_container.train(INPUT_DATA_CONFIG, HYPERPARAMETERS, TRAINING_JOB_NAME) channel_dir = os.path.join(directories[1], 'b') download_folder_calls = [ call('my-own-bucket', 'prefix', channel_dir, sagemaker_session) ] download_folder.assert_has_calls(download_folder_calls) docker_compose_file = os.path.join(sagemaker_container.container_root, 'docker-compose.yaml') call_args = popen.call_args[0][0] assert call_args is not None expected = [ 'docker-compose', '-f', docker_compose_file, 'up', '--build', '--abort-on-container-exit' ] for i, v in enumerate(expected): assert call_args[i] == v with open(docker_compose_file, 'r') as f: config = yaml.load(f) assert len(config['services']) == instance_count for h in sagemaker_container.hosts: assert config['services'][h]['image'] == image assert config['services'][h]['command'] == 'train' assert 'AWS_REGION={}'.format( REGION) in config['services'][h]['environment'] assert 'TRAINING_JOB_NAME={}'.format( TRAINING_JOB_NAME) in config['services'][h]['environment'] # assert that expected by sagemaker container output directories exist assert os.path.exists( os.path.join(sagemaker_container.container_root, 'output')) assert os.path.exists( os.path.join(sagemaker_container.container_root, 'output/data'))
def test_download_file(): boto_mock = Mock(name='boto_session') boto_mock.client('sts').get_caller_identity.return_value = {'Account': '123'} bucket_mock = Mock() boto_mock.resource('s3').Bucket.return_value = bucket_mock session = sagemaker.Session(boto_session=boto_mock, sagemaker_client=Mock()) sagemaker_container = _SageMakerContainer('local', 2, 'my-image', sagemaker_session=session) sagemaker_container._download_file(BUCKET_NAME, '/prefix/path/file.tar.gz', '/tmp/file.tar.gz') bucket_mock.download_file.assert_called_with('prefix/path/file.tar.gz', '/tmp/file.tar.gz')
def test_prepare_serving_volumes_with_local_model(sagemaker_session): sagemaker_container = _SageMakerContainer( 'local', 1, 'some-image', sagemaker_session=sagemaker_session) sagemaker_container.container_root = '/tmp/container_root' volumes = sagemaker_container._prepare_serving_volumes('/path/to/my_model') assert len(volumes) == 1 assert volumes[0].container_dir == '/opt/ml/model' assert volumes[0].host_dir == '/path/to/my_model'
def test_retrieve_artifacts(LocalSession, tmpdir): sagemaker_container = _SageMakerContainer('local', 2, 'my-image') sagemaker_container.hosts = ['algo-1', 'algo-2'] # avoid any randomness sagemaker_container.container_root = str(tmpdir.mkdir('container-root')) volume1 = os.path.join(sagemaker_container.container_root, 'algo-1/output/') volume2 = os.path.join(sagemaker_container.container_root, 'algo-2/output/') os.makedirs(volume1) os.makedirs(volume2) compose_data = { 'services': { 'algo-1': { 'volumes': ['%s:/opt/ml/model' % volume1] }, 'algo-2': { 'volumes': ['%s:/opt/ml/model' % volume2] } } } dirs1 = ['model', 'model/data'] dirs2 = ['model', 'model/data', 'model/tmp'] files1 = ['model/data/model.json', 'model/data/variables.csv'] files2 = [ 'model/data/model.json', 'model/data/variables2.csv', 'model/tmp/something-else.json' ] expected = [ 'model', 'model/data/', 'model/data/model.json', 'model/data/variables.csv', 'model/data/variables2.csv', 'model/tmp/something-else.json' ] for d in dirs1: os.mkdir(os.path.join(volume1, d)) for d in dirs2: os.mkdir(os.path.join(volume2, d)) # create all the files for f in files1: open(os.path.join(volume1, f), 'a').close() for f in files2: open(os.path.join(volume2, f), 'a').close() s3_model_artifacts = sagemaker_container.retrieve_model_artifacts( compose_data) for f in expected: assert os.path.exists(os.path.join(s3_model_artifacts, f))
def start(self, input_data, output_data, transform_resources, **kwargs): """Start the Local Transform Job Args: input_data (dict): Describes the dataset to be transformed and the location where it is stored. output_data (dict): Identifies the location where to save the results from the transform job transform_resources (dict): compute instances for the transform job. Currently only supports local or local_gpu **kwargs: additional arguments coming from the boto request object """ self.transform_resources = transform_resources self.input_data = input_data self.output_data = output_data image = self.primary_container["Image"] instance_type = transform_resources["InstanceType"] instance_count = 1 environment = self._get_container_environment(**kwargs) # Start the container, pass the environment and wait for it to start up self.container = _SageMakerContainer(instance_type, instance_count, image, self.local_session) self.container.serve(self.primary_container["ModelDataUrl"], environment) serving_port = get_config_value("local.serving_port", self.local_session.config) or 8080 _wait_for_serving_container(serving_port) # Get capabilities from Container if needed endpoint_url = "http://localhost:%s/execution-parameters" % serving_port response, code = _perform_request(endpoint_url) if code == 200: execution_parameters = json.loads(response.read()) # MaxConcurrentTransforms is ignored because we currently only support 1 for setting in ("BatchStrategy", "MaxPayloadInMB"): if setting not in kwargs and setting in execution_parameters: kwargs[setting] = execution_parameters[setting] # Apply Defaults if none was provided kwargs.update(self._get_required_defaults(**kwargs)) self.start_time = datetime.datetime.now() self.batch_strategy = kwargs["BatchStrategy"] if "Environment" in kwargs: self.environment = kwargs["Environment"] # run the batch inference requests self._perform_batch_inference(input_data, output_data, **kwargs) self.end_time = datetime.datetime.now() self.state = self._COMPLETED
def test_write_config_files_input_content_type(LocalSession, tmpdir): sagemaker_container = _SageMakerContainer("local", 1, "my-image") sagemaker_container.container_root = str(tmpdir.mkdir("container-root")) host = "algo-1" sagemaker.local.image._create_config_file_directories( sagemaker_container.container_root, host) container_root = sagemaker_container.container_root config_file_root = os.path.join(container_root, host, "input", "config") input_data_config_file = os.path.join(config_file_root, "inputdataconfig.json") # write the config files, and then lets check they exist and have the right content. input_data_config = [ { "ChannelName": "channel_a", "DataUri": "file:///tmp/source1", "ContentType": "text/csv", "DataSource": { "FileDataSource": { "FileDataDistributionType": "FullyReplicated", "FileUri": "file:///tmp/source1", } }, }, { "ChannelName": "channel_b", "DataUri": "s3://my-own-bucket/prefix", "DataSource": { "S3DataSource": { "S3DataDistributionType": "FullyReplicated", "S3DataType": "S3Prefix", "S3Uri": "s3://my-own-bucket/prefix", } }, }, ] sagemaker_container.write_config_files(host, HYPERPARAMETERS, input_data_config) assert os.path.exists(input_data_config_file) parsed_input_config = json.load(open(input_data_config_file)) # Validate Input Data Config for channel in input_data_config: assert channel["ChannelName"] in parsed_input_config # Channel A has a content type assert "ContentType" in parsed_input_config["channel_a"] assert parsed_input_config["channel_a"]["ContentType"] == "text/csv" # Channel B does not have content type assert "ContentType" not in parsed_input_config["channel_b"]
def test_train_error(_download_folder, _cleanup, popen, _stream_output, LocalSession, tmpdir, sagemaker_session): directories = [str(tmpdir.mkdir('container-root')), str(tmpdir.mkdir('data'))] with patch('sagemaker.local.image._SageMakerContainer._create_tmp_folder', side_effect=directories): instance_count = 2 image = 'my-image' sagemaker_container = _SageMakerContainer('local', instance_count, image, sagemaker_session=sagemaker_session) with pytest.raises(RuntimeError) as e: sagemaker_container.train(INPUT_DATA_CONFIG, HYPERPARAMETERS) assert 'this is expected' in str(e)
def test_write_config_files_input_content_type(LocalSession, tmpdir): sagemaker_container = _SageMakerContainer('local', 1, 'my-image') sagemaker_container.container_root = str(tmpdir.mkdir('container-root')) host = 'algo-1' sagemaker.local.image._create_config_file_directories( sagemaker_container.container_root, host) container_root = sagemaker_container.container_root config_file_root = os.path.join(container_root, host, 'input', 'config') input_data_config_file = os.path.join(config_file_root, 'inputdataconfig.json') # write the config files, and then lets check they exist and have the right content. input_data_config = [{ 'ChannelName': 'channel_a', 'DataUri': 'file:///tmp/source1', 'ContentType': 'text/csv', 'DataSource': { 'FileDataSource': { 'FileDataDistributionType': 'FullyReplicated', 'FileUri': 'file:///tmp/source1' } } }, { 'ChannelName': 'channel_b', 'DataUri': 's3://my-own-bucket/prefix', 'DataSource': { 'S3DataSource': { 'S3DataDistributionType': 'FullyReplicated', 'S3DataType': 'S3Prefix', 'S3Uri': 's3://my-own-bucket/prefix' } } }] sagemaker_container.write_config_files(host, HYPERPARAMETERS, input_data_config) assert os.path.exists(input_data_config_file) parsed_input_config = json.load(open(input_data_config_file)) # Validate Input Data Config for channel in input_data_config: assert channel['ChannelName'] in parsed_input_config # Channel A has a content type assert 'ContentType' in parsed_input_config['channel_a'] assert parsed_input_config['channel_a']['ContentType'] == 'text/csv' # Channel B does not have content type assert 'ContentType' not in parsed_input_config['channel_b']
def test_train_local_code(get_data_source_instance, tmpdir, sagemaker_session): data_source = Mock() data_source.get_root_dir.return_value = "foo" get_data_source_instance.return_value = data_source directories = [ str(tmpdir.mkdir("container-root")), str(tmpdir.mkdir("data")) ] with patch("sagemaker.local.image._SageMakerContainer._create_tmp_folder", side_effect=directories): instance_count = 2 image = "my-image" sagemaker_container = _SageMakerContainer( "local", instance_count, image, sagemaker_session=sagemaker_session) sagemaker_container.train( INPUT_DATA_CONFIG, OUTPUT_DATA_CONFIG, LOCAL_CODE_HYPERPARAMETERS, ENVIRONMENT, TRAINING_JOB_NAME, ) docker_compose_file = os.path.join(sagemaker_container.container_root, "docker-compose.yaml") shared_folder_path = os.path.join(sagemaker_container.container_root, "shared") with open(docker_compose_file, "r") as f: config = yaml.load(f, Loader=yaml.SafeLoader) assert len(config["services"]) == instance_count for h in sagemaker_container.hosts: assert config["services"][h]["image"] == image assert config["services"][h]["command"] == "train" volumes = config["services"][h]["volumes"] assert "%s:/opt/ml/code" % "/tmp/code" in volumes assert "%s:/opt/ml/shared" % shared_folder_path in volumes config_file_root = os.path.join(sagemaker_container.container_root, h, "input", "config") hyperparameters_file = os.path.join(config_file_root, "hyperparameters.json") hyperparameters_data = json.load(open(hyperparameters_file)) assert hyperparameters_data[ "sagemaker_submit_directory"] == json.dumps("/opt/ml/code")
def test_prepare_serving_volumes_with_local_model(get_data_source_instance, sagemaker_session): sagemaker_container = _SageMakerContainer('local', 1, 'some-image', sagemaker_session=sagemaker_session) sagemaker_container.container_root = '/tmp/container_root' local_file_data_source = Mock() local_file_data_source.get_root_dir.return_value = '/path/to/my_model' local_file_data_source.get_file_list.return_value = ['/path/to/my_model/model'] get_data_source_instance.return_value = local_file_data_source volumes = sagemaker_container._prepare_serving_volumes('file:///path/to/my_model') assert len(volumes) == 1 assert volumes[0].container_dir == '/opt/ml/model' assert volumes[0].host_dir == '/path/to/my_model'
def test_serve_local_code_no_env(tmpdir, sagemaker_session): with patch('sagemaker.local.image._SageMakerContainer._create_tmp_folder', return_value=str(tmpdir.mkdir('container-root'))): image = 'my-image' sagemaker_container = _SageMakerContainer('local', 1, image, sagemaker_session=sagemaker_session) sagemaker_container.serve('/some/model/path', {}) docker_compose_file = os.path.join(sagemaker_container.container_root, 'docker-compose.yaml') with open(docker_compose_file, 'r') as f: config = yaml.load(f) for h in sagemaker_container.hosts: assert config['services'][h]['image'] == image assert config['services'][h]['command'] == 'serve'
def test_prepare_serving_volumes_with_s3_model(is_tarfile, _download_file, sagemaker_session): sagemaker_container = _SageMakerContainer('local', 1, 'some-image', sagemaker_session=sagemaker_session) sagemaker_container.container_root = '/tmp/container_root' container_model_dir = os.path.join('/tmp/container_root/', sagemaker_container.hosts[0], 'model') is_tarfile.return_value = True volumes = sagemaker_container._prepare_serving_volumes('s3://bucket/my_model.tar.gz') tar_location = os.path.join(container_model_dir, 'my_model.tar.gz') _download_file.assert_called_with('bucket', '/my_model.tar.gz', tar_location) is_tarfile.assert_called_with(tar_location) assert len(volumes) == 1 assert volumes[0].container_dir == '/opt/ml/model' assert volumes[0].host_dir == container_model_dir
def test_write_config_file(LocalSession, tmpdir): sagemaker_container = _SageMakerContainer('local', 2, 'my-image') sagemaker_container.container_root = str(tmpdir.mkdir('container-root')) host = "algo-1" sagemaker.local.image._create_config_file_directories(sagemaker_container.container_root, host) container_root = sagemaker_container.container_root config_file_root = os.path.join(container_root, host, 'input', 'config') hyperparameters_file = os.path.join(config_file_root, 'hyperparameters.json') resource_config_file = os.path.join(config_file_root, 'resourceconfig.json') input_data_config_file = os.path.join(config_file_root, 'inputdataconfig.json') # write the config files, and then lets check they exist and have the right content. sagemaker_container.write_config_files(host, HYPERPARAMETERS, INPUT_DATA_CONFIG) assert os.path.exists(hyperparameters_file) assert os.path.exists(resource_config_file) assert os.path.exists(input_data_config_file) hyperparameters_data = json.load(open(hyperparameters_file)) resource_config_data = json.load(open(resource_config_file)) input_data_config_data = json.load(open(input_data_config_file)) # Validate HyperParameters for k, v in HYPERPARAMETERS.items(): assert k in hyperparameters_data assert hyperparameters_data[k] == v # Validate Resource Config assert resource_config_data['current_host'] == host assert resource_config_data['hosts'] == sagemaker_container.hosts # Validate Input Data Config for channel in INPUT_DATA_CONFIG: assert channel['ChannelName'] in input_data_config_data
def test_download_folder(makedirs): boto_mock = Mock(name='boto_session') boto_mock.client('sts').get_caller_identity.return_value = {'Account': '123'} session = sagemaker.Session(boto_session=boto_mock, sagemaker_client=Mock()) train_data = Mock() validation_data = Mock() train_data.bucket_name.return_value = BUCKET_NAME train_data.key = '/prefix/train/train_data.csv' validation_data.bucket_name.return_value = BUCKET_NAME validation_data.key = '/prefix/train/validation_data.csv' s3_files = [train_data, validation_data] boto_mock.resource('s3').Bucket(BUCKET_NAME).objects.filter.return_value = s3_files obj_mock = Mock() boto_mock.resource('s3').Object.return_value = obj_mock sagemaker_container = _SageMakerContainer('local', 2, 'my-image', sagemaker_session=session) sagemaker_container._download_folder(BUCKET_NAME, '/prefix', '/tmp') obj_mock.download_file.assert_called() calls = [call(os.path.join('/tmp', 'train/train_data.csv')), call(os.path.join('/tmp', 'train/validation_data.csv'))] obj_mock.download_file.assert_has_calls(calls) obj_mock.reset_mock() # Testing with a trailing slash for the prefix. sagemaker_container._download_folder(BUCKET_NAME, '/prefix/', '/tmp') obj_mock.download_file.assert_called() calls = [call(os.path.join('/tmp', 'train/train_data.csv')), call(os.path.join('/tmp', 'train/validation_data.csv'))] obj_mock.download_file.assert_has_calls(calls)
def test_retrieve_artifacts(LocalSession, tmpdir): sagemaker_container = _SageMakerContainer('local', 2, 'my-image') sagemaker_container.hosts = ['algo-1', 'algo-2'] # avoid any randomness sagemaker_container.container_root = str(tmpdir.mkdir('container-root')) volume1 = os.path.join(sagemaker_container.container_root, 'algo-1') volume2 = os.path.join(sagemaker_container.container_root, 'algo-2') os.mkdir(volume1) os.mkdir(volume2) compose_data = { 'services': { 'algo-1': { 'volumes': ['%s:/opt/ml/model' % os.path.join(volume1, 'model'), '%s:/opt/ml/output' % os.path.join(volume1, 'output')] }, 'algo-2': { 'volumes': ['%s:/opt/ml/model' % os.path.join(volume2, 'model'), '%s:/opt/ml/output' % os.path.join(volume2, 'output')] } } } dirs1 = ['model', 'model/data'] dirs2 = ['model', 'model/data', 'model/tmp'] dirs3 = ['output', 'output/data'] dirs4 = ['output', 'output/data', 'output/log'] files1 = ['model/data/model.json', 'model/data/variables.csv'] files2 = ['model/data/model.json', 'model/data/variables2.csv', 'model/tmp/something-else.json'] files3 = ['output/data/loss.json', 'output/data/accuracy.json'] files4 = ['output/data/loss.json', 'output/data/accuracy2.json', 'output/log/warnings.txt'] expected = ['model', 'model/data/', 'model/data/model.json', 'model/data/variables.csv', 'model/data/variables2.csv', 'model/tmp/something-else.json', 'output', 'output/data', 'output/log', 'output/data/loss.json', 'output/data/accuracy.json', 'output/data/accuracy2.json', 'output/log/warnings.txt'] for d in dirs1: os.mkdir(os.path.join(volume1, d)) for d in dirs2: os.mkdir(os.path.join(volume2, d)) for d in dirs3: os.mkdir(os.path.join(volume1, d)) for d in dirs4: os.mkdir(os.path.join(volume2, d)) # create all the files for f in files1: open(os.path.join(volume1, f), 'a').close() for f in files2: open(os.path.join(volume2, f), 'a').close() for f in files3: open(os.path.join(volume1, f), 'a').close() for f in files4: open(os.path.join(volume2, f), 'a').close() s3_model_artifacts = sagemaker_container.retrieve_artifacts(compose_data) s3_artifacts = os.path.dirname(s3_model_artifacts) for f in expected: assert set(os.listdir(s3_artifacts)) == set(['model', 'output']) assert os.path.exists(os.path.join(s3_artifacts, f))