示例#1
0
def test_sagemaker_container_hosts_should_have_lowercase_names():
    random.seed(a=42)

    def assert_all_lowercase(hosts):
        for host in hosts:
            assert host.lower() == host

    sagemaker_container = _SageMakerContainer("local", 2, "my-image", sagemaker_session=Mock())
    assert_all_lowercase(sagemaker_container.hosts)

    sagemaker_container = _SageMakerContainer("local", 10, "my-image", sagemaker_session=Mock())
    assert_all_lowercase(sagemaker_container.hosts)

    sagemaker_container = _SageMakerContainer("local", 1, "my-image", sagemaker_session=Mock())
    assert_all_lowercase(sagemaker_container.hosts)
def test_train(popen, get_data_source_instance, tmpdir, sagemaker_session):
    data_source = Mock()
    data_source.get_root_dir.return_value = 'foo'
    get_data_source_instance.return_value = data_source

    directories = [str(tmpdir.mkdir('container-root')), str(tmpdir.mkdir('data'))]
    with patch('sagemaker.local.image._SageMakerContainer._create_tmp_folder',
               side_effect=directories):

        instance_count = 2
        image = 'my-image'
        sagemaker_container = _SageMakerContainer('local', instance_count, image, sagemaker_session=sagemaker_session)
        sagemaker_container.train(
            INPUT_DATA_CONFIG, OUTPUT_DATA_CONFIG, HYPERPARAMETERS, TRAINING_JOB_NAME)

        docker_compose_file = os.path.join(sagemaker_container.container_root, 'docker-compose.yaml')
        call_args = popen.call_args[0][0]
        assert call_args is not None

        expected = ['docker-compose', '-f', docker_compose_file, 'up', '--build', '--abort-on-container-exit']
        for i, v in enumerate(expected):
            assert call_args[i] == v

        with open(docker_compose_file, 'r') as f:
            config = yaml.load(f)
            assert len(config['services']) == instance_count
            for h in sagemaker_container.hosts:
                assert config['services'][h]['image'] == image
                assert config['services'][h]['command'] == 'train'
                assert 'AWS_REGION={}'.format(REGION) in config['services'][h]['environment']
                assert 'TRAINING_JOB_NAME={}'.format(TRAINING_JOB_NAME) in config['services'][h]['environment']

        # assert that expected by sagemaker container output directories exist
        assert os.path.exists(os.path.join(sagemaker_container.container_root, 'output'))
        assert os.path.exists(os.path.join(sagemaker_container.container_root, 'output/data'))
示例#3
0
def test_train_with_hyperparameters_without_job_name(get_data_source_instance,
                                                     tmpdir,
                                                     sagemaker_session):
    data_source = Mock()
    data_source.get_root_dir.return_value = "foo"
    get_data_source_instance.return_value = data_source

    directories = [
        str(tmpdir.mkdir("container-root")),
        str(tmpdir.mkdir("data"))
    ]
    with patch("sagemaker.local.image._SageMakerContainer._create_tmp_folder",
               side_effect=directories):
        instance_count = 2
        image = "my-image"
        sagemaker_container = _SageMakerContainer(
            "local",
            instance_count,
            image,
            sagemaker_session=sagemaker_session)
        sagemaker_container.train(INPUT_DATA_CONFIG, OUTPUT_DATA_CONFIG,
                                  HYPERPARAMETERS, TRAINING_JOB_NAME)

        docker_compose_file = os.path.join(sagemaker_container.container_root,
                                           "docker-compose.yaml")

        with open(docker_compose_file, "r") as f:
            config = yaml.load(f)
            for h in sagemaker_container.hosts:
                assert ("TRAINING_JOB_NAME={}".format(TRAINING_JOB_NAME)
                        in config["services"][h]["environment"])
示例#4
0
def test_serve(tmpdir, sagemaker_session, caplog):
    caplog.set_level(logging.INFO)
    with patch(
            "sagemaker.local.image._SageMakerContainer._create_tmp_folder",
            return_value=str(tmpdir.mkdir("container-root")),
    ):
        image = "my-image"
        sagemaker_container = _SageMakerContainer(
            "local", 1, image, sagemaker_session=sagemaker_session)
        environment = {
            "env1": 1,
            "env2": "b",
            "SAGEMAKER_SUBMIT_DIRECTORY": "s3://some/path"
        }

        sagemaker_container.serve("/some/model/path", environment)
        docker_compose_file = os.path.join(sagemaker_container.container_root,
                                           "docker-compose.yaml")

        with open(docker_compose_file, "r") as f:
            config = yaml.load(f, Loader=yaml.SafeLoader)

            for h in sagemaker_container.hosts:
                assert config["services"][h]["image"] == image
                assert config["services"][h]["command"] == "serve"
    assert "[Masked]" in caplog.text
示例#5
0
def test_train(
    popen, get_data_source_instance, retrieve_artifacts, cleanup, tmpdir, sagemaker_session, caplog
):
    data_source = Mock()
    data_source.get_root_dir.return_value = "foo"
    get_data_source_instance.return_value = data_source

    caplog.set_level(logging.INFO)

    directories = [str(tmpdir.mkdir("container-root")), str(tmpdir.mkdir("data"))]
    with patch(
        "sagemaker.local.image._SageMakerContainer._create_tmp_folder", side_effect=directories
    ):

        instance_count = 2
        image = "my-image"
        sagemaker_container = _SageMakerContainer(
            "local", instance_count, image, sagemaker_session=sagemaker_session
        )
        sagemaker_container.train(
            INPUT_DATA_CONFIG, OUTPUT_DATA_CONFIG, HYPERPARAMETERS, TRAINING_JOB_NAME
        )

        docker_compose_file = os.path.join(
            sagemaker_container.container_root, "docker-compose.yaml"
        )
        call_args = popen.call_args[0][0]
        assert call_args is not None

        expected = [
            "docker-compose",
            "-f",
            docker_compose_file,
            "up",
            "--build",
            "--abort-on-container-exit",
        ]
        for i, v in enumerate(expected):
            assert call_args[i] == v

        with open(docker_compose_file, "r") as f:
            config = yaml.load(f, Loader=yaml.SafeLoader)
            assert len(config["services"]) == instance_count
            for h in sagemaker_container.hosts:
                assert config["services"][h]["image"] == image
                assert config["services"][h]["command"] == "train"
                # TODO-reinvent-2019 [akarpur]: uncomment the below assert statement
                # assert "AWS_REGION={}".format(REGION) in config["services"][h]["environment"]
                assert (
                    "TRAINING_JOB_NAME={}".format(TRAINING_JOB_NAME)
                    in config["services"][h]["environment"]
                )

        # assert that expected by sagemaker container output directories exist
        assert os.path.exists(os.path.join(sagemaker_container.container_root, "output"))
        assert os.path.exists(os.path.join(sagemaker_container.container_root, "output/data"))

    retrieve_artifacts.assert_called_once()
    cleanup.assert_called_once()
    assert "[Masked]" in caplog.text
def test_train_local_code(_download_folder, _cleanup, popen, _stream_output,
                          _local_session, tmpdir, sagemaker_session):
    directories = [str(tmpdir.mkdir('container-root')), str(tmpdir.mkdir('data'))]
    with patch('sagemaker.local.image._SageMakerContainer._create_tmp_folder',
               side_effect=directories):
        instance_count = 2
        image = 'my-image'
        sagemaker_container = _SageMakerContainer('local', instance_count, image,
                                                  sagemaker_session=sagemaker_session)

        sagemaker_container.train(INPUT_DATA_CONFIG, LOCAL_CODE_HYPERPARAMETERS)

        docker_compose_file = os.path.join(sagemaker_container.container_root,
                                           'docker-compose.yaml')
        shared_folder_path = os.path.join(sagemaker_container.container_root, 'shared')

        with open(docker_compose_file, 'r') as f:
            config = yaml.load(f)
            assert len(config['services']) == instance_count
            for h in sagemaker_container.hosts:
                assert config['services'][h]['image'] == image
                assert config['services'][h]['command'] == 'train'
                volumes = config['services'][h]['volumes']
                assert '%s:/opt/ml/code' % '/tmp/code' in volumes
                assert '%s:/opt/ml/shared' % shared_folder_path in volumes
def test_serve_local_code(up, copy, copytree, tmpdir, sagemaker_session):

    with patch('sagemaker.local.image._SageMakerContainer._create_tmp_folder',
               return_value=str(tmpdir.mkdir('container-root'))):

        image = 'my-image'
        sagemaker_container = _SageMakerContainer('local', 1, image, sagemaker_session=sagemaker_session)
        primary_container = {'ModelDataUrl': '/some/model/path',
                             'Environment': {'env1': 1,
                                             'env2': 'b',
                                             'SAGEMAKER_SUBMIT_DIRECTORY': 'file:///tmp/code'
                                             }
                             }

        sagemaker_container.serve(primary_container)
        docker_compose_file = os.path.join(sagemaker_container.container_root,
                                           'docker-compose.yaml')

        with open(docker_compose_file, 'r') as f:
            config = yaml.load(f)

            for h in sagemaker_container.hosts:
                assert config['services'][h]['image'] == image
                assert config['services'][h]['command'] == 'serve'

                volumes = config['services'][h]['volumes']
                assert '%s:/opt/ml/code' % '/tmp/code' in volumes
def test_train(_download_folder, _cleanup, popen, _stream_output, LocalSession,
               tmpdir, sagemaker_session):

    directories = [str(tmpdir.mkdir('container-root')), str(tmpdir.mkdir('data'))]
    with patch('sagemaker.local.image._SageMakerContainer._create_tmp_folder',
               side_effect=directories):

        instance_count = 2
        image = 'my-image'
        sagemaker_container = _SageMakerContainer('local', instance_count, image, sagemaker_session=sagemaker_session)
        sagemaker_container.train(INPUT_DATA_CONFIG, HYPERPARAMETERS)

        channel_dir = os.path.join(directories[1], 'b')
        download_folder_calls = [call('my-own-bucket', 'prefix', channel_dir)]
        _download_folder.assert_has_calls(download_folder_calls)

        docker_compose_file = os.path.join(sagemaker_container.container_root, 'docker-compose.yaml')

        call_args = popen.call_args[0][0]
        assert call_args is not None

        expected = ['docker-compose', '-f', docker_compose_file, 'up', '--build', '--abort-on-container-exit']
        for i, v in enumerate(expected):
            assert call_args[i] == v

        with open(docker_compose_file, 'r') as f:
            config = yaml.load(f)
            assert len(config['services']) == instance_count
            for h in sagemaker_container.hosts:
                assert config['services'][h]['image'] == image
                assert config['services'][h]['command'] == 'train'
def test_train_with_hyperparameters_without_job_name(get_data_source_instance,
                                                     tmpdir,
                                                     sagemaker_session):
    data_source = Mock()
    data_source.get_root_dir.return_value = 'foo'
    get_data_source_instance.return_value = data_source

    directories = [
        str(tmpdir.mkdir('container-root')),
        str(tmpdir.mkdir('data'))
    ]
    with patch('sagemaker.local.image._SageMakerContainer._create_tmp_folder',
               side_effect=directories):
        instance_count = 2
        image = 'my-image'
        sagemaker_container = _SageMakerContainer(
            'local',
            instance_count,
            image,
            sagemaker_session=sagemaker_session)
        sagemaker_container.train(INPUT_DATA_CONFIG, OUTPUT_DATA_CONFIG,
                                  HYPERPARAMETERS, TRAINING_JOB_NAME)

        docker_compose_file = os.path.join(sagemaker_container.container_root,
                                           'docker-compose.yaml')

        with open(docker_compose_file, 'r') as f:
            config = yaml.load(f)
            for h in sagemaker_container.hosts:
                assert 'TRAINING_JOB_NAME={}'.format(
                    TRAINING_JOB_NAME) in config['services'][h]['environment']
def test_train_error(get_data_source_instance, retrieve_artifacts, cleanup,
                     _stream_output, tmpdir, sagemaker_session):
    data_source = Mock()
    data_source.get_root_dir.return_value = 'foo'
    get_data_source_instance.return_value = data_source

    directories = [
        str(tmpdir.mkdir('container-root')),
        str(tmpdir.mkdir('data'))
    ]
    with patch('sagemaker.local.image._SageMakerContainer._create_tmp_folder',
               side_effect=directories):
        instance_count = 2
        image = 'my-image'
        sagemaker_container = _SageMakerContainer(
            'local',
            instance_count,
            image,
            sagemaker_session=sagemaker_session)

        with pytest.raises(RuntimeError) as e:
            sagemaker_container.train(INPUT_DATA_CONFIG, OUTPUT_DATA_CONFIG,
                                      HYPERPARAMETERS, TRAINING_JOB_NAME)

        assert 'this is expected' in str(e)

    retrieve_artifacts.assert_called_once()
    cleanup.assert_called_once()
def test_serve_local_code(tmpdir, sagemaker_session):
    with patch('sagemaker.local.image._SageMakerContainer._create_tmp_folder',
               return_value=str(tmpdir.mkdir('container-root'))):
        image = 'my-image'
        sagemaker_container = _SageMakerContainer(
            'local', 1, image, sagemaker_session=sagemaker_session)
        environment = {
            'env1': 1,
            'env2': 'b',
            'SAGEMAKER_SUBMIT_DIRECTORY': 'file:///tmp/code'
        }

        sagemaker_container.serve('/some/model/path', environment)
        docker_compose_file = os.path.join(sagemaker_container.container_root,
                                           'docker-compose.yaml')

        with open(docker_compose_file, 'r') as f:
            config = yaml.load(f)

            for h in sagemaker_container.hosts:
                assert config['services'][h]['image'] == image
                assert config['services'][h]['command'] == 'serve'

                volumes = config['services'][h]['volumes']
                assert '%s:/opt/ml/code' % '/tmp/code' in volumes
                assert 'SAGEMAKER_SUBMIT_DIRECTORY=/opt/ml/code' in config[
                    'services'][h]['environment']
示例#12
0
    def create_endpoint(self, EndpointName, EndpointConfigName):
        instance_type = self.variants[0]['InstanceType']
        instance_count = self.variants[0]['InitialInstanceCount']
        self.serve_container = _SageMakerContainer(
            instance_type, instance_count, self.primary_container['Image'],
            self.sagemaker_session)
        self.serve_container.serve(self.primary_container)
        self.created_endpoint = True

        i = 0
        http = urllib3.PoolManager()
        serving_port = 8080
        if self.sagemaker_session.config and 'local' in self.sagemaker_session.config:
            serving_port = self.sagemaker_session.config['local'].get(
                'serving_port', 8080)
        endpoint_url = "http://localhost:%s/ping" % serving_port
        while True:
            i += 1
            if i >= 10:
                raise RuntimeError(
                    "Giving up, endpoint: %s didn't launch correctly" %
                    EndpointName)

            logger.info("Checking if endpoint is up, attempt: %s" % i)
            try:
                r = http.request('GET', endpoint_url)
                if r.status != 200:
                    logger.info("Container still not up, got: %s" % r.status)
                else:
                    return
            except urllib3.exceptions.RequestError:
                logger.info("Container still not up")

            time.sleep(1)
def test_train_local_code(tmpdir, sagemaker_session):
    directories = [
        str(tmpdir.mkdir('container-root')),
        str(tmpdir.mkdir('data'))
    ]
    with patch('sagemaker.local.image._SageMakerContainer._create_tmp_folder',
               side_effect=directories):
        instance_count = 2
        image = 'my-image'
        sagemaker_container = _SageMakerContainer(
            'local',
            instance_count,
            image,
            sagemaker_session=sagemaker_session)

        sagemaker_container.train(INPUT_DATA_CONFIG, OUTPUT_DATA_CONFIG,
                                  LOCAL_CODE_HYPERPARAMETERS,
                                  TRAINING_JOB_NAME)

        docker_compose_file = os.path.join(sagemaker_container.container_root,
                                           'docker-compose.yaml')
        shared_folder_path = os.path.join(sagemaker_container.container_root,
                                          'shared')

        with open(docker_compose_file, 'r') as f:
            config = yaml.load(f)
            assert len(config['services']) == instance_count
            for h in sagemaker_container.hosts:
                assert config['services'][h]['image'] == image
                assert config['services'][h]['command'] == 'train'
                volumes = config['services'][h]['volumes']
                assert '%s:/opt/ml/code' % '/tmp/code' in volumes
                assert '%s:/opt/ml/shared' % shared_folder_path in volumes
示例#14
0
def test_serve_local_code(tmpdir, sagemaker_session):
    with patch(
            "sagemaker.local.image._SageMakerContainer._create_tmp_folder",
            return_value=str(tmpdir.mkdir("container-root")),
    ):
        image = "my-image"
        sagemaker_container = _SageMakerContainer(
            "local", 1, image, sagemaker_session=sagemaker_session)
        environment = {
            "env1": 1,
            "env2": "b",
            "SAGEMAKER_SUBMIT_DIRECTORY": "file:///tmp/code"
        }

        sagemaker_container.serve("/some/model/path", environment)
        docker_compose_file = os.path.join(sagemaker_container.container_root,
                                           "docker-compose.yaml")

        with open(docker_compose_file, "r") as f:
            config = yaml.load(f)

            for h in sagemaker_container.hosts:
                assert config["services"][h]["image"] == image
                assert config["services"][h]["command"] == "serve"

                volumes = config["services"][h]["volumes"]
                assert "%s:/opt/ml/code" % "/tmp/code" in volumes
                assert ("SAGEMAKER_SUBMIT_DIRECTORY=/opt/ml/code"
                        in config["services"][h]["environment"])
示例#15
0
def test_train_local_code(get_data_source_instance, tmpdir, sagemaker_session):
    data_source = Mock()
    data_source.get_root_dir.return_value = 'foo'
    get_data_source_instance.return_value = data_source

    directories = [str(tmpdir.mkdir('container-root')), str(tmpdir.mkdir('data'))]
    with patch('sagemaker.local.image._SageMakerContainer._create_tmp_folder',
               side_effect=directories):
        instance_count = 2
        image = 'my-image'
        sagemaker_container = _SageMakerContainer('local', instance_count, image,
                                                  sagemaker_session=sagemaker_session)

        sagemaker_container.train(
            INPUT_DATA_CONFIG, OUTPUT_DATA_CONFIG, LOCAL_CODE_HYPERPARAMETERS, TRAINING_JOB_NAME)

        docker_compose_file = os.path.join(sagemaker_container.container_root,
                                           'docker-compose.yaml')
        shared_folder_path = os.path.join(sagemaker_container.container_root, 'shared')

        with open(docker_compose_file, 'r') as f:
            config = yaml.load(f)
            assert len(config['services']) == instance_count

        for h in sagemaker_container.hosts:
            assert config['services'][h]['image'] == image
            assert config['services'][h]['command'] == 'train'
            volumes = config['services'][h]['volumes']
            assert '%s:/opt/ml/code' % '/tmp/code' in volumes
            assert '%s:/opt/ml/shared' % shared_folder_path in volumes

            config_file_root = os.path.join(sagemaker_container.container_root, h, 'input', 'config')
            hyperparameters_file = os.path.join(config_file_root, 'hyperparameters.json')
            hyperparameters_data = json.load(open(hyperparameters_file))
            assert hyperparameters_data['sagemaker_submit_directory'] == json.dumps('/opt/ml/code')
示例#16
0
    def create_training_job(self,
                            TrainingJobName,
                            AlgorithmSpecification,
                            OutputDataConfig,
                            ResourceConfig,
                            InputDataConfig=None,
                            **kwargs):
        """
        Create a training job in Local Mode
        Args:
            TrainingJobName (str): local training job name.
            AlgorithmSpecification (dict): Identifies the training algorithm to use.
            InputDataConfig (dict): Describes the training dataset and the location where it is stored.
            OutputDataConfig (dict): Identifies the location where you want to save the results of model training.
            ResourceConfig (dict): Identifies the resources to use for local model traininig.
            HyperParameters (dict) [optional]: Specifies these algorithm-specific parameters to influence the quality of
                the final model.
        """
        InputDataConfig = InputDataConfig or {}
        container = _SageMakerContainer(
            ResourceConfig["InstanceType"],
            ResourceConfig["InstanceCount"],
            AlgorithmSpecification["TrainingImage"],
            self.sagemaker_session,
        )
        training_job = _LocalTrainingJob(container)
        hyperparameters = kwargs[
            "HyperParameters"] if "HyperParameters" in kwargs else {}
        training_job.start(InputDataConfig, OutputDataConfig, hyperparameters,
                           TrainingJobName)

        LocalSagemakerClient._training_jobs[TrainingJobName] = training_job
示例#17
0
def test_train_local_intermediate_output(tmpdir, sagemaker_session):
    directories = [
        str(tmpdir.mkdir('container-root')),
        str(tmpdir.mkdir('data'))
    ]
    with patch('sagemaker.local.image._SageMakerContainer._create_tmp_folder',
               side_effect=directories):
        instance_count = 2
        image = 'my-image'
        sagemaker_container = _SageMakerContainer(
            'local',
            instance_count,
            image,
            sagemaker_session=sagemaker_session)

        output_path = str(tmpdir.mkdir('customer_intermediate_output'))
        output_data_config = {'S3OutputPath': 'file://%s' % output_path}
        hyperparameters = {'sagemaker_s3_output': output_path}

        sagemaker_container.train(INPUT_DATA_CONFIG, output_data_config,
                                  hyperparameters, TRAINING_JOB_NAME)

        docker_compose_file = os.path.join(sagemaker_container.container_root,
                                           'docker-compose.yaml')
        intermediate_folder_path = os.path.join(output_path,
                                                'output/intermediate')

        with open(docker_compose_file, 'r') as f:
            config = yaml.load(f)
            assert len(config['services']) == instance_count
            for h in sagemaker_container.hosts:
                assert config['services'][h]['image'] == image
                assert config['services'][h]['command'] == 'train'
                volumes = config['services'][h]['volumes']
                assert '%s:/opt/ml/output/intermediate' % intermediate_folder_path in volumes
示例#18
0
def test_serve(up, copy, copytree, tmpdir, sagemaker_session):

    with patch('sagemaker.local.image._SageMakerContainer._create_tmp_folder',
               return_value=str(tmpdir.mkdir('container-root'))):

        image = 'my-image'
        sagemaker_container = _SageMakerContainer(
            'local', 1, image, sagemaker_session=sagemaker_session)
        primary_container = {
            'ModelDataUrl': '/some/model/path',
            'Environment': {
                'env1': 1,
                'env2': 'b'
            }
        }

        sagemaker_container.serve(primary_container)
        docker_compose_file = os.path.join(sagemaker_container.container_root,
                                           'docker-compose.yaml')

        with open(docker_compose_file, 'r') as f:
            config = yaml.load(f)

            for h in sagemaker_container.hosts:
                assert config['services'][h]['image'] == image
                assert config['services'][h]['command'] == 'serve'
def test_train_with_hyperparameters_without_job_name(download_folder, _cleanup,
                                                     _stream_output,
                                                     LocalSession, tmpdir):

    directories = [
        str(tmpdir.mkdir('container-root')),
        str(tmpdir.mkdir('data'))
    ]
    with patch('sagemaker.local.image._SageMakerContainer._create_tmp_folder',
               side_effect=directories):

        instance_count = 2
        image = 'my-image'
        sagemaker_container = _SageMakerContainer(
            'local', instance_count, image, sagemaker_session=LocalSession)
        sagemaker_container.train(INPUT_DATA_CONFIG, HYPERPARAMETERS,
                                  TRAINING_JOB_NAME)

        docker_compose_file = os.path.join(sagemaker_container.container_root,
                                           'docker-compose.yaml')

        with open(docker_compose_file, 'r') as f:
            config = yaml.load(f)
            for h in sagemaker_container.hosts:
                assert 'TRAINING_JOB_NAME={}'.format(
                    TRAINING_JOB_NAME) in config['services'][h]['environment']
    def create_training_job(self,
                            TrainingJobName,
                            AlgorithmSpecification,
                            RoleArn,
                            InputDataConfig,
                            OutputDataConfig,
                            ResourceConfig,
                            StoppingCondition,
                            HyperParameters,
                            Tags=None):

        self.train_container = _SageMakerContainer(
            ResourceConfig['InstanceType'], ResourceConfig['InstanceCount'],
            AlgorithmSpecification['TrainingImage'], self.sagemaker_session)

        for channel in InputDataConfig:
            data_distribution = channel['DataSource']['S3DataSource'][
                'S3DataDistributionType']
            if data_distribution != 'FullyReplicated':
                raise RuntimeError(
                    "DataDistribution: %s is not currently supported in Local Mode"
                    % data_distribution)

        self.s3_model_artifacts = self.train_container.train(
            InputDataConfig, HyperParameters)
示例#21
0
def test_container_does_not_enable_nvidia_docker_for_cpu_containers(sagemaker_session):
    instance_count = 1
    image = 'my-image'
    sagemaker_container = _SageMakerContainer('local', instance_count, image,
                                              sagemaker_session=sagemaker_session)

    docker_host = sagemaker_container._create_docker_host('host-1', {}, set(), 'train', [])
    assert 'runtime' not in docker_host
示例#22
0
def test_container_has_gpu_support(tmpdir, sagemaker_session):
    instance_count = 1
    image = 'my-image'
    sagemaker_container = _SageMakerContainer('local_gpu', instance_count, image,
                                              sagemaker_session=sagemaker_session)

    docker_host = sagemaker_container._create_docker_host('host-1', {}, set(), 'train', [])
    assert 'runtime' in docker_host
    assert docker_host['runtime'] == 'nvidia'
示例#23
0
def test_container_does_not_enable_nvidia_docker_for_cpu_containers(sagemaker_session):
    instance_count = 1
    image = "my-image"
    sagemaker_container = _SageMakerContainer(
        "local", instance_count, image, sagemaker_session=sagemaker_session
    )

    docker_host = sagemaker_container._create_docker_host("host-1", {}, set(), "train", [])
    assert "runtime" not in docker_host
示例#24
0
def test_container_has_gpu_support(tmpdir, sagemaker_session):
    instance_count = 1
    image = "my-image"
    sagemaker_container = _SageMakerContainer(
        "local_gpu", instance_count, image, sagemaker_session=sagemaker_session
    )

    docker_host = sagemaker_container._create_docker_host("host-1", {}, set(), "train", [])
    assert "runtime" in docker_host
    assert docker_host["runtime"] == "nvidia"
def test_prepare_serving_volumes_with_local_model(sagemaker_session):

    sagemaker_container = _SageMakerContainer('local', 1, 'some-image', sagemaker_session=sagemaker_session)
    sagemaker_container.container_root = '/tmp/container_root'

    volumes = sagemaker_container._prepare_serving_volumes('/path/to/my_model')

    assert len(volumes) == 1
    assert volumes[0].container_dir == '/opt/ml/model'
    assert volumes[0].host_dir == '/path/to/my_model'
def test_train(download_folder, _cleanup, popen, _stream_output, LocalSession,
               tmpdir, sagemaker_session):

    directories = [
        str(tmpdir.mkdir('container-root')),
        str(tmpdir.mkdir('data'))
    ]
    with patch('sagemaker.local.image._SageMakerContainer._create_tmp_folder',
               side_effect=directories):

        instance_count = 2
        image = 'my-image'
        sagemaker_container = _SageMakerContainer(
            'local',
            instance_count,
            image,
            sagemaker_session=sagemaker_session)
        sagemaker_container.train(INPUT_DATA_CONFIG, HYPERPARAMETERS,
                                  TRAINING_JOB_NAME)

        channel_dir = os.path.join(directories[1], 'b')
        download_folder_calls = [
            call('my-own-bucket', 'prefix', channel_dir, sagemaker_session)
        ]
        download_folder.assert_has_calls(download_folder_calls)

        docker_compose_file = os.path.join(sagemaker_container.container_root,
                                           'docker-compose.yaml')

        call_args = popen.call_args[0][0]
        assert call_args is not None

        expected = [
            'docker-compose', '-f', docker_compose_file, 'up', '--build',
            '--abort-on-container-exit'
        ]
        for i, v in enumerate(expected):
            assert call_args[i] == v

        with open(docker_compose_file, 'r') as f:
            config = yaml.load(f)
            assert len(config['services']) == instance_count
            for h in sagemaker_container.hosts:
                assert config['services'][h]['image'] == image
                assert config['services'][h]['command'] == 'train'
                assert 'AWS_REGION={}'.format(
                    REGION) in config['services'][h]['environment']
                assert 'TRAINING_JOB_NAME={}'.format(
                    TRAINING_JOB_NAME) in config['services'][h]['environment']

        # assert that expected by sagemaker container output directories exist
        assert os.path.exists(
            os.path.join(sagemaker_container.container_root, 'output'))
        assert os.path.exists(
            os.path.join(sagemaker_container.container_root, 'output/data'))
def test_download_file():
    boto_mock = Mock(name='boto_session')
    boto_mock.client('sts').get_caller_identity.return_value = {'Account': '123'}
    bucket_mock = Mock()
    boto_mock.resource('s3').Bucket.return_value = bucket_mock
    session = sagemaker.Session(boto_session=boto_mock, sagemaker_client=Mock())

    sagemaker_container = _SageMakerContainer('local', 2, 'my-image', sagemaker_session=session)
    sagemaker_container._download_file(BUCKET_NAME, '/prefix/path/file.tar.gz', '/tmp/file.tar.gz')

    bucket_mock.download_file.assert_called_with('prefix/path/file.tar.gz', '/tmp/file.tar.gz')
def test_prepare_serving_volumes_with_local_model(sagemaker_session):

    sagemaker_container = _SageMakerContainer(
        'local', 1, 'some-image', sagemaker_session=sagemaker_session)
    sagemaker_container.container_root = '/tmp/container_root'

    volumes = sagemaker_container._prepare_serving_volumes('/path/to/my_model')

    assert len(volumes) == 1
    assert volumes[0].container_dir == '/opt/ml/model'
    assert volumes[0].host_dir == '/path/to/my_model'
示例#29
0
def test_retrieve_artifacts(LocalSession, tmpdir):
    sagemaker_container = _SageMakerContainer('local', 2, 'my-image')
    sagemaker_container.hosts = ['algo-1', 'algo-2']  # avoid any randomness
    sagemaker_container.container_root = str(tmpdir.mkdir('container-root'))

    volume1 = os.path.join(sagemaker_container.container_root,
                           'algo-1/output/')
    volume2 = os.path.join(sagemaker_container.container_root,
                           'algo-2/output/')
    os.makedirs(volume1)
    os.makedirs(volume2)

    compose_data = {
        'services': {
            'algo-1': {
                'volumes': ['%s:/opt/ml/model' % volume1]
            },
            'algo-2': {
                'volumes': ['%s:/opt/ml/model' % volume2]
            }
        }
    }

    dirs1 = ['model', 'model/data']
    dirs2 = ['model', 'model/data', 'model/tmp']

    files1 = ['model/data/model.json', 'model/data/variables.csv']
    files2 = [
        'model/data/model.json', 'model/data/variables2.csv',
        'model/tmp/something-else.json'
    ]

    expected = [
        'model', 'model/data/', 'model/data/model.json',
        'model/data/variables.csv', 'model/data/variables2.csv',
        'model/tmp/something-else.json'
    ]

    for d in dirs1:
        os.mkdir(os.path.join(volume1, d))
    for d in dirs2:
        os.mkdir(os.path.join(volume2, d))

    # create all the files
    for f in files1:
        open(os.path.join(volume1, f), 'a').close()
    for f in files2:
        open(os.path.join(volume2, f), 'a').close()

    s3_model_artifacts = sagemaker_container.retrieve_model_artifacts(
        compose_data)

    for f in expected:
        assert os.path.exists(os.path.join(s3_model_artifacts, f))
示例#30
0
    def start(self, input_data, output_data, transform_resources, **kwargs):
        """Start the Local Transform Job

        Args:
            input_data (dict): Describes the dataset to be transformed and the
                location where it is stored.
            output_data (dict): Identifies the location where to save the
                results from the transform job
            transform_resources (dict): compute instances for the transform job.
                Currently only supports local or local_gpu
            **kwargs: additional arguments coming from the boto request object
        """
        self.transform_resources = transform_resources
        self.input_data = input_data
        self.output_data = output_data

        image = self.primary_container["Image"]
        instance_type = transform_resources["InstanceType"]
        instance_count = 1

        environment = self._get_container_environment(**kwargs)

        # Start the container, pass the environment and wait for it to start up
        self.container = _SageMakerContainer(instance_type, instance_count,
                                             image, self.local_session)
        self.container.serve(self.primary_container["ModelDataUrl"],
                             environment)

        serving_port = get_config_value("local.serving_port",
                                        self.local_session.config) or 8080
        _wait_for_serving_container(serving_port)

        # Get capabilities from Container if needed
        endpoint_url = "http://localhost:%s/execution-parameters" % serving_port
        response, code = _perform_request(endpoint_url)
        if code == 200:
            execution_parameters = json.loads(response.read())
            # MaxConcurrentTransforms is ignored because we currently only support 1
            for setting in ("BatchStrategy", "MaxPayloadInMB"):
                if setting not in kwargs and setting in execution_parameters:
                    kwargs[setting] = execution_parameters[setting]

        # Apply Defaults if none was provided
        kwargs.update(self._get_required_defaults(**kwargs))

        self.start_time = datetime.datetime.now()
        self.batch_strategy = kwargs["BatchStrategy"]
        if "Environment" in kwargs:
            self.environment = kwargs["Environment"]

        # run the batch inference requests
        self._perform_batch_inference(input_data, output_data, **kwargs)
        self.end_time = datetime.datetime.now()
        self.state = self._COMPLETED
示例#31
0
def test_write_config_files_input_content_type(LocalSession, tmpdir):
    sagemaker_container = _SageMakerContainer("local", 1, "my-image")
    sagemaker_container.container_root = str(tmpdir.mkdir("container-root"))
    host = "algo-1"

    sagemaker.local.image._create_config_file_directories(
        sagemaker_container.container_root, host)

    container_root = sagemaker_container.container_root
    config_file_root = os.path.join(container_root, host, "input", "config")

    input_data_config_file = os.path.join(config_file_root,
                                          "inputdataconfig.json")

    # write the config files, and then lets check they exist and have the right content.
    input_data_config = [
        {
            "ChannelName": "channel_a",
            "DataUri": "file:///tmp/source1",
            "ContentType": "text/csv",
            "DataSource": {
                "FileDataSource": {
                    "FileDataDistributionType": "FullyReplicated",
                    "FileUri": "file:///tmp/source1",
                }
            },
        },
        {
            "ChannelName": "channel_b",
            "DataUri": "s3://my-own-bucket/prefix",
            "DataSource": {
                "S3DataSource": {
                    "S3DataDistributionType": "FullyReplicated",
                    "S3DataType": "S3Prefix",
                    "S3Uri": "s3://my-own-bucket/prefix",
                }
            },
        },
    ]
    sagemaker_container.write_config_files(host, HYPERPARAMETERS,
                                           input_data_config)

    assert os.path.exists(input_data_config_file)
    parsed_input_config = json.load(open(input_data_config_file))
    # Validate Input Data Config
    for channel in input_data_config:
        assert channel["ChannelName"] in parsed_input_config

    # Channel A has a content type
    assert "ContentType" in parsed_input_config["channel_a"]
    assert parsed_input_config["channel_a"]["ContentType"] == "text/csv"

    # Channel B does not have content type
    assert "ContentType" not in parsed_input_config["channel_b"]
def test_train_error(_download_folder, _cleanup, popen, _stream_output, LocalSession, tmpdir, sagemaker_session):
    directories = [str(tmpdir.mkdir('container-root')), str(tmpdir.mkdir('data'))]

    with patch('sagemaker.local.image._SageMakerContainer._create_tmp_folder', side_effect=directories):
        instance_count = 2
        image = 'my-image'
        sagemaker_container = _SageMakerContainer('local', instance_count, image, sagemaker_session=sagemaker_session)

        with pytest.raises(RuntimeError) as e:
            sagemaker_container.train(INPUT_DATA_CONFIG, HYPERPARAMETERS)

        assert 'this is expected' in str(e)
def test_write_config_files_input_content_type(LocalSession, tmpdir):
    sagemaker_container = _SageMakerContainer('local', 1, 'my-image')
    sagemaker_container.container_root = str(tmpdir.mkdir('container-root'))
    host = 'algo-1'

    sagemaker.local.image._create_config_file_directories(
        sagemaker_container.container_root, host)

    container_root = sagemaker_container.container_root
    config_file_root = os.path.join(container_root, host, 'input', 'config')

    input_data_config_file = os.path.join(config_file_root,
                                          'inputdataconfig.json')

    # write the config files, and then lets check they exist and have the right content.
    input_data_config = [{
        'ChannelName': 'channel_a',
        'DataUri': 'file:///tmp/source1',
        'ContentType': 'text/csv',
        'DataSource': {
            'FileDataSource': {
                'FileDataDistributionType': 'FullyReplicated',
                'FileUri': 'file:///tmp/source1'
            }
        }
    }, {
        'ChannelName': 'channel_b',
        'DataUri': 's3://my-own-bucket/prefix',
        'DataSource': {
            'S3DataSource': {
                'S3DataDistributionType': 'FullyReplicated',
                'S3DataType': 'S3Prefix',
                'S3Uri': 's3://my-own-bucket/prefix'
            }
        }
    }]
    sagemaker_container.write_config_files(host, HYPERPARAMETERS,
                                           input_data_config)

    assert os.path.exists(input_data_config_file)
    parsed_input_config = json.load(open(input_data_config_file))
    # Validate Input Data Config
    for channel in input_data_config:
        assert channel['ChannelName'] in parsed_input_config

    # Channel A has a content type
    assert 'ContentType' in parsed_input_config['channel_a']
    assert parsed_input_config['channel_a']['ContentType'] == 'text/csv'

    # Channel B does not have content type
    assert 'ContentType' not in parsed_input_config['channel_b']
示例#34
0
def test_train_local_code(get_data_source_instance, tmpdir, sagemaker_session):
    data_source = Mock()
    data_source.get_root_dir.return_value = "foo"
    get_data_source_instance.return_value = data_source

    directories = [
        str(tmpdir.mkdir("container-root")),
        str(tmpdir.mkdir("data"))
    ]
    with patch("sagemaker.local.image._SageMakerContainer._create_tmp_folder",
               side_effect=directories):
        instance_count = 2
        image = "my-image"
        sagemaker_container = _SageMakerContainer(
            "local",
            instance_count,
            image,
            sagemaker_session=sagemaker_session)

        sagemaker_container.train(
            INPUT_DATA_CONFIG,
            OUTPUT_DATA_CONFIG,
            LOCAL_CODE_HYPERPARAMETERS,
            ENVIRONMENT,
            TRAINING_JOB_NAME,
        )

        docker_compose_file = os.path.join(sagemaker_container.container_root,
                                           "docker-compose.yaml")
        shared_folder_path = os.path.join(sagemaker_container.container_root,
                                          "shared")

        with open(docker_compose_file, "r") as f:
            config = yaml.load(f, Loader=yaml.SafeLoader)
            assert len(config["services"]) == instance_count

        for h in sagemaker_container.hosts:
            assert config["services"][h]["image"] == image
            assert config["services"][h]["command"] == "train"
            volumes = config["services"][h]["volumes"]
            assert "%s:/opt/ml/code" % "/tmp/code" in volumes
            assert "%s:/opt/ml/shared" % shared_folder_path in volumes

            config_file_root = os.path.join(sagemaker_container.container_root,
                                            h, "input", "config")
            hyperparameters_file = os.path.join(config_file_root,
                                                "hyperparameters.json")
            hyperparameters_data = json.load(open(hyperparameters_file))
            assert hyperparameters_data[
                "sagemaker_submit_directory"] == json.dumps("/opt/ml/code")
示例#35
0
def test_prepare_serving_volumes_with_local_model(get_data_source_instance, sagemaker_session):
    sagemaker_container = _SageMakerContainer('local', 1, 'some-image', sagemaker_session=sagemaker_session)
    sagemaker_container.container_root = '/tmp/container_root'

    local_file_data_source = Mock()
    local_file_data_source.get_root_dir.return_value = '/path/to/my_model'
    local_file_data_source.get_file_list.return_value = ['/path/to/my_model/model']
    get_data_source_instance.return_value = local_file_data_source

    volumes = sagemaker_container._prepare_serving_volumes('file:///path/to/my_model')

    assert len(volumes) == 1
    assert volumes[0].container_dir == '/opt/ml/model'
    assert volumes[0].host_dir == '/path/to/my_model'
示例#36
0
def test_serve_local_code_no_env(tmpdir, sagemaker_session):
    with patch('sagemaker.local.image._SageMakerContainer._create_tmp_folder',
               return_value=str(tmpdir.mkdir('container-root'))):
        image = 'my-image'
        sagemaker_container = _SageMakerContainer('local', 1, image, sagemaker_session=sagemaker_session)
        sagemaker_container.serve('/some/model/path', {})
        docker_compose_file = os.path.join(sagemaker_container.container_root,
                                           'docker-compose.yaml')

        with open(docker_compose_file, 'r') as f:
            config = yaml.load(f)

            for h in sagemaker_container.hosts:
                assert config['services'][h]['image'] == image
                assert config['services'][h]['command'] == 'serve'
def test_prepare_serving_volumes_with_s3_model(is_tarfile, _download_file, sagemaker_session):

    sagemaker_container = _SageMakerContainer('local', 1, 'some-image', sagemaker_session=sagemaker_session)
    sagemaker_container.container_root = '/tmp/container_root'
    container_model_dir = os.path.join('/tmp/container_root/', sagemaker_container.hosts[0], 'model')

    is_tarfile.return_value = True

    volumes = sagemaker_container._prepare_serving_volumes('s3://bucket/my_model.tar.gz')

    tar_location = os.path.join(container_model_dir, 'my_model.tar.gz')
    _download_file.assert_called_with('bucket', '/my_model.tar.gz', tar_location)
    is_tarfile.assert_called_with(tar_location)

    assert len(volumes) == 1
    assert volumes[0].container_dir == '/opt/ml/model'
    assert volumes[0].host_dir == container_model_dir
def test_write_config_file(LocalSession, tmpdir):

    sagemaker_container = _SageMakerContainer('local', 2, 'my-image')
    sagemaker_container.container_root = str(tmpdir.mkdir('container-root'))
    host = "algo-1"

    sagemaker.local.image._create_config_file_directories(sagemaker_container.container_root, host)

    container_root = sagemaker_container.container_root
    config_file_root = os.path.join(container_root, host, 'input', 'config')

    hyperparameters_file = os.path.join(config_file_root, 'hyperparameters.json')
    resource_config_file = os.path.join(config_file_root, 'resourceconfig.json')
    input_data_config_file = os.path.join(config_file_root, 'inputdataconfig.json')

    # write the config files, and then lets check they exist and have the right content.
    sagemaker_container.write_config_files(host, HYPERPARAMETERS, INPUT_DATA_CONFIG)

    assert os.path.exists(hyperparameters_file)
    assert os.path.exists(resource_config_file)
    assert os.path.exists(input_data_config_file)

    hyperparameters_data = json.load(open(hyperparameters_file))
    resource_config_data = json.load(open(resource_config_file))
    input_data_config_data = json.load(open(input_data_config_file))

    # Validate HyperParameters
    for k, v in HYPERPARAMETERS.items():
        assert k in hyperparameters_data
        assert hyperparameters_data[k] == v

    # Validate Resource Config
    assert resource_config_data['current_host'] == host
    assert resource_config_data['hosts'] == sagemaker_container.hosts

    # Validate Input Data Config
    for channel in INPUT_DATA_CONFIG:
        assert channel['ChannelName'] in input_data_config_data
def test_download_folder(makedirs):
    boto_mock = Mock(name='boto_session')
    boto_mock.client('sts').get_caller_identity.return_value = {'Account': '123'}

    session = sagemaker.Session(boto_session=boto_mock, sagemaker_client=Mock())

    train_data = Mock()
    validation_data = Mock()

    train_data.bucket_name.return_value = BUCKET_NAME
    train_data.key = '/prefix/train/train_data.csv'
    validation_data.bucket_name.return_value = BUCKET_NAME
    validation_data.key = '/prefix/train/validation_data.csv'

    s3_files = [train_data, validation_data]
    boto_mock.resource('s3').Bucket(BUCKET_NAME).objects.filter.return_value = s3_files

    obj_mock = Mock()
    boto_mock.resource('s3').Object.return_value = obj_mock

    sagemaker_container = _SageMakerContainer('local', 2, 'my-image', sagemaker_session=session)
    sagemaker_container._download_folder(BUCKET_NAME, '/prefix', '/tmp')

    obj_mock.download_file.assert_called()
    calls = [call(os.path.join('/tmp', 'train/train_data.csv')),
             call(os.path.join('/tmp', 'train/validation_data.csv'))]
    obj_mock.download_file.assert_has_calls(calls)
    obj_mock.reset_mock()

    # Testing with a trailing slash for the prefix.
    sagemaker_container._download_folder(BUCKET_NAME, '/prefix/', '/tmp')
    obj_mock.download_file.assert_called()
    calls = [call(os.path.join('/tmp', 'train/train_data.csv')),
             call(os.path.join('/tmp', 'train/validation_data.csv'))]

    obj_mock.download_file.assert_has_calls(calls)
def test_retrieve_artifacts(LocalSession, tmpdir):
    sagemaker_container = _SageMakerContainer('local', 2, 'my-image')
    sagemaker_container.hosts = ['algo-1', 'algo-2']  # avoid any randomness
    sagemaker_container.container_root = str(tmpdir.mkdir('container-root'))

    volume1 = os.path.join(sagemaker_container.container_root, 'algo-1')
    volume2 = os.path.join(sagemaker_container.container_root, 'algo-2')
    os.mkdir(volume1)
    os.mkdir(volume2)

    compose_data = {
        'services': {
            'algo-1': {
                'volumes': ['%s:/opt/ml/model' % os.path.join(volume1, 'model'),
                            '%s:/opt/ml/output' % os.path.join(volume1, 'output')]
            },
            'algo-2': {
                'volumes': ['%s:/opt/ml/model' % os.path.join(volume2, 'model'),
                            '%s:/opt/ml/output' % os.path.join(volume2, 'output')]
            }
        }
    }

    dirs1 = ['model', 'model/data']
    dirs2 = ['model', 'model/data', 'model/tmp']
    dirs3 = ['output', 'output/data']
    dirs4 = ['output', 'output/data', 'output/log']

    files1 = ['model/data/model.json', 'model/data/variables.csv']
    files2 = ['model/data/model.json', 'model/data/variables2.csv', 'model/tmp/something-else.json']
    files3 = ['output/data/loss.json', 'output/data/accuracy.json']
    files4 = ['output/data/loss.json', 'output/data/accuracy2.json', 'output/log/warnings.txt']

    expected = ['model', 'model/data/', 'model/data/model.json', 'model/data/variables.csv',
                'model/data/variables2.csv', 'model/tmp/something-else.json', 'output', 'output/data', 'output/log',
                'output/data/loss.json', 'output/data/accuracy.json', 'output/data/accuracy2.json',
                'output/log/warnings.txt']

    for d in dirs1:
        os.mkdir(os.path.join(volume1, d))
    for d in dirs2:
        os.mkdir(os.path.join(volume2, d))
    for d in dirs3:
        os.mkdir(os.path.join(volume1, d))
    for d in dirs4:
        os.mkdir(os.path.join(volume2, d))

    # create all the files
    for f in files1:
        open(os.path.join(volume1, f), 'a').close()
    for f in files2:
        open(os.path.join(volume2, f), 'a').close()
    for f in files3:
        open(os.path.join(volume1, f), 'a').close()
    for f in files4:
        open(os.path.join(volume2, f), 'a').close()

    s3_model_artifacts = sagemaker_container.retrieve_artifacts(compose_data)
    s3_artifacts = os.path.dirname(s3_model_artifacts)

    for f in expected:
        assert set(os.listdir(s3_artifacts)) == set(['model', 'output'])
        assert os.path.exists(os.path.join(s3_artifacts, f))