示例#1
0
def test_get_runner_by_mpi_with_extra_args(training_env):
    training_env().num_gpus = 0

    runner = _runner.get(_runner.MPIRunnerType, USER_SCRIPT, CMD_ARGS,
                         ENV_VARS, MPI_OPTS)

    assert isinstance(runner, _mpi.MasterRunner)

    assert runner._user_entry_point == USER_SCRIPT
    assert runner._args == CMD_ARGS
    assert runner._env_vars == ENV_VARS
    assert runner._process_per_host == 2
    assert runner._num_processes == 4
    assert runner._custom_mpi_options == NCCL_DEBUG_MPI_OPT

    training_env().to_cmd_args.assert_not_called()
    training_env().to_env_vars.assert_not_called()
    training_env().user_entry_point.assert_not_called()
    training_env().additional_framework_parameters.assert_not_called()

    training_env().is_master = False
    runner = _runner.get(_runner.MPIRunnerType, USER_SCRIPT, CMD_ARGS,
                         ENV_VARS)

    assert isinstance(runner, _mpi.WorkerRunner)

    assert runner._user_entry_point == USER_SCRIPT
    assert runner._args == CMD_ARGS
    assert runner._env_vars == ENV_VARS

    training_env().to_cmd_args.assert_not_called()
    training_env().to_env_vars.assert_not_called()
    training_env().user_entry_point.assert_not_called()
示例#2
0
def test_get_runner_by_mpi_returns_runnner(training_env):
    runner = _runner.get(_runner.MPIRunnerType)

    assert isinstance(runner, _mpi.MasterRunner)
    training_env().to_cmd_args.assert_called()
    training_env().to_env_vars.assert_called()

    training_env().is_master = False
    runner = _runner.get(_runner.MPIRunnerType)

    assert isinstance(runner, _mpi.WorkerRunner)
    training_env().to_cmd_args.assert_called()
    training_env().to_env_vars.assert_called()
示例#3
0
def test_runnner_with_default_gpu_processes_per_host(training_env):
    training_env().additional_framework_parameters = dict()
    training_env().num_gpus = 2

    runner = _runner.get(_runner.MPIRunnerType)

    assert isinstance(runner, _mpi.MasterRunner)
    assert runner._process_per_host == 2
def run(uri,
        user_entry_point,
        args,
        env_vars=None,
        wait=True,
        capture_error=False,
        runner=_runner.ProcessRunnerType):
    # type: (str, str, List[str], Dict[str, str], bool, bool, _runner.RunnerType) -> None
    """Download, prepare and executes a compressed tar file from S3 or provided directory as an user
    entrypoint. Runs the user entry point, passing env_vars as environment variables and args as command
    arguments.

    If the entry point is:
        - A Python package: executes the packages as >>> env_vars python -m module_name + args
        - A Python script: executes the script as >>> env_vars python module_name + args
        - Any other: executes the command as >>> env_vars /bin/sh -c ./module_name + args

    Example:
         >>>import sagemaker_containers
         >>>from sagemaker_containers.beta.framework import entry_point

         >>>env = sagemaker_containers.training_env()
         {'channel-input-dirs': {'training': '/opt/ml/input/training'}, 'model_dir': '/opt/ml/model', ...}


         >>>hyperparameters = env.hyperparameters
         {'batch-size': 128, 'model_dir': '/opt/ml/model'}

         >>>args = mapping.to_cmd_args(hyperparameters)
         ['--batch-size', '128', '--model_dir', '/opt/ml/model']

         >>>env_vars = mapping.to_env_vars()
         ['SAGEMAKER_CHANNELS':'training', 'SAGEMAKER_CHANNEL_TRAINING':'/opt/ml/input/training',
         'MODEL_DIR':'/opt/ml/model', ...}

         >>>entry_point.run('user_script', args, env_vars)
         SAGEMAKER_CHANNELS=training SAGEMAKER_CHANNEL_TRAINING=/opt/ml/input/training \
         SAGEMAKER_MODEL_DIR=/opt/ml/model python -m user_script --batch-size 128 --model_dir /opt/ml/model

     Args:
        user_entry_point (str): name of the user provided entry point
        args (list):  A list of program arguments.
        env_vars (dict): A map containing the environment variables to be written.
        uri (str): the location of the module.
        capture_error (bool): Default false. If True, the running process captures the
            stderr, and appends it to the returned Exception message in case of errors.

     """
    env_vars = env_vars or {}
    env_vars = env_vars.copy()

    _files.download_and_extract(uri, user_entry_point, _env.code_dir)

    install(user_entry_point, _env.code_dir, capture_error)

    _env.write_env_vars(env_vars)

    return _runner.get(runner).run(wait, capture_error)
示例#5
0
def test_get_runner_by_process_with_extra_args(training_env):
    runner = _runner.get(_runner.ProcessRunnerType, USER_SCRIPT, CMD_ARGS,
                         ENV_VARS)

    assert isinstance(runner, _process.ProcessRunner)

    assert runner._user_entry_point == USER_SCRIPT
    assert runner._args == CMD_ARGS
    assert runner._env_vars == ENV_VARS

    training_env().to_cmd_args.assert_not_called()
    training_env().to_env_vars.assert_not_called()
    training_env().user_entry_point.assert_not_called()
def run(
    uri,
    user_entry_point,
    args,
    env_vars=None,
    wait=True,
    capture_error=False,
    runner=_runner.ProcessRunnerType,
    extra_opts=None,
):
    # type: (str, str, List[str], Dict[str, str], bool, bool, _runner.RunnerType,Dict[str, str]) -> None  # pylint: disable=line-too-long # noqa ignore=E501
    """Download, prepare and executes a compressed tar file from S3 or provided directory as an user
    entrypoint. Runs the user entry point, passing env_vars as environment variables and args
    as command arguments.

    If the entry point is:
        - A Python package: executes the packages as >>> env_vars python -m module_name + args
        - A Python script: executes the script as >>> env_vars python module_name + args
        - Any other: executes the command as >>> env_vars /bin/sh -c ./module_name + args

    Example:
         >>>import sagemaker_containers
         >>>from sagemaker_containers.beta.framework import entry_point

         >>>env = sagemaker_containers.training_env()
         {'channel-input-dirs': {'training': '/opt/ml/input/training'},
          'model_dir': '/opt/ml/model', ...}


         >>>hyperparameters = env.hyperparameters
         {'batch-size': 128, 'model_dir': '/opt/ml/model'}

         >>>args = mapping.to_cmd_args(hyperparameters)
         ['--batch-size', '128', '--model_dir', '/opt/ml/model']

         >>>env_vars = mapping.to_env_vars()
         ['SAGEMAKER_CHANNELS':'training', 'SAGEMAKER_CHANNEL_TRAINING':'/opt/ml/input/training',
         'MODEL_DIR':'/opt/ml/model', ...}

         >>>entry_point.run('user_script', args, env_vars)
         SAGEMAKER_CHANNELS=training SAGEMAKER_CHANNEL_TRAINING=/opt/ml/input/training \
         SAGEMAKER_MODEL_DIR=/opt/ml/model python -m user_script --batch-size 128
                             --model_dir /opt/ml/model

    Args:
        uri (str): the location of the module.
        user_entry_point (str): name of the user provided entry point
        args (list):  A list of program arguments.
        env_vars (dict): A map containing the environment variables to be written (default: None).
        wait (bool): If the user entry point should be run to completion before this method returns
            (default: True).
        capture_error (bool): Default false. If True, the running process captures the
            stderr, and appends it to the returned Exception message in case of errors.
        runner (sagemaker_containers.beta.framework.runner.RunnerType): the type of runner object to
            be created (default: sagemaker_containers.beta.framework.runner.ProcessRunnerType).
        extra_opts (dict): Additional options for running the entry point (default: None).
            Currently, this only applies for MPI.

    Returns:
        sagemaker_containers.beta.framework.process.ProcessRunner: the runner object responsible for
            executing the entry point.
    """
    env_vars = env_vars or {}
    env_vars = env_vars.copy()

    _files.download_and_extract(uri, _env.code_dir)

    install(user_entry_point, _env.code_dir, capture_error)

    _env.write_env_vars(env_vars)

    _wait_hostname_resolution()

    return _runner.get(runner, user_entry_point, args, env_vars,
                       extra_opts).run(wait, capture_error)
示例#7
0
def test_get_runner_by_process_returns_runnner(training_env):
    runner = _runner.get(_runner.ProcessRunnerType)

    assert isinstance(runner, _process.ProcessRunner)
    training_env().to_cmd_args.assert_called()
    training_env().to_env_vars.assert_called()
示例#8
0
def test_get_runner_returns_runnner_itself(runner_class):
    runner = MagicMock(spec=runner_class)

    assert _runner.get(runner) == runner
示例#9
0
def test_get_runner_invalid_identifier():
    with pytest.raises(ValueError):
        _runner.get(42)