Exemplo n.º 1
0
def local_experiment(args: Namespace) -> None:
    if not args.test_mode:
        raise NotImplementedError(
            "Local training mode (--local mode without --test mode) is not yet supported. Please "
            "try local test mode by adding the --test flag or cluster training mode by removing "
            "the --local flag.")

    experiment_config = _parse_config_file_or_exit(args.config_file,
                                                   args.config)
    entrypoint = experiment_config["entrypoint"]

    # --local --test mode only makes sense for the legacy trial entrypoints.  Otherwise the user
    # would just run their training script directly.
    if not det.util.match_legacy_trial_class(entrypoint):
        raise NotImplementedError(
            "Local test mode (--local --test) is only supported for Trial-like entrypoints. "
            "Script-like entrypoints are not supported, but maybe you can just invoke your script "
            "directly?")

    set_logger(bool(experiment_config.get("debug", False)))

    with _local_execution_manager(args.model_def.resolve()):
        trial_class = determined.load.trial_class_from_entrypoint(entrypoint)
        determined.experimental.test_one_batch(trial_class=trial_class,
                                               config=experiment_config)
Exemplo n.º 2
0
def _load_trial_on_local(
        context_dir: pathlib.Path, training: bool, config: Dict[str, Any],
        hparams: Dict[str, Any]) -> Tuple[Type[det.Trial], det.TrialContext]:
    with det._local_execution_manager(context_dir):
        trial_class = load.load_trial_implementation(config["entrypoint"])
        env, rendezvous_info, hvd_config = det._make_local_execution_env(
            training, config, hparams)
        trial_context = trial_class.trial_context_class(env, hvd_config)
    return trial_class, trial_context
Exemplo n.º 3
0
def _load_trial_on_local(
    context_dir: pathlib.Path,
    managed_training: bool,
    config: Dict[str, Any],
    hparams: Dict[str, Any],
) -> Tuple[Type[det.Trial], det.TrialContext]:
    with det._local_execution_manager(context_dir):
        trial_class = load.trial_class_from_entrypoint(config["entrypoint"])
        env, rendezvous_info, hvd_config = det._make_local_execution_env(
            managed_training=managed_training, test_mode=False, config=config, hparams=hparams
        )
        trial_context = trial_class.trial_context_class(env, hvd_config, rendezvous_info)
    return trial_class, trial_context
Exemplo n.º 4
0
def test_test_one_batch() -> None:
    with det._local_execution_manager(
            pathlib.Path(pytorch_xor_model.__file__).parent):
        experimental.test_one_batch(
            trial_class=pytorch_xor_model.XORTrial,
            config={
                "hyperparameters": {
                    "hidden_size": 2,
                    "learning_rate": 0.5,
                    "global_batch_size": 4
                }
            },
        )
Exemplo n.º 5
0
def local_experiment(args: Namespace) -> None:
    if not args.test_mode:
        raise NotImplementedError(
            "Local training mode (--local mode without --test mode) is not yet supported. Please "
            "try local test mode by adding the --test flag or cluster training mode by removing "
            "the --local flag.")

    experiment_config = _parse_config_file_or_exit(args.config_file,
                                                   args.config)

    set_logger(bool(experiment_config.get("debug", False)))

    with _local_execution_manager(args.model_def.resolve()):
        trial_class = determined.load.trial_class_from_entrypoint(
            experiment_config["entrypoint"])
        determined.experimental.test_one_batch(trial_class=trial_class,
                                               config=experiment_config)
Exemplo n.º 6
0
def local_experiment(args: Namespace) -> None:
    try:
        import determined as det
        from determined import experimental, load
    except ImportError as e:
        print("--local requires that the `determined` package is installed.")
        raise e

    if not args.test_mode:
        raise NotImplementedError(
            "Local training mode (--local mode without --test mode) is not yet supported. Please "
            "try local test mode by adding the --test flag or cluster training mode by removing "
            "the --local flag."
        )

    experiment_config = _parse_config_file_or_exit(args.config_file)

    determined.common.set_logger(bool(experiment_config.get("debug", False)))

    with det._local_execution_manager(args.model_def.resolve()):
        trial_class = load.trial_class_from_entrypoint(experiment_config["entrypoint"])
        experimental.test_one_batch(trial_class=trial_class, config=experiment_config)
Exemplo n.º 7
0
def init_native(
    trial_def: Optional[Type[det.Trial]] = None,
    controller_cls: Optional[Type[det.TrialController]] = None,
    native_context_cls: Optional[Type[det.NativeContext]] = None,
    config: Optional[Dict[str, Any]] = None,
    local: bool = False,
    test: bool = False,
    context_dir: str = "",
    command: Optional[List[str]] = None,
    master_url: Optional[str] = None,
) -> Any:
    determined.common.set_logger(
        util.debug_mode()
        or det.ExperimentConfig(config or {}).debug_enabled())

    if local:
        if not test:
            logging.warning("local training is not supported, testing instead")

        with det._local_execution_manager(pathlib.Path(context_dir).resolve()):
            return test_one_batch(
                controller_cls=controller_cls,
                native_context_cls=native_context_cls,
                trial_class=trial_def,
                config=config,
            )

    else:
        return _init_cluster_mode(
            trial_def=trial_def,
            controller_cls=controller_cls,
            native_context_cls=native_context_cls,
            config=config,
            test=test,
            context_dir=context_dir,
            command=command,
            master_url=master_url,
        )
Exemplo n.º 8
0
def create(
    trial_def: Type[det.Trial],
    config: Optional[Dict[str, Any]] = None,
    local: bool = False,
    test: bool = False,
    context_dir: str = "",
    command: Optional[List[str]] = None,
    master_url: Optional[str] = None,
) -> Any:
    # TODO: Add a reference to the local development tutorial.
    """
    Create an experiment.

    Arguments:
        trial_def:
            A class definition implementing the :class:`determined.Trial`
            interface.

        config:
            A dictionary representing the experiment configuration to be
            associated with the experiment.

        local:
            A boolean indicating if training should be done locally. When
            ``False``, the experiment will be submitted to the Determined
            cluster. Defaults to ``False``.

        test:
            A boolean indicating if the experiment should be shortened
            to a minimal loop of training on a small amount of data,
            performing validation, and checkpointing.  ``test=True`` is
            useful for quick iteration during model porting or debugging
            because common errors will surface more quickly.  Defaults
            to ``False``.

        context_dir:
            A string filepath that defines the context directory. All model
            code will be executed with this as the current working directory.

            When ``local=False``, this argument is required. All files in this
            directory will be uploaded to the Determined cluster. The total
            size of this directory must be under 96 MB.

            When ``local=True``, this argument is optional and defaults to
            the current working directory.

        command:
            A list of strings that is used as the entrypoint of the training
            script in the Determined task environment. When executing this
            function via a Python script, this argument is inferred to be
            ``sys.argv`` by default. When executing this function via IPython
            or Jupyter notebook, this argument is required.

            Example: When creating an experiment by running ``python train.py
            --flag value``, the default command is inferred as ``["train.py",
            "--flag", "value"]``.

        master_url:
            An optional string to use as the Determined master URL when
            ``local=False``. If not specified, will be inferred from the
            environment variable ``DET_MASTER``.
    """

    if local and not test:
        raise NotImplementedError(
            "det.create(local=True, test=False) is not yet implemented. Please set local=False "
            "or test=True.")

    determined.common.set_logger(
        util.debug_mode()
        or det.ExperimentConfig(config or {}).debug_enabled())

    if local:
        # Local test mode.
        with det._local_execution_manager(pathlib.Path(context_dir).resolve()):
            return test_one_batch(
                trial_class=trial_def,
                config=config,
            )

    elif not load.RunpyGlobals.is_initialized():
        # Cluster mode, but still running locally; submit the experiment.
        _submit_experiment(
            config=config,
            test=test,
            context_dir=context_dir,
            command=command,
            master_url=master_url,
        )

    else:
        # Cluster mode, now on the cluster; actually train.
        load.RunpyGlobals.set_runpy_trial_result(trial_def)
        raise det.errors.StopLoadingImplementation()