Exemplo n.º 1
0
def test_base_model_init(tmpdir):
    tmpdir_path = Path(tmpdir)
    MODEL_PARAMS = {"param": "a"}

    # Create model
    m = MockModel(data_dir=tmpdir_path, **MODEL_PARAMS)

    # Metadata path for the model should now exist
    assert m.metadata_path.exists()

    # So should the info path
    assert m.info_path.exists()

    # Info should be populated appropriately
    info = read_metadata(m.info_path)
    assert info["class"] == m.__class__.__name__
    assert info["gobbli_version"] == gobbli_version()

    # We shouldn't be able to create a new model in the same directory
    # with load_existing=False
    with pytest.raises(ValueError):
        MockModel(data_dir=tmpdir_path, load_existing=False, **MODEL_PARAMS)

    # We should be able to load the existing model, and it should
    # have the same param values without being passed explicitly
    m2 = MockModel(data_dir=tmpdir_path, load_existing=True)
    assert m2.params == MODEL_PARAMS
Exemplo n.º 2
0
    def __init__(
        self,
        data_dir: Optional[Path] = None,
        load_existing: bool = False,
        use_gpu: bool = False,
        nvidia_visible_devices: str = "all",
        logger: Optional[logging.Logger] = None,
        **kwargs,
    ):
        """
        Create a model.

        Args:
          data_dir: Optional path to a directory used to store model data.  If not given,
            a unique directory under GOBBLI_DIR will be created and used.
          load_existing: If True, ``data_dir`` should be a directory that was previously used
            to create a model.  Parameters will be loaded to match the original model, and
            user-specified model parameters will be ignored.  If False, the data_dir must
            be empty if it already exists.
          use_gpu: If True, use the
            nvidia-docker runtime (https://github.com/NVIDIA/nvidia-docker) to expose
            NVIDIA GPU(s) to the container.  Will cause an error if the computer you're running
            on doesn't have an NVIDIA GPU and/or doesn't have the nvidia-docker runtime installed.
          nvidia_visible_devices: Which GPUs to make available to the container; ignored if
            ``use_gpu`` is False.  If not 'all', should be a comma-separated string: ex. ``1,2``.
          logger: If passed, use this logger for logging instead of the default module-level logger.
          **kwargs: Additional model-specific parameters to be passed to the model's :meth:`init` method.
        """
        if data_dir is None:
            self._data_dir = self.model_class_dir() / generate_uuid()
        else:
            self._data_dir = data_dir
        self._data_dir.mkdir(parents=True, exist_ok=True)

        if load_existing and self.metadata_path.exists():
            params = read_metadata(self.metadata_path)
            if len(kwargs) > 0:
                warnings.warn(
                    "User-passed params ignored due to existing model being "
                    f"loaded: {kwargs}")
        else:
            if not is_dir_empty(self._data_dir):
                raise ValueError(
                    f"data_dir '{self._data_dir}' is non-empty;"
                    " it must be empty to avoid overwriting data.")
            params = kwargs
            write_metadata(params, self.metadata_path)

        self.use_gpu = use_gpu
        self.nvidia_visible_devices = nvidia_visible_devices

        self._logger = LOGGER
        if logger is not None:
            self._logger = logger

        self.docker_client = docker.from_env()

        self.init(params)
Exemplo n.º 3
0
def test_base_model_init_warnings_errors(tmpdir):
    tmpdir_path = Path(tmpdir)

    # Create model to bootstrap the metadata/info files
    m = MockModel(data_dir=tmpdir_path, param="a")

    # Make sure we get a warning if user passes params that are ignored
    with pytest.warns(UserWarning):
        MockModel(data_dir=tmpdir_path, load_existing=True, param="b")

    # Make sure we get a warning if the gobbli version in the info file changes
    info = read_metadata(m.info_path)
    bad_version_info = info.copy()
    bad_version_info["gobbli_version"] = "not a real version"
    write_metadata(bad_version_info, m.info_path)
    with pytest.warns(UserWarning):
        MockModel(data_dir=tmpdir_path, load_existing=True)

    # Make sure we get an error if the class in the info file changes
    bad_class_info = info.copy()
    bad_class_info["class"] = "not a real model"
    write_metadata(bad_class_info, m.info_path)
    with pytest.raises(ValueError):
        MockModel(data_dir=tmpdir_path, load_existing=True)
Exemplo n.º 4
0
def st_select_model_checkpoint(
    model_data_path: Path, use_gpu: bool, nvidia_visible_devices: str
) -> Tuple[Any, Dict[str, Any], Dict[str, Any]]:
    """
    Generate widgets allowing for users to select a checkpoint from a given model directory.

    Args:
      model_data_path: Path to the model data directory to search for checkpoints.
      use_gpu: If True, initialize the model using a GPU.
      nvidia_visible_devices: The list of devices to make available to the model container.
       Should be either "all" or a comma-separated list of device IDs (ex "1,2").

    Returns:
      A 3-tuple: the class of model corresponding to the checkpoint, the kwargs to initialize
      the model with, and the metadata for the checkpoint.
    """
    try:
        model_info = read_metadata(model_data_path / BaseModel._INFO_FILENAME)
    except FileNotFoundError:
        raise ValueError(
            "The passed model data directory does not appear to contain a saved gobbli model. "
            "Did you pass the right directory?"
        )

    model_cls_name = model_info["class"]
    if not hasattr(gobbli.model, model_cls_name):
        raise ValueError(f"Unknown model type: {model_cls_name}")
    model_cls = getattr(gobbli.model, model_cls_name)

    model_kwargs = {
        "data_dir": model_data_path,
        "load_existing": True,
        "use_gpu": use_gpu,
        "nvidia_visible_devices": nvidia_visible_devices,
    }
    model = model_cls(**model_kwargs)

    task_metadata = {}
    if isinstance(model, TrainMixin):
        # The model can be trained, so it may have some trained weights
        model_train_dir = model.train_dir()

        # See if any checkpoints are available for the given model
        for task_dir in model_train_dir.iterdir():
            task_context = ContainerTaskContext(task_dir)
            output_dir = task_context.host_output_dir

            if output_dir.exists():
                metadata_path = output_dir / TaskIO._METADATA_FILENAME
                if metadata_path.exists():
                    with open(metadata_path, "r") as f:
                        metadata = json.load(f)

                        if "checkpoint" in metadata:
                            task_formatted = format_task(task_dir)
                            task_metadata[task_formatted] = metadata

    if len(task_metadata) == 0:
        st.error("No trained checkpoints found for the given model.")
        return

    model_checkpoint = st.sidebar.selectbox(
        "Model Checkpoint", list(task_metadata.keys())
    )
    return model_cls, model_kwargs, task_metadata[model_checkpoint]
Exemplo n.º 5
0
    def __init__(
        self,
        data_dir: Optional[Path] = None,
        load_existing: bool = False,
        use_gpu: bool = False,
        nvidia_visible_devices: str = "all",
        logger: Optional[logging.Logger] = None,
        **kwargs,
    ):
        """
        Create a model.

        Args:
          data_dir: Optional path to a directory used to store model data.  If not given,
            a unique directory under GOBBLI_DIR will be created and used.
          load_existing: If True, ``data_dir`` should be a directory that was previously used
            to create a model.  Parameters will be loaded to match the original model, and
            user-specified model parameters will be ignored.  If False, the data_dir must
            be empty if it already exists.
          use_gpu: If True, use the
            nvidia-docker runtime (https://github.com/NVIDIA/nvidia-docker) to expose
            NVIDIA GPU(s) to the container.  Will cause an error if the computer you're running
            on doesn't have an NVIDIA GPU and/or doesn't have the nvidia-docker runtime installed.
          nvidia_visible_devices: Which GPUs to make available to the container; ignored if
            ``use_gpu`` is False.  If not 'all', should be a comma-separated string: ex. ``1,2``.
          logger: If passed, use this logger for logging instead of the default module-level logger.
          **kwargs: Additional model-specific parameters to be passed to the model's :meth:`init` method.
        """
        self._logger = LOGGER
        if logger is not None:
            self._logger = logger

        if data_dir is None:
            self._data_dir = self.model_class_dir() / generate_uuid()
        else:
            self._data_dir = data_dir
        # Ensure we have an absolute data dir so any derived paths used in metadata files, etc
        # aren't ambiguous
        self._data_dir = self._data_dir.resolve()
        self._data_dir.mkdir(parents=True, exist_ok=True)

        class_name = self.__class__.__name__
        cur_gobbli_version = gobbli_version()

        if self.info_path.exists():
            info = read_metadata(self.info_path)
            if not info["class"] == class_name:
                raise ValueError(
                    f"Model class mismatch: the model stored in {data_dir} is of "
                    f"class '{info['class']}'.  Expected '{class_name}'.")
            if not info["gobbli_version"] == cur_gobbli_version:
                warnings.warn(
                    f"The model stored in {data_dir} was created with gobbli version "
                    f"{info['gobbli_version']}, but you're running version {cur_gobbli_version}. "
                    "You may encounter compatibility issues.")

        if load_existing and self.metadata_path.exists():
            params = read_metadata(self.metadata_path)
            if len(kwargs) > 0:
                warnings.warn(
                    "User-passed params ignored due to existing model being "
                    f"loaded: {kwargs}")

        else:
            if not is_dir_empty(self._data_dir):
                raise ValueError(
                    f"data_dir '{self._data_dir}' is non-empty;"
                    " it must be empty to avoid overwriting data.")
            params = kwargs
            write_metadata(params, self.metadata_path)
            write_metadata(
                {
                    "class": class_name,
                    "gobbli_version": cur_gobbli_version
                },
                self.info_path,
            )

        self.use_gpu = use_gpu
        self.nvidia_visible_devices = nvidia_visible_devices

        self.docker_client = docker.from_env()

        self.init(params)

        self._logger.info(
            f"{class_name} initialized with data directory '{self._data_dir}'")