def test_base_model_init(tmpdir): tmpdir_path = Path(tmpdir) MODEL_PARAMS = {"param": "a"} # Create model m = MockModel(data_dir=tmpdir_path, **MODEL_PARAMS) # Metadata path for the model should now exist assert m.metadata_path.exists() # So should the info path assert m.info_path.exists() # Info should be populated appropriately info = read_metadata(m.info_path) assert info["class"] == m.__class__.__name__ assert info["gobbli_version"] == gobbli_version() # We shouldn't be able to create a new model in the same directory # with load_existing=False with pytest.raises(ValueError): MockModel(data_dir=tmpdir_path, load_existing=False, **MODEL_PARAMS) # We should be able to load the existing model, and it should # have the same param values without being passed explicitly m2 = MockModel(data_dir=tmpdir_path, load_existing=True) assert m2.params == MODEL_PARAMS
def __init__( self, data_dir: Optional[Path] = None, load_existing: bool = False, use_gpu: bool = False, nvidia_visible_devices: str = "all", logger: Optional[logging.Logger] = None, **kwargs, ): """ Create a model. Args: data_dir: Optional path to a directory used to store model data. If not given, a unique directory under GOBBLI_DIR will be created and used. load_existing: If True, ``data_dir`` should be a directory that was previously used to create a model. Parameters will be loaded to match the original model, and user-specified model parameters will be ignored. If False, the data_dir must be empty if it already exists. use_gpu: If True, use the nvidia-docker runtime (https://github.com/NVIDIA/nvidia-docker) to expose NVIDIA GPU(s) to the container. Will cause an error if the computer you're running on doesn't have an NVIDIA GPU and/or doesn't have the nvidia-docker runtime installed. nvidia_visible_devices: Which GPUs to make available to the container; ignored if ``use_gpu`` is False. If not 'all', should be a comma-separated string: ex. ``1,2``. logger: If passed, use this logger for logging instead of the default module-level logger. **kwargs: Additional model-specific parameters to be passed to the model's :meth:`init` method. """ if data_dir is None: self._data_dir = self.model_class_dir() / generate_uuid() else: self._data_dir = data_dir self._data_dir.mkdir(parents=True, exist_ok=True) if load_existing and self.metadata_path.exists(): params = read_metadata(self.metadata_path) if len(kwargs) > 0: warnings.warn( "User-passed params ignored due to existing model being " f"loaded: {kwargs}") else: if not is_dir_empty(self._data_dir): raise ValueError( f"data_dir '{self._data_dir}' is non-empty;" " it must be empty to avoid overwriting data.") params = kwargs write_metadata(params, self.metadata_path) self.use_gpu = use_gpu self.nvidia_visible_devices = nvidia_visible_devices self._logger = LOGGER if logger is not None: self._logger = logger self.docker_client = docker.from_env() self.init(params)
def test_base_model_init_warnings_errors(tmpdir): tmpdir_path = Path(tmpdir) # Create model to bootstrap the metadata/info files m = MockModel(data_dir=tmpdir_path, param="a") # Make sure we get a warning if user passes params that are ignored with pytest.warns(UserWarning): MockModel(data_dir=tmpdir_path, load_existing=True, param="b") # Make sure we get a warning if the gobbli version in the info file changes info = read_metadata(m.info_path) bad_version_info = info.copy() bad_version_info["gobbli_version"] = "not a real version" write_metadata(bad_version_info, m.info_path) with pytest.warns(UserWarning): MockModel(data_dir=tmpdir_path, load_existing=True) # Make sure we get an error if the class in the info file changes bad_class_info = info.copy() bad_class_info["class"] = "not a real model" write_metadata(bad_class_info, m.info_path) with pytest.raises(ValueError): MockModel(data_dir=tmpdir_path, load_existing=True)
def st_select_model_checkpoint( model_data_path: Path, use_gpu: bool, nvidia_visible_devices: str ) -> Tuple[Any, Dict[str, Any], Dict[str, Any]]: """ Generate widgets allowing for users to select a checkpoint from a given model directory. Args: model_data_path: Path to the model data directory to search for checkpoints. use_gpu: If True, initialize the model using a GPU. nvidia_visible_devices: The list of devices to make available to the model container. Should be either "all" or a comma-separated list of device IDs (ex "1,2"). Returns: A 3-tuple: the class of model corresponding to the checkpoint, the kwargs to initialize the model with, and the metadata for the checkpoint. """ try: model_info = read_metadata(model_data_path / BaseModel._INFO_FILENAME) except FileNotFoundError: raise ValueError( "The passed model data directory does not appear to contain a saved gobbli model. " "Did you pass the right directory?" ) model_cls_name = model_info["class"] if not hasattr(gobbli.model, model_cls_name): raise ValueError(f"Unknown model type: {model_cls_name}") model_cls = getattr(gobbli.model, model_cls_name) model_kwargs = { "data_dir": model_data_path, "load_existing": True, "use_gpu": use_gpu, "nvidia_visible_devices": nvidia_visible_devices, } model = model_cls(**model_kwargs) task_metadata = {} if isinstance(model, TrainMixin): # The model can be trained, so it may have some trained weights model_train_dir = model.train_dir() # See if any checkpoints are available for the given model for task_dir in model_train_dir.iterdir(): task_context = ContainerTaskContext(task_dir) output_dir = task_context.host_output_dir if output_dir.exists(): metadata_path = output_dir / TaskIO._METADATA_FILENAME if metadata_path.exists(): with open(metadata_path, "r") as f: metadata = json.load(f) if "checkpoint" in metadata: task_formatted = format_task(task_dir) task_metadata[task_formatted] = metadata if len(task_metadata) == 0: st.error("No trained checkpoints found for the given model.") return model_checkpoint = st.sidebar.selectbox( "Model Checkpoint", list(task_metadata.keys()) ) return model_cls, model_kwargs, task_metadata[model_checkpoint]
def __init__( self, data_dir: Optional[Path] = None, load_existing: bool = False, use_gpu: bool = False, nvidia_visible_devices: str = "all", logger: Optional[logging.Logger] = None, **kwargs, ): """ Create a model. Args: data_dir: Optional path to a directory used to store model data. If not given, a unique directory under GOBBLI_DIR will be created and used. load_existing: If True, ``data_dir`` should be a directory that was previously used to create a model. Parameters will be loaded to match the original model, and user-specified model parameters will be ignored. If False, the data_dir must be empty if it already exists. use_gpu: If True, use the nvidia-docker runtime (https://github.com/NVIDIA/nvidia-docker) to expose NVIDIA GPU(s) to the container. Will cause an error if the computer you're running on doesn't have an NVIDIA GPU and/or doesn't have the nvidia-docker runtime installed. nvidia_visible_devices: Which GPUs to make available to the container; ignored if ``use_gpu`` is False. If not 'all', should be a comma-separated string: ex. ``1,2``. logger: If passed, use this logger for logging instead of the default module-level logger. **kwargs: Additional model-specific parameters to be passed to the model's :meth:`init` method. """ self._logger = LOGGER if logger is not None: self._logger = logger if data_dir is None: self._data_dir = self.model_class_dir() / generate_uuid() else: self._data_dir = data_dir # Ensure we have an absolute data dir so any derived paths used in metadata files, etc # aren't ambiguous self._data_dir = self._data_dir.resolve() self._data_dir.mkdir(parents=True, exist_ok=True) class_name = self.__class__.__name__ cur_gobbli_version = gobbli_version() if self.info_path.exists(): info = read_metadata(self.info_path) if not info["class"] == class_name: raise ValueError( f"Model class mismatch: the model stored in {data_dir} is of " f"class '{info['class']}'. Expected '{class_name}'.") if not info["gobbli_version"] == cur_gobbli_version: warnings.warn( f"The model stored in {data_dir} was created with gobbli version " f"{info['gobbli_version']}, but you're running version {cur_gobbli_version}. " "You may encounter compatibility issues.") if load_existing and self.metadata_path.exists(): params = read_metadata(self.metadata_path) if len(kwargs) > 0: warnings.warn( "User-passed params ignored due to existing model being " f"loaded: {kwargs}") else: if not is_dir_empty(self._data_dir): raise ValueError( f"data_dir '{self._data_dir}' is non-empty;" " it must be empty to avoid overwriting data.") params = kwargs write_metadata(params, self.metadata_path) write_metadata( { "class": class_name, "gobbli_version": cur_gobbli_version }, self.info_path, ) self.use_gpu = use_gpu self.nvidia_visible_devices = nvidia_visible_devices self.docker_client = docker.from_env() self.init(params) self._logger.info( f"{class_name} initialized with data directory '{self._data_dir}'")