def __init__(self, ds: Dataset, section: str, **kwargs): assert ds.stored, NhaStorageError( """Dataset '{}' is not stored by the framework, so it cannot be mounted in a container""" .format(ds.show())) subdir = ds.get_dir_name() dyr = OnBoard.SHARED_DATA_DIR super().__init__(alias='dataset-{}'.format(ds.get_pk()), mount_to=os.path.join(dyr, subdir), mode='ro', barrel=DatasetBarrel(ds, **kwargs), section=section, lightweight=ds.lightweight)
def new(self, name: str = None, model: str = None, train: str = None, ds: str = None, path: str = None, pretrained: str = None, skip_upload=False, lightweight=False, **kwargs): if path is None: raise NhaAPIError( "Cannot publish model version if path to model files is not provided" ) model = Model.find_one(name=model) if lightweight: model.assert_movers_can_be_lightweight() if ds is not None: kwargs['ds'] = Dataset.find_one(name=ds, model=model).to_embedded() if train is not None: if self.proj is None: raise NhaAPIError( "Cannot determine parent training if no working project is set" ) else: kwargs['train'] = Training.find_one( name=train, proj=self.proj.name).to_embedded() if pretrained is not None: kwargs['pretrained'] = ModelVersion.find_by_pk( pretrained).to_embedded() LOG.info( "Model version used pre-trained model '{}'".format(pretrained)) mv: ModelVersion = super().new(name=name, model=model, lightweight=lightweight, **kwargs, _duplicate_filter=dict(name=name, model=model)) barrel = None try: if not skip_upload: barrel = self._store(mv, path) except Exception as e: LOG.warn("Reverting creation of model version '{}'".format( mv.name)) mv.delete() if barrel is not None: barrel.purge(ignore=True) raise e return mv
def lyst(self, _filter: dict = None, model: str = None, train: str = None, ds: str = None, **kwargs): if model is not None: kwargs['model'] = Model().find_one(name=model).name _filter = _filter or {} if train is not None: if self.proj is None: raise NhaAPIError( "Cannot filter by training name if no working project is set" ) else: train = Training.find_one(name=train, proj=self.proj.name) _filter['train.name'] = train.name _filter['train.bvers.proj.name'] = train.bvers.proj.name if ds is not None: if model is None: raise NhaAPIError( "Cannot filter by dataset name if no model was specified") else: ds = Dataset.find_one(name=ds, model=model) _filter['ds.name'] = ds.name _filter['ds.model'] = ds.model.name return super().lyst(_filter=_filter, **kwargs)
def update(self, name, model, train: str = None, ds: str = None, path: str = None, **kwargs): if ds is not None: kwargs['ds'] = Dataset().find_one(name=ds, model=model).to_embedded() if train is not None: if self.proj is None: raise NhaAPIError( "Cannot determine parent training if no working project is set" ) else: kwargs['train'] = Training().find_one( name=train, proj=self.proj.name).to_embedded() mv = super().update(filter_kwargs=dict(name=name, model=model), update_kwargs=kwargs) if path is not None: self._store(mv, path) return mv
def new(self, name: str = None, tag=DockerConst.LATEST, notebook: str = None, params: dict = None, details: dict = None, datasets: list = None, movers: list = None, _replace: bool = None, target_deploy: str = None, **kwargs): self.set_logger(name) bv = BuildVersion.find_one_or_none(tag=tag, proj=self.proj) movers = [ ModelVersion.find_by_pk(mv).to_embedded() for mv in movers or [] ] datasets = [Dataset.find_by_pk(ds) for ds in datasets or []] if name is None: all_names = set([ds.name for ds in datasets]) if len(all_names) == 1: name = all_names.pop() for mv in movers: self.LOG.info( "Pre-trained model '{}' will be available in this training". format(mv.show())) train: Training = super().new( name=name, proj=self.proj, bvers=None if bv is None else bv.to_embedded(), notebook=assert_extension(notebook, Extension.IPYNB), details=join_dicts(details or {}, dict(params=params or {}), allow_overwrite=False), _duplicate_filter=dict(name=name, proj=self.proj)) exp = TrainingExp(train=train, tag=tag, datasets=datasets, movers=movers, resource_profile=kwargs.pop('resource_profile', None), log=self.LOG) exp.launch(**kwargs) train.reload() if target_deploy is not None and train.task.state == Task.State.FINISHED: self._update_deploy(target_deploy, train) self.reset_logger() return train.reload()
def __init__(self, ds: Dataset, **kwargs): self.ds_name = ds.name self.model_name = ds.model.name self.subject = "dataset '{}'".format(ds.show()) super().__init__( schema=ds.model.data_files, compress_to=None if not ds.compressed else ds.name, lightweight=ds.lightweight, **kwargs )
def _store(self, ds: Dataset, path: str = None, files: dict = None): if path or files: # either is not None barrel = DatasetBarrel(ds) if barrel.schema is None: LOG.warn( "Publishing dataset '{}' without a strict file definition". format(ds.get_pk())) if path: barrel.store_from_path(path) elif files: barrel.store_from_dict(files) else: raise NotImplementedError() return barrel else: LOG.warn( "Dataset '{}' for model '{}' is not being stored by the framework" .format(ds.name, ds.model.name)) ds.update(stored=False) return None
def __call__(self, tag: str = DockerConst.LATEST, port: int = NoteConst.HOST_PORT, movers: list = None, datasets: list = None, **kwargs): LOG.info("Notebook IDE will be mapped to port {}".format(port)) return NotebookExp( port=port, proj=self.proj, tag=tag, movers=[ ModelVersion.find_by_pk(mv).to_embedded() for mv in movers or [] ], datasets=[Dataset.find_by_pk(ds) for ds in datasets or []], resource_profile=kwargs.pop('resource_profile', None)).launch(**kwargs)