Пример #1
0
    def __init__(self, ds: Dataset, section: str, **kwargs):

        assert ds.stored, NhaStorageError(
            """Dataset '{}' is not stored by the framework, so it cannot be mounted in a container"""
            .format(ds.show()))

        subdir = ds.get_dir_name()
        dyr = OnBoard.SHARED_DATA_DIR
        super().__init__(alias='dataset-{}'.format(ds.get_pk()),
                         mount_to=os.path.join(dyr, subdir),
                         mode='ro',
                         barrel=DatasetBarrel(ds, **kwargs),
                         section=section,
                         lightweight=ds.lightweight)
Пример #2
0
    def new(self,
            name: str = None,
            model: str = None,
            train: str = None,
            ds: str = None,
            path: str = None,
            pretrained: str = None,
            skip_upload=False,
            lightweight=False,
            **kwargs):

        if path is None:
            raise NhaAPIError(
                "Cannot publish model version if path to model files is not provided"
            )

        model = Model.find_one(name=model)

        if lightweight:
            model.assert_movers_can_be_lightweight()

        if ds is not None:
            kwargs['ds'] = Dataset.find_one(name=ds, model=model).to_embedded()

        if train is not None:
            if self.proj is None:
                raise NhaAPIError(
                    "Cannot determine parent training if no working project is set"
                )
            else:
                kwargs['train'] = Training.find_one(
                    name=train, proj=self.proj.name).to_embedded()

        if pretrained is not None:
            kwargs['pretrained'] = ModelVersion.find_by_pk(
                pretrained).to_embedded()
            LOG.info(
                "Model version used pre-trained model '{}'".format(pretrained))

        mv: ModelVersion = super().new(name=name,
                                       model=model,
                                       lightweight=lightweight,
                                       **kwargs,
                                       _duplicate_filter=dict(name=name,
                                                              model=model))

        barrel = None

        try:
            if not skip_upload:
                barrel = self._store(mv, path)
        except Exception as e:
            LOG.warn("Reverting creation of model version '{}'".format(
                mv.name))
            mv.delete()
            if barrel is not None:
                barrel.purge(ignore=True)
            raise e

        return mv
Пример #3
0
    def lyst(self,
             _filter: dict = None,
             model: str = None,
             train: str = None,
             ds: str = None,
             **kwargs):

        if model is not None:
            kwargs['model'] = Model().find_one(name=model).name

        _filter = _filter or {}

        if train is not None:
            if self.proj is None:
                raise NhaAPIError(
                    "Cannot filter by training name if no working project is set"
                )
            else:
                train = Training.find_one(name=train, proj=self.proj.name)
                _filter['train.name'] = train.name
                _filter['train.bvers.proj.name'] = train.bvers.proj.name

        if ds is not None:
            if model is None:
                raise NhaAPIError(
                    "Cannot filter by dataset name if no model was specified")
            else:
                ds = Dataset.find_one(name=ds, model=model)
                _filter['ds.name'] = ds.name
                _filter['ds.model'] = ds.model.name

        return super().lyst(_filter=_filter, **kwargs)
Пример #4
0
    def update(self,
               name,
               model,
               train: str = None,
               ds: str = None,
               path: str = None,
               **kwargs):

        if ds is not None:
            kwargs['ds'] = Dataset().find_one(name=ds,
                                              model=model).to_embedded()

        if train is not None:
            if self.proj is None:
                raise NhaAPIError(
                    "Cannot determine parent training if no working project is set"
                )
            else:
                kwargs['train'] = Training().find_one(
                    name=train, proj=self.proj.name).to_embedded()

        mv = super().update(filter_kwargs=dict(name=name, model=model),
                            update_kwargs=kwargs)

        if path is not None:
            self._store(mv, path)

        return mv
Пример #5
0
    def new(self,
            name: str = None,
            tag=DockerConst.LATEST,
            notebook: str = None,
            params: dict = None,
            details: dict = None,
            datasets: list = None,
            movers: list = None,
            _replace: bool = None,
            target_deploy: str = None,
            **kwargs):

        self.set_logger(name)
        bv = BuildVersion.find_one_or_none(tag=tag, proj=self.proj)
        movers = [
            ModelVersion.find_by_pk(mv).to_embedded() for mv in movers or []
        ]
        datasets = [Dataset.find_by_pk(ds) for ds in datasets or []]

        if name is None:
            all_names = set([ds.name for ds in datasets])

            if len(all_names) == 1:
                name = all_names.pop()

        for mv in movers:
            self.LOG.info(
                "Pre-trained model '{}' will be available in this training".
                format(mv.show()))

        train: Training = super().new(
            name=name,
            proj=self.proj,
            bvers=None if bv is None else bv.to_embedded(),
            notebook=assert_extension(notebook, Extension.IPYNB),
            details=join_dicts(details or {},
                               dict(params=params or {}),
                               allow_overwrite=False),
            _duplicate_filter=dict(name=name, proj=self.proj))

        exp = TrainingExp(train=train,
                          tag=tag,
                          datasets=datasets,
                          movers=movers,
                          resource_profile=kwargs.pop('resource_profile',
                                                      None),
                          log=self.LOG)
        exp.launch(**kwargs)
        train.reload()

        if target_deploy is not None and train.task.state == Task.State.FINISHED:
            self._update_deploy(target_deploy, train)

        self.reset_logger()
        return train.reload()
Пример #6
0
 def __init__(self, ds: Dataset, **kwargs):
     
     self.ds_name = ds.name
     self.model_name = ds.model.name
     self.subject = "dataset '{}'".format(ds.show())
     super().__init__(
         schema=ds.model.data_files,
         compress_to=None if not ds.compressed else ds.name,
         lightweight=ds.lightweight,
         **kwargs
     )
Пример #7
0
    def _store(self, ds: Dataset, path: str = None, files: dict = None):

        if path or files:  # either is not None
            barrel = DatasetBarrel(ds)

            if barrel.schema is None:
                LOG.warn(
                    "Publishing dataset '{}' without a strict file definition".
                    format(ds.get_pk()))

            if path:
                barrel.store_from_path(path)
            elif files:
                barrel.store_from_dict(files)
            else:
                raise NotImplementedError()

            return barrel
        else:
            LOG.warn(
                "Dataset '{}' for model '{}' is not being stored by the framework"
                .format(ds.name, ds.model.name))
            ds.update(stored=False)
            return None
Пример #8
0
    def __call__(self,
                 tag: str = DockerConst.LATEST,
                 port: int = NoteConst.HOST_PORT,
                 movers: list = None,
                 datasets: list = None,
                 **kwargs):

        LOG.info("Notebook IDE will be mapped to port {}".format(port))
        return NotebookExp(
            port=port,
            proj=self.proj,
            tag=tag,
            movers=[
                ModelVersion.find_by_pk(mv).to_embedded()
                for mv in movers or []
            ],
            datasets=[Dataset.find_by_pk(ds) for ds in datasets or []],
            resource_profile=kwargs.pop('resource_profile',
                                        None)).launch(**kwargs)