def to_disk(self, path: Path, **kwargs): """Serialize CandidateGenerator to disk path (Path): Directory to serialize to """ cfg = { "k": self.k, "m_parameter": self.m_parameter, "ef_search": self.ef_search, "ef_construction": self.ef_construction, "n_threads": self.n_threads, } serializers = { "cg_cfg": lambda p: srsly.write_json(p, cfg), "aliases": lambda p: srsly.write_json(p.with_suffix(".json"), self.aliases), "short_aliases": lambda p: srsly.write_json(p.with_suffix(".json"), self. short_aliases), "ann_index": lambda p: self.ann_index.saveIndex(str(p.with_suffix(".bin"))), "tfidf_vectorizer": lambda p: joblib.dump(self.vectorizer, p.with_suffix(".joblib")), "tfidf_vectors_sparse": lambda p: scipy.sparse.save_npz( p.with_suffix(".npz"), self.alias_tfidfs.astype(np.float16)), } to_disk(path, serializers, {})
def to_disk(self, path: Union[str, Path], *, exclude: Iterable[str] = tuple()) -> None: """Serialize the pipe to disk. path (str / Path): Path to a directory. exclude (Iterable[str]): String names of serialization fields to exclude. DOCS: https://nightly.spacy.io/api/transformer#to_disk """ def save_model(p): trf_dir = Path(p).absolute() if not trf_dir.exists(): trf_dir.mkdir() self.model.attrs["tokenizer"].save_pretrained(str(trf_dir)) transformer = self.model.layers[0].shims[0]._model torch.save(transformer.state_dict(), trf_dir / WEIGHTS_NAME) transformer.config.to_json_file(trf_dir / CONFIG_NAME) serialize = {} serialize["cfg"] = lambda p: srsly.write_json(p, self.cfg) serialize["vocab"] = lambda p: self.vocab.to_disk(p) serialize["model"] = lambda p: save_model(p) util.to_disk(path, serialize, exclude)
def to_disk(self, path: Path, exclude: Tuple = tuple(), **kwargs): """Serialize RemoteAnnLinker to disk. path (Path): directory to serialize to exclude (Tuple, optional): config to exclude. Defaults to tuple(). """ path = ensure_path(path) serializers = {"cfg": lambda p: srsly.write_json(p, self.cfg)} to_disk(path, serializers, {})
def to_disk( self, path: Union[str, Path], *, exclude: Iterable[str] = tuple() ) -> None: """Serialize the pipe to disk. path (str / Path): Path to a directory. exclude (Iterable[str]): String names of serialization fields to exclude. DOCS: https://spacy.io/api/transformer#to_disk """ serialize = {} serialize["cfg"] = lambda p: srsly.write_json(p, self.cfg) serialize["vocab"] = lambda p: self.vocab.to_disk(p) serialize["model"] = lambda p: self.model.to_disk(p) util.to_disk(path, serialize, exclude)
def to_disk(self, path, exclude=tuple(), **kwargs): """Serialize the pipe and its model to disk.""" def save_model(p): trf_dir = Path(p).absolute() trf_dir.mkdir() self.model.attrs["tokenizer"].save_pretrained(str(trf_dir)) transformer = self.model.layers[0].shims[0]._model torch.save(transformer.state_dict(), trf_dir / WEIGHTS_NAME) transformer.config.to_json_file(trf_dir / CONFIG_NAME) serialize = {} serialize["cfg"] = lambda p: srsly.write_json(p, self.cfg) serialize["vocab"] = lambda p: self.vocab.to_disk(p) serialize["model"] = lambda p: save_model(p) exclude = util.get_serialization_exclude(serialize, exclude, kwargs) util.to_disk(path, serialize, exclude)
def to_disk(self, path, **_kwargs): path = util.ensure_path(path) serializers = OrderedDict( (("cfg", lambda p: srsly.write_json(p, self._get_config())), )) return util.to_disk(path, serializers, [])
def to_disk(self, path: Union[str, Path], *, exclude: Iterable[str] = SimpleFrozenList()): serialize = {"lookups": lambda p: self.lookups.to_disk(p)} util.to_disk(path, serialize, exclude)