def load_norms(file: Union[str, bytes, int, PathLike]): """ Load Norms from a finalfusion file. Loads the first Norms chunk from a finalfusion file. Parameters ---------- file: str, bytes, int, PathLike Path to finalfusion file containing a Norms chunk. Returns ------- norms : Norms First finalfusion Norms in the file. Raises ------ ValueError If the file did not contain norms. """ with open(file, "rb") as inf: chunk = find_chunk(inf, [ChunkIdentifier.NdNorms]) if chunk is None: raise ValueError('File did not contain norms.') if chunk == ChunkIdentifier.NdNorms: return Norms.read_chunk(inf) raise ValueError(f"Unexpected chunk: {str(chunk)}")
def load_ndarray(file: Union[str, bytes, int, PathLike], mmap: bool = False) -> NdArray: """ Load an array chunk from the given file. Parameters ---------- file: str, bytes, int, PathLike Finalfusion file with a ndarray chunk. mmap : bool Toggles memory mapping the array buffer as read only. Returns ------- storage : NdArray The NdArray storage from the file. Raises ------ ValueError If the file did not contain and NdArray chunk. """ with open(file, "rb") as inf: chunk = find_chunk(inf, [ChunkIdentifier.NdArray]) if chunk is None: raise ValueError("File did not contain a NdArray chunk") if chunk == ChunkIdentifier.NdArray: if mmap: return NdArray.mmap_chunk(inf) return NdArray.read_chunk(inf) raise ValueError(f"unknown storage type: {chunk}")
def load_storage(file: Union[str, bytes, int, PathLike], mmap: bool = False) -> Storage: """ Load any vocabulary from a finalfusion file. Loads the first known vocabulary from a finalfusion file. Parameters ---------- file : str Path to finalfusion file containing a storage chunk. mmap : bool Toggles memory mapping the storage buffer as read-only. Returns ------- storage : Storage First finalfusion Storage in the file. Raises ------ ValueError If the file did not contain a vocabulary. """ with open(file, "rb") as inf: chunk = find_chunk( inf, [ChunkIdentifier.NdArray, ChunkIdentifier.QuantizedArray]) if chunk is None: raise ValueError('File did not contain a storage') if chunk == ChunkIdentifier.NdArray: if mmap: return NdArray.mmap_chunk(inf) return NdArray.read_chunk(inf) raise NotImplementedError('Storage type is not yet supported.')
def load_metadata(file: Union[str, bytes, int, PathLike]) -> Metadata: """ Load a Metadata chunk from the given file. Parameters ---------- file : str, bytes, int, PathLike Finalfusion file with a metadata chunk. Returns ------- metadata : Metadata The Metadata from the file. Raises ------ ValueError If the file did not contain an Metadata chunk. """ with open(file, 'rb') as inf: chunk = find_chunk(inf, [ChunkIdentifier.Metadata]) if chunk is None: raise ValueError("File did not contain a Metadata chunk") if chunk == ChunkIdentifier.Metadata: return Metadata.read_chunk(inf) raise ValueError(f"unexpected chunk: {str(chunk)}")
def load_vocab(file: Union[str, bytes, int, PathLike]) -> Vocab: """ Load any vocabulary from a finalfusion file. Loads the first known vocabulary from a finalfusion file. One of: * :class:`~finalfusion.vocab.simple_vocab.SimpleVocab`, * :class:`~finalfusion.vocab.subword.FinalfusionBucketVocab` * :class:`~finalfusion.vocab.subword.FastTextVocab` * :class:`~finalfusion.vocab.subword.ExplicitVocab` Parameters ---------- file: str, bytes, int, PathLike Path to file containing a finalfusion vocab chunk. Returns ------- vocab : Vocab First vocabulary in the file. Raises ------ ValueError If the file did not contain a vocabulary. """ with open(file, "rb") as inf: chunk = find_chunk(inf, [ ChunkIdentifier.SimpleVocab, ChunkIdentifier.FastTextSubwordVocab, ChunkIdentifier.ExplicitSubwordVocab, ChunkIdentifier.BucketSubwordVocab ]) if chunk is None: raise ValueError('File did not contain a vocabulary') if chunk == ChunkIdentifier.SimpleVocab: return SimpleVocab.read_chunk(inf) if chunk == ChunkIdentifier.BucketSubwordVocab: return FinalfusionBucketVocab.read_chunk(inf) if chunk == ChunkIdentifier.FastTextSubwordVocab: return FastTextVocab.read_chunk(inf) if chunk == ChunkIdentifier.ExplicitSubwordVocab: return ExplicitVocab.read_chunk(inf) raise ValueError(f'Unexpected chunk type {chunk}.')
def load_simple_vocab(file: Union[str, bytes, int, PathLike]) -> SimpleVocab: """ Load a SimpleVocab from the given finalfusion file. Parameters ---------- file : str Path to file containing a SimpleVocab chunk. Returns ------- vocab : SimpleVocab Returns the first SimpleVocab in the file. """ with open(file, "rb") as inf: chunk = find_chunk(inf, [ChunkIdentifier.SimpleVocab]) if chunk is None: raise ValueError('File did not contain a SimpleVocab}') return SimpleVocab.read_chunk(inf)
def load_explicit_vocab( file: Union[str, bytes, int, PathLike]) -> ExplicitVocab: """ Load a ExplicitVocab from the given finalfusion file. Parameters ---------- file : str, bytes, int, PathLike Path to file containing a ExplicitVocab chunk. Returns ------- vocab : ExplicitVocab Returns the first ExplicitVocab in the file. """ with open(file, "rb") as inf: chunk = find_chunk(inf, [ChunkIdentifier.ExplicitSubwordVocab]) if chunk is None: raise ValueError('File did not contain a FastTextVocab}') return ExplicitVocab.read_chunk(inf)