Пример #1
0
def load_norms(file: Union[str, bytes, int, PathLike]):
    """
    Load Norms from a finalfusion file.

    Loads the first Norms chunk from a finalfusion file.

    Parameters
    ----------
    file: str, bytes, int, PathLike
        Path to finalfusion file containing a Norms chunk.

    Returns
    -------
    norms : Norms
        First finalfusion Norms in the file.

    Raises
    ------
    ValueError
        If the file did not contain norms.
    """
    with open(file, "rb") as inf:
        chunk = find_chunk(inf, [ChunkIdentifier.NdNorms])
        if chunk is None:
            raise ValueError('File did not contain norms.')
        if chunk == ChunkIdentifier.NdNorms:
            return Norms.read_chunk(inf)
        raise ValueError(f"Unexpected chunk: {str(chunk)}")
Пример #2
0
def load_ndarray(file: Union[str, bytes, int, PathLike],
                 mmap: bool = False) -> NdArray:
    """
    Load an array chunk from the given file.

    Parameters
    ----------
    file: str, bytes, int, PathLike
        Finalfusion file with a ndarray chunk.
    mmap : bool
        Toggles memory mapping the array buffer as read only.

    Returns
    -------
    storage : NdArray
        The NdArray storage from the file.

    Raises
    ------
    ValueError
        If the file did not contain and NdArray chunk.
    """
    with open(file, "rb") as inf:
        chunk = find_chunk(inf, [ChunkIdentifier.NdArray])
        if chunk is None:
            raise ValueError("File did not contain a NdArray chunk")
        if chunk == ChunkIdentifier.NdArray:
            if mmap:
                return NdArray.mmap_chunk(inf)
            return NdArray.read_chunk(inf)
        raise ValueError(f"unknown storage type: {chunk}")
Пример #3
0
def load_storage(file: Union[str, bytes, int, PathLike],
                 mmap: bool = False) -> Storage:
    """
    Load any vocabulary from a finalfusion file.

    Loads the first known vocabulary from a finalfusion file.

    Parameters
    ----------
    file : str
        Path to finalfusion file containing a storage chunk.
    mmap : bool
        Toggles memory mapping the storage buffer as read-only.

    Returns
    -------
    storage : Storage
        First finalfusion Storage in the file.

    Raises
    ------
    ValueError
         If the file did not contain a vocabulary.
    """
    with open(file, "rb") as inf:
        chunk = find_chunk(
            inf, [ChunkIdentifier.NdArray, ChunkIdentifier.QuantizedArray])
        if chunk is None:
            raise ValueError('File did not contain a storage')
        if chunk == ChunkIdentifier.NdArray:
            if mmap:
                return NdArray.mmap_chunk(inf)
            return NdArray.read_chunk(inf)
        raise NotImplementedError('Storage type is not yet supported.')
Пример #4
0
def load_metadata(file: Union[str, bytes, int, PathLike]) -> Metadata:
    """
    Load a Metadata chunk from the given file.

    Parameters
    ----------
    file : str, bytes, int, PathLike
        Finalfusion file with a metadata chunk.

    Returns
    -------
    metadata : Metadata
        The Metadata from the file.

    Raises
    ------
    ValueError
        If the file did not contain an Metadata chunk.
    """
    with open(file, 'rb') as inf:
        chunk = find_chunk(inf, [ChunkIdentifier.Metadata])
        if chunk is None:
            raise ValueError("File did not contain a Metadata chunk")
        if chunk == ChunkIdentifier.Metadata:
            return Metadata.read_chunk(inf)
        raise ValueError(f"unexpected chunk: {str(chunk)}")
Пример #5
0
def load_vocab(file: Union[str, bytes, int, PathLike]) -> Vocab:
    """
    Load any vocabulary from a finalfusion file.

    Loads the first known vocabulary from a finalfusion file.

    One of:
        * :class:`~finalfusion.vocab.simple_vocab.SimpleVocab`,
        * :class:`~finalfusion.vocab.subword.FinalfusionBucketVocab`
        * :class:`~finalfusion.vocab.subword.FastTextVocab`
        * :class:`~finalfusion.vocab.subword.ExplicitVocab`

    Parameters
    ----------
    file: str, bytes, int, PathLike
        Path to file containing a finalfusion vocab chunk.

    Returns
    -------
    vocab : Vocab
        First vocabulary in the file.

    Raises
    ------
    ValueError
         If the file did not contain a vocabulary.
    """
    with open(file, "rb") as inf:
        chunk = find_chunk(inf, [
            ChunkIdentifier.SimpleVocab, ChunkIdentifier.FastTextSubwordVocab,
            ChunkIdentifier.ExplicitSubwordVocab,
            ChunkIdentifier.BucketSubwordVocab
        ])
        if chunk is None:
            raise ValueError('File did not contain a vocabulary')
        if chunk == ChunkIdentifier.SimpleVocab:
            return SimpleVocab.read_chunk(inf)
        if chunk == ChunkIdentifier.BucketSubwordVocab:
            return FinalfusionBucketVocab.read_chunk(inf)
        if chunk == ChunkIdentifier.FastTextSubwordVocab:
            return FastTextVocab.read_chunk(inf)
        if chunk == ChunkIdentifier.ExplicitSubwordVocab:
            return ExplicitVocab.read_chunk(inf)
        raise ValueError(f'Unexpected chunk type {chunk}.')
Пример #6
0
def load_simple_vocab(file: Union[str, bytes, int, PathLike]) -> SimpleVocab:
    """
    Load a SimpleVocab from the given finalfusion file.

    Parameters
    ----------
    file : str
        Path to file containing a SimpleVocab chunk.

    Returns
    -------
    vocab : SimpleVocab
        Returns the first SimpleVocab in the file.
    """
    with open(file, "rb") as inf:
        chunk = find_chunk(inf, [ChunkIdentifier.SimpleVocab])
        if chunk is None:
            raise ValueError('File did not contain a SimpleVocab}')
        return SimpleVocab.read_chunk(inf)
Пример #7
0
def load_explicit_vocab(
        file: Union[str, bytes, int, PathLike]) -> ExplicitVocab:
    """
    Load a ExplicitVocab from the given finalfusion file.

    Parameters
    ----------
    file : str, bytes, int, PathLike
        Path to file containing a ExplicitVocab chunk.

    Returns
    -------
    vocab : ExplicitVocab
        Returns the first ExplicitVocab in the file.
    """
    with open(file, "rb") as inf:
        chunk = find_chunk(inf, [ChunkIdentifier.ExplicitSubwordVocab])
        if chunk is None:
            raise ValueError('File did not contain a FastTextVocab}')
        return ExplicitVocab.read_chunk(inf)