Exemplo n.º 1
0
def load_scp(fname, endian='<', separator=None, as_bytes=False, segments=None):
    """Lazy loader for kaldi scp file.

    Args:
        fname (str or file(text mode)):
        endian (str):
        separator (str):
        as_bytes (bool): Read as raw bytes string
        segments (str): The path of segments
    """
    assert endian in ('<', '>'), endian
    if segments is None:
        load_func = partial(load_mat, endian=endian, as_bytes=as_bytes)
        loader = LazyLoader(load_func)
        with open_or_fd(fname, 'r') as fd:
            for line in fd:
                seps = line.split(separator, 1)
                if len(seps) != 2:
                    raise ValueError(
                        'Invalid line is found:\n>   {}'.format(line))
                token, arkname = seps
                loader[token] = arkname.rstrip()
        return loader
    else:
        return SegmentsExtractor(fname, separator=separator, segments=segments)
Exemplo n.º 2
0
def load_scp(fname, endian="<", separator=None, segments=None, max_cache_fd=0):
    """Lazy loader for kaldi scp file.

    Args:
        fname (str or file(text mode)):
        endian (str):
        separator (str):
        segments (str): The path of segments
    """
    assert endian in ("<", ">"), endian

    if max_cache_fd != 0:
        if segments is not None:
            raise ValueError("max_cache_fd is not supported for segments mode")
        d = LimitedSizeDict(max_cache_fd)
    else:
        d = None

    if segments is None:
        load_func = partial(load_mat, endian=endian, fd_dict=d)
        loader = LazyLoader(load_func)
        with open_like_kaldi(fname, "r") as fd:
            for line in fd:
                seps = line.split(separator, 1)
                if len(seps) != 2:
                    raise ValueError(
                        "Invalid line is found:\n>   {}".format(line))
                token, arkname = seps
                loader[token] = arkname.rstrip()
        return loader
    else:
        return SegmentsExtractor(fname, separator=separator, segments=segments)