def load_scp(fname, endian='<', separator=None, as_bytes=False, segments=None): """Lazy loader for kaldi scp file. Args: fname (str or file(text mode)): endian (str): separator (str): as_bytes (bool): Read as raw bytes string segments (str): The path of segments """ assert endian in ('<', '>'), endian if segments is None: load_func = partial(load_mat, endian=endian, as_bytes=as_bytes) loader = LazyLoader(load_func) with open_or_fd(fname, 'r') as fd: for line in fd: seps = line.split(separator, 1) if len(seps) != 2: raise ValueError( 'Invalid line is found:\n> {}'.format(line)) token, arkname = seps loader[token] = arkname.rstrip() return loader else: return SegmentsExtractor(fname, separator=separator, segments=segments)
def load_ark(fname, endian="<"): assert endian in ("<", ">"), endian with open_or_fd(fname, "rb") as fd: while True: token = read_token(fd) if token is None: break array = read_kaldi(fd, endian) yield token, array
def load_ark(fname, return_position=False, endian='<'): assert endian in ('<', '>'), endian size = 0 with open_or_fd(fname, 'rb') as fd: while True: token = read_token(fd) if token is None: break size += len(token) + 1 array, _size = read_kaldi(fd, endian, return_size=True) if return_position: yield token, array, size else: yield token, array size += _size
def __init__(self, fname, segments=None, separator=None): self.wav_scp = fname self.wav_loader = load_scp(self.wav_scp, separator=separator) self.segments = segments self._segments_dict = {} with open_or_fd(self.segments, 'r') as f: for l in f: sps = l.rstrip().split(separator) if len(sps) != 4: raise RuntimeError('Format is invalid: {}'.format(l)) uttid, recodeid, st, et = sps self._segments_dict[uttid] = (recodeid, float(st), float(et)) if recodeid not in self.wav_loader: raise RuntimeError('Not found "{}" in {}'.format( recodeid, self.wav_scp))
def load_scp_sequential(fname, endian='<', separator=None, as_bytes=False, segments=None): """Lazy loader for kaldi scp file. Args: fname (str or file(text mode)): endian (str): separator (str): as_bytes (bool): Read as raw bytes string segments (str): The path of segments """ assert endian in ('<', '>'), endian if segments is None: with open_or_fd(fname, 'r') as fd: prev_ark = None prev_arkfd = None try: for line in fd: seps = line.split(separator, 1) if len(seps) != 2: raise ValueError( 'Invalid line is found:\n> {}'.format(line)) token, arkname = seps arkname = arkname.rstrip() ark, offset, slices = _parse_arkpath(arkname) if prev_ark == ark: arkfd = prev_arkfd mat = _load_mat(arkfd, offset, slices, endian=endian, as_bytes=as_bytes) else: if prev_arkfd is not None: prev_arkfd.close() arkfd = open_like_kaldi(ark, 'rb') mat = _load_mat(arkfd, offset, slices, endian=endian, as_bytes=as_bytes) prev_ark = ark prev_arkfd = arkfd yield token, mat except Exception: if prev_arkfd is not None: prev_arkfd.close() raise else: for data in SegmentsExtractor(fname, separator=separator, segments=segments).generator(): yield data
def save_mat(fname, array, endian='<', compression_method=None): with open_or_fd(fname, 'wb') as fd: return write_array(fd, array, endian, compression_method)
def save_ark(ark, array_dict, scp=None, append=False, text=False, as_bytes=False, endian='<', compression_method=None): """Write ark Args: ark (str or fd): array_dict (dict): scp (str or fd): append (bool): If True is specified, open the file with appendable mode text (bool): If True, saving in text ark format. as_bytes (bool): Save the value of the input array_dict as just a bytes string. endian (str): compression_method (int): """ if isinstance(ark, string_types): seekable = True # Maybe, never match with this elif not hasattr(ark, 'tell'): seekable = False else: try: ark.tell() seekable = True except Exception: seekable = False if scp is not None and not isinstance(ark, string_types): if not seekable: raise TypeError('scp file can be created only ' 'if the output ark file is a file or ' 'a seekable file descriptor.') # Write ark mode = 'ab' if append else 'wb' pos_list = [] with open_or_fd(ark, mode) as fd: if seekable: offset = fd.tell() else: offset = 0 size = 0 for key in array_dict: encode_key = (key + ' ').encode(encoding=default_encoding) fd.write(encode_key) size += len(encode_key) pos_list.append(size) if as_bytes: byte = bytes(array_dict[key]) size += len(byte) fd.write(byte) else: data = array_dict[key] if isinstance(data, (list, tuple)): rate, array = data size += write_wav(fd, rate, array) elif text: size += write_array_ascii(fd, data, endian) else: size += write_array(fd, data, endian, compression_method) # Write scp mode = 'a' if append else 'w' if scp is not None: name = ark if isinstance(ark, string_types) else ark.name with open_or_fd(scp, mode) as fd: for key, position in zip(array_dict, pos_list): fd.write(key + u' ' + name + ':' + str(position + offset) + '\n')
def save_ark( ark, array_dict, scp=None, append=False, text=False, endian="<", compression_method=None, write_function=None, ): """Write ark Args: ark (str or fd): array_dict (dict): scp (str or fd): append (bool): If True is specified, open the file with appendable mode text (bool): If True, saving in text ark format. endian (str): compression_method (int): write_function: (str): """ if isinstance(ark, string_types): seekable = True # Maybe, never match with this elif not hasattr(ark, "tell"): seekable = False else: try: ark.tell() seekable = True except Exception: seekable = False if scp is not None and not isinstance(ark, string_types): if not seekable: raise TypeError("scp file can be created only " "if the output ark file is a file or " "a seekable file descriptor.") # Write ark mode = "ab" if append else "wb" pos_list = [] with open_or_fd(ark, mode) as fd: if seekable: offset = fd.tell() else: offset = 0 size = 0 for key in array_dict: encode_key = (key + " ").encode(encoding=default_encoding) fd.write(encode_key) size += len(encode_key) pos_list.append(size) data = array_dict[key] if write_function is not None: # Ignore case write_function = write_function.lower() if write_function.startswith("soundfile"): import soundfile if "flac" in write_function: audio_format = "flac" elif "wav" in write_function: audio_format = "wav" else: audio_format = "wav" def _write_function(fd, data): if not isinstance(data, (list, tuple)): raise TypeError( "Expected list or tuple type, but got {}". format(type(data))) elif len(data) != 2: raise ValueError( "Expected length=2, bot got {}".format( len(data))) _fd = BytesIO() if isinstance(data[0], np.ndarray) and isinstance( data[1], int): soundfile.write(_fd, data[0], data[1], format=audio_format) elif isinstance(data[1], np.ndarray) and isinstance( data[0], int): soundfile.write(_fd, data[1], data[0], format=audio_format) else: raise ValueError( "Expected Tuple[int, np.ndarray] or " "Tuple[np.ndarray, int]: " "but got Tuple[{}, {}]".format( type(data[0]), type(data[1]))) fd.write(b"AUDIO") buf = _fd.getbuffer() # Write the information for the length bytes_length = _write_length_header(fd, len(buf)) fd.write(buf) return len(buf) + len(b"AUDIO") + bytes_length elif write_function == "pickle": def _write_function(fd, data): # Note that we don't need size information for pickle! fd.write(b"PKL") _fd = BytesIO() pickle.dump(data, _fd) buf = _fd.getbuffer() fd.write(buf) return len(buf) + len("PKL") elif write_function == "numpy": def _write_function(fd, data): # Write numpy file in BytesIO _fd = BytesIO() np.save(_fd, data) fd.write(b"NPY") buf = _fd.getbuffer() # Write the information for the length bytes_length = _write_length_header(fd, len(buf)) # Write numpy to real file object fd.write(buf) return len(buf) + len(b"NPY") + bytes_length else: raise RuntimeError( "Not supported: write_function={}".format( write_function)) size += _write_function(fd, data) elif isinstance(data, (list, tuple)): rate, array = data size += write_wav(fd, rate, array) elif text: size += write_array_ascii(fd, data, endian) else: size += write_array(fd, data, endian, compression_method) # Write scp mode = "a" if append else "w" if scp is not None: name = ark if isinstance(ark, string_types) else ark.name with open_or_fd(scp, mode) as fd: for key, position in zip(array_dict, pos_list): fd.write(key + " " + name + ":" + str(position + offset) + "\n")