def test_write_read_int32_vector(tmpdir, endian): path = tmpdir.mkdir('test') a = np.random.randint(1, 128, 10, dtype=np.int32) b = np.random.randint(1, 128, 10, dtype=np.int32) origin = {u'Ï,é,à': a, u'あいうえお': b} kaldiio.save_ark(path.join('a.ark').strpath, origin, scp=path.join('b.scp').strpath, endian=endian) d2 = { k: v for k, v in kaldiio.load_ark(path.join('a.ark').strpath, endian=endian) } d5 = { k: v for k, v in kaldiio.load_scp(path.join('b.scp').strpath, endian=endian).items() } with io.open(path.join('a.ark').strpath, 'rb') as fd: d6 = {k: v for k, v in kaldiio.load_ark(fd, endian=endian)} _compare(d2, origin) _compare(d5, origin) _compare(d6, origin)
def test_append_mode(tmpdir): path = tmpdir.mkdir("test") a = np.random.rand(1000, 120).astype(np.float32) b = np.random.rand(10, 120).astype(np.float32) origin = {"Ï,é,à": a, "あいうえお": b} kaldiio.save_ark(path.join("a.ark").strpath, origin, scp=path.join("b.scp").strpath) kaldiio.save_ark( path.join("a2.ark").strpath, {"Ï,é,à": a}, scp=path.join("b2.scp").strpath, append=True, ) kaldiio.save_ark( path.join("a2.ark").strpath, {"あいうえお": b}, scp=path.join("b2.scp").strpath, append=True, ) d1 = {k: v for k, v in kaldiio.load_ark(path.join("a.ark").strpath)} d2 = {k: v for k, v in kaldiio.load_scp(path.join("b.scp").strpath).items()} d3 = {k: v for k, v in kaldiio.load_ark(path.join("a2.ark").strpath)} d4 = {k: v for k, v in kaldiio.load_scp(path.join("b2.scp").strpath).items()} _compare(d1, origin) _compare(d2, origin) _compare(d3, origin) _compare(d4, origin)
def test_write_read(tmpdir, shape1, shape2, endian, dtype, max_cache_fd): path = tmpdir.mkdir("test") a = np.random.rand(*shape1).astype(dtype) b = np.random.rand(*shape2).astype(dtype) origin = {"Ï,é,à": a, "あいうえお": b} kaldiio.save_ark( path.join("a.ark").strpath, origin, scp=path.join("b.scp").strpath, endian=endian, ) d2 = {k: v for k, v in kaldiio.load_ark(path.join("a.ark").strpath, endian=endian)} d5 = { k: v for k, v in kaldiio.load_scp( path.join("b.scp").strpath, endian=endian, max_cache_fd=max_cache_fd ).items() } with io.open(path.join("a.ark").strpath, "rb") as fd: d6 = {k: v for k, v in kaldiio.load_ark(fd, endian=endian)} _compare(d2, origin) _compare(d5, origin) _compare(d6, origin)
def test_write_read_compress(tmpdir, compression_method, endian): path = tmpdir.mkdir('test') a = np.random.rand(1000, 120).astype(np.float32) b = np.random.rand(10, 120).astype(np.float32) origin = {u'Ï,é,à': a, u'あいうえお': b} kaldiio.save_ark(path.join('a.ark').strpath, origin, scp=path.join('b.scp').strpath, compression_method=compression_method, endian=endian) d2 = { k: v for k, v in kaldiio.load_ark(path.join('a.ark').strpath, endian=endian) } d5 = { k: v for k, v in kaldiio.load_scp(path.join('b.scp').strpath, endian=endian).items() } with io.open(path.join('a.ark').strpath, 'rb') as fd: d6 = {k: v for k, v in kaldiio.load_ark(fd, endian=endian)} _compare_allclose(d2, origin, atol=1e-1) _compare_allclose(d5, origin, atol=1e-1) _compare_allclose(d6, origin, atol=1e-1)
def test_write_read(tmpdir, shape1, shape2, endian, dtype): path = tmpdir.mkdir('test') a = np.random.rand(*shape1).astype(dtype) b = np.random.rand(*shape2).astype(dtype) origin = {u'Ï,é,à': a, u'あいうえお': b} kaldiio.save_ark(path.join('a.ark').strpath, origin, scp=path.join('b.scp').strpath, endian=endian) d2 = { k: v for k, v in kaldiio.load_ark(path.join('a.ark').strpath, endian=endian) } d5 = { k: v for k, v in kaldiio.load_scp(path.join('b.scp').strpath, endian=endian).items() } with io.open(path.join('a.ark').strpath, 'rb') as fd: d6 = {k: v for k, v in kaldiio.load_ark(fd, endian=endian)} _compare(d2, origin) _compare(d5, origin) _compare(d6, origin)
def test_append_mode(tmpdir): path = tmpdir.mkdir('test') a = np.random.rand(1000, 120).astype(np.float32) b = np.random.rand(10, 120).astype(np.float32) origin = {'a': a, 'b': b} kaldiio.save_ark(path.join('a.ark').strpath, origin, scp=path.join('b.scp').strpath) kaldiio.save_ark(path.join('a2.ark').strpath, {'a': a}, scp=path.join('b2.scp').strpath, append=True) kaldiio.save_ark(path.join('a2.ark').strpath, {'b': b}, scp=path.join('b2.scp').strpath, append=True) d1 = {k: v for k, v in kaldiio.load_ark(path.join('a.ark').strpath)} d2 = { k: v for k, v in kaldiio.load_scp(path.join('b.scp').strpath).items() } d3 = {k: v for k, v in kaldiio.load_ark(path.join('a2.ark').strpath)} d4 = { k: v for k, v in kaldiio.load_scp(path.join('b2.scp').strpath).items() } _compare(d1, origin) _compare(d2, origin) _compare(d3, origin) _compare(d4, origin)
def test_read_arks(fname): # Assume arks dir existing at the same directory ark0 = dict( kaldiio.load_ark( os.path.join(os.path.dirname(__file__), 'arks', 'test.ark'))) ark = dict(kaldiio.load_ark(fname)) _compare_allclose(ark, ark0, atol=1e-1)
def test_write_compressed_arks(tmpdir, compression_method): # Assume arks dir existing at the same directory ark0 = dict( kaldiio.load_ark( os.path.join(os.path.dirname(__file__), 'arks', 'test.ark'))) path = tmpdir.mkdir('test').join('c.ark').strpath kaldiio.save_ark(path, ark0, compression_method=compression_method) arkc = dict(kaldiio.load_ark(path)) arkc_valid = dict( kaldiio.load_ark( os.path.join(os.path.dirname(__file__), 'arks', 'test.cm{}.ark'.format(compression_method)))) _compare_allclose(arkc, arkc_valid, atol=1e-4)
def _tpl_command(wave: numpy.ndarray, cmd: Sequence[str], sample_rate: float, buffering: int = -1, verbose: int = -1) -> numpy.ndarray: if wave.dtype not in (numpy.int8, numpy.int16, numpy.int32): raise ValueError( 'Can read only PCM data. Input as int8, int16 or int32 type' '(int24 is not supported by numpy).') if shutil.which(cmd[0]) is None: raise RuntimeError(f'Command not found: {cmd[0]}') with Popen(cmd, stdin=PIPE, stdout=PIPE, stderr=None if verbose > -1 else PIPE, bufsize=buffering) as p: with BytesIO() as fin: scipy.io.wavfile.write(fin, int(sample_rate), wave) stdin = DUMMY_KEY + fin.getvalue() stdout, stderr = p.communicate(input=stdin) if p.returncode != 0: if stderr is not None: ms = stderr.decode() else: ms = f'Fail: {" ".join(cmd)}' raise RuntimeError(ms) fout = BytesIO(stdout) return next(kaldiio.load_ark(fout))[1]
def KaldiInfer(self, audio): wav_id = PackZero(self.utt_id, size=6) self.CreateTestEnv(audio, wav_id) audio_path = "audio_%s" % PackZero(self.utt_id, size=6) # pass workspace, infer_set and num of jobs infer_log = os.popen("%s %s %s 1" % ( os.path.join(self.kaldi_workspace, "extract_post.sh"), self.kaldi_workspace, os.path.join(self.kaldi_workspace, "data", audio_path), )) infer_log = infer_log.readlines() if "infer success" not in " ".join(infer_log): print("Error\n%s" % infer_log) ark_post = os.path.join(self.kaldi_workspace, "data", audio_path + "_post", "phone_post.1.ark") post_ark = load_ark(ark_post) for key, numpy_array in post_ark: if key == "%s_%s" % (wav_id, wav_id): post_numpy = numpy_array break self.utt_id += 1 # add the column of stress into its non_stress phone if self.flag_version: del_list = [ 5, 9, 13, 17, 21, 25, 33, 37, 41, 48, 52, 62, 66, 76, 80 ] del_list_all = self.vowels_id for i in del_list: for j in [i, i - 1, i - 2]: post_numpy[:, j - 1] += post_numpy[:, j] post_numpy = np.delete(post_numpy, del_list_all, axis=1) return post_numpy
def test_write_read_int32_vector_ascii(tmpdir): path = tmpdir.mkdir("test") a = np.random.randint(1, 128, 10, dtype=np.int32) b = np.random.randint(1, 128, 10, dtype=np.int32) origin = {"Ï,é,à": a, "あいうえお": b} kaldiio.save_ark( path.join("a.ark").strpath, origin, scp=path.join("b.scp").strpath, text=True ) d2 = {k: v for k, v in kaldiio.load_ark(path.join("a.ark").strpath)} d5 = {k: v for k, v in kaldiio.load_scp(path.join("b.scp").strpath).items()} with io.open(path.join("a.ark").strpath, "rb") as fd: d6 = {k: v for k, v in kaldiio.load_ark(fd)} _compare_allclose(d2, origin) _compare_allclose(d5, origin) _compare_allclose(d6, origin)
def test_read_arks(): # Assume arks dir existing at the same directory arkdir = os.path.join(os.path.dirname(__file__), 'arks') arks = { fname: dict(kaldiio.load_ark(fname)) for fname in glob.glob(os.path.join(arkdir, '*.ark')) } fnames = list(arks) for fname in fnames[1:]: _compare_allclose(arks[fname], arks[fnames[0]], atol=1e-1)
def get_file_dict(self, path: str, uttid_locs: List[Tuple[str, str]] = None) -> \ Dict[str, Dict[str, Any]]: def mat2dict(mat): return {"rate": mat[(0, ) * mat.ndim] or None, "x": mat[1:]} file_dict = OrderedDict( (uttid, mat2dict(mat)) for uttid, mat in kaldiio.load_ark(path)) if uttid_locs is None: return file_dict return OrderedDict((k, file_dict[k]) for k, loc in uttid_locs)
def test_write_load(tmpdir): path = tmpdir.mkdir('test') a = np.random.rand(1000, 120).astype(np.float32) b = np.random.rand(10, 120).astype(np.float32) origin = {'a': a, 'b': b} kaldiio.save_ark(path.join('a.ark').strpath, origin, scp=path.join('b.scp').strpath) d2 = {k: v for k, v in kaldiio.load_ark(path.join('a.ark').strpath)} d5 = { k: v for k, v in kaldiio.load_scp(path.join('b.scp').strpath).items() } with open(path.join('a.ark').strpath, 'rb') as fd: d6 = {k: v for k, v in kaldiio.load_ark(fd)} _compare(d2, origin) _compare(d5, origin) _compare(d6, origin)
def test_write_read_ascii(tmpdir): path = tmpdir.mkdir("test") a = np.random.rand(10, 10).astype(np.float32) b = np.random.rand(5, 35).astype(np.float32) origin = {"Ï,é,à": a, "あいうえお": b} kaldiio.save_ark( path.join("a.ark").strpath, origin, scp=path.join("a.scp").strpath, text=True ) d2 = {k: v for k, v in kaldiio.load_ark(path.join("a.ark").strpath)} d5 = {k: v for k, v in kaldiio.load_scp(path.join("a.scp").strpath).items()} _compare_allclose(d2, origin) _compare_allclose(d5, origin)
def test_write_read_int32_vector_ascii(tmpdir): path = tmpdir.mkdir('test') a = np.random.randint(1, 128, 10, dtype=np.int32) b = np.random.randint(1, 128, 10, dtype=np.int32) origin = {'a': a, 'b': b} kaldiio.save_ark(path.join('a.ark').strpath, origin, scp=path.join('b.scp').strpath, text=True) d2 = {k: v for k, v in kaldiio.load_ark(path.join('a.ark').strpath)} d5 = { k: v for k, v in kaldiio.load_scp(path.join('b.scp').strpath).items() } with open(path.join('a.ark').strpath, 'r') as fd: d6 = {k: v for k, v in kaldiio.load_ark(fd)} _compare_allclose(d2, origin) _compare_allclose(d5, origin) _compare_allclose(d6, origin)
def call(self, stats): """ Do CMVN. :param stats: Statistics of features. :return: Mean and std of features. """ p = self.config if isinstance(stats, dict): stats_dict = dict(stats) else: if p.filetype == 'mat': stats_dict = {None: kaldiio.load_mat(stats)} elif p.filetype == 'ark': stats_dict = dict(kaldiio.load_ark(stats)) else: raise ValueError('Not supporting filetype={}'.format( p.filetype)) if p.utt2spk is not None: self.utt2spk = {} with io.open(p.utt2spk, 'r', encoding='utf-8') as f: for line in f: utt, spk = line.rstrip().split(None, 1) self.utt2spk[utt] = spk elif p.spk2utt is not None: self.utt2spk = {} with io.open(p.spk2utt, 'r', encoding='utf-8') as f: for line in f: spk, utts = line.rstrip().split(None, 1) for utt in utts.split(): self.utt2spk[utt] = spk else: self.utt2spk = None self.bias = {} self.scale = {} for spk, stats in stats_dict.items(): assert len(stats) == 2, stats.shape count = stats[0, -1] if not (np.isscalar(count) or isinstance(count, (int, float))): count = count.flatten()[0] mean = stats[0, :-1] / count var = stats[1, :-1] / count - mean * mean std = np.maximum(np.sqrt(var), p.std_floor) self.bias[spk] = -mean self.scale[spk] = 1 / std
def main(): parser = get_parser() args = parser.parse_args() d = kaldiio.load_ark(args.rspecifier) with file_writer_helper( args.wspecifier, filetype='mat', write_num_frames=args.write_num_frames, compress=args.compress, compression_method=args.compression_method) as writer: for utt, mat in d: writer[utt] = mat
def test_write_read_compress(tmpdir, compression_method, endian): path = tmpdir.mkdir("test") a = np.random.rand(1000, 120).astype(np.float32) b = np.random.rand(10, 120).astype(np.float32) origin = {"Ï,é,à": a, "あいうえお": b} kaldiio.save_ark( path.join("a.ark").strpath, origin, scp=path.join("b.scp").strpath, compression_method=compression_method, endian=endian, ) d2 = {k: v for k, v in kaldiio.load_ark(path.join("a.ark").strpath, endian=endian)} d5 = { k: v for k, v in kaldiio.load_scp(path.join("b.scp").strpath, endian=endian).items() } with io.open(path.join("a.ark").strpath, "rb") as fd: d6 = {k: v for k, v in kaldiio.load_ark(fd, endian=endian)} _compare_allclose(d2, origin, atol=1e-1) _compare_allclose(d5, origin, atol=1e-1) _compare_allclose(d6, origin, atol=1e-1)
def test_write_load_ascii(tmpdir): path = tmpdir.mkdir('test') a = np.random.rand(10, 10).astype(np.float32) b = np.random.rand(5, 35).astype(np.float32) origin = {'a': a, 'b': b} kaldiio.save_ark(path.join('a.ark').strpath, origin, scp=path.join('a.scp').strpath, text=True) d2 = {k: v for k, v in kaldiio.load_ark(path.join('a.ark').strpath)} d5 = { k: v for k, v in kaldiio.load_scp(path.join('a.scp').strpath).items() } _compare_allclose(d2, origin) _compare_allclose(d5, origin)
def write_expanded_feature(raw_mfcc_and_pitch_file, output_data_dir): expanded_features = {} for utt, features in kaldiio.load_ark(raw_mfcc_and_pitch_file): num_frames = len(features) target_emotion_column = np.full((num_frames, 1), get_target_emotion(utt)) expanded_feature = np.append(features, target_emotion_column, 1) expanded_features[utt] = expanded_feature (_, split, _) = raw_mfcc_and_pitch_file.split('.', 2) kaldiio.save_ark( os.path.join(output_data_dir, 'mfcc_pitch_and_target_emotion.%s.ark' % (split)), expanded_features, scp=os.path.join(output_data_dir, 'mfcc_pitch_and_target_emotion.%s.scp' % (split)))
def KaldiInfer(self, audio): wav_id = PackZero(self.utt_id, size=6) self.CreateTestEnv(audio, wav_id) audio_path = "audio_%s" % PackZero(self.utt_id, size=6) # pass workspace, infer_set and num of jobs infer_log = os.popen( "%s %s %s 1" % (os.path.join(self.kaldi_workspace, "extract_post.sh"), self.kaldi_workspace, os.path.join(self.kaldi_workspace, "data", audio_path))) infer_log = infer_log.readlines() if "infer success" not in " ".join(infer_log): print("Error\n%s" % infer_log) ark_post = os.path.join(self.kaldi_workspace, "data", audio_path + "_post", "phone_post.1.ark") ############### # ark_post = os.path.join('/home/nan/CALL-proto/backend/audio_000151_post_test/phone_post.1.ark') post_ark = load_ark(ark_post) for key, numpy_array in post_ark: if key == "%s_%s" % (wav_id, wav_id): post_numpy = numpy_array break # print(post_numpy.shape) self.utt_id += 1 ### Nan if self.flag_version: del_list = [ 5, 9, 13, 17, 21, 25, 33, 37, 41, 48, 52, 62, 66, 76, 80 ] del_list_all = [ 3, 4, 5, 7, 8, 9, 11, 12, 13, 15, 16, 17, 19, 20, 21, 23, 24, 25, 31, 32, 33, 35, 36, 37, 39, 40, 41, 46, 47, 48, 50, 51, 52, 60, 61, 62, 64, 65, 66, 74, 75, 76, 78, 79, 80 ] for i in del_list: for j in [i, i - 1, i - 2]: post_numpy[:, j - 1] += post_numpy[:, j] post_numpy = np.delete(post_numpy, del_list_all, axis=1) np.savetxt('post_new.csv', post_numpy, delimiter=',') ### Delete later ### return post_numpy
def read_cmvn_stats(path, cmvn_type): if cmvn_type == "global": d = {None: kaldiio.load_mat(path)} else: d = dict(kaldiio.load_ark(path)) return {spk: CMVNStats.from_numpy(stats) for spk, stats in d.items()}
def __init__(self, stats, norm_means=True, norm_vars=False, filetype='mat', utt2spk=None, spk2utt=None, reverse=False, std_floor=1.0e-20): self.stats_file = stats self.norm_means = norm_means self.norm_vars = norm_vars self.reverse = reverse if isinstance(stats, dict): stats_dict = dict(stats) else: # Use for global CMVN if filetype == 'mat': stats_dict = {None: kaldiio.load_mat(stats)} # Use for global CMVN elif filetype == 'npy': stats_dict = {None: np.load(stats)} # Use for speaker CMVN elif filetype == 'ark': self.accept_uttid = True stats_dict = dict(kaldiio.load_ark(stats)) # Use for speaker CMVN elif filetype == 'hdf5': self.accept_uttid = True stats_dict = h5py.File(stats) else: raise ValueError('Not supporting filetype={}'.format(filetype)) if utt2spk is not None: self.utt2spk = {} with io.open(utt2spk, 'r', encoding='utf-8') as f: for line in f: utt, spk = line.rstrip().split(None, 1) self.utt2spk[utt] = spk elif spk2utt is not None: self.utt2spk = {} with io.open(spk2utt, 'r', encoding='utf-8') as f: for line in f: spk, utts = line.rstrip().split(None, 1) for utt in utts.split(): self.utt2spk[utt] = spk else: self.utt2spk = None # Kaldi makes a matrix for CMVN which has a shape of (2, feat_dim + 1), # and the first vector contains the sum of feats and the second is # the sum of squares. The last value of the first, i.e. stats[0,-1], # is the number of samples for this statistics. self.bias = {} self.scale = {} for spk, stats in stats_dict.items(): assert len(stats) == 2, stats.shape count = stats[0, -1] # If the feature has two or more dimensions if not (np.isscalar(count) or isinstance(count, (int, float))): # The first is only used count = count.flattten()[0] mean = stats[0, :-1] / count # V(x) = E(x^2) - (E(x))^2 var = stats[1, :-1] / count - mean * mean std = np.maximum(np.sqrt(var), std_floor) self.bias[spk] = -mean self.scale[spk] = 1 / std
def __init__(self, norm_means=True, norm_vars=False, std_floor=1.0e-20): self.norm_means = norm_means self.norm_vars = norm_vars self.std_floor = std_floor def __repr__(self): return ('{name}(norm_means={norm_means}, norm_vars={norm_vars})' .format(name=self.__class__.__name__, norm_means=self.norm_means, norm_vars=self.norm_vars)) def __call__(self, x, uttid=None): # x: [Time, Dim] square_sums = (x ** 2).sum(axis=0) mean = x.mean(axis=0) if self.norm_means: x = np.subtract(x, mean) if self.norm_vars: var = square_sums / x.shape[0] - mean ** 2 std = np.maximum(np.sqrt(var), self.std_floor) x = np.divide(x, std) return x if __name__ == '__main__': a = kaldiio.load_ark("/teamscratch/tts_intern_experiment/yuwu1/ASR/script/cmvn.ark") #for key, numpy_array in a: # print(key,numpy_array) #stats_dict = dict() print(dict(a))
def main(argv): """ Main script :param argv: train or test path :returns: None """ feature_name = 'energy' escale_16_PCM = 2**15 #import soundfile as sf #ob = sf.SoundFile(os.path.join(audio_path,kal[:-3]+'wav')) #print('Sample rate: {}'.format(ob.samplerate)) #print('Channels: {}'.format(ob.channels)) #print('Subtype: {}'.format(ob.subtype)) #16-bit PCM -32768 +32767 int16 # Se define el path de test o train path = sys.argv[1] folder = path.split("/")[1] # folder = 'train' kal_list = os.listdir(os.path.join('info_user', folder)) # 1.- Cargar el archivo de configuración frame_shift, frame_length = load_config() # 2.- Cargar los audios y dividirlos en ventanas para calcular la energía audio_path = 'audio/experiment_lm' energy_dict = {} # t = time.time() for kal in kal_list: if kal.endswith('.kal'): # 3.- Calcula la energía y las derivadas fs, audio = wavfile.read(os.path.join(audio_path, kal[:-3] + 'wav')) audio = audio / escale_16_PCM kal_file = open(os.path.join('info_user', folder, kal), 'r') kal_file = kal_file.readlines() energy_vector, deltae_vector, deltade_vector = calculate_energy( audio, fs, frame_shift, frame_length, kal_file) # Este ajuste se hace porque al generar el feature el orden es 1,2,3,... pero al guardarlos tenemos que tener 1,10,11,12,... porque es el orden que utiliza kaldi y los diccionarios. key_list = list() for i in range(0, len(energy_vector)): key_list.append(kal[:-4] + '-utt' + str(i)) # end for key_list.sort() for i in range(0, len(energy_vector)): index = int(key_list[i][16:]) #print(key_list[i]) #print(index) energy_dict[key_list[i]] = np.vstack( (np.asarray(energy_vector[index]), np.asarray(deltae_vector[index]), np.asarray(deltade_vector[index]))).T # end for # end if # end for # elapsed = time.time() - t # 4.- Lo guarda como un .ark y .scp if os.path.exists('mfcc') and len(os.listdir('mfcc')) > 0: features_listdir = os.listdir('mfcc') folder_features = 'mfcc' elif os.path.exists('plp') and len(os.listdir('plp')) > 0: features_listdir = os.listdir('plp') folder_features = 'plp' else: # No existen carpeta sgeneradas por kaldi para los features features_listdir = list() folder_features = 'none' # end if if folder_features == 'none': # Se cogen los features calculados y se dividen en 4. Creamos nuestro propio listdir d = split_dict(energy_dict, 4) index = 1 for dic in d: destark_filename = 'raw_' + feature_name + '_' + folder + '.' + str( index) + '.ark' destark_filename = os.path.join(os.getcwd(), feature_name, destark_filename) srcscp_filename = destark_filename.replace('ark', 'scp') #print ("Writing to " + destark_filename) kaldiio.save_ark(destark_filename, dic, scp=srcscp_filename) index = index + 1 # end for else: for file in features_listdir: write_dict = {} # kaldiio uses features in the form of a dict if file.endswith('.ark') and folder in file: d = kaldiio.load_ark(os.path.join(folder_features, file)) for key_kaldi, array_kaldi in d: write_dict[key_kaldi] = energy_dict[key_kaldi] # end for destark_filename = file.replace(folder_features, feature_name) destark_filename = os.path.join(os.getcwd(), feature_name, destark_filename) srcscp_filename = destark_filename.replace('ark', 'scp') #print ("Writing to " + destark_filename) kaldiio.save_ark(destark_filename, write_dict, scp=srcscp_filename)
def main(folder): print("Relocating features...") # folder = 'train' if folder == 'train': featuresCombined_path = 'combined_features' infouser_path = 'info_user' ark_list = os.listdir(featuresCombined_path) # 1.- Load .ark data and save in global dict global_dict = {} for ark in ark_list: if 'raw_data' in ark and ark.endswith('.ark'): d = kaldiio.load_ark(os.path.join(featuresCombined_path, ark)) for key_kaldi, array_kaldi in d: global_dict[key_kaldi] = array_kaldi # end if # end for # 2.- Generate train and test dict kal_train = os.listdir(os.path.join(infouser_path, 'train')) kal_train = [k[:-4] for k in kal_train] # -4 delete .kal string kal_train.sort() kal_test = os.listdir(os.path.join(infouser_path, 'test')) kal_test = [k[:-4] for k in kal_test] kal_test.sort() train_dict = {} test_dict = {} global_list = list(global_dict.keys()) global_list.sort() for key_kaldi in global_list: if key_kaldi[:-5] in kal_train: train_dict[key_kaldi] = global_dict[key_kaldi] elif key_kaldi[:-5] in kal_test: test_dict[key_kaldi] = global_dict[key_kaldi] # end if # end for # 3.- Save both dictionary as .ark files for f in ark_list: os.remove(os.path.join(featuresCombined_path, f)) num_split = int( len([s for s in ark_list if ('data_' + folder) in str(s)]) / 2) split_train = split_dict(train_dict, num_split) split_test = split_dict(test_dict, num_split) index = 1 for dic in split_train: destark_filename = 'raw_data_train.' + str(index) + '.ark' destark_filename = os.path.join(os.getcwd(), featuresCombined_path, destark_filename) srcscp_filename = destark_filename.replace('ark', 'scp') kaldiio.save_ark(destark_filename, dic, scp=srcscp_filename) index = index + 1 # end for index = 1 for dic in split_test: destark_filename = 'raw_data_test.' + str(index) + '.ark' destark_filename = os.path.join(os.getcwd(), featuresCombined_path, destark_filename) srcscp_filename = destark_filename.replace('ark', 'scp') kaldiio.save_ark(destark_filename, dic, scp=srcscp_filename) index = index + 1 # end for # end if # 4.- Calculate rest of makeFeats.sh bashCommand = "bash local/copy_scp_all.sh " + folder process = subprocess.Popen(bashCommand, shell=True) output, error = process.communicate() bashCommand = "bash utils/data/fix_data_dir.sh data/" + folder process = subprocess.Popen(bashCommand, shell=True) output, error = process.communicate() bashCommand = "steps/compute_cmvn_stats.sh data/" + folder + " exp/make_combined_features/" + folder + " combined_features || exit 1;" process = subprocess.Popen(bashCommand, shell=True) output, error = process.communicate() bashCommand = "utils/validate_data_dir.sh data/" + folder process = subprocess.Popen(bashCommand, shell=True) output, error = process.communicate() print("Features relocated.")
def align( array: numpy.ndarray, transcription: Union[Sequence[int], numpy.ndarray], tree: str, transition_model: str, fst: str, batch_size: int=250, read_disambig_syms: str=None, reorder: bool=True, rm_eps: bool=False, transition_scale=1.0, acoustic_scale: float=1, self_loop_scale=0.1, beam: float=200, binary: bool=True, careful: bool=False, retry_beam: float=0, verbose: int=-1) -> numpy.ndarray: """ compile-train-graph + align-compiled-mapped Args: array (numpy.ndarray): transcription (Sequence[int]): A sequnce of integer representing the transcription of a utterance. It can be get by this command, % utils/sym2int.pl --map-oov $(cat $lang/oov.int) -f 2- $lang/words.txt $text |" tree (str): tree-in transition_model (str): transition-model-in("final.mdl") fst (str): lexicon-fst-in("$lang/L.fst") batch_size (int): Number of FSTs to compile at a time (more -> faster but uses more memory. E.g. 500 read_disambig_syms (str): File containing list of disambiguation symbols in phone symbol table, $lang/phones/disambig.int reorder (bool): Reorder transition ids for greater decoding efficiency. rm_eps (bool) : Remove [most] epsilons before minimization (only applicable if disambig symbols present) transition_scale (float): acoustic_scale (float): Scaling factor for acoustic likelihoods self_loop_scale (float): beam (float): Decoding beam used in alignment binary (bool): Write output in binary mode careful (bool): If true, do 'careful' alignment, which is better at detecting alignment failure (involves loop to start of decoding graph). retry_beam (float): Decoding beam for second try at alignment verbose (int): Verbose level (higher->more logging) """ for cmd in ['compile-train-graphs', 'align-compiled-mapped']: if shutil.which(cmd) is None: raise RuntimeError( f'Command not found: {cmd}') if isinstance(transcription, numpy.ndarray) and \ len(transcription.shape) == 1 and transcription.dtype.kind == 'i': transcription = [int(v) for v in transcription] if not isinstance(transcription, collections.abc.Sequence) or\ any(not isinstance(v, int) for v in transcription): raise TypeError(f'{type(transcription)} is not a sequence of integer') if read_disambig_syms is None: read_disambig_syms = '' for f in [tree, transition_model, fst] +\ [read_disambig_syms] if read_disambig_syms != '' else []: if not Path(f).exists(): raise FileNotFoundError(f'{f}: No such file or directory') if array.ndim != 2: raise ValueError( 'The input posterior must be 2-dimension array: ' f'{array.ndim} != 2') if len(array) < len(transcription): raise ValueError( f'The frame length of the input likehood array ' f'is shorter than the length of the transcription. ' f'{len(array)} < {len(transcription)}') # Don't support self-loop-scale and transition-scale # because I don't understand how to input... compile_train_graph = ( f'echo {DUMMY_KEY} {" ".join(map(str, transcription))} | ' f'compile-train-graphs ' f'--batch-size={batch_size} ' f'--read-disambig-syms={read_disambig_syms} ' f'--reorder={str(reorder).lower()} ' f'--rm-eps={str(rm_eps).lower()} ' f'--verbose={max(verbose, 0)} ' f'{tree} {transition_model} {fst} ark:- ark:- |') cmds = ['align-compiled-mapped', f'--transition-scale={transition_scale}', f'--self-loop-scale={self_loop_scale}', f'--acoustic-scale={acoustic_scale}', f'--beam={beam}', f'--binary={str(binary).lower()}', f'--careful={str(careful).lower()}', f'--retry-beam={retry_beam}', f'--verbose={max(verbose, 0)}', f'{transition_model}', f'ark:{compile_train_graph}', 'ark:-', 'ark:-'] with Popen(cmds, stdin=PIPE, stdout=PIPE, stderr=None if verbose > -1 else PIPE, bufsize=-1) as p: try: kaldiio.save_ark(p.stdin, {DUMMY_KEY: array}) except BrokenPipeError: pass # communicate() must ignore broken pipe errors. except OSError as e: if e.errno == errno.EINVAL and p.poll() is not None: # Issue #19612: On Windows, stdin.write() fails with EINVAL # if the process already exited before the write pass else: raise stdout, stderr = p.communicate() if p.returncode != 0: if stderr is not None: ms = stderr.decode() else: ms = f'Fail: {" ".join(cmds)}' raise RuntimeError(ms) fout = BytesIO(stdout) return next(kaldiio.load_ark(fout))[1]
ark_dir = ['trainNet_sp/deltafalse', 'valid/deltafalse', 'test/deltafalse'] ark_dir = [data_dir+x for x in ark_dir] save_dir = 'data/timit_dataset/' save_sub_dirs = ['train/', 'valid/', 'test/'] save_sub_dirs = [save_dir+x for x in save_sub_dirs] if not os.path.exists(save_dir): os.makedirs(save_dir) for i in save_sub_dirs: if not os.path.exists(i): os.makedirs(i) for i, data_type in enumerate(ark_dir): dir = data_type save_path = save_sub_dirs[i] label_file = label_files[i] df = pd.read_csv(label_file, sep='\t', header=None) df.set_index(0, inplace=True) for file in glob(f'{dir}/*.ark'): d = kaldiio.load_ark(file) for key, numpy_array in d: label = df[1][key] print(f'{save_path}{key}_{label}.npy') np.save(f'{save_path}{key}_{label}.npy', numpy_array.T) for i in save_sub_dirs: datatype = i.split('/')[1] num = len(os.listdir(i)) print(f'{datatype} => {num}')
#x-vectors from the .ark files for single speakers import numpy as np import kaldiio import os for filename in os.listdir('espnet/egs/libritts/tts1/exp/xvector_nnet_1a/xvectors_train_clean_100'): if filename.endswith('.ark'): f = kaldiio.load_ark(os.path.join('espnet/egs/libritts/tts1/exp/xvector_nnet_1a/xvectors_train_clean_100', filename)) # open in readonly mode for key, numpy_array in f: print(filename,key) path = 'xvector/'+key+'.txt' print(path) np.savetxt(path, numpy_array)