def test_write_read_multiark(tmpdir, endian, dtype): path = tmpdir.mkdir("test") a = np.random.rand(1000, 120).astype(dtype) b = np.random.rand(10, 120).astype(dtype) origin = {"Ï,é,à": a, "あいうえお": b} kaldiio.save_ark( path.join("a.ark").strpath, origin, scp=path.join("b.scp").strpath, endian=endian, ) c = np.random.rand(1000, 120).astype(dtype) d = np.random.rand(10, 120).astype(dtype) origin.update({"c": c, "d": d}) with io.open(path.join("b.scp").strpath, "a", encoding="utf-8") as f: kaldiio.save_ark(path.join("b.ark").strpath, origin, scp=f, endian=endian) d5 = { k: v for k, v in kaldiio.load_scp(path.join("b.scp").strpath, endian=endian).items() } _compare(d5, origin)
def test_write_read(tmpdir, shape1, shape2, endian, dtype, max_cache_fd): path = tmpdir.mkdir("test") a = np.random.rand(*shape1).astype(dtype) b = np.random.rand(*shape2).astype(dtype) origin = {"Ï,é,à": a, "あいうえお": b} kaldiio.save_ark( path.join("a.ark").strpath, origin, scp=path.join("b.scp").strpath, endian=endian, ) d2 = {k: v for k, v in kaldiio.load_ark(path.join("a.ark").strpath, endian=endian)} d5 = { k: v for k, v in kaldiio.load_scp( path.join("b.scp").strpath, endian=endian, max_cache_fd=max_cache_fd ).items() } with io.open(path.join("a.ark").strpath, "rb") as fd: d6 = {k: v for k, v in kaldiio.load_ark(fd, endian=endian)} _compare(d2, origin) _compare(d5, origin) _compare(d6, origin)
def scp2array_dic(scp_path, array_dic=None, ark_path=None, compression_method=None, append=False): """ read array_dic from ark indexed by scp or write array_dic to ark while create scp to index :param scp_path: filepath of scp :param array_dic: dic of array :param ark_path: filepath of ark, default is scppath.replace('.scp', '.ark') :param compression_method: compression method, default=None, kAutomaticMethod=1, kSpeechFeature=2, kTwoByteAuto=3,kTwoByteSignedInteger=4, kOneByteAuto=5, kOneByteUnsignedInteger=6, kOneByteZeroOne=7 :param append: if True, append, else write :return: dic of numpy array for read while None for write """ if array_dic is None: array_dic = kaldiio.load_scp(scp_path) return array_dic else: if ark_path is None: ark_path = scp_path.replace(".scp", ".ark") else: pass kaldiio.save_ark( ark=ark_path, array_dict=array_dic, scp=scp_path, compression_method=compression_method, append=append, ) return None
def test_write_read_multiark_sequential(tmpdir, endian): path = tmpdir.mkdir('test') a = np.random.rand(1000, 120).astype(np.float32) b = np.random.rand(10, 120).astype(np.float32) origin = {'a': a, 'b': b} kaldiio.save_ark(path.join('a.ark').strpath, origin, scp=path.join('b.scp').strpath, endian=endian) c = np.random.rand(1000, 120).astype(np.float32) d = np.random.rand(10, 120).astype(np.float32) origin.update({'c': c, 'd': d}) with open(path.join('b.scp').strpath, 'a') as f: kaldiio.save_ark(path.join('b.ark').strpath, origin, scp=f, endian=endian) d5 = { k: v for k, v in kaldiio.load_scp_sequential(path.join('b.scp').strpath, endian=endian) } _compare(d5, origin)
def test_write_read_int32_vector(tmpdir, endian): path = tmpdir.mkdir('test') a = np.random.randint(1, 128, 10, dtype=np.int32) b = np.random.randint(1, 128, 10, dtype=np.int32) origin = {u'Ï,é,à': a, u'あいうえお': b} kaldiio.save_ark(path.join('a.ark').strpath, origin, scp=path.join('b.scp').strpath, endian=endian) d2 = { k: v for k, v in kaldiio.load_ark(path.join('a.ark').strpath, endian=endian) } d5 = { k: v for k, v in kaldiio.load_scp(path.join('b.scp').strpath, endian=endian).items() } with io.open(path.join('a.ark').strpath, 'rb') as fd: d6 = {k: v for k, v in kaldiio.load_ark(fd, endian=endian)} _compare(d2, origin) _compare(d5, origin) _compare(d6, origin)
def test_write_read_compress(tmpdir, compression_method, endian): path = tmpdir.mkdir('test') a = np.random.rand(1000, 120).astype(np.float32) b = np.random.rand(10, 120).astype(np.float32) origin = {u'Ï,é,à': a, u'あいうえお': b} kaldiio.save_ark(path.join('a.ark').strpath, origin, scp=path.join('b.scp').strpath, compression_method=compression_method, endian=endian) d2 = { k: v for k, v in kaldiio.load_ark(path.join('a.ark').strpath, endian=endian) } d5 = { k: v for k, v in kaldiio.load_scp(path.join('b.scp').strpath, endian=endian).items() } with io.open(path.join('a.ark').strpath, 'rb') as fd: d6 = {k: v for k, v in kaldiio.load_ark(fd, endian=endian)} _compare_allclose(d2, origin, atol=1e-1) _compare_allclose(d5, origin, atol=1e-1) _compare_allclose(d6, origin, atol=1e-1)
def test_write_read(tmpdir, shape1, shape2, endian, dtype): path = tmpdir.mkdir('test') a = np.random.rand(*shape1).astype(dtype) b = np.random.rand(*shape2).astype(dtype) origin = {u'Ï,é,à': a, u'あいうえお': b} kaldiio.save_ark(path.join('a.ark').strpath, origin, scp=path.join('b.scp').strpath, endian=endian) d2 = { k: v for k, v in kaldiio.load_ark(path.join('a.ark').strpath, endian=endian) } d5 = { k: v for k, v in kaldiio.load_scp(path.join('b.scp').strpath, endian=endian).items() } with io.open(path.join('a.ark').strpath, 'rb') as fd: d6 = {k: v for k, v in kaldiio.load_ark(fd, endian=endian)} _compare(d2, origin) _compare(d5, origin) _compare(d6, origin)
def prepareSimData(dataset, audiofiles, destloc, preprocess): pbar = pkbar.Pbar(name='Preparing SpecImages for ' + dataset + ' (Sim)', target=len(audiofiles)) for i in range(len(audiofiles)): audio = audiofiles[i] audio = audio.strip() audioName, audioLoc = audio.split() if dataset == 'Train': cleanLoc = audioLoc.replace('REVERB_WSJCAM0_tr', 'WSJCAM0') cleanLoc = cleanLoc.replace('/mc_train', '') cleanLoc = cleanLoc.replace('_ch1', '') else: cleanLoc = audioLoc.replace('far_test', 'cln_test') cleanLoc = cleanLoc.replace('near_test', 'cln_test') cleanLoc = cleanLoc.replace('_ch1', '') reverbPSD, cleanPSD = spectralImages_2D(audioName, audioLoc, cleanLoc, preprocess) reverb_arkfile = destloc + '/1ch/' + dataset + '/Sim/' + audioName + '.ark' clean_arkfile = destloc + '/1ch/' + dataset + '/Clean/' + audioName + '.ark' Path(os.path.dirname(reverb_arkfile)).mkdir(parents=True, exist_ok=True) Path(os.path.dirname(clean_arkfile)).mkdir(parents=True, exist_ok=True) kaldiio.save_ark(reverb_arkfile, reverbPSD, scp=reverb_arkfile.replace('ark', 'scp')) kaldiio.save_ark(clean_arkfile, cleanPSD, scp=clean_arkfile.replace('ark', 'scp')) pbar.update(i) os.system('cat ' + destloc + '/1ch/Dev/Sim/*.scp | sort > ' + destloc + '/1ch/Dev/Sim/wav.scp') os.system('cat ' + destloc + '/1ch/Dev/Clean/*.scp | sort > ' + destloc + '/1ch/Dev/Clean/wav.scp') return
def test_write_read_multiark(tmpdir, endian, dtype): path = tmpdir.mkdir('test') a = np.random.rand(1000, 120).astype(dtype) b = np.random.rand(10, 120).astype(dtype) origin = {u'Ï,é,à': a, u'あいうえお': b} kaldiio.save_ark(path.join('a.ark').strpath, origin, scp=path.join('b.scp').strpath, endian=endian) c = np.random.rand(1000, 120).astype(dtype) d = np.random.rand(10, 120).astype(dtype) origin.update({u'c': c, u'd': d}) with io.open(path.join('b.scp').strpath, 'a', encoding='utf-8') as f: kaldiio.save_ark(path.join('b.ark').strpath, origin, scp=f, endian=endian) d5 = { k: v for k, v in kaldiio.load_scp(path.join('b.scp').strpath, endian=endian).items() } _compare(d5, origin)
def extract(scp_path, model, name=None, path='./', dif_pairs=20, sam_pairs=-1): name = name if name is not None else \ os.path.basename(scp_path).split('\.')[0] loder = KaldiLoader(scp_path=scp_path) meta_dict = {} key_list = [] # same pairs for spk in loder.spk_list: utt_list = get_utt_list(loder.raw_labels, loder.spk2indexes[spk]) key_list.clear() for utt1 in utt_list: for utt2 in utt_list: key = "{}#{}".format(utt1, utt2) key_list.append(key) sam_key_pairs = random.sample( key_list, sam_pairs if sam_pairs != -1 else min(len(key_list), sam_pairs)) for key in sam_key_pairs: utt1, utt2 = key.split('#') utt_xvec1 = loder.datas.get(utt1) utt_xvec2 = loder.datas.get(utt2) val = forward(utt_xvec1, utt_xvec2, model) meta_dict[key] = val # different pairs for spk1, spk2 in combinations(loder.spk_list, 2): utt_list1 = get_utt_list(loder.raw_labels, loder.spk2indexes[spk1]) utt_list2 = get_utt_list(loder.raw_labels, loder.spk2indexes[spk2]) key_list.clear() for utt1 in utt_list1: for utt2 in utt_list2: key = "{}#{}".format(utt1, utt2) key_list.append(key) dif_key_pairs = random.sample( key_list, dif_pairs if dif_pairs != -1 else min(len(key_list), dif_pairs)) for key in dif_key_pairs: utt1, utt2 = key.split('#') utt_xvec1 = loder.datas.get(utt1) utt_xvec2 = loder.datas.get(utt2) val = forward(utt_xvec1, utt_xvec2, model) meta_dict[key] = val ko.save_ark(ark=os.path.join(path, '{}.ark'.format(name)), array_dict=meta_dict, scp=os.path.join(path, '{}.scp'.format(name)))
def __call__(self, key, value_dict): # Concatenate the rate to the signal to save it rate = value_dict.get("rate", 0) array = value_dict["x"] array = np.concatenate((rate * np.ones((1, *array.shape[1:])), array), axis=0) kaldiio.save_ark(self.writer, {key: array}, scp=self.writer_scp, compression_method=self.compression_method)
def evaluation(model, prior, loader, epoch, args, task='dev'): """forward model with PyTorch and run decoding with Kaldi Args: model (nn.Module): dnn model prior (array): prior probability $\mathcal{P}(HMMstate)$ loader (dataloader): data generator epoch (int): current epoch args (arguments): parameters for decoding task (str): evaluation for dev set or test set Returns: output, err (str): runtime log total_loss / total_utt (float): average loss total_acc / total_utt (float): average accuracy """ # forward model model.eval() likelihood_dict = {} total_loss = 0. total_acc = 0. total_utt = 0 for (utt_id, utt_feat, utt_align) in loader: utt_feat = torch.from_numpy(utt_feat) utt_align = torch.from_numpy(utt_align) log_probs = model(utt_feat) loss, acc = cal_loss_acc(log_probs, utt_align, -1) total_loss += loss.item() total_acc += acc total_utt += len(utt_id) likelihood = log_probs[0, :, :].data.cpu().numpy().reshape(-1, args.cdphones_num) \ - prior likelihood_dict[utt_id[0]] = likelihood # decoding is time consuming, especially at the early stages of training, # so we only do this for the last epoch. if args.epochs - epoch < 2: # write likelihood to ark-files if not os.path.exists(args.output_dir + '/decode_' + task): os.makedirs(args.output_dir + '/decode_' + task) ark_file = os.path.join(args.output_dir, 'decode_' + task, 'ep' + str(epoch) + task + '_likelihood.ark') kaldiio.save_ark(ark_file, likelihood_dict) # run kaldi for decoding ground_truth_dir = args.output_dir + '/../data/text_' + task cmd_decode = 'cd kaldi_decoding_script && ./decode_dnn.sh --kaldi-root ' + args.kaldi_root + ' ' + \ args.graph_dir + ' ' + ground_truth_dir + ' ' + \ args.gmmhmm + ' ' + ark_file + ' && cd ..' output, err = run_shell(cmd_decode) else: output = "".encode("utf-8") err = "".encode("utf-8") return output, err, total_loss / total_utt, total_acc / total_utt
def main_concatenate(audio_dir, visual_dir, store_dir, ji=None, nj=None): audio_loader = scp2array_dic( scp_path=os.path.join(audio_dir, "feats.scp"), array_dic=None, ark_path=None, compression_method=None, append=False, ) visual_npz_dic = {} with codecs.open(os.path.join(visual_dir, "embedding.scp"), "r") as handle: lines_content = handle.readlines() for line_content in [ *map(lambda x: x[:-1] if x[-1] in ["\n"] else x, lines_content) ]: key, path = line_content.split(" ") visual_npz_dic[key] = path common_keys = [*(set(audio_loader.keys()) & set(visual_npz_dic.keys()))] store_scp = os.path.abspath( os.path.join(store_dir, "raw_av_embedding.{}.scp".format(ji)) ) store_ark = os.path.abspath( os.path.join(store_dir, "raw_av_embedding.{}.ark".format(ji)) ) for key_idx in tqdm( range(len(common_keys)), leave=True, desc="0" if ji is None else str(ji) ): if ji is None: processing_token = True else: if key_idx % nj == ji: processing_token = True else: processing_token = False if processing_token: key = common_keys[key_idx] audio_array = audio_loader[key] visual_array = np.load(visual_npz_dic[key])["data"][0] expend_visual_array = np.stack( [visual_array for _ in range(4)], axis=-1 ).reshape(-1, visual_array.shape[-1]) expend_visual_array = expend_visual_array[1:] expend_visual_array = expend_visual_array[: audio_array.shape[0]] audio_visual_array = np.concatenate( [audio_array, expend_visual_array], axis=-1 ) kaldiio.save_ark( ark=store_ark, array_dict={key: audio_visual_array}, scp=store_scp, append=True, ) return None
def test_write_read_ascii(tmpdir): path = tmpdir.mkdir("test") a = np.random.rand(10, 10).astype(np.float32) b = np.random.rand(5, 35).astype(np.float32) origin = {"Ï,é,à": a, "あいうえお": b} kaldiio.save_ark( path.join("a.ark").strpath, origin, scp=path.join("a.scp").strpath, text=True ) d2 = {k: v for k, v in kaldiio.load_ark(path.join("a.ark").strpath)} d5 = {k: v for k, v in kaldiio.load_scp(path.join("a.scp").strpath).items()} _compare_allclose(d2, origin) _compare_allclose(d5, origin)
def test_write_compressed_arks(tmpdir, compression_method): # Assume arks dir existing at the same directory ark0 = dict( kaldiio.load_ark( os.path.join(os.path.dirname(__file__), 'arks', 'test.ark'))) path = tmpdir.mkdir('test').join('c.ark').strpath kaldiio.save_ark(path, ark0, compression_method=compression_method) arkc = dict(kaldiio.load_ark(path)) arkc_valid = dict( kaldiio.load_ark( os.path.join(os.path.dirname(__file__), 'arks', 'test.cm{}.ark'.format(compression_method)))) _compare_allclose(arkc, arkc_valid, atol=1e-4)
def test_write_load_ascii(tmpdir): path = tmpdir.mkdir('test') a = np.random.rand(10, 10).astype(np.float32) b = np.random.rand(5, 35).astype(np.float32) origin = {'a': a, 'b': b} kaldiio.save_ark(path.join('a.ark').strpath, origin, scp=path.join('a.scp').strpath, text=True) d2 = {k: v for k, v in kaldiio.load_ark(path.join('a.ark').strpath)} d5 = { k: v for k, v in kaldiio.load_scp(path.join('a.scp').strpath).items() } _compare_allclose(d2, origin) _compare_allclose(d5, origin)
def write_expanded_feature(raw_mfcc_and_pitch_file, output_data_dir): expanded_features = {} for utt, features in kaldiio.load_ark(raw_mfcc_and_pitch_file): num_frames = len(features) target_emotion_column = np.full((num_frames, 1), get_target_emotion(utt)) expanded_feature = np.append(features, target_emotion_column, 1) expanded_features[utt] = expanded_feature (_, split, _) = raw_mfcc_and_pitch_file.split('.', 2) kaldiio.save_ark( os.path.join(output_data_dir, 'mfcc_pitch_and_target_emotion.%s.ark' % (split)), expanded_features, scp=os.path.join(output_data_dir, 'mfcc_pitch_and_target_emotion.%s.scp' % (split)))
def test_write_read_sequential(tmpdir, endian): path = tmpdir.mkdir('test') a = np.random.rand(1000, 120).astype(np.float32) b = np.random.rand(10, 120).astype(np.float32) origin = {u'Ï,é,à': a, u'あいうえお': b} kaldiio.save_ark(path.join('a.ark').strpath, origin, scp=path.join('b.scp').strpath, endian=endian) d5 = { k: v for k, v in kaldiio.load_scp_sequential(path.join('b.scp').strpath, endian=endian) } _compare(d5, origin)
def test_write_read_int32_vector_ascii(tmpdir): path = tmpdir.mkdir("test") a = np.random.randint(1, 128, 10, dtype=np.int32) b = np.random.randint(1, 128, 10, dtype=np.int32) origin = {"Ï,é,à": a, "あいうえお": b} kaldiio.save_ark( path.join("a.ark").strpath, origin, scp=path.join("b.scp").strpath, text=True ) d2 = {k: v for k, v in kaldiio.load_ark(path.join("a.ark").strpath)} d5 = {k: v for k, v in kaldiio.load_scp(path.join("b.scp").strpath).items()} with io.open(path.join("a.ark").strpath, "rb") as fd: d6 = {k: v for k, v in kaldiio.load_ark(fd)} _compare_allclose(d2, origin) _compare_allclose(d5, origin) _compare_allclose(d6, origin)
def main(args): with kaldiio.ReadHelper(f'scp:{args.emb_in}') as reader: x_vector_u = {utt:embd for utt, embd in reader} R = np.load(args.rotation) # Convert from dictionaries to numpy arrays u_out, u_out_label = ( np.array([x_vector_u[i] for i in x_vector_u]), np.array([i for i in x_vector_u]), ) _, _, emb, emb_label = frontend(args, np.zeros((512,512)), np.zeros((512,)), u_out, u_out_label) R_emb = np.dot(emb, R) scp_data = {utt:embd for utt, embd in zip(emb_label, R_emb)} kaldiio.save_ark(f'{args.emb_out}/transformed_xvector.ark', scp_data, scp=f'{args.emb_out}/transformed_xvector.scp')
def prepareRealData(dataset, audiofiles, destloc, preprocess): pbar = pkbar.Pbar(name='Preparing SpecImages for ' + dataset + ' (Real)', target=len(audiofiles)) for i in range(len(audiofiles)): audio = audiofiles[i] audio = audio.strip() audioName, audioLoc = audio.split() reverbPSD = spectralImages_1D(audioName, audioLoc, preprocess) reverb_arkfile = destloc + '/1ch/Dev/Real/' + audioName + '.ark' Path(os.path.dirname(reverb_arkfile)).mkdir(parents=True, exist_ok=True) kaldiio.save_ark(reverb_arkfile, reverbPSD, scp=reverb_arkfile.replace('ark', 'scp')) pbar.update(i) os.system('cat ' + destloc + '/1ch/Dev/Real/*.scp | sort > ' + destloc + '/1ch/Dev/Real/wav.scp') return
def test_write_read_sequential(tmpdir, endian): path = tmpdir.mkdir("test") a = np.random.rand(1000, 120).astype(np.float32) b = np.random.rand(10, 120).astype(np.float32) origin = {"Ï,é,à": a, "あいうえお": b} kaldiio.save_ark( path.join("a.ark").strpath, origin, scp=path.join("b.scp").strpath, endian=endian, ) d5 = { k: v for k, v in kaldiio.load_scp_sequential( path.join("b.scp").strpath, endian=endian ) } _compare(d5, origin)
def test_write_load(tmpdir): path = tmpdir.mkdir('test') a = np.random.rand(1000, 120).astype(np.float32) b = np.random.rand(10, 120).astype(np.float32) origin = {'a': a, 'b': b} kaldiio.save_ark(path.join('a.ark').strpath, origin, scp=path.join('b.scp').strpath) d2 = {k: v for k, v in kaldiio.load_ark(path.join('a.ark').strpath)} d5 = { k: v for k, v in kaldiio.load_scp(path.join('b.scp').strpath).items() } with open(path.join('a.ark').strpath, 'rb') as fd: d6 = {k: v for k, v in kaldiio.load_ark(fd)} _compare(d2, origin) _compare(d5, origin) _compare(d6, origin)
def extract(): model = load_model() model.eval() transform = Transpose2D() dataset = KaldiFeatDataset(root=args.root, transform=transform) loader = DataLoader(dataset, batch_size=1, shuffle=False, num_workers=1, pin_memory=args.pin_memory) utt2emb = {} for data, utt in tqdm(loader): with torch.no_grad(): data = data.to(device) emb = model(data)[0].cpu().numpy() if args.norm: emb = emb / linalg.norm(emb) utt2emb[utt[0]] = emb kaldiio.save_ark(args.output + '.ark', utt2emb, args.output + '.scp')
def test_write_read_int32_vector_ascii(tmpdir): path = tmpdir.mkdir('test') a = np.random.randint(1, 128, 10, dtype=np.int32) b = np.random.randint(1, 128, 10, dtype=np.int32) origin = {'a': a, 'b': b} kaldiio.save_ark(path.join('a.ark').strpath, origin, scp=path.join('b.scp').strpath, text=True) d2 = {k: v for k, v in kaldiio.load_ark(path.join('a.ark').strpath)} d5 = { k: v for k, v in kaldiio.load_scp(path.join('b.scp').strpath).items() } with open(path.join('a.ark').strpath, 'r') as fd: d6 = {k: v for k, v in kaldiio.load_ark(fd)} _compare_allclose(d2, origin) _compare_allclose(d5, origin) _compare_allclose(d6, origin)
def test_append_mode(tmpdir): path = tmpdir.mkdir("test") a = np.random.rand(1000, 120).astype(np.float32) b = np.random.rand(10, 120).astype(np.float32) origin = {"Ï,é,à": a, "あいうえお": b} kaldiio.save_ark(path.join("a.ark").strpath, origin, scp=path.join("b.scp").strpath) kaldiio.save_ark( path.join("a2.ark").strpath, {"Ï,é,à": a}, scp=path.join("b2.scp").strpath, append=True, ) kaldiio.save_ark( path.join("a2.ark").strpath, {"あいうえお": b}, scp=path.join("b2.scp").strpath, append=True, ) d1 = {k: v for k, v in kaldiio.load_ark(path.join("a.ark").strpath)} d2 = {k: v for k, v in kaldiio.load_scp(path.join("b.scp").strpath).items()} d3 = {k: v for k, v in kaldiio.load_ark(path.join("a2.ark").strpath)} d4 = {k: v for k, v in kaldiio.load_scp(path.join("b2.scp").strpath).items()} _compare(d1, origin) _compare(d2, origin) _compare(d3, origin) _compare(d4, origin)
def test_append_mode(tmpdir): path = tmpdir.mkdir('test') a = np.random.rand(1000, 120).astype(np.float32) b = np.random.rand(10, 120).astype(np.float32) origin = {'a': a, 'b': b} kaldiio.save_ark(path.join('a.ark').strpath, origin, scp=path.join('b.scp').strpath) kaldiio.save_ark(path.join('a2.ark').strpath, {'a': a}, scp=path.join('b2.scp').strpath, append=True) kaldiio.save_ark(path.join('a2.ark').strpath, {'b': b}, scp=path.join('b2.scp').strpath, append=True) d1 = {k: v for k, v in kaldiio.load_ark(path.join('a.ark').strpath)} d2 = { k: v for k, v in kaldiio.load_scp(path.join('b.scp').strpath).items() } d3 = {k: v for k, v in kaldiio.load_ark(path.join('a2.ark').strpath)} d4 = { k: v for k, v in kaldiio.load_scp(path.join('b2.scp').strpath).items() } _compare(d1, origin) _compare(d2, origin) _compare(d3, origin) _compare(d4, origin)
def write_kaldi_matrix(folder, model, loader, output_dir): """ Args: folder: str model: pytorch model loader: torch.utils.data.Dataloader output_dir: str """ if not os.path.exists(output_dir): print( "Destination folder for storage\n of .ark , .scp files doesn't exist!!" ) sys.exit(1) with torch.no_grad(): for idx, img in enumerate(loader): img = img.view(img.size(0), -1).cuda() output = model(img) lt_vec = model.lant_vec.cpu() feats_np = lt_vec.numpy() # initially --> s1_v1_u32 , to --> s01_u32.ark to match audio name files #print(folder) #s1 --> s01 if folder.index("_") - folder.index("s") == 2: folder = "s0" + folder.split("s")[1] #s01_v1_u32 --> s01_u32 tokens = folder.split("_") folder = tokens[0] + "_" + tokens[2] #print(folder) scp_file = output_dir + "/" + folder + ".scp" ark_file = output_dir + "/" + folder + ".ark" write_dict = {} write_dict[folder] = feats_np kaldiio.save_ark(ark_file, write_dict, scp=scp_file)
def test_write_read_compress(tmpdir, compression_method, endian): path = tmpdir.mkdir("test") a = np.random.rand(1000, 120).astype(np.float32) b = np.random.rand(10, 120).astype(np.float32) origin = {"Ï,é,à": a, "あいうえお": b} kaldiio.save_ark( path.join("a.ark").strpath, origin, scp=path.join("b.scp").strpath, compression_method=compression_method, endian=endian, ) d2 = {k: v for k, v in kaldiio.load_ark(path.join("a.ark").strpath, endian=endian)} d5 = { k: v for k, v in kaldiio.load_scp(path.join("b.scp").strpath, endian=endian).items() } with io.open(path.join("a.ark").strpath, "rb") as fd: d6 = {k: v for k, v in kaldiio.load_ark(fd, endian=endian)} _compare_allclose(d2, origin, atol=1e-1) _compare_allclose(d5, origin, atol=1e-1) _compare_allclose(d6, origin, atol=1e-1)
def gen_dummy_data_dir(data_dir, num_spk, num_utt_per_spk, feat_len=100, feat_dim=40): ''' Generate a dummy data directory and return its meta. ''' os.makedirs(data_dir, exist_ok=True) meta = kaldi_dir.KaldiMetaData() feats = {} vads = {} for spk_idx in range(num_spk): for utt_idx in range(num_utt_per_spk): spk = str(spk_idx) utt = '%s_%d' % (spk, utt_idx) utt_meta = kaldi_dir.Utt() feat_mat = np.ones((feat_len, feat_dim), dtype='float32') feats[utt] = feat_mat utt_meta.featlen = feat_len vad_mat = np.ones((feat_len, ), dtype='float32') vads[utt] = vad_mat utt_meta.spk = spk meta.utts[utt] = utt_meta meta.collect_spks_from_utts() meta.dump(data_dir, True) feats_ark_path = os.path.join(data_dir, 'feats.ark') feats_scp_path = os.path.join(data_dir, 'feats.scp') kaldiio.save_ark(feats_ark_path, feats, scp=feats_scp_path, text=True) vad_ark_path = os.path.join(data_dir, 'vad.ark') vad_scp_path = os.path.join(data_dir, 'vad.scp') kaldiio.save_ark(vad_ark_path, vads, scp=vad_scp_path, text=True) loaded_meta = kaldi_dir.KaldiMetaData() loaded_meta.load(data_dir) return loaded_meta