def main(): parser = argparse.ArgumentParser() parser.add_argument('--fs', type=int, default=22050, help='Sampling frequency') parser.add_argument('--fmax', type=int, default=None, nargs='?', help='Maximum frequency') parser.add_argument('--fmin', type=int, default=None, nargs='?', help='Minimum frequency') parser.add_argument('--n_fft', type=int, default=1024, help='FFT length in point') parser.add_argument('--n_shift', type=int, default=512, help='Shift length in point') parser.add_argument('--win_length', type=int, default=None, nargs='?', help='Analisys window length in point') parser.add_argument('--n_mels', type=int, default=80, help='Number of mel basis') parser.add_argument('--window', type=str, default='hann', choices=['hann', 'hamming'], help='Type of window') parser.add_argument('scp', type=str, help='Feat scp files') parser.add_argument('outdir', type=str, help='Output directory') args = parser.parse_args() # logging info logging.basicConfig( level=logging.INFO, format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s") # load scp reader = kaldi_io_py.read_mat_scp(args.scp) # chech direcitory if not os.path.exists(args.outdir): os.makedirs(args.outdir) # extract feature and then write as ark with scp format for idx, (utt_id, lmspc) in enumerate(reader, 1): spc = logmelspc_to_linearspc( lmspc, fs=args.fs, n_mels=args.n_mels, n_fft=args.n_fft, fmin=args.fmin, fmax=args.fmax) y = griffin_lim( spc, n_fft=args.n_fft, n_shift=args.n_shift, win_length=args.win_length, window=args.window) logging.info("(%d) %s" % (idx, utt_id)) write(args.outdir + "/%s.wav" % utt_id, args.fs, (y * np.iinfo(np.int16).max).astype(np.int16))
def test_voxforge_feats(): import kaldi_io_py pytest.importorskip("kaldi_io") import kaldi_io train_scp = "scp:egs/voxforge/asr1/data/tr_it/feats.scp" if not os.path.exists(train_scp): pytest.skip("voxforge scp has not been created") r1 = kaldi_io_py.read_mat_scp(train_scp) r2 = kaldi_io.RandomAccessBaseFloatMatrixReader(train_scp) for k, v1 in r1: k = str(k) print(k) v2 = r2[k] assert v1.shape == v2.shape numpy.testing.assert_allclose(v1, v2, atol=1e-5)
# coding: utf-8 import numpy import kaldi_io import kaldi_io_py import lazy_io train_scp = "scp:egs/voxforge/asr1/data/tr_it/feats.scp" r1 = kaldi_io_py.read_mat_scp(train_scp) r2 = kaldi_io.RandomAccessBaseFloatMatrixReader(train_scp) r3 = lazy_io.read_dict_scp(train_scp) for k, v1 in r1: k = str(k) print(k) v2 = r2[k] v3 = r3[k] assert v1.shape == v2.shape assert v1.shape == v3.shape numpy.testing.assert_allclose(v1, v2, atol=1e-5) numpy.testing.assert_allclose(v1, v3, atol=0)
type=str, required=True, help='output file') parser.add_argument('--cmvn', type=str, help='cmvn file') args = parser.parse_args() #ark='ark: copy-feats --print-args=false scp:%s ark:- | ' % (args.input) #print(args.input) # read global mean/stddev for normalize if args.cmvn: with h5py.File(args.cmvn, 'r') as f: cmvn_mean = f['mean'].value cmvn_dev = f['stddev'].value generator = kaldi_io_py.read_mat_scp(args.input) with h5py.File(args.output, 'w') as f: for key, mat in generator: if args.cmvn: cp = np.ndarray(mat.shape) for row in range(mat.shape[0]): cp[row] = mat[row] - cmvn_mean.reshape(mat.shape[1], ) cp[row] = mat[row] / cmvn_dev.reshape(mat.shape[1], ) f.create_dataset(key, data=cp, compression='gzip', compression_opts=9) else: f.create_dataset(key, data=mat, compression='gzip',
spk_file=os.path.join(args.outdir,'speakers') with open(spk_file, 'w') as wf: with open(args.feats, 'r') as f: lines=f.readlines() for line in lines: spk = re.sub('\S\S\S$', "", re.sub('_DT',"",line.split()[0])) if spk in speakers: speakers[spk]=spk_num out="{0} {1}\n".format(spk, spk_num) wf.write(out) spk_num+=1 hdf_file=os.path.join(args.outdir,'data.h5') generator=kaldi_io_py.read_mat_scp(args.feats) with h5py.File(hdf_file, 'w') as hdf: for key, mat in generator: label = self.textset[key] hdf.create_group(key) hdf.create_dataset(key+'/data', data=mat, compression='gzip', compression_opts=9) if '_DT' in key: label=0 else: label=1 # 1 if speaker is deaf spk=re.sub('\S\S\S$', "", re.sub('_DT',"", key)) spk_label=speakers[$spk] hdf.create_dataset(key+'/label', data=label) hdf.create_dataset(key+'/speaker',data=spk_label)