示例#1
0
def decode_wrap(argv, audio_batch_size, wav_paths, file_output, wst_path=None):
    wst = wst2dict(wst_path)
    d = PyOnlineLatgenRecogniser()
    d.setup(argv)
    for wav_name, wav_path in wav_paths:
        sw, sr = 2, 16000  # 16-bit audio so 1 sample_width = 2 chars
        pcm = load_wav(wav_path, def_sample_width=sw, def_sample_rate=sr)
        print('%s has %f sec' % (wav_name, (float(len(pcm)) / sw) / sr))
        lat, lik, decoded_frames = decode(d, pcm)
        lat.isyms = lat.osyms = fst.read_symbols_text(wst_path)
        if DEBUG:
            with open('pykaldi_%s.svg' % wav_name, 'w') as f:
                f.write(lat._repr_svg_())
            lat.write('%s_pykaldi.fst' % wav_name)

        print(
            "Log-likelihood per frame for utterance %s is %f over %d frames" %
            (wav_name, int(lik / decoded_frames), decoded_frames))
        word_ids = lattice_to_nbest(lat, n=10)
        write_decoded(file_output, wav_name, word_ids, wst)
def decode_wrap(argv, audio_batch_size, wav_paths,
        file_output, wst_path=None):
    wst = wst2dict(wst_path)
    d = PyOnlineLatgenRecogniser()
    d.setup(argv)
    for wav_name, wav_path in wav_paths:
        sw, sr = 2, 16000  # 16-bit audio so 1 sample_width = 2 chars
        pcm = load_wav(wav_path, def_sample_width=sw, def_sample_rate=sr)
        print '%s has %f sec' % (wav_name, (float(len(pcm)) / sw) / sr)
        lat, lik, decoded_frames = decode(d, pcm)
        lat.isyms = lat.osyms = fst.read_symbols_text(wst_path)
        if DEBUG:
            with open('pykaldi_%s.svg' % wav_name, 'w') as f:
                f.write(lat._repr_svg_())
            lat.write('%s_pykaldi.fst' % wav_name)

        print "Log-likelihood per frame for utterance %s is %f over %d frames" % (
            wav_name, (lik / decoded_frames), decoded_frames)
        word_ids = lattice_to_nbest(lat, n=10)
        write_decoded(file_output, wav_name, word_ids, wst)