示例#1
0
文件: live-demo.py 项目: 2php/kaldi
 def run(self):
     while True:
         time.sleep(0.1)
         self._user_control()
         new_frames = self.d.decode(max_frames=10)
         while new_frames > 0:
             self.utt_frames += new_frames
             new_frames = self.d.decode(max_frames=10)
         if self.utt_end or self.dialog_end:
             start = time.time()
             self.d.prune_final()
             prob, lat = self.d.get_lattice()
             # lat.write('live-demo-recorded.fst')
             nbest = lattice_to_nbest(lat, n=10)
             if nbest:
                 best_prob, best_path = nbest[0]
                 decoded = ' '.join([wst[w] for w in best_path])
             else:
                 decoded = 'Empty hypothesis'
             print("%s secs, frames: %d, prob: %f, %s " % (
                 str(time.time() - start), self.utt_frames, prob, decoded))
             self.utt_frames = 0
             self.d.reset(keep_buffer_data=False)
         if self.dialog_end:
             self.save_wav()
             break
 def run(self):
     while True:
         time.sleep(0.1)
         self._user_control()
         new_frames = self.d.decode(max_frames=10)
         while new_frames > 0:
             self.utt_frames += new_frames
             new_frames = self.d.decode(max_frames=10)
         if self.utt_end or self.dialog_end:
             start = time.time()
             self.d.prune_final()
             prob, lat = self.d.get_lattice()
             # lat.write('live-demo-recorded.fst')
             nbest = lattice_to_nbest(lat, n=10)
             if nbest:
                 best_prob, best_path = nbest[0]
                 decoded = ' '.join([wst[w] for w in best_path])
             else:
                 decoded = 'Empty hypothesis'
             print(
                 "%s secs, frames: %d, prob: %f, %s " %
                 (str(time.time() - start), self.utt_frames, prob, decoded))
             self.utt_frames = 0
             self.d.reset(keep_buffer_data=False)
         if self.dialog_end:
             self.save_wav()
             break
示例#3
0
def nbest_hypotheses(n=10):
    global recogniser

    recogniser.prune_final()
    utt_lik, lat = recogniser.get_lattice()
    recogniser.reset()

    return [(prob, path_to_text(path))
            for (prob, path) in lattice_to_nbest(lat, n=10)]
示例#4
0
def get_lat():
    global dec_frames
    d.prune_final()
    lik, lat = d.get_lattice()
    nbest = lattice_to_nbest(lat, n=10)
    nbest_s = '\n'.join(["%0.3f %s" % (prob, ' '.join([wst[i] for i in ids])) for (prob, ids) in nbest])
    dec_frames, result = 0, jsonify(nbest=nbest_s, dec_frames=dec_frames)
    print 'DEBUG', dec_frames
    d.reset(keep_buffer_data=False)
    return result
示例#5
0
def get_lat():
    global dec_frames
    d.prune_final()
    lik, lat = d.get_lattice()
    nbest = lattice_to_nbest(lat, n=10)
    nbest_s = '\n'.join([
        "%0.3f %s" % (prob, ' '.join([wst[i] for i in ids]))
        for (prob, ids) in nbest
    ])
    dec_frames, result = 0, jsonify(nbest=nbest_s, dec_frames=dec_frames)
    print 'DEBUG', dec_frames
    d.reset(keep_buffer_data=False)
    return result
示例#6
0
def decode_wrap(argv, audio_batch_size, wav_paths, file_output, wst_path=None):
    wst = wst2dict(wst_path)
    d = PyOnlineLatgenRecogniser()
    d.setup(argv)
    for wav_name, wav_path in wav_paths:
        sw, sr = 2, 16000  # 16-bit audio so 1 sample_width = 2 chars
        pcm = load_wav(wav_path, def_sample_width=sw, def_sample_rate=sr)
        print('%s has %f sec' % (wav_name, (float(len(pcm)) / sw) / sr))
        lat, lik, decoded_frames = decode(d, pcm)
        lat.isyms = lat.osyms = fst.read_symbols_text(wst_path)
        if DEBUG:
            with open('pykaldi_%s.svg' % wav_name, 'w') as f:
                f.write(lat._repr_svg_())
            lat.write('%s_pykaldi.fst' % wav_name)

        print(
            "Log-likelihood per frame for utterance %s is %f over %d frames" %
            (wav_name, int(lik / decoded_frames), decoded_frames))
        word_ids = lattice_to_nbest(lat, n=10)
        write_decoded(file_output, wav_name, word_ids, wst)
def decode_wrap(argv, audio_batch_size, wav_paths,
        file_output, wst_path=None):
    wst = wst2dict(wst_path)
    d = PyOnlineLatgenRecogniser()
    d.setup(argv)
    for wav_name, wav_path in wav_paths:
        sw, sr = 2, 16000  # 16-bit audio so 1 sample_width = 2 chars
        pcm = load_wav(wav_path, def_sample_width=sw, def_sample_rate=sr)
        print '%s has %f sec' % (wav_name, (float(len(pcm)) / sw) / sr)
        lat, lik, decoded_frames = decode(d, pcm)
        lat.isyms = lat.osyms = fst.read_symbols_text(wst_path)
        if DEBUG:
            with open('pykaldi_%s.svg' % wav_name, 'w') as f:
                f.write(lat._repr_svg_())
            lat.write('%s_pykaldi.fst' % wav_name)

        print "Log-likelihood per frame for utterance %s is %f over %d frames" % (
            wav_name, (lik / decoded_frames), decoded_frames)
        word_ids = lattice_to_nbest(lat, n=10)
        write_decoded(file_output, wav_name, word_ids, wst)