def re_run(self, utt): if 'wavpath' not in utt: return k = Kaldi( get_resource('data/nnet_a_gpu_online'), self.gen_hclg_filename, get_resource('PROTO_LANGDIR')) audio = numm3.sound2np( os.path.join(self.resources['attach'].attachdir, utt['wavpath']), nchannels=1, R=8000) k.push_chunk(audio.tostring()) wds = k.get_final() k.stop() for wd in wds: del wd['phones'] utt['command_words'] = wds utt['command'] = ' '.join([X['word'] for X in wds]) reactor.callFromThread(self.db.onchange, None, {"type": "change", "id": utt["_id"], "doc": utt})
def re_run(self, utt): if 'wavpath' not in utt: return k = Kaldi(get_resource('data/nnet_a_gpu_online'), self.gen_hclg_filename, get_resource('PROTO_LANGDIR')) audio = numm3.sound2np(os.path.join(self.resources['attach'].attachdir, utt['wavpath']), nchannels=1, R=8000) k.push_chunk(audio.tostring()) wds = k.get_final() k.stop() for wd in wds: del wd['phones'] utt['command_words'] = wds utt['command'] = ' '.join([X['word'] for X in wds]) reactor.callFromThread(self.db.onchange, None, { "type": "change", "id": utt["_id"], "doc": utt })
self._cmd("reset") def stop(self): if not self.finished: self.finished = True self._cmd("stop") self._p.stdin.close() self._p.stdout.close() self._p.wait() def __del__(self): self.stop() if __name__ == '__main__': import numm3 import sys infile = sys.argv[1] k = Kaldi() buf = numm3.sound2np(infile, nchannels=1, R=8000) print('loaded_buf', len(buf)) idx = 0 while idx < len(buf): k.push_chunk(buf[idx:idx + 160000].tostring()) print(k.get_final()) idx += 160000
def _reset(self): self._cmd("reset") def stop(self): if not self.finished: self.finished = True self._cmd("stop") self._p.stdin.close() self._p.stdout.close() self._p.wait() def __del__(self): self.stop() if __name__=='__main__': import numm3 import sys infile = sys.argv[1] k = Kaldi() buf = numm3.sound2np(infile, nchannels=1, R=8000) print('loaded_buf', len(buf)) idx=0 while idx < len(buf): k.push_chunk(buf[idx:idx+160000].tostring()) print(k.get_final()) idx += 160000
hit_results[utt_idx] = hit_results.get(utt_idx, 0) + 1 else: self.onpreview(ret, utt_idx) def stop(self): self.stopped = True self.utts[-1].stop() def join(self): self.t.join() if __name__=='__main__': import sys # Simulate with an audio file AUDIOFILE = sys.argv[1] OUTDIR = sys.argv[2] sess = Session(OUTDIR) test_audio = numm3.sound2np(AUDIOFILE, nchannels=1, R=8000) cur_start = 0 BUF_LEN = 200 while True: sess.feed(test_audio[cur_start:cur_start+BUF_LEN]) cur_start += BUF_LEN if cur_start >= len(test_audio): break sess.stop() sess.join()
import math import sys AUDIOPATH = sys.argv[1] JSON_OUT = sys.argv[2] N_THREADS = 4 T_PER_CHUNK = 10 OVERLAP_T = 2 kaldi_queue = Queue() for i in range(N_THREADS): kaldi_queue.put(standard_kaldi.Kaldi()) # Preload entire audio audiobuf = numm3.sound2np(AUDIOPATH, R=8000, nchannels=1) n_chunks = int(math.ceil(len(audiobuf) / (8000.0 * (T_PER_CHUNK-OVERLAP_T)))) print 'sharding into %d chunks' % (n_chunks) chunks = [] # (idx, [words]) def transcribe_chunk(idx): st = idx * (T_PER_CHUNK-OVERLAP_T) * 8000 end= st + T_PER_CHUNK * 8000 buf = audiobuf[st:end] print buf.shape k = kaldi_queue.get()