示例#1
0
 def re_run(self, utt):
     if 'wavpath' not in utt:
         return
     
     k = Kaldi(
         get_resource('data/nnet_a_gpu_online'),
         self.gen_hclg_filename,
         get_resource('PROTO_LANGDIR'))
     audio = numm3.sound2np(
         os.path.join(self.resources['attach'].attachdir, utt['wavpath']),
         nchannels=1,
         R=8000)
     k.push_chunk(audio.tostring())
     wds = k.get_final()
     k.stop()
     for wd in wds:
         del wd['phones']
     utt['command_words'] = wds
     utt['command'] = ' '.join([X['word'] for X in wds])
     
     reactor.callFromThread(self.db.onchange, None, {"type": "change",
                                                     "id": utt["_id"],
                                                     "doc": utt})
示例#2
0
文件: serve.py 项目: afcarl/earmark
    def re_run(self, utt):
        if 'wavpath' not in utt:
            return

        k = Kaldi(get_resource('data/nnet_a_gpu_online'),
                  self.gen_hclg_filename, get_resource('PROTO_LANGDIR'))
        audio = numm3.sound2np(os.path.join(self.resources['attach'].attachdir,
                                            utt['wavpath']),
                               nchannels=1,
                               R=8000)
        k.push_chunk(audio.tostring())
        wds = k.get_final()
        k.stop()
        for wd in wds:
            del wd['phones']
        utt['command_words'] = wds
        utt['command'] = ' '.join([X['word'] for X in wds])

        reactor.callFromThread(self.db.onchange, None, {
            "type": "change",
            "id": utt["_id"],
            "doc": utt
        })
示例#3
0
        self._cmd("reset")

    def stop(self):
        if not self.finished:
            self.finished = True
            self._cmd("stop")
            self._p.stdin.close()
            self._p.stdout.close()
            self._p.wait()

    def __del__(self):
        self.stop()


if __name__ == '__main__':
    import numm3
    import sys

    infile = sys.argv[1]

    k = Kaldi()

    buf = numm3.sound2np(infile, nchannels=1, R=8000)
    print('loaded_buf', len(buf))

    idx = 0
    while idx < len(buf):
        k.push_chunk(buf[idx:idx + 160000].tostring())
        print(k.get_final())
        idx += 160000
示例#4
0
    def _reset(self):
        self._cmd("reset")

    def stop(self):
        if not self.finished:
            self.finished = True
            self._cmd("stop")
            self._p.stdin.close()
            self._p.stdout.close()
            self._p.wait()

    def __del__(self):
        self.stop()

if __name__=='__main__':
    import numm3
    import sys

    infile = sys.argv[1]
    
    k = Kaldi()

    buf = numm3.sound2np(infile, nchannels=1, R=8000)
    print('loaded_buf', len(buf))
    
    idx=0
    while idx < len(buf):
        k.push_chunk(buf[idx:idx+160000].tostring())
        print(k.get_final())
        idx += 160000
示例#5
0
                    hit_results[utt_idx] = hit_results.get(utt_idx, 0) + 1
                else:
                    self.onpreview(ret, utt_idx)

    def stop(self):
        self.stopped = True
        self.utts[-1].stop()

    def join(self):
        self.t.join()

if __name__=='__main__':
    import sys
    # Simulate with an audio file
    AUDIOFILE = sys.argv[1]
    OUTDIR = sys.argv[2]

    sess = Session(OUTDIR)
    test_audio = numm3.sound2np(AUDIOFILE, nchannels=1, R=8000)

    cur_start = 0
    BUF_LEN = 200
    while True:
        sess.feed(test_audio[cur_start:cur_start+BUF_LEN])
        cur_start += BUF_LEN
        if cur_start >= len(test_audio):
            break

    sess.stop()
    sess.join()
示例#6
0
import math
import sys

AUDIOPATH = sys.argv[1]
JSON_OUT  = sys.argv[2]

N_THREADS = 4
T_PER_CHUNK = 10
OVERLAP_T = 2

kaldi_queue = Queue()
for i in range(N_THREADS):
    kaldi_queue.put(standard_kaldi.Kaldi())

# Preload entire audio
audiobuf  = numm3.sound2np(AUDIOPATH, R=8000, nchannels=1)
n_chunks = int(math.ceil(len(audiobuf) / (8000.0 * (T_PER_CHUNK-OVERLAP_T))))

print 'sharding into %d chunks' % (n_chunks)

chunks = []                   # (idx, [words])

def transcribe_chunk(idx):
    st = idx * (T_PER_CHUNK-OVERLAP_T) * 8000
    end= st + T_PER_CHUNK * 8000

    buf = audiobuf[st:end]
    print buf.shape

    k = kaldi_queue.get()