def recognize(wav_file): #BASE_PATH = os.path.dirname(os.path.realpath(__file__)) #HMDIR = os.path.join(BASE_PATH, "hmm") #LMDIR = os.path.join(BASE_PATH, "lm/en-us.lm.bin") #DICTD = os.path.join(BASE_PATH, "dict/en_in.dic") sound = "try1.wav" model_path = get_model_path() data_path = get_data_path() config = DefaultConfig() config.set_string('-hmm', "hmm/") config.set_string('-lm', 'lm\en-us.lm.bin') config.set_string('-dict', 'dict\en_in.dic') #decoder = Decoder(config) """ Run speech recognition on a given file. """ speech_rec = Decoder(config) print("Decoder Initialized") wav_file = wave.open(wav_file, 'rb') print("AudioFile Loaded") speech_rec.decode_raw(wav_file) print("Audio file decoded") result = speech_rec.get_hyp() print("Result Ready\n") return result
class stt: def __init__(self, profile, hmm=None, dict=None, lm=None, kws_threshold=None, keyphrase=None): self.profile = profile if keyphrase: if not dict: dict = fullpath('config/keyphrase.dic') if not lm: lm = fullpath('config/keyphrase.lm') else: if not dict: dict = fullpath('config/corpus.dic') if not lm: lm = fullpath('config/corpus.lm') if not hmm: hmm = 'share/pocketsphinx/model/en-us/en-us' config = Decoder.default_config() config.set_string('-hmm', os.path.join(SPHINX_ROOT, hmm)) config.set_string('-dict', dict) config.set_string('-lm', lm) config.set_string('-logfn', fullpath('config/sphinx.log')) if keyphrase: config.set_string('-keyphrase', keyphrase) if kws_threshold: config.set_float('-kws_threshold', kws_threshold) self.decoder = Decoder(config) self.transcribe = self.transcribe_darwin self.hyp = None def transcribe_darwin(self, wav): self.decoder.start_utt() self.decoder.process_raw(wav, False, False) self.decoder.end_utt() self.hyp = self.decoder.hyp() if self.hyp: return self.hyp.hypstr def get_prob(self): if self.hyp: print self.hyp.best_score return self.hyp.prob def transcribe_linux(self, wav): self.decoder.start_utt() self.decoder.process_raw(wav, False, False) self.decoder.end_utt() result = self.decoder.get_hyp() if result: return result[0]
def passiverecord(THRESHOLD=None): FORMAT = pyaudio.paInt16 CHANNELS = 1 LISTEN_TIME = 2 WAVE_OUTPUT_FILENAME = "passive.wav" p = pyaudio.PyAudio() if THRESHOLD == None: THRESHOLD = fetchThreshold() print THRESHOLD stream = p.open(format=FORMAT, channels=1, rate=RATE, input=True, frames_per_buffer=CHUNK) # print "* recording" frames = [] lastN = [THRESHOLD * 1.2 for i in range(30)] for i in range(0, RATE / CHUNK * LISTEN_TIME): data = stream.read(CHUNK) frames.append(data) score = getScore(data) lastN.pop(0) lastN.append(score) average = sum(lastN) / float(len(lastN)) # print average,THRESHOLD * 0.8 if average < THRESHOLD * 0.8: break # print "* done recording" # stream.stop_stream() stream.close() p.terminate() # write data to WAVE file data = ''.join(frames) wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb') wf.setnchannels(1) wf.setsampwidth(p.get_sample_size(FORMAT)) wf.setframerate(RATE) wf.writeframes(data) wf.close() sysdir = os.getcwd() wavfile = sysdir + "/passive.wav" # decoded=decodepassive() speechRec = Decoder(hmm=hmdir, lm=lmdir, dict=dictd) with open(wavfile, 'rb') as wavFile: speechRec.decode_raw(wavFile) result = speechRec.get_hyp() return (result[0])
def record(listen_time): THRESHOLD=None WAVE_OUTPUT_FILENAME = "livewav.wav" p = pyaudio.PyAudio() if THRESHOLD == None: THRESHOLD = fetchThreshold() print THRESHOLD stream = p.open(format=FORMAT, channels=1, rate=RATE, input=True, frames_per_buffer=CHUNK) print "* recording" frames = [] detected=False for i in range(0, RATE / CHUNK * listen_time): data = stream.read(CHUNK) frames.append(data) score = getScore(data) if score < THRESHOLD: continue else: detected=True if not detected: print "nothing detected" return("") print "* done recording" #stream.stop_stream() stream.close() p.terminate() # write data to WAVE file data = ''.join(frames) wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb') wf.setnchannels(1) wf.setsampwidth(p.get_sample_size(FORMAT)) wf.setframerate(RATE) wf.writeframes(data) wf.close() sysdir = os.getcwd() wavfile = sysdir+"/livewav.wav" speechRec = Decoder(hmm=hmdir, lm=lmdir, dict=dictd) with open(wavfile, 'rb') as wavFile: speechRec.decode_raw(wavFile) result = speechRec.get_hyp() return(result[0])
def record(listen_time): THRESHOLD = None WAVE_OUTPUT_FILENAME = "livewav.wav" p = pyaudio.PyAudio() if THRESHOLD == None: THRESHOLD = fetchThreshold() print THRESHOLD stream = p.open(format=FORMAT, channels=1, rate=RATE, input=True, frames_per_buffer=CHUNK) print "* recording" frames = [] detected = False for i in range(0, RATE / CHUNK * listen_time): data = stream.read(CHUNK) frames.append(data) score = getScore(data) if score < THRESHOLD: continue else: detected = True if not detected: print "nothing detected" return("") print "* done recording" # stream.stop_stream() stream.close() p.terminate() # write data to WAVE file data = ''.join(frames) wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb') wf.setnchannels(1) wf.setsampwidth(p.get_sample_size(FORMAT)) wf.setframerate(RATE) wf.writeframes(data) wf.close() sysdir = os.getcwd() wavfile = sysdir + "/livewav.wav" speechRec = Decoder(hmm=hmdir, lm=lmdir, dict=dictd) with open(wavfile, 'rb') as wavFile: speechRec.decode_raw(wavFile) result = speechRec.get_hyp() return(result[0])
def best_sphinx_speech_result(pyaudio, wav_name, profile): if not have_sphinx_dictionary: if not profile.has_key("words"): raise(KeyError("Pass the possible words in in profile")) compile("sentences.txt", "dictionary.dic", "language_model.lm", profile["words"]) global have_sphinx_dictionary have_sphinx_dictionary = True wav_file = file(wav_name, 'rb') speechRec = Decoder( hmm = "/usr/local/share/pocketsphinx/model/hmm/en_US/hub4wsj_sc_8k", lm = "language_model.lm", dict = "dictionary.dic" ) speechRec.decode_raw(wav_file) results = speechRec.get_hyp() return results[0]
def record(THRESHOLD=None): FORMAT = pyaudio.paInt16 CHANNELS = 1 LISTEN_TIME = 4 WAVE_OUTPUT_FILENAME = "livewav.wav" p = pyaudio.PyAudio() if THRESHOLD == None: THRESHOLD = fetchThreshold() print THRESHOLD stream = p.open(format=FORMAT, channels=1, rate=RATE, input=True, frames_per_buffer=CHUNK) print "* recording" frames = [] lastN = [THRESHOLD * 1.2 for i in range(30)] for i in range(0, RATE / CHUNK * LISTEN_TIME): data = stream.read(CHUNK) frames.append(data) score = getScore(data) lastN.pop(0) lastN.append(score) average = sum(lastN) / float(len(lastN)) #print average,THRESHOLD * 0.8 if average < THRESHOLD * 0.8: break print "* done recording" #stream.stop_stream() stream.close() p.terminate() # write data to WAVE file data = ''.join(frames) wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb') wf.setnchannels(1) wf.setsampwidth(p.get_sample_size(FORMAT)) wf.setframerate(RATE) wf.writeframes(data) wf.close() sysdir = os.getcwd() wavfile = sysdir+"/livewav.wav" #decoded=decodepassive() speechRec = Decoder(hmm=hmdir, lm=lmdir, dict=dictd) with open(wavfile, 'rb') as wavFile: speechRec.decode_raw(wavFile) result = speechRec.get_hyp() return(result[0])
def decodepassive(): speechRec = Decoder(hmm = hmdir, lm = lmdir, dict = dictd) with open(passivewav, 'rb') as passivewav: speechRec.decode_raw(passivewav) result = speechRec.get_hyp() return(result[0])
p.terminate() # write data to WAVE file data = ''.join(all) wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb') wf.setnchannels(CHANNELS) wf.setsampwidth(p.get_sample_size(FORMAT)) wf.setframerate(RATE) wf.writeframes(data) wf.close() if __name__ == "__main__": hmdir = "/usr/share/pocketsphinx/model/hmm/en_US/hub4wsj_sc_8k" lmdir = "/usr/share/pocketsphinx/model/lm/en_US/hub4.5000.DMP" dictd = "/usr/share/pocketsphinx/model/lm/en_US/cmu07a.dic" record() wavfile = "/home/shridhar/pocketsphinxtest/livewav.wav" speechRec = Decoder(hmm=hmdir, lm=lmdir, dict=dictd) wavFile = file(wavfile, 'rb') speechRec.decode_raw(wavFile) result = speechRec.get_hyp() print "Recognised text from the converted video file" print "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" print result[0] print "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%"
from os import environ, path from pocketsphinx import Decoder from sphinxbase import * print('a') model_dir = '/home/wmlab/CMU_try/zh_broadcastnews_ptm256_8000/' # lm_dir = '/home/wmlab/CMU_try/CMUsphinx-Demo-master/0506.lm' # dict_dir = '/home/wmlab/CMU_try/CMUsphinx-Demo-master/0506.dic' lm_dir = '/home/wmlab/CMU_try/try_python/usable_version/0506.lm' dict_dir = '/home/wmlab/CMU_try/try_python/usable_version/0506.dic' wav_file = '/home/wmlab/CMU_try/testing_audio/testing003.wav' speech_rec = Decoder(hmm=model_dir, lm=lm_dir, dict=dict_dir) wavFile = file(wav_file, 'rb') wavFile.seek(44) speech_rec.decode_raw(wavFile) result = speech_rec.get_hyp() print(result[0])
def decodepassive(): speechRec = Decoder(hmm=hmdir, lm=lmdir, dict=dictd) with open(passivewav, 'rb') as passivewav: speechRec.decode_raw(passivewav) result = speechRec.get_hyp() return(result[0])