def function16(name): import urllib2 import speech_recognition as sr from subprocess import call #from converter import Converter import os #file=input("Enter wav file path and name") #c=Converter() #call(["avconv","-i","/home/pi/voice.mp3","-vn","-f","wav","voice.wav"]) r = sr.Recognizer() if (name == 'chethana'): with sr.WavFile('/home/pi/person1.wav') as source: audio = r.record(source) elif (name == 'dinesh'): with sr.WavFile('/home/pi/dinesh.wav') as source: audio = r.record(source) elif (name == 'sampu'): with sr.WavFile('/home/pi/sampu.wav') as source: audio = r.record(source) command = r.recognize_google(audio) print command if (name == 'chethana' and command == 'hello home'): return "successfull" elif (name == 'dinesh' and command == 'happy'): return "successfull" elif (name == 'sampu' and command == 'crazy'): return "successfull" else: return "failed"
def __init__(self, mic1, mic2): # Check if there is speech, if not, then there is no point in doing # further calculations. speechStartTime = self.getSpeechStartTime(mic1.getFileName()) if speechStartTime == -1: self.speechExists = False return else: self.speechExists = True # Get speech portion from both wav files speech1 = mic1.getSound()[speechStartTime - 100:speechStartTime + 400] speech2 = mic2.getSound()[speechStartTime - 100:speechStartTime + 400] speech1Holder = 'holder/speech1.wav' speech2Holder = 'holder/speech2.wav' speech1.export(speech1Holder,format='wav') speech2.export(speech2Holder,format='wav') # Prepare an amplitudes list for the speech portions # Needs to be normalized and values need to be lessened # so that cross correlation does not deal with the square root # of negative numbers and there is not an overflow error data1 = self.getAmplitudeList(speech1Holder) data2 = self.getAmplitudeList(speech2Holder) self.speech1 = [] self.speech2 = [] for num in data1: self.speech1.append(abs(num / 1000)) for num in data2: self.speech2.append(abs(num / 1000)) # Get the mean amplitude of each speaking portion meanSpeech1 = sum(self.speech1) / len(self.speech2) meanSpeech2 = sum(self.speech1) / len(self.speech2) # Recognize the speech of the wav files # Give the wav file with more mean amplitude priority if meanSpeech1 > meanSpeech2: self.DOA = 45 r = sr.Recognizer() with sr.WavFile(speech1Holder) as source: audio = r.record(source) text = r.recognize_google(audio) self.recognizedSpeech = text else: self.DOA = 135 r = sr.Recognizer() with sr.WavFile(speech2Holder) as source: audio = r.record(source) text = r.recognize_google(audio) self.recognizedSpeech = text
def live_stream_to_text(self, broadcast_id, audio_dir, duration): """ Returns a string of text from the audio processed. """ self.save_live_stream_audio(broadcast_id, audio_dir, duration) audio_path = os.path.join(audio_dir, broadcast_id + '.mp3') # Read from the file being created r = sr.Recognizer() with sr.WavFile( audio_path) as source: # use "test.wav" as the audio source audio = r.record(source) # extract audio data from the file try: list = r.recognize( audio, True) # generate a list of possible transcriptions print("Possible transcriptions:") for prediction in list: print(" " + prediction["text"] + " (" + str(prediction["confidence"] * 100) + "%)") except LookupError: # speech is unintelligible print("Could not understand audio") return prediction['text']
def extract(self, filename, **kwargs): speech = '' # convert to wav, if not already .wav base, ext = os.path.splitext(filename) if ext != '.wav': temp_filename = self.convert_to_wav(filename) try: speech = self.extract(temp_filename, **kwargs) finally: # make sure temp_file is deleted os.remove(temp_filename) else: r = sr.Recognizer() with sr.WavFile(filename) as source: audio = r.record(source) try: speech = r.recognize_google(audio) except LookupError: # audio is not understandable speech = '' # add a newline, to make output cleaner speech += '\n' return speech
def checkActive(self): result = False t = "" with speech.WavFile("audio/tmp/send.wav") as source: audio = self.recognizer.record(source) # используем возможности библиотеки Spinx try: t = self.recognizer.recognize_sphinx( audio) # todo check, language='ru-RU/cmusphinx-ru-5.2') if t != "" or self.logAll: print("Sphinx thinks you said: [" + t + "]") except speech.UnknownValueError: result = 0 if self.logAll: print("Sphinx could not understand audio") except speech.RequestError as e: print("Sphinx error; {0}".format(e)) if t == "jarvis": result = True if result: self.playHiBeep() return result
def extract(self, filename, method='', **kwargs): speech = '' # convert to wav, if not already .wav base, ext = os.path.splitext(filename) if ext != '.wav': temp_filename = self.convert_to_wav(filename) try: speech = self.extract(temp_filename, method, **kwargs) finally: # make sure temp_file is deleted os.remove(temp_filename) else: r = sr.Recognizer() with sr.WavFile(filename) as source: audio = r.record(source) try: if method == 'google' or method == '': speech = r.recognize_google(audio) elif method == 'sphinx': speech = r.recognize_sphinx(audio) else: raise UnknownMethod(method) except LookupError: # audio is not understandable speech = '' except sr.UnknownValueError: speech = '' except sr.RequestError as e: speech = '' # add a newline, to make output cleaner speech += '\n' return speech
def transcribe(): recognizer = sr.Recognizer() print('-'*100) print("Listening...\n") # Use arecord to record short os.system("arecord -D plughw:1,0 -d 5 temp.wav") with sr.WavFile("temp.wav") as source: audio = recognizer.record(source) print("Working") try: res = recognizer.recognize_google(audio).lower() print("You said: \"" + res + "\"") print('-'*100) return res except sr.UnknownValueError: print("Could not understand audio") print('-'*100) return "" except sr.RequestError as e: print("Could not request results from the service; {0}".format(e)) print('-'*100) return ""
def audio2Text(): r = sr.Recognizer() with sr.WavFile("audio.wav") as source: audio = r.record(source) listVulgar = [ 'az', 'bz', 'cz', 'dz', 'ez', 'fz', 'gz', 'hz', 'iz', 'jz', 'kz', 'lz', 'mz', 'nz', 'oz,', 'pz', 'qz', 'rz', 'sz', 'tz', 'uz', 'vz', 'wz', 'xz', 'yz', 'zz', 'damn', 'ass', 'god', 'stupid', 'no' ] try: print("Transcribing...") x = str(r.recognize(audio)) print x replace = re.sub("[*]", 'z ', x) wordList = re.sub("[^\w]", " ", replace).split() print wordList #Executes the command here if any(i in wordList for i in listVulgar): print 'YOU DIE!' playVideo() except LookupError: print("Could not understand audio")
def wav_transcribe(WAV_FILE, lang="es-ES"): """ Transcribes a WAV file containing a speech into text using Google text-to-speech algorithm and returns the complete transcription in text format """ import speech_recognition as sr import sys r = sr.Recognizer() fulltext = "" with sr.WavFile(WAV_FILE) as source: audio = r.record(source) # read the entire WAV file # recognize speech using Google Speech Recognition try: # for testing purposes, we're just using the default API key # to use another API key, use `r.recognize_google(audio, key="GOOGLE_SPEECH_RECOGNITION_API_KEY")` # instead of `r.recognize_google(audio)` textfromwav = r.recognize_google(audio, language=lang) # print("Google Speech Recognition thinks you said ") print("\"" + textfromwav + "\"") except sr.UnknownValueError: print("Google Speech Recognition could not understand audio") except sr.RequestError as e: print( "Could not request results from Google Speech Recognition service; {0}" .format(e)) return textfromwav
def savewav(self): wf = wave.open("temp.wav", 'wb') wf.setnchannels(1) wf.setsampwidth(2) wf.setframerate(self.SAMPLING_RATE) wf.writeframes(np.array(self.Voice_String).tostring()) # wf.writeframes(self.Voice_String.decode()) wf.close() r = sr.Recognizer() # 建立 辨識器 r.energy_threshold = 4000 # 雜音去除 with sr.WavFile("temp.wav") as source: # 讀取 wav 檔 audio = r.record(source) try: self.dialogue = r.recognize_google(audio, language="zh-TW") # 透過 語音辨識 將 錄音資料 轉為 文字 self.dialogue = self.dialogue.lower() except sr.UnknownValueError as e: error = '聽不懂' except sr.RequestError as e: error = '無法連線' print(self.dialogue) while True: if os.path.exists('temp.wav'): os.remove('temp.wav') self.dialogue = '' break
def get_asr(self): if not self.ambient: #VAD using adinrec #note: threshold for adinrec needs to be readjusted for a new HW setting try: subprocess.check_call( EXEC_PATH + '/adinrec -lv 1000 -zc 200 ' + self.WAV_FILE, shell=True, stderr=subprocess.STDOUT) except subprocess.CalledProcessError as e: print("Ping stdout output:\n", e.output) return 'ERROR' with sr.WavFile(self.WAV_FILE) as source: audio = self.r.record(source) # read the entire WAV file try: str = self.r.recognize_google(audio, language=self.lang) print(str) except: str = 'ERROR' print("can't understand") #os.remove(self.WAVE_FILE) else: try: with self.source as source: audio = self.r.listen(source) str = self.r.recognize_google(audio, language=self.lang) except: str = 'error' return str
def VoiceToText(path, files, target_path): for file in files: txt_file = "%s\\%s.txt" % (target_path, file[:-4]) if os.path.isfile(txt_file): continue with open("%s\\%s.txt" % (target_path, file[:-4]), "w", encoding="utf-8") as f: f.write("%s:\n" % file) r = sr.Recognizer() # 預設辨識英文 with sr.WavFile(path + "\\" + file) as source: # 讀取wav檔 audio = r.record(source) try: text = r.recognize_google(audio, language="zh-TW") print(file) if len(text) == 0: print("===無資料==") continue print(text) f.write("%s \n\n" % text) if file == files[-1]: print("結束翻譯") except sr.RequestError as e: print("無法翻譯{0}".format(e)) # 兩個 except 是當語音辨識不出來的時候 防呆用的 # 使用Google的服務 except LookupError: print("Could not understand audio") except sr.UnknownValueError: print("Error: 無法識別 Audio")
def trans_text(): path = r".\w" files = os.listdir(path) files = [path + "\\" + f for f in files if f.endswith('.wav')] freq_wav = len(files) for freq in range(1,freq_wav): if freq%2==0: time.sleep(30) print("SLEEP30") r = sr.Recognizer() with sr.WavFile("./w/test-{}.wav".format(freq)) as source: audio = r.record(source) f = open('test.txt','a') try: respones = r.recognize_google(audio, language='zh-tw') print("Transcription: " + r.recognize_google(audio,language='zh-tw')) #print(type(respones)) f.write(respones) except LookupError: print("Could not understand audio") time.sleep(30) shutil.rmtree('./w') os.makedirs("../test", exist_ok =True)
def get_text_from_wav(path): r = sr.Recognizer() with sr.WavFile(path) as source: audio = r.record(source) return r.recognize_google(audio)
def receive_audio(bot, update): print("audio received") file = bot.getFile(update.message.voice.file_id) print("file_id: " + str(update.message.voice.file_id)) currt = strftime("%Y-%m-%d %H:%M:%S", gmtime()) file.download('voice.ogg') new_file_name = 'apple' + str(currt) + '.wav' #temp='ffmpeg -i voice.ogg '+new_file_name #os.system(temp) os.system('ffmpeg -i "%s" "%s"' % ('voice.ogg', new_file_name)) os.system('rm -rf voice.ogg') print("audio converted") r = sr.Recognizer() with sr.WavFile(new_file_name) as source: audio = r.record(source) try: print("before") value = r.recognize_google(audio) print(value) print("after") process_audio(value, bot, update) except sr.UnknownValueError: print("Could not understand audio") except sr.RequestError as e: print("Could not request results; {0}".format(e))
def speech_recognition(): try: global speechRecWord_rec print("you can say now...") tts.say("you can say now...") record_NAO(robot_IP, robot_PORT=9559) WAV_FILE = "/home/nao/record.wav" r = sr.Recognizer() #m = sr.Microphone() m = sr.WavFile(WAV_FILE) #print("A moment of silence, please...") with m as source: r.adjust_for_ambient_noise(source) r.energy_threshold #print("Set minimum energy threshold to {}".format(r.energy_threshold)) audio = r.record(source) #print("Got it!") try: # recognize speech using Google Speech Recognition speechRecWord_rec = r.recognize_google(audio) print speechRecWord_rec except sr.UnknownValueError: print("Oops! Didn't catch that") tts.say("Oops! Didn't catch that") except sr.RequestError as e: print("Uh oh! Couldn't request results from Google Speech Recognition service; {0}".format(e)) except KeyboardInterrupt: pass return speechRecWord_rec
def handle_call_view(): filename = '/tmp/{}'.format(str(uuid.uuid4())) recording_url = request.values.get("RecordingUrl", None) with open(filename, 'wb') as handle: response = requests.get(recording_url, stream=True) for block in response.iter_content(1024): if block: handle.write(block) else: break resp = twilio.twiml.Response() try: r = sr.Recognizer() with sr.WavFile(filename) as source: audio = r.record(source) command = r.recognize(audio) from_number = request.values.get('From', '')[2:] user = users.find_one({'number': from_number}) if not from_number or not user: resp.say("I couldn't find {} in our database!".format(from_number)) else: pending_commands.insert({'key': user['key'], 'message': command, 'datetime': datetime.datetime.utcnow()}) resp.say("I will " + command) except: resp.say("I didn't catch that. Please try again.") resp.redirect(url_for('call_view', _external=True)) return str(resp)
def sound_to_text(): NEWS = [] for i in range(int(length / 8)): WAV_FILE = path.join(path.dirname(path.realpath(__file__)), 'nlp_' + str(i) + '.wav') # use "english.wav" as the audio source r = sr.Recognizer() with sr.WavFile(WAV_FILE) as source: audio = r.record(source) # read the entire WAV file # recognize speech using Google Speech Recognition try: # for testing purposes, we're just using the default API key # to use another API key, use `r.recognize_google(audio, key="GOOGLE_SPEECH_RECOGNITION_API_KEY")` # instead of `r.recognize_google(audio)` print(i, ". part: ", r.recognize_google(audio, language="tr")) NEWS.append(r.recognize_google(audio, language="tr")) except sr.UnknownValueError: # print("Google Speech Recognition could not understand audio") pass except sr.RequestError as e: # print("Could not request results from Google Speech Recognition service; {0}".format(e)) pass return NEWS
def ask_object(self): print("What object were you thinking of?") transcriber = Transcriber("transcriber", self.address, self.port) transcriber.start() while True: if transcriber.check: break transcriber.stop() os.system("sox -r 48000 -e signed -b 16 -c output.raw speech.wav") r = sr.Recognizer() with sr.WavFile("speech.wav") as source: speech = r.record(source) try: possibilities = r.recognize(speech, True) for possibility in possibilities: for word in self.object_vocab: for syn in self.object_vocab[word]: if possibility["text"] == syn: print possibility self.broker.shutdown() return possibility except LookupError: self.say( "I couldn't understand what you said. Please go to the computer and type the name of your object." ) self.broker.shutdown() return raw_input("What object were you thinking of?")
def recognize_speech_from_file(file): recognizer = sr.Recognizer() # check that recognizer and microphone arguments are appropriate type if not isinstance(recognizer, sr.Recognizer): raise TypeError("`recognizer` must be `Recognizer` instance") # set up the response object response = { "success": True, "error": None, "transcription": None } # try recognizing the speech in the recording # if a RequestError or UnknownValueError exception is caught, # update the response object accordingly with sr.WavFile(file) as source: audio = recognizer.record(source) try: response["transcription"] = recognizer.recognize_google(audio) except sr.RequestError: # API was unreachable or unresponsive response["success"] = False response["error"] = "API unavailable" except sr.UnknownValueError: # speech was unintelligible response["error"] = "Unable to recognize speech" return response
def search_audio(): global _run tts.listen("กรุณาพูดข้อความที่ต้องการค้นหาค่ะ") _run("ds1.wav") with sr2.WavFile("ds1.wav") as source: print("รับเสียง") audio = r.record(source)
def word_frequency_detection(word_to_detect): TOTAL_WORD_COUNT = 0 #Count the number of appreance in the text voice_data_queue = Queue() #Acquire voice clip for speech detection voice_capture_thread = Thread(target=subrecord.voice_capture, args=[math.floor(RECORD_KHZ *1000), INPUT_BUF_SIZE, voice_data_queue]) voice_capture_thread.start() while True: speech.wav = voice_data_queue.get() WAV_FILE = path.join(path.dirname(path.realpath(__file__)), "speech.wav") r = sr.Recognizer() with sr.WavFile(WAV_FILE) as source: audio = r.record(source) try: result_text = r.recognize_google(audio) #result_text is a string print(result_text) #convert string to list result_list = result_text.split() #word frequency counting for word in result_list: if word == word_to_detect: TOTAL_WORD_COUNT += 1 print("Has appreared %d times", TOTAL_WORD_COUNT) except sr.UnknownValueError: print("Google Speech Reconition could not uderstand audio") except sr.RequestError as e: print("Could not request results from Google Speech Recognition service; {0}".format(e))
def speech_recognition(): basepath = os.path.dirname(__file__) filepath = os.path.abspath( os.path.join(basepath, "session_data/multi_test_7/user_2/watson-audio.wav")) r = sr.Recognizer() with sr.WavFile(filepath) as source: audio = r.record(source) # read the entire WAV file # recognize speech using IBM Speech to Text IBM_USERNAME = "******" # IBM Speech to Text usernames are strings of the form XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX IBM_PASSWORD = "******" # IBM Speech to Text passwords are mixed-case alphanumeric strings try: data = r.recognize_ibm(audio, username=IBM_USERNAME, password=IBM_PASSWORD, show_all=True) filepath_final = os.path.abspath( os.path.join(basepath, "session_data/multi_test_7/user_2/watson-data.json")) with open(filepath_final, 'w') as output_file: json.dump(data, output_file) except sr.UnknownValueError: print("IBM Speech to Text could not understand audio") except sr.RequestError as e: print("Could not request results from IBM Speech to Text service; {0}". format(e))
def SpeechToText(filename): #!/usr/bin/env python3 import speech_recognition as sr from os import path WAV_FILE = path.join(path.dirname(path.realpath(__file__)), filename) r = sr.Recognizer() with sr.WavFile(WAV_FILE) as source: audio = r.record(source) # read the entire WAV file # recognize speech using Google Speech Recognition try: # for testing purposes, we're just using the default API key # to use another API key, use `r.recognize_google(audio, key="GOOGLE_SPEECH_RECOGNITION_API_KEY")` # instead of `r.recognize_google(audio)` recognized_word = normalize_string(r.recognize_google(audio, language = "es-US")) print("Google Speech Recognition thinks you said: " + recognized_word) return recognized_word except sr.UnknownValueError: caca = "Google Speech Recognition could not understand audio" return caca except sr.RequestError as e: caca = "Could not request results from Google Speech Recognition service; {0}".format(e) return caca
def recognize_with_att(app_key, app_secret, filename): recognizer = speech_recognition.Recognizer() path_to_wav_source = os.getcwd() path_to_wav_source += "/%s" % (filename) with speech_recognition.WavFile( path.join(path.dirname(path.realpath(path_to_wav_source)), filename)) as speech_to_interpret: WAV_interpretor = recognizer.record(speech_to_interpret) try: start = time.time() print "ATT: \n" + recognizer.recognize_att(WAV_interpretor, app_key, app_secret, language="en-US", show_all=False) end = time.time() diff = (end - start) print "ATT: %s" % (str(diff)) print "\n" return diff except: pass
def Spinx(): r = sr.Recognizer() with sr.WavFile("voice.wav") as source: audio = r.record(source) t = 0 try: t = r.recognize_sphinx(audio) print(t) except LookupError: print("Could not understand audio") except sr.UnknownValueError: print("Sphinx could not understand audio") except sr.RequestError as e: print("Sphinx error; {0}".format(e)) if t==("hi"): sp.Play_Hello() elif t==("time"): sp.Play_Current_Time() elif t==("how are you"): sp.Play_HRU() elif t==("settings"): sp.Play_Current_Settings()
def audio_message(bot: Bot, update: Update): data['Type message'] = 'audio' logger.info('Voice message processing') try: recognizer = sr.Recognizer() fileID = update.message.voice.file_id file = bot.get_file(fileID) file.download('audio.ogg') sound = AudioSegment.from_ogg('audio.ogg') sound.export('audio.wav', format="wav") try: with sr.WavFile('audio.wav') as source: audio = recognizer.record(source) text = recognizer.recognize_google(audio, language='ru_RU').lower() bot_answer(bot, update, text) except sr.UnknownValueError: update.message.reply_text('Извините, не понял что вы сказали.') except Exception as e: logger.info('Voice message: ' + str(e)) update.message.reply_text( text= '😔Извините, в данный момент нет возможности прослушать голосовое сообщение.', reply_markup=reply_markup_help(), )
def ConvertAudioToText(): global bufferOfWords global fileName global s global bufferOfWords25 with sr.WavFile(fileName) as source: audio = s.record(source) try: userSaid = s.recognize_google(audio).lower() print("You said \'" + userSaid +'\'') except IndexError: print("No internet connection") return except KeyError: print("Invalid API key or quota maxed out") return except LookupError: print("Could not understand audio") return userWords = userSaid.split(' ') for word in userWords: bufferOfWords.append(word) bufferOfWords25 = bufferOfWords[-25:] threading.Thread(target = DeviceAction).start()
def speak_button_press(self): try: p = pyaudio.PyAudio() stream = p.open(format=self.FORMAT, channels=self.CHANNELS, rate=self.RATE, input=True, frames_per_buffer=self.CHUNK) self.ui.status.setText("recording..") self.ui.status.repaint() print("* recording") frames = [] for i in range(0, int(self.RATE / self.CHUNK * self.RECORD_SECONDS)): data = stream.read(self.CHUNK) frames.append(data) self.ui.status.setText("Done recording") self.ui.status.repaint() print("* done recording") stream.stop_stream() stream.close() p.terminate() wf = wave.open(self.WAVE_OUTPUT_FILENAME, 'wb') wf.setnchannels(self.CHANNELS) wf.setsampwidth(p.get_sample_size(self.FORMAT)) wf.setframerate(self.RATE) wf.writeframes(b''.join(frames)) wf.close() r = sr.Recognizer() with sr.Microphone( ) as source: # use the default microphone as the audio source audio = r.adjust_for_ambient_noise( source ) # listen for 1 second to calibrate the energy threshold for ambient noise levels with sr.WavFile("output.wav") as source: audio = r.record(source) try: self.ui.status.setText("Analysing") self.ui.status.repaint() command = r.recognize_google( audio, language="en-US" ) # recognize speech using Google Speech Recognition print(command) self.ui.textbox.setText(command) self.ui.status.setText("") self.ui.status.repaint() except LookupError: # speech is unintelligible self.ui.status.setText("Say Again..") self.ui.status.repaint() print("Could not understand audio") self.ui.textbox.setText("Error, not understand audio") except: print("error ")
def speech_text(): IBM_USERNAME = '******' #7572e178-a205-4ec1-bdb9-4023851065e2 (NEW) #68de1d92-305b-414b-a70f-ffa23f797c3e IBM_PASSWORD = '******' #ADUiTI8WfbGR #73rNCowi21bv LANGUAGE = 'fr-FR' #fr-FR ,en-US recognizer = sr.Recognizer() code = 200 payload = '' try: file_storage = request.files['blob_stream'] file_storage.save('blob.wav') with sr.WavFile('blob.wav') as source: audio = recognizer.record(source) speech_2_text = recognizer.recognize_ibm(audio, username=IBM_USERNAME, password=IBM_PASSWORD, language=LANGUAGE) payload = speech_2_text #log timestamp = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') print("\r") print("[TRANSLATION]" + timestamp + " || SUCCESS ||" + "Translation Result:" + speech_2_text) print("\r") except Exception as ex: #log timestamp = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') print("\r") print("[TRANSLATION]" + timestamp + " || FAILED ||" + "Failed Reason:") print(ex) print("\r") code = 400 feedback = {} feedback['code'] = code feedback['data'] = payload return jsonify(feedback)