def recognize_voice(data, link, token, wit_token): chunk_step = 230000 interval = 10000 user_id = data['user_id'] client = Wit(wit_token) doc = requests.get(link) resp = None with closing(doc): try: if (len(doc.content) > 300000): msg = "Точность при распозновании больших голосовых сообщений не гарантируется. Сообщение распознается по частям. \n\n" sound = doc.content current_point = interval amount = len(sound) // chunk_step step = len(sound) // (amount + 1) for i in range(amount): current_part = sound[current_point - interval:current_point + step] current_point += step resp = client.speech(current_part, None, {'Content-Type': 'audio/mpeg3'}) msg = msg + str(resp['_text']) + '\n' + '...' + '\n' current_part = sound[current_point - 1000:len(sound)] resp = client.speech(current_part, None, {'Content-Type': 'audio/mpeg3'}) msg = msg + str(resp['_text']) resp = msg else: resp = client.speech(doc.content, None, {'Content-Type': 'audio/mpeg3'}) resp = "💬: " + str(resp['_text']) except: resp = "Не удалось распознать сообщение" finally: if (len(resp) > 3500): cnt = 0 amount_msg = len(sound) // 3500 for i in range(amount_msg): vk.method('messages.send', { 'user_id': user_id, 'message': resp[cnt:cnt + 3500] }) cnt = cnt + 3500 vk.method('messages.send', { 'user_id': user_id, 'message': resp[cnt:len(resp)] }) else: vk.method('messages.send', { 'user_id': user_id, 'message': resp }) return
def transcribe(filename, verbose=True): """Convert speech to text Args: filename (str): Path to audio file. Returns: transcript (unicode, utf-8): Transcription of audio file. proc_time (float): STT processing time. confidence (float): None provided, so default to 1.0. """ service = Wit(os.environ['SERVER_ACCESS_TOKEN']) # server access token response = None with open(filename, 'rb') as audio_file: start_time = time.time() response = service.speech(audio_file, None, {'Content-Type': 'audio/wav'}) proc_time = time.time() - start_time transcript = response['_text'] if verbose: print("Filename: {}".format(filename)) print(transcript) print("Elapsed Time: {:.3f} seconds".format(proc_time)) print("Confidence: None Provided") return transcript, proc_time, 1.0
def sendToWit(audio, frames): createWavFile(audio, frames) client = Wit(access_token="CU77ZWXAO4STA5GEBM3VRU4UYSCEW7JI") resp = None with open(WAVE_OUTPUT_FILENAME, 'rb') as f: resp = client.speech(f, None, {'Content-Type': 'audio/wav'}) print('Wit Response: ' + resp["_text"])
def reCaptcha(): resp = None captcha = '' client = Wit("BHKRRZVCRA456NELAYDKN5GB72QLVPPB") for i in range(1, 7): with open("C:/Users/Skirda/Downloads/" + str(i) + ".mp3", "rb") as audio: resp = client.speech(audio, None, {'Content-type': 'audio/mpeg3'}) if str(resp['_text']) == "один": captcha += '1' if str(resp['_text']) == "два": captcha += '2' if str(resp['_text']) == "три": captcha += '3' if str(resp['_text']) == "четыре": captcha += '4' if str(resp['_text']) == "пять": captcha += '5' if str(resp['_text']) == "шесть": captcha += '6' if str(resp['_text']) == "семь": captcha += '7' if str(resp['_text']) == "восемь": captcha += '8' if str(resp['_text']) == "девять": captcha += '9' return captcha
def wit_recognize(timestamp): filename = "audio_%s.wav" % timestamp fpath = "guided_data/wav/" + filename transcription = "" with open(fpath, 'rb') as audio: try: wit_client = Wit(WIT_AI_KEY) transcription = None transcription = wit_client.speech(audio, None, {'Content-Type': 'audio/wav'}) if transcription != None: if "_text" in transcription: transcription = str(transcription["_text"]) else: transcription = "" else: return "" except Exception as e: # print("Could not request results from Wit.ai service; {0}".format(e)) return "" return transcription
def speech_recognition(app, service, chat_id, message, lang): app.send_chat_action(chat_id, "typing") msg = app.send_message(chat_id, service['sprec_start']) if str(lang) == 'ru': wit_cli = Wit(wit_ru_token) elif str(lang) == 'en': wit_cli = Wit(wit_en_token) m = message mf = message.from_user voice = m.voice folder = '/home/katsu/Documents/katsu_bots/audio/' name = str(str(m.chat.id) + '_' + str(m.message_id)) name_p = 'audio/' + 'ogg' + name + '.ogg' name_i = folder + 'ogg' + name + '.ogg' name_o = folder + 'wav' + name + '.wav' app.download_media(voice.file_id, file_ref=voice.file_ref, file_name=name_p) song = AudioSegment.from_file(name_i, format='ogg') song.export(name_o, format="wav") remove(name_i) with open(name_o, 'rb') as f: resp = wit_cli.speech(f, None, {'Content-Type': 'audio/wav'}) usrname = ('@' + str(mf.username)) if mf.username else mf.first_name txt = usrname + ' \n' + resp['_text'] remove(name_o) app.send_chat_action(chat_id, "cancel") app.edit_message_text(chat_id, msg.message_id, txt)
def read_voice(file_name): client = Wit(access_token='WWFXLDLV5232NULJ4P7PXO2RUSMLMDUT') resp = None with open('voice.wav', 'rb') as f: resp = client.speech(f, None, {'Content-Type': 'audio/wav'}) # print(resp) # client.speech(resp) return (str(resp))
def convert(this): client=Wit(Wit_token); resp = None with open('output.wav', 'rb') as f: resp = client.speech(f, None, {'Content-Type': 'audio/wav'}) #print('Yay, got Wit.ai response: ' + str(resp)) return resp;
class STTHandler: def __init__(self): print "In the sttHandler class cons" self.client = Wit(access_token=config.ACCESS_TOKEN) def extractTextFromSpeech(self, f): resp = self.client.speech(f, None, {'Content-Type': 'audio/wav'}) return resp
def send_audio_to_wit(filename): client = Wit(access_token=str(os.environ.get('WIT_TOKEN'))) text = None with open(str(filename) + '.wav', 'rb') as f: resp = client.speech(f, None, headers={'Content-Type': 'audio/wav'}) text = str(resp['_text']) return text
class WitSTT(STTEngine): def __init__(self): self.client = Wit(Config().get_wit_ai_api_key()) def listen(self, mic, previous_frames=None): filename = mic.record() with open(filename, 'rb') as f: resp = self.client.speech(f, None, {'Content-Type': 'audio/wav'}) # print('STT: ' + str(resp)) os.remove(filename) return resp["_text"] if resp is not None else None
def wit(): intro() client = Wit(witKey) results = None with sr.Microphone() as source: audio = r.record(source, duration=5) results = client.speech(audio, {'Content-Type': 'audio/wav'}) commandID = None for elements in results["intents"]: commandID = elements["id"] return action(commandID)
def query_wit(msg, is_audio): print("Querying wit with " + msg) client = Wit(os.environ['WIT_KEY']) if is_audio: with open(msg, 'rb') as f: resp = client.speech(f, {'Content-Type': 'audio/wav'}) else: resp = client.message(msg) print('Yay, got Wit.ai response: ' + str(resp)) return resp
def get_intent(self, audio_path): # Create the API client client = Wit(api_key) # Send the .wav file we've created earlier to the API try: with open(get_path('/resources/audio/microphone-results.wav'), 'rb') as f: resp = client.speech(f, None, {'Content-Type': 'audio/wav'}) return resp except: debug("Microphone-results failed to open")
class voice_analysis: def __init__(self): self.model = VGGVOX() self.buckets = build_buckets() self.client = Wit(access_token="ADD THE WIT ACCESS TOKEN") with open(path_user_list, 'rb') as f: data = yaml.load(f, Loader=yaml.FullLoader) self.users_list = data['users'] with open(path_embedding, 'rb') as f: self.speaker_dict = pickle.load(f) sub_audio = rospy.Subscriber('audio_file', String, self.callback) self.pub_res = rospy.Publisher('wit_response', String, queue_size=1) def callback(self, file_path): #path_audio = file_path.data if self.verify_speaker(path_audio): with open(path_audio, 'rb') as f: result = self.client.speech(f, None, {'Content-Type': 'audio/wav'}) wit_response = json.dumps( result) ### converting the dictionary to string print(result) self.pub_res.publish(wit_response) def verify_speaker(self, path_audio): verified = False features = get_fft_spectrum(path_audio, self.buckets) h, w = features.shape test_embedding = np.squeeze( self.model.predict(features.reshape(1, h, w, 1))) speakers = [] scores = [] for speaker, embedding in self.speaker_dict.items(): dist = cosine(test_embedding, embedding) scores.append(1. / dist) speakers.append(speaker) sigmoid_score = np.exp(scores) / np.sum(np.exp(scores)) idx = np.argmax(sigmoid_score) score_threshold = 0.8 #(1.0/len(self.speaker_dict))*1.8 print(speakers, sigmoid_score, score_threshold) if (speakers[idx] in self.users_list) and (sigmoid_score[idx] > score_threshold): verified = True return verified
def main(): # get directories parentDir = str(Path(os.getcwd())) audioDir = parentDir + '/test_audio' # open logs log = open("results.txt", "w") empty = open("empty.txt", "w") i = 0 numEmpty = 0 # init Wit, access token for wit.ai found in app settings access_token = "DFKCCZXCNSMCOLOGG5GVUGQIOFTE45SG" client = Wit(access_token) # for each wav file, open and convert to text using Wit.AI for wav in os.listdir(audioDir): i = i + 1 print('Reading file ' + str(i)) log.write(str(i) + ". Result of STT for " + wav + ": \nText:") with open(audioDir + '/' + wav, 'rb') as w: try: resp = client.speech(w, None, {'Content-Type': 'audio/wav'}) except: print('Too many requests. Program exiting.') break if (i % 50 == 0): print("Sleeping to avoid excess requests...") time.sleep(40) # if response text is empty (ie. could not interpret speech) if (resp.get('_text') == ''): numEmpty = numEmpty + 1 empty.write(str(numEmpty) + '. ' + audioDir + '/' + wav + '\n') log.write("\"" + resp.get('_text') + "\"") log.write("\n\n") print('Total number of files read: ' + str(i)) print('Number of empty responses (ie. could not interpret): ' + str(numEmpty)) log.write('Total number of files read: ' + str(i)) log.write('\nNumber of empty responses (ie. could not interpret): ' + str(numEmpty)) empty.close() log.close()
def main(): status_ui = aiy.voicehat.get_status_ui() status_ui.status('starting') assistant = aiy.assistant.grpc.get_assistant() led = aiy.voicehat.get_led() button = aiy.voicehat.get_button() access_token = 'XVV53YJBHIOA3ELM2ZPBPFX5LI7PJQFY' client = Wit(access_token) print("one step is over") with aiy.audio.get_recorder(): while True: led.set_state(aiy.voicehat.LED.BLINK) LED_RED.set_state(aiy.voicehat.LED.BLINK) print(led) print(LED_RED) status_ui.status('ready') print('Press the button and speak') button.wait_for_press() status_ui.status('listening') print('Listening...') text, audio = assistant.recognize() if audio: aiy.audio.play_audio(audio, assistant.get_volume()) db_range = -60.0 -(-60.0 * (50 / 100.0)) db_scaler = 10 ** (db_range / 40) datatext = np.multiply(np.frombuffer(audio, dtype=np.int16), db_scaler).astype(np.int16).tobytes() global save_audio save_audio = datatext save_wav('output5.wav') play_wav() print('audio data is ', datatext) path = '/home/pi/Pycham/0000_test/output.wav' soundPath = '/usr/share/sounds/alsa/Front_Left.wav' resp = None with open('/usr/share/sounds/alsa/Front_Left.wav', 'rb') as f: resp = client.speech(f, None, {'Content-Type' : 'audio/wav'}) print('what : ' + str(resp))
class WitClient: def __init__(self, credentials='conf/wit_api_credentials.json'): self.credentials = json.load(open(credentials, 'rb')) self.client = Wit(self.credentials['secret']) def submit(self, file_name): assert os.path.exists(file_name) with open(file_name, 'rb') as f: try: resp = self.client.speech(f, None, {'Content-Type': 'audio/wav'}) return str(resp['_text']) or '-' except Exception: traceback.print_exc() return ''
def wit_ai_understand(msg, audio_blob=False): client = Wit(g.chatbot_proj['token']) user_resp = None if audio_blob: msg.seek(0) user_resp = client.speech(msg, {'Content-Type': 'audio/wav'}) else: user_resp = client.message(msg) if user_resp['intents'] == []: return user_resp[ 'text'], "Sorry, I don't understand your question. I can only answer questions within the topic." resp_intent = user_resp['intents'][0] proj_name = g.chatbot_proj['proj_name'] responses_fpath = os.path.join(current_app.instance_path, proj_name, 'responses', resp_intent['name'] + '.txt') if not os.path.isfile(responses_fpath): print( f"Cannot response to {resp_intent['name']} because {responses_fpath} is missing" ) return user_resp[ 'text'], "Sorry, I don't understand your question. Please ask your teacher for help." resp_df = pandas.read_csv(responses_fpath, sep='\t', encoding='utf-8', quotechar='"') ent_keys = set(user_resp['entities'].keys()) df_keys = set(resp_df.columns) #TODO: Emit a log if the ent_keys has key that df_keys does not have common_keys = df_keys.intersection(ent_keys) resp_df_sel = None for k in common_keys: v = user_resp['entities'][k][0]['value'] if resp_df_sel is None: resp_df_sel = resp_df[k] == v else: resp_df_sel = resp_df_sel & (resp_df[k] == v) resp_df = resp_df[resp_df_sel] #Related to the intent resp_df_idx = random.randint(0, resp_df['wit_response'].count() - 1) return user_resp['text'], resp_df['wit_response'].iloc[resp_df_idx]
def home(request): if request.method == 'POST': #print(request.body) #f = open('./file.wav', 'wb') #f.write(request.body) #f.close() client = Wit(access_token) resp = None #with open('./file.wav', 'rb') as f: resp = client.speech(request.body, {'Content-Type': 'audio/wav'}) print('Yay, got Wit.ai response: ' + str(resp)) #print(resp['text']) #resolve intent intent = resp['intents'][0]['name'] #resolve Color Entity color = (0, 0, 0) if resolve_color(resp) is not None: color = to_bgr[resolve_color(resp)] if intent == 'draw_square': frame_b64 = to_base64(draw_square(color=color)) if intent == 'draw_circle': frame_b64 = to_base64(draw_circle(color=color)) #context = {'text' : 'test','img': ('data:image/jpeg;base64, '+ frame_b64.decode("utf-8"))} context.update({ 'text': resp['text'], 'img': ('data:image/jpeg;base64, ' + frame_b64.decode("utf-8")) }) #return HttpResponseRedirect("/") #return JsonResponse(context) return redirect('/home') else: return render(request, 'home/home.html', context)
def recognize_voice(data, link, token, wit_token): user_id = data['user_id'] client = Wit(wit_token) doc = requests.get(link) resp = None #if len(doc.content)>100000: #resp = "Распознование больших сообщений пока что находится в альфа-версии.\nИзвините, если сообщение будет приходить несколько раз - скоро проблема будет пофикшена." #vkapi.send_message(user_id, token, resp, '') with closing(doc): try: resp = client.speech(doc.content, None, {'Content-Type': 'audio/mpeg3'}) resp = str(resp['_text']) except: resp = "Не удалось распознать сообщение" finally: vkapi.send_message(user_id, token, resp, '') return
def openComms(): #define api key for https://wit.ai/ client_wit = Wit('YOURKEYHERE') #clear input words variable inputWords = '' #call recording software, to start recording on noise detection and stop after a few seconds of silence subprocess.call(['rec /home/pi/PiBadge/test.wav rate 32k silence 1 0.1 1% 1 3.0 1%'], shell=True) resp = None #pass the wav file to wit.ai for processing with open('/home/pi/PiBadge/test.wav', 'rb') as f: resp = client_wit.speech(f, None, {'Content-Type': 'audio/wav'}) #extract the return text from the stt at wit.ai inputWords = str(resp['_text']) #print input words for debugging print inputWords #if the input matches any of the exit words then return from the function back to main loop if any(x in inputWords.split() for x in exitWords.split()): return #send the modules and the input to the query module query(modules, inputWords) return
def record(): blank = {'_text': '', 'entities': {}} resp = blank while resp == blank: sample_rate = 44100 sd.default.samplerate = sample_rate duration = 4 # record for four seconds access_token = "WF7LRTYFMA6VOCP7ORHYDDE464DTUC2I" client = Wit(access_token) print('Hit enter and I will start recording.') placeholder = input() print("Go ahead I'm listening...") print('-------------------------') myrecording = sd.rec(duration * sample_rate, channels=1) t = duration while t >= 0: print('0:0{}'.format(t), end='\r') time.sleep(1) t -= 1 wav.write("output_sound.wav", sample_rate, myrecording) # create wav file with open("output_sound.wav", "rb") as f: resp = client.speech(f, {'Content-Type': 'audio/wav'}) # send to wit if resp == blank: print("Hm, I didn't hear anything. Could you repeat that?") continue return resp
from urllib.error import URLError, HTTPError from datetime import datetime import os ACCESS_TOKEN = os.getenv("WIT_ACCESS_TOKEN") if __name__ == '__main__': client = Wit(ACCESS_TOKEN) fpath = "audio/google/hello.wav" print("Processing: " + fpath) with open(fpath, 'rb') as audio: try: translation = None # print(audio) translation = client.speech(audio, None, {'Content-Type': 'audio/wav'}) print(translation) if translation != None: if "text" in translation: sentence = str(translation["text"]) print("Translation: " + sentence) except Exception as e: print( "Could not request results from Wit.ai service; {0}".format(e))
def sst_wit(audio_fname): client = Wit('PURT3OOTK3J7QN5Y2IJEUWD37PYRRJZY') with open(audio_fname, 'rb') as f: resp = client.speech(f, None, {'Content-Type': 'audio/wav'}) print('Wit.ai response: ' + str(resp)) return resp['_text']
def openConversation(personName): #the wit.ai API key (this is a fake one you will need to sign up for your own at wit.ai) client_wit = Wit('YOURKEYHERE') #setting up variables for mongodb client = MongoClient('localhost', 27017) db = client.words_database responses = db.responses allwords = db.allwords #variables for first input and the 2 levels of search accuracy inputWords = ("hello") globalReply = ("hello") botAccuracy = 0.725 botAccuracyLower = 0.45 #initialise the main class and get a basic first response from the bot talkClass = talkLoop(client, db, responses, allwords, inputWords, globalReply, botAccuracy, botAccuracyLower) #pass the starting inputs to the database for storage talkClass.updateDB(inputWords, globalReply) #the below three lines push the input words into the reply tumbler in order to find another greeting other than just human responses to 'hello' #for instance: 'hello' can return 'greeting' which will return human responses to that such as 'good day' instead of just returning 'greeting' inputWords = (talkClass.replyTumbler()) talkClass.updateDB(inputWords, globalReply) globalReply = (talkClass.replyTumbler()) #combine the greeting with the humans name from the face identification code globalReply = str(globalReply + " " + personName) #use subprocess again to initialise espeak (the TTS) and say the bots response subprocess.call(['espeak', globalReply]) #print the output words to the screen (debug/testing purposes) sys.stdout.write(BLUE) print(globalReply) sys.stdout.write(RESET) #the main loop wrapped in a try to capture any errors and hopefully exit cleanly try: while True: #using subprocess to call the sox recording software with a configuration to trim silence from the recording and stop recording when the speaker has finished subprocess.call( ['rec test.wav rate 32k silence 1 0.1 5% 1 1.0 5%'], shell=True) resp = None #use the wit.ai class to interface with the API and send off the wav file from above for STT functions with open('test.wav', 'rb') as f: resp = client_wit.speech(f, None, {'Content-Type': 'audio/wav'}) #parse the response given to get the text sent back which will then become the words the bot uses inputWords = str(resp['_text']) #if the word(s) goodbye/good bye are said then break the loop which will return to the main code and resume the skull to look around for another human face if inputWords == "goodbye": break if inputWords == "good bye": break #print the input words to the screen (debug/testing purposes) sys.stdout.write(RED) print inputWords sys.stdout.write(RESET) #update the database with the humans response and the bots last response talkClass.updateDB(inputWords, globalReply) #call the reply tumbler function for the bots reply globalReply = (talkClass.replyTumbler()) #use subprocess again to initialise espeak (the TTS) and say the bots response subprocess.call(['espeak', globalReply]) #print the output words to the screen (debug/testing purposes) sys.stdout.write(BLUE) print(globalReply) sys.stdout.write(RESET) except: pass
class app(): def __init__(self): # App global variables self.loop = asyncio.get_event_loop() self.wit_client = Wit(access_token=WIT_ACCESS_TOKEN) self.window = FullscreenWindow() self.sleep_timer = 0 self.display_asleep = False # Defines some nice app window update behavior #starts the main loop self.loop.run_until_complete(self.app_main_loop()) async def app_main_loop(self): # get some microphones up in here m = sr.Microphone() r = sr.Recognizer() with m as source: r.adjust_for_ambient_noise(source) # APP main loop audio = r.listen_in_background(source, functools.partial(self.audio_callback)) try: await self.whole_window_update() while True: self.window.tk.update_idletasks() self.window.tk.update() await asyncio.sleep(1.0 / WINDOW_UPDATES_PER_SECOND) if not self.display_asleep: if self.sleep_timer > WINDOW_UPDATES_PER_SECOND * 60 * SLEEP_AFTER_MINUTES: await self.sleep() r.adjust_for_ambient_noise(source) else: await self.loop.create_task(self.window.update_clock()) self.sleep_timer += 1 except KeyboardInterrupt: print("Shutting down") audio.stop_listening() self.window.quit() self.loop.close() async def sleep(self): self.sleep_timer = 0 self.display_asleep = True self.window.hide_widgets() async def awake(self): self.display_asleep = False await self.whole_window_update() self.window.show_widgets() # Handles voice command call backs def audio_callback(self, recognizer, audio): data = audio.get_wav_data() json_data = self.wit_client.speech(data, None, {'Content-Type': 'audio/wav'}) self.voice_control_callback(json_data) def voice_control_callback(self, json_data): print(json_data) if 'self' in json_data['entries']: if 'hello' in json_data['entities']: self.loop.create_task(self.awake()) if 'bye' in json_data['entities']: self.loop.create_task(self.sleep()) async def whole_window_update(self): await self.loop.create_task(self.window.update_news()) await self.loop.create_task(self.window.update_clock()) await self.loop.create_task(self.window.update_calendar()) await self.loop.create_task(self.window.update_weather())
class CallWit(object): def __init__(self): # Wit.ai API parameters self.WIT_TOKEN = settings.WIT_TOKEN # Actions - Deprecated in new Wit API # # Setup Actions # actions = { # 'send': self.send_fb, # 'merge': self.merge, # 'getWeather': self.getWeather, # 'getName': self.getName, # 'getTime': self.getTime, # 'getConversion': self.get_currency_conversion, # } # Setup Wit Client self.client = Wit(access_token=self.WIT_TOKEN) self.default_msg = "Sorry mate ! I didn't get what you said..." self.welcome_msg = "Hey !! How can you help you today ? You can ask me about `Weather`, `Time` at a place and " \ "I can also do some currency conversions !! " def handle_message(self, session_id, user_query): wit_response = self.client.message(msg=user_query) logging.debug("Response from Wit : {}".format(wit_response)) # user_name = self.getName(session_id) entities = wit_response['entities'] context_dict = self.merge(wit_response) # TODO account for confidence values greetings, greetings_score = self.first_entity_value(entities, 'greetings') light_toggle, light_toggle_score = self.first_entity_value(entities, 'on_off') intent, intent_score = self.first_entity_value(entities=entities, entity='intent') logging.info("Intent obtained : {} with score {}".format(intent, intent_score)) if intent == 'getWeather': logging.debug("Getting weather info") context = self.getWeather(context_dict) # messenger.fb_message(session_id, self.weather_replies(user_name, context)) elif intent == 'getTime': logging.debug("Getting Time info") context = self.getTime(context_dict) # messenger.fb_message(session_id, self.time_replies(user_name, context)) elif intent == 'curConvert': logging.debug("Getting Currency info") context = self.get_currency_conversion(context_dict) # messenger.fb_message(session_id, self.currency_replies(user_name, context)) elif light_toggle == 'on': if greetings and light_toggle_score < greetings_score: if greetings == 'greetings': print "Hello Shashank!" # messenger.fb_message(session_id, self.welcome_msg) else: print "See you soon then!" # messenger.fb_message(session_id, "See you soon then!") else: # messenger.fb_message(session_id, "Switching ON the light ...") self.turn_on_flux(session_id) elif light_toggle == 'off': if greetings and light_toggle_score > greetings_score: if greetings == 'greetings': print "Hello !! " # messenger.fb_message(session_id, self.welcome_msg) else: print "See you soon then!" # messenger.fb_message(session_id, "See you soon then!") else: # messenger.fb_message(session_id, "Switching OFF the light ...") self.turn_off_flux(session_id) elif greetings == 'greetings': print "Hello!" # messenger.fb_message(session_id, self.welcome_msg) elif greetings == 'end': print "Goodbye!" # messenger.fb_message(session_id, "See you soon then !!!") else: print "Sorry ... " # messenger.fb_message(session_id, self.default_msg) def speech_to_wit(self, audio_url): """ To Handle Audio files in Messenger :param audio_url: :return: response as per Wit.AI API docs """ # Download the URL r = requests.get(audio_url) with open('audio.wav', 'wb') as f: f.write(r.content) logging.debug("Audio file received") response = None header = {'Content-Type': 'audio/mpeg3'} with open('audio.wav', 'rb') as f: response = self.client.speech(f, None, header) return response def first_entity_value(self, entities, entity): """ Returns given entity value with its confidence score """ if entity not in entities: return None, None entity_val = entities[entity][0]['value'] entity_score = entities[entity][0]['confidence'] if not entity_val: return None, None logging.debug("ENTITY VALUE, Score {}, {}".format(entity_val, entity_score)) return (entity_val['value'], entity_val['confidence']) if isinstance(entity_val, dict) else (entity_val, entity_score) def high_entity_value(self, entities, entity): """ Returns first entity value """ if entity not in entities: return None entity_val = entities[entity][0]['value'] if not entity_val: return None logging.debug("ENTITY VALUE {}".format(entity_val)) return entity_val['value'] if isinstance(entity_val, dict) else entity_val def merge(self, request): try: context = request['context'] except: context = {} entities = request['entities'] loc, loc_score = self.first_entity_value(entities, 'location') # Get context for currency conversion currency_source, currency_source_score = self.first_entity_value(entities, 'source') currency_dest, currency_dest_score = self.first_entity_value(entities, 'destination') if currency_source and currency_dest: context['currencyNameSource'] = currency_source context['currencyNameDest'] = currency_dest elif loc: context['weatherLocation'] = loc context['timeLocation'] = loc return context # Services and APIs def getWeather(self, context): # context = request['context'] # entities = request['entities'] # loc = first_entity_value(entities, 'loc') del context['timeLocation'] loc = context['weatherLocation'] # Initialize Weather API class weather_obj = CallWeather(location=loc) if loc: # This is where we use a weather service api to get the weather. try: context['forecast'] = weather_obj.inWeather() if context.get('missingLocation') is not None: del context['missingLocation'] except: logging.warning("Error from Weather API : {}".format(sys.exc_info()[0])) # TODO Handle error messages in User replies context['weather_default'] = True del context['weatherLocation'] # Delete session ID to stop looping # del request['session_id'] else: context['missingLocation'] = True if context.get('forecast') is not None: del context['forecast'] logging.debug("Forecast obtained for {}: {}".format(loc, context)) print "Forecast obtained for {}: {}".format(loc, context) return context def getName(self, session_id): # context = request['context'] # Get user name from the Messenger API resp = requests.get("https://graph.facebook.com/v2.8/" + session_id, params={"access_token": self.FB_PAGE_TOKEN}) print resp sender_name = resp.json()['first_name'] return sender_name def getTime(self, context): # context = request['context'] # entities = request['entities'] del context['weatherLocation'] loc = context['timeLocation'] # Initialize Time API class world_time_obj = CallGoogleTime(location=loc) if loc: try: context['country_time'] = world_time_obj.world_time() if context.get('missingCountry') is not None: del context['missingCountry'] except: logging.warning("Error from Time API : {}".format(sys.exc_info()[0])) context['time_default'] = True del context['timeLocation'] # Delete session ID to stop looping # del request['session_id'] else: context['missingCountry'] = True if context.get('country_time') is not None: del context['country_time'] logging.debug("Time obtained for {}: {}".format(loc, context)) return context def get_currency_conversion(self, context): # context = request['context'] source_name = context['currencyNameSource'] dest_name = context['currencyNameDest'] currency_object = CurrencyRates() if source_name and dest_name: try: context['conversionVal'] = currency_object.get_conversion_rate(source_name, dest_name) except: logging.warning("Error from Currency API : {}".format(sys.exc_info()[0])) context['cur_default'] = True del context['currencyNameSource'] del context['currencyNameDest'] else: context['cur_default'] = True del context['currencyNameSource'] del context['currencyNameDest'] return context def turn_on_flux(self, session_id): try: ipaddr = flux_api.scan_bulb() flux_api.switch_on(ipaddr) return except: messenger.fb_message(session_id, "The bulb doesn't seem to be online") return def turn_off_flux(self, session_id): try: ipaddr = flux_api.scan_bulb() flux_api.switch_off(ipaddr) return except: messenger.fb_message(session_id, "The bulb doesn't seem to be online") return # Replies from Wit def weather_replies(self, user_name, context): response_template = random.choice( ['Hey {mention} ! Weather at {location} is {forecast}', 'Yo {mention}! It is {forecast} at {location}', 'Hi {mention} ! The weather is {weather} at {location}' ]) return response_template.format(mention=user_name, location=context.get('weatherLocation'), forecast=context.get('forecast')) def time_replies(self, user_name, context): response_template = random.choice( ['Hey {mention} ! Time at {location} is {time}', 'Yo {mention}! It is {time} at {location}', 'The time is {time} at {location}...', 'Uno momento please {mention} ... The time is {time} at {location} !!' ]) return response_template.format(mention=user_name, location=context.get('timeLocation'), time=context.get('country_time')) def currency_replies(self, user_name, context): response_template = random.choice( ['Hey {mention} ! 1 {source_currency} is equal to {conversion_val} {dest_currency}', 'Yo {mention} ! 1 {source_currency} is equal to {conversion_val} {dest_currency}', 'Just a moment ... Hey {mention} ! 1 {source_currency} is equal to ' '{conversion_val} {dest_currency}' ]) return response_template.format(mention=user_name, source_currency=context.get('currencyNameSource'), dest_currency=context.get('currencyNameDest'), conversion_val=context.get('conversionVal')) def wit_interactive(self): client = Wit(access_token=self.WIT_TOKEN) client.interactive()
wf.writeframes(data) wf.close() if __name__ == '__main__': test = True while (test): #test = False response_continue = True while (response_continue): print("please speak a word into the microphone. Say stop to quit.") record_to_file('demo.wav') print("done - result written to demo.wav") try: with open('demo.wav', 'rb') as f: resp = client.speech(f, None, {'Content-Type': 'audio/wav'}) response_continue = False except Exception as e: print("response issue") print('Yay, got Wit.ai response: ' + str(resp)) if str(resp['_text']).find('stop') >= 0: break if (('structure' in resp['entities']) and resp['entities']['structure'][0]['value'] == 'function'): if ('function_name' in resp['entities']): name = resp['entities']['function_name'][0]['value'] arguments = "" if 'argument' in resp['entities']: for index in len(resp['entities']['argument_name']): if index != 0:
class voice_recognitor(): """ Class voice_recognitor This class allows to recognite the voice during an indicated time in seconds. """ def __init__(self): """Class constructor It is the constructor of the class. It does: -Subscribe to recognize_voice topic -Publish the asr text """ #Subscribe to ROS topics self.asr_sub = rospy.Subscriber("recognize_voice", Float32, self.callback) #Define the ROS publishers self.asr_pub = rospy.Publisher("asr_full_text", String, queue_size=0) self.listening_pub = rospy.Publisher("listening", Bool, queue_size=0) #Define object as msg type self.asr_msg = String() self.asr_msg.data = "" self.listening_msg = Bool() self.listening_msg.data = False self.duration = 3.0 self.configuration() print("[INFO] Node started") def configuration(self): """Configuration void. In this void the token of the wit.ai client is defined. And it is set the sample_rate of the text recorded. """ token = "VW6CLYS2BCPOCWSATWXNZNVTLSEH3WJM" self.client = Wit(token) self.sample_rate = 44100 def recognize(self, duration): """Void to recognize voice First, the voice is recorded with the duration time set. After that, the text is recognized and published. """ self.listening_msg.data = True self.listening_pub.publish(self.listening_msg) myrecording = sd.rec(int(duration * self.sample_rate), samplerate=self.sample_rate, channels=2) sd.wait() write('output.wav', self.sample_rate, myrecording) self.listening_msg.data = False self.listening_pub.publish(self.listening_msg) try: with open('output.wav', 'rf') as f: answ = self.client.speech(f, {'Content-Type': 'audio/wav'}) text = unidecode(answ[u'text']) except: text = "" print(text) self.asr_msg.data = text #Publish msg self.asr_pub.publish(self.asr_msg) def run_loop(self): """ Infinite loop. When ROS is closed, it exits. """ while not rospy.is_shutdown(): #functions to repeat until the node is closed rospy.spin() def stopping_node(self): """ROS closing node Is the function called when ROS node is closed.""" print("\n\nBye bye! :)\n\n") def callback(self, data): """ROS callback This void is executed when a message is received. It simply calls the function to recognize giving the duration of the recording""" self.recognize(data.data)