def __init__(self, detect_model="data/andrew2.net", lyrics_model="data/keras_model_1200.h5", lyrics_chars="data/chars.pkl"): # microphone self.mic = ResumableMicrophoneStream(SAMPLE_RATE, CHUNK_SIZE) # wake word detector self.detector = TriggerDetector(detect_model) # speech and language services self.speech_client = SpeechToText() self.luis = LangUnderstand() self.tts = TextToSpeech() # lyrics generator model self.lyrics_gen = LyricsGenerator(lyrics_model, lyrics_chars) self.pred_queue = DetectQueue(maxlen=5) self.is_wakeup = False # pytft display self.tft = TFTDisplay() self.tft_queue = queue.Queue() self.tft_thread = threading.Thread(target=self.tft_manage, args=()) self.tft_thread.daemon = True self.tft_thread.start() self.notify("hi_there")
def get_text_from_audio(): try: file = request.files.get('file') if file and file.filename.rsplit('.', 1)[1] in ('flac', 'wav'): speech_to_text = SpeechToText() filename = secure_filename(file.filename) file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename)) text = speech_to_text.recognize( audio_file=os.path.join(app.config['UPLOAD_FOLDER'], filename)) return jsonify({'text': text}) return jsonify( {'message': 'File format is not supported. Please use flac, wav'}), 400 except Exception as e: return jsonify('Failed to upload file'), 400
def PredictForLanguage(executor, language): language_path = os.path.join(path, language) if (not app_utils.CheckIfPathExists(language_path)): return files = app_utils.listdir_fullpath(language_path) with open(os.path.join(out_path, "vortex_" + language + ".csv"), "w") as f: for file in files: # Convert Speech to Text print("Converting Speech to Text for ", file) text = SpeechToText(executor, file, language) # Use this to predict results app_utils.DebugCommand("Text Recognised is ", text) translated = text translated = TranslateToEnglish(text, language, translator="Seq2Seq") app_utils.DebugCommand("Translated to English Is ", translated) if ModelColdStarter.isAlive(): ModelColdStarter.join() prediction = PredictResults(translated) print(prediction) f.write(file + ", " + prediction[0] + "\n")
def send_voice(self): try: result = SpeechToText().run(self.auth_token) output = result[0] self.auth_token = result[1] print(self.auth_token) self.browser.append("You: " + output) print(output) self.receive_message(output) except Exception as e: print(e)
def __init__(self, config): """ Initialize :param config: configuration :type config: Config """ self.command_processor = CommandProcessor( self._command_handlers(config.command_handlers)) self.robot = Robot(config.apiai.client_access_token, config.apiai.language, self.command_processor.commands) self.speech_to_text = SpeechToText( config.speechkit.key, "", config.speechkit.recognition.language) self.text_to_speech = TextToSpeech( config.speechkit.synthesis.cache_size, config.speechkit.key, config.speechkit.synthesis.language, config.speechkit.synthesis.speaker, config.speechkit.synthesis.emotion, config.speechkit.synthesis.speed) self.record = SpeechCapture(config.record.silence_calculation_chunks, config.record.speech_level_coefficient, config.record.start_wait_chunks, config.record.finish_wait_chunks)
def predict(speech_recognition=False, speech_synthesis=False): ''' Работа с обученной моделью seq2seq. 1. speech_recognition - включение распознавания речи с микрофона с помощью PocketSphinx 2. speech_synthesis - включение озвучивания ответов с помощью RHVoice ''' name_dataset = configure_file_names() ttt = TextToText(f_name_w2v_model=f_name_w2v_model, f_name_model=f_name_model, f_name_model_weights=f_name_model_weights) if speech_recognition: print('[i] Загрузка языковой модели для распознавания речи...') stt = SpeechToText('from_microphone', name_dataset) if speech_synthesis: print('[i] Загрузка синтезатора речи...') tts = TextToSpeech('anna') print() question = '' while (True): if speech_recognition: print('Слушаю...') question = stt.get() os.write(sys.stdout.fileno(), curses.tigetstr('cuu1')) print('Вы: ' + question) else: question = input('Вы: ') answer, lost_words = ttt.predict(question, True) print('\t=> %s' % answer) if len(lost_words) > 0: print('[w] Потерянные слова: ' + ', '.join(lost_words) + '\n') else: print() if speech_synthesis: tts.get(answer)
def main(): (stt_json, adj_json, output_json) = sys.argv[1:4] # Turn adjustment data into list of kept segments with open(adj_json, 'r') as file: adj_data = json.load(file) # Turn stt json into objects with open(stt_json, 'r') as file: stt = SpeechToText().from_json(json.load(file)) # List of adjustments (start, end, adjustment) offset_adj = [] # Last ending position for iterating through kept segments last_end = 0.00 # Running tally of removed segment lengths current_adj = 0.00 # For each segment that was kept, keep track of the gaps to know how much to adjust for kept_segment in adj_data: print(kept_segment + ":" + str(adj_data[kept_segment])) start = float(kept_segment) end = adj_data[kept_segment] # If the start of this segment is after the last end, we have a gap if (start >= last_end): # Keep track of the gap in segments current_adj = current_adj + (start - last_end) # Add it to a list of adjustments offset_adj.append( Adjustment(start - current_adj, end - current_adj, current_adj)) # Keep track of the last segment end last_end = end # For each word, find the corresponding adjustment for word in stt.results.words: adjust_word(word, offset_adj) # Write the resulting json mgm_utils.write_json_file(stt, output_json)
def convert(media_file, kaldi_file, kaldi_transcript_file, output_json_file): mgm_utils.exception_if_file_not_exist(kaldi_file) if not os.path.exists(kaldi_transcript_file): raise Exception( "Exception: File " + kaldi_transcript_file + " doesn't exist, the previous command generating it must have failed." ) results = SpeechToTextResult() # Open the kaldi json with open(kaldi_file) as json_file: data = json.load(json_file) # Get the kaldi transcript transcript = open(kaldi_transcript_file, "r") results.transcript = transcript.read() # Get a list of words words = data["words"] duration = 0.00 # For each word, add a word to our results for w in words: time = float(w["time"]) end = time + float(w["duration"]) # Keep track of the last time and use it as the duration if end > duration: duration = end results.addWord("", time, end, w["word"], None, None) # Create the media objeect media = SpeechToTextMedia(duration, media_file) # Create the final object outputFile = SpeechToText(media, results) #write the output mgm_utils.write_json_file(outputFile, output_json_file)
class Application: """ Application main class """ def __init__(self, config): """ Initialize :param config: configuration :type config: Config """ self.command_processor = CommandProcessor( self._command_handlers(config.command_handlers)) self.robot = Robot(config.apiai.client_access_token, config.apiai.language, self.command_processor.commands) self.speech_to_text = SpeechToText( config.speechkit.key, "", config.speechkit.recognition.language) self.text_to_speech = TextToSpeech( config.speechkit.synthesis.cache_size, config.speechkit.key, config.speechkit.synthesis.language, config.speechkit.synthesis.speaker, config.speechkit.synthesis.emotion, config.speechkit.synthesis.speed) self.record = SpeechCapture(config.record.silence_calculation_chunks, config.record.speech_level_coefficient, config.record.start_wait_chunks, config.record.finish_wait_chunks) def _handler(self, real_handler): return lambda args: real_handler(self, args) def _command_handlers(self, command_handlers): result = {} for command, handler in stdhandlers.items(): result[command] = self._handler(handler) for command, handler in command_handlers.items(): result[command] = self._handler(handler) return result def _process_answer(self, commands): finish, results = self.command_processor.process_commands(commands) if not finish: noempty_results = [ result for result in results if result is not None ] play_list(noempty_results) def welcome(self): """ Run robot welcome event """ success, session_id, answer_commands = self.robot.welcome() if success: self.speech_to_text.uuid = session_id self._process_answer(answer_commands) def query(self): """ Run speech request (if we'lll have it) """ print("Listening") silent, record = self.record.record_mp3() if silent: print("Silent") return success, text = self.speech_to_text.convert(record) if not success: print("Not recognized") return print("User : "******"Robot commands : " + json.dumps(commands)) if not success: return self._process_answer(commands) def main(self): self.welcome() while True: self.query()
class Andrew(object): """the rap voice assisstant """ def __init__(self, detect_model="data/andrew2.net", lyrics_model="data/keras_model_1200.h5", lyrics_chars="data/chars.pkl"): # microphone self.mic = ResumableMicrophoneStream(SAMPLE_RATE, CHUNK_SIZE) # wake word detector self.detector = TriggerDetector(detect_model) # speech and language services self.speech_client = SpeechToText() self.luis = LangUnderstand() self.tts = TextToSpeech() # lyrics generator model self.lyrics_gen = LyricsGenerator(lyrics_model, lyrics_chars) self.pred_queue = DetectQueue(maxlen=5) self.is_wakeup = False # pytft display self.tft = TFTDisplay() self.tft_queue = queue.Queue() self.tft_thread = threading.Thread(target=self.tft_manage, args=()) self.tft_thread.daemon = True self.tft_thread.start() self.notify("hi_there") def notify(self, topic="hi_there", is_async=False, audio_path="data/audio"): # Notify with local preset audio files from os.path import join, isfile audio_file = join(audio_path, f"{topic}.wav") if not isfile(audio_file): return self.tts.play_file(audio_file, is_async) def generate_rap(self, topic="", beat_path="data/beat"): """Generate rap and play """ tts = self.tts lyrics_gen = self.lyrics_gen response = tts.generate_speech(f"hey, I can rap about {topic}") tts.play(response, True) # Generate based on topic lyrics_output = lyrics_gen.generate(topic) # Generate speech lyrics_speech = tts.generate_speech(lyrics_output) # Select beat beat_index = random.randint(0, 20) # Play beat and lyrics tts.play_file(f'{beat_path}/beat_{beat_index}.wav', True) tts.play(lyrics_speech) def get_weather_message(self, city="Ithaca"): import requests, json, os api_key = os.getenv('WEATHER_APIKEY') base_url = "https://api.openweathermap.org/data/2.5/weather?" city_name = f"{city},us" complete_url = f"{base_url}q={city_name}&units=imperial&APPID={api_key}" try: response = requests.get(complete_url) res = response.json() msg_weather = f"Today, it's {res['weather'][0]['description']} in {city}. " msg_temp = f"The temperature is {int(res['main']['temp'])} degrees." return msg_weather + msg_temp except: pass return "" def intent_recognize(self, text=""): """Recognize intent """ luis = self.luis tts = self.tts # Get result from language understanding engine luis_result = luis.predict(text) intent = luis_result.top_scoring_intent.intent if intent == "Freestyle": entities = luis_result.entities entity_topic = "rap" if (len(entities) > 0): entity = entities[0] cprint(f'The topic is {entity.entity}', 'cyan') entity_topic = entity.entity self.generate_rap(entity_topic) elif intent == "Weather": response = tts.generate_speech("I will tell you the weather in Ithaca.") tts.play(response) weather = self.get_weather_message() response = tts.generate_speech(weather) tts.play(response) else: self.notify("sorry") def tft_manage(self): """Manage TFT display through state """ self.tft.display_text("Andrew is waking up") status = {'state': 'None'} while True: if status['state'] is 'wait': self.tft.display_wave() elif status['state'] is 'listen': self.tft.display_wave((0, 255, 0)) # Update the status try: update = self.tft_queue.get(block=False) if update is not None: status = update except queue.Empty: continue def start(self): """Start listening and interacting """ tft = self.tft tts = self.tts # Init stream with self.mic as stream: self.tft_queue.put({'state': 'listen'}) while True: if not self.is_wakeup: stream.closed = False while not stream.closed: stream.audio_input = [] audio_gen = stream.generator() for chunk in audio_gen: if not self.is_wakeup: prob = self.detector.get_prediction(chunk) self.pred_queue.append(prob > 0.6) print('!' if prob > 0.6 else '.', end='', flush=True) if (self.pred_queue.count >= 2): self.notify("hi") cprint(' Trigger word detected! \n', 'magenta') self.pred_queue.clear() self.is_wakeup = True stream.pause() break else: cprint('Speech to text\n', 'green') time.sleep(1) stream.closed = False try: voice_command = self.speech_client.recognize(stream) cprint(f'{voice_command}\n', 'yellow') cprint('Recognition ended...\n', 'red') stream.pause() #tft.display_text(f'"{voice_command}"') if ("goodbye" in voice_command): self.notify("see_you") exit() if ("sorry" in voice_command): self.notify("its_ok") else: cprint('Recognize intents...', 'cyan') self.intent_recognize(voice_command) except Exception as e: cprint(f'Error: {e}', 'red') self.is_wakeup = False
def main(): (media_file, transcribe_file, output_stt_json_file, output_seg_json_file) = sys.argv[1:5] mgm_utils.exception_if_file_not_exist(transcribe_file) # Open the transcribe output with open(transcribe_file) as json_file: data = json.load(json_file) amp_results = SpeechToTextResult() # Fail if we don't have results if "results" not in data.keys(): exit(1) aws_results = data["results"] if "transcripts" not in aws_results.keys(): exit(1) # Parse transcript transcripts = aws_results["transcripts"] for t in transcripts: amp_results.transcript = amp_results.transcript + t["transcript"] # Fail if we don't have any keys if "items" not in aws_results.keys(): exit(1) # Parse items (words) items = aws_results["items"] duration = 0.00 # For each item, get the necessary parts and store as a word for i in items: alternatives = i["alternatives"] # Choose an alternative max_confidence = 0.00 text = "" # Each word is stored as an "alternative". Get the one with the maximum confidence for a in alternatives: if float(a["confidence"]) >= max_confidence: max_confidence = float(a["confidence"]) text = a["content"] end_time = -1 start_time = -1 # Two types (punctionation, pronunciation). Only keep times for pronunciation if i["type"] == "pronunciation": end_time = float(i["end_time"]) start_time = float(i["start_time"]) # If this is the greatest end time, store it as duration if end_time > duration: duration = end_time # Add the word to the results amp_results.addWord(i["type"], start_time, end_time, text, "confidence", max_confidence) # Create the media object media = SpeechToTextMedia(duration, media_file) # Create the final object outputFile = SpeechToText(media, amp_results) # Write the output mgm_utils.write_json_file(outputFile, output_stt_json_file) # Start segmentation schema with diarization data # Create a segmentation object to serialize seg_schema = Segmentation() # Create the media object segMedia = SegmentationMedia(duration, media_file) seg_schema.media = segMedia if "speaker_labels" in aws_results.keys(): speakerLabels = aws_results["speaker_labels"] seg_schema.numSpeakers = speakerLabels["speakers"] # For each segment, get the start time, end time and speaker label segments = speakerLabels["segments"] for segment in segments: seg_schema.addDiarizationSegment(float(segment["start_time"]), float(segment["end_time"]), segment["speaker_label"]) # Write the output mgm_utils.write_json_file(seg_schema, output_seg_json_file)
def main(): (input_file, json_file, bucketName, dataAccessRoleArn) = sys.argv[1:5] # Read a list of categories to ignore when outputting entity list ignore_cats_list = list() if len(sys.argv) > 5: print("ignore cats:" + sys.argv[5]) ignore_cats_list = split_ignore_list(sys.argv[5]) # Variable declaration outputS3Uri = 's3://' + bucketName + '/' timestamp = datetime.now().strftime("%Y%m%d%H%M%S") jobName = 'AwsComprehend-' + timestamp + ".json" inputS3Uri = outputS3Uri + jobName # Get the transcript text from the input file with open(input_file, 'r') as file: stt = SpeechToText().from_json(json.load(file)) # Create the ner object ner = EntityExtraction() # Add the media information if stt is None or stt.results is None: mediaLength = 0 else: mediaLength = len(stt.results.transcript) # If we have a blank file, don't error. Create another blank json file to pass to the next process if mediaLength == 0: ner.media = EntityExtractionMedia(mediaLength, input_file) mgm_utils.write_json_file(ner, json_file) exit(0) # Create a temp file to upload to S3 tmpfile = create_temp_transcript_file(jobName, stt.results.transcript) # Copy the temporary text file to S3 copy_to_s3(tmpfile.name, bucketName, jobName) # Make call to aws comprehend output_uri = run_comprehend_job(jobName, inputS3Uri, outputS3Uri, dataAccessRoleArn) uncompressed_file = download_from_s3(output_uri, outputS3Uri, bucketName) if uncompressed_file is None: exit(1) comprehend_data = read_comprehend_response(uncompressed_file) ner.media = EntityExtractionMedia(mediaLength, input_file) # Variables for filling time offsets based on speech to text lastPos = 0 # Iterator to keep track of location in STT word sttWords = len(stt.results.words) # Number of STT words if 'Entities' in comprehend_data.keys(): for entity in comprehend_data["Entities"]: entity_type = entity["Type"] # Start and end time offsets start = None end = None text = entity["Text"] # Split the entity into an array of words based on whitespace entityParts = text.split() # For each word in the entity, find the corresponding word in the STT word list foundWordPos = None for entityPart in entityParts: for wordPos in range(lastPos, sttWords): # If it matches, set the time offset. word = stt.results.words[wordPos] if clean_entity_word( word.text) == clean_entity_word(entityPart): # Keep track of last position to save iterations foundWordPos = wordPos # Set start if we haven't set it yet if start is None: start = word.start end = word.end break else: start = None end = None foundWordPos = None if start is not None: lastPos = foundWordPos else: print("Could not find word") print(text) print(entityParts) print(lastPos) if clean_text( entity_type) not in ignore_cats_list and start is not None: ner.addEntity( entity_type, text, None, None, "relevance", float(entity["Score"]), start, None) #AMP-636 removed startOffset=endOffset=end=None #Write the json file mgm_utils.write_json_file(ner, json_file) #Cleanup temp files safe_delete(uncompressed_file) safe_delete(tmpfile.name)
def main(): (root_dir, from_draftjs, original_transcript, to_transcript) = sys.argv[1:5] # using output instead of input filename as the latter is unique while the former could be used by multiple jobs logger = MgmLogger(root_dir, "hmgm_transcript", to_transcript) sys.stdout = logger sys.stderr = logger try: # if from_draftjs is in error raise exception to notify HMGM job runner to fail the job # otherwise if from_draftjs doesn't exist yet, exit 1 to keep waiting mgm_utils.exit_if_file_not_ready(from_draftjs) print("Converting DraftJs " + from_draftjs + " to Transcript " + to_transcript) with open(from_draftjs) as json_file: d = json.load(json_file) data = eval(json.dumps(d)) #read original file for extracting only the confidence score of each word original_input = open(original_transcript) original_json = json.loads(original_input.read()) original_items = original_json["results"]["words"] #print("the data in editor output is:",data) results = SpeechToTextResult() word_type = text = '' confidence = start_time = end_time = -1 duration = 0.0 # draftJS input file here always came from converted and corrected AMP Transcript, # so it should always contain 'entityMap', otherwise error should occur #Standardising draft js format # if "entityMap" in data.keys(): transcript = '' entityMap = data["entityMap"] for i in range(0, len(entityMap.keys())): punctuation = '' if str(i) not in entityMap.keys(): continue entity = entityMap[str(i)] if "data" in entity: if "text" in entity["data"].keys(): text = entity["data"]["text"] transcript += entity["data"]["text"] + " " if text[-1] in string.punctuation: #[',','.','!','?']: punctuation = text[-1] text = text[0:-1] if "type" in entity: entity_type = entity["type"] if entity_type == "WORD": word_type = "pronunciation" if "start" in entity["data"]: start_time = float(entity["data"]["start"]) if "end" in entity["data"]: end_time = float(entity["data"]["end"]) if end_time > duration: duration = end_time else: word_type = entity_type results.addWord(word_type, start_time, end_time, text, "confidence", confidence) if len(punctuation) > 0: results.addWord('punctuation', None, None, punctuation, "confidence", 0.0) results.transcript = transcript words = results.words #Now retrieving the confidence values from the original input file and assigning them to 'results' list_items = [] list_result = [] for i in range(0, len(original_items)): list_items.append(original_items[i]["text"]) for j in range(0, len(words)): list_result.append(words[j].text) d = difflib.Differ() res = list(d.compare(list_items, list_result)) i = j = 0 word_count = len(words) original_item_count = len(original_items) print("original item count: " + str(original_item_count)) print("word count: " + str(word_count)) for ele in res: if j >= word_count or i >= original_item_count: break elif ele.startswith("- "): i += 1 elif len(ele) > 2 and ele[0:2] == "+ ": words[j].score.scoreValue = 1.0 j += 1 elif ele[0:1] == " " and words[j].text == original_items[i]["text"]: if ("score" in original_items[i]): words[j].score.scoreValue = float( original_items[i]["score"]["scoreValue"]) else: words[ j].score.scoreValue = 1.0 # default score to 1.0 if not existing originally i += 1 j += 1 print("i: " + str(i) + " j:" + str(j)) # Create the media object media = SpeechToTextMedia(duration, original_transcript) # Create the final object stt = SpeechToText(media, results) # Write the output mgm_utils.write_json_file(stt, to_transcript) print("Successfully converted from DraftJs " + from_draftjs + " to Transcript " + to_transcript) # as the last command in HMGM, implicitly exit 0 here to let the whole job complete in success except Exception as e: # as the last command in HMGM, exit -1 to let the whole job fail print( "Failed to convert from DraftJs " + from_draftjs + " to Transcript " + to_transcript, e) traceback.print_exc() sys.stdout.flush() exit(-1)
def reply(text_message, client, thread_id): if client.uid == thread_id: return mess = Message(text=text_message) client.send(mess, thread_id=thread_id, thread_type=ThreadType.USER) client.delay() client = Bot('', '', max_tries=1, user_agent=cfg.user_agent, session_cookies=cfg.session_cookies) client.startListening() acc_demo = '100041985261746' acc_trDuong = '100014187060145' recg = SpeechToText('right shift', reply, ( client, acc_trDuong, )) while 1: client.doOneListen() recg.recognize_once() while 1: pass #speech_to_text.recognize('space', reply, (client, '100014187060145',))
def run(host, port, wsgi=False, https_mode=False): ''' Автовыбор доступного порта (если указан порт 0), загрузка языковой модели и нейронной сети и запуск сервера. 1. wsgi - True: запуск WSGI сервера, False: запуск тестового Flask сервера 2. https - True: запуск в режиме https (сертификат и ключ должны быть в cert.pem и key.pem), False: запуск в режиме http Самоподписанный сертификат можно получить, выполнив: openssl req -x509 -newkey rsa:4096 -nodes -out temp/cert.pem -keyout temp/key.pem -days 365 ''' if port == 0: # Если был введён порт 0, то автовыбор любого доступного порта try: sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.bind((host, 0)) port = sock.getsockname()[1] log('выбран порт ' + str(port)) sock.close() except socket.gaierror: log('адрес ' + host + ':' + str(port) + ' некорректен', level='error') sock.close() return except OSError: log('адрес ' + host + ':' + str(port) + ' недоступен', level='error') sock.close() return log('Flask v.' + flask_version + ', WSGIServer v.' + wsgi_version) log('установлен максимальный размер принимаемых данных: {:.2f} Кб'.format( max_content_length / 1024)) name_dataset = f_name_w2v_model_plays[ f_name_w2v_model_plays.rfind('w2v_model_') + len('w2v_model_'):f_name_w2v_model_plays.rfind('.bin')] log('загрузка обученной на наборе данных ' + name_dataset + ' модели seq2seq...') global ttt print() ttt = TextToText(f_name_w2v_model=f_name_w2v_model_plays, f_name_model=f_name_model_plays, f_name_model_weights=f_name_model_weights_plays) print() log('загрузка языковой модели для распознавания речи...') global stt stt = SpeechToText('from_file', name_dataset) log('загрузка синтезатора речи...') global tts tts = TextToSpeech('anna') if wsgi: global http_server if https_mode: log('WSGI сервер запущен на https://' + host + ':' + str(port) + ' (нажмите Ctrl+C или Ctrl+Z для выхода)') else: log('WSGI сервер запущен на http://' + host + ':' + str(port) + ' (нажмите Ctrl+C или Ctrl+Z для выхода)') try: if https_mode: http_server = WSGIServer((host, port), app, log=app.logger, error_log=app.logger, keyfile='temp/key.pem', certfile='temp/cert.pem') else: http_server = WSGIServer((host, port), app, log=app.logger, error_log=app.logger) http_server.serve_forever() except OSError: print() log('адрес ' + host + ':' + str(port) + ' недоступен', level='error') else: log('запуск тестового Flask сервера...') try: if https_mode: app.run(host=host, port=port, ssl_context=('temp/cert.pem', 'temp/key.pem'), threaded=True, debug=False) else: app.run(host=host, port=port, threaded=True, debug=False) except OSError: print() log('адрес ' + host + ':' + str(port) + ' недоступен', level='error')
def main(): fail_safe = FailSafe() coref_solver = CorefSolver() print("Press H for help.") verbose = False if "--verbose" in sys.argv: verbose = True u = UtteranceBranching(coref_solver, verbose=True) else: u = UtteranceBranching(coref_solver) kb_file_name = None if "--kb" in sys.argv: pos = sys.argv.index("--kb") kb_file_name = sys.argv[pos + 1] q_file_name = None if "--q" in sys.argv: pos = sys.argv.index("--q") q_file_name = sys.argv[pos + 1] if kb_file_name: with open(kb_file_name, "r") as f: utterances = list(f) for utterance in utterances: print( "-------------------------------------------------------") print(utterance) response = u.process(utterance[:-1]) print(response) if q_file_name: with open(q_file_name, "r") as f: questions = list(f) for q in questions: print( "-------------------------------------------------------") print(q) response = u.process(q[:-1]) if response: if response[-1] == "+": question, fail_response, similarity = fail_safe.answer_questions( q[:-1]) coref_solver.prev.pop() # print("*********", similarity) if similarity > 0.7: response = fail_response else: response = response[:-1] else: question, response, similarity = fail_safe.answer_questions( q[:-1]) # print(similarity) coref_solver.prev.pop() print("Bot: ", response) return speech_to_text = SpeechToText() while True: # type or say print("You: ", end='', flush=True) utterance = str(sys.stdin.readline()) if utterance[:-1].lower() == "h": print("Press H for help.") print("Press S to view assistant's internal state.") print( "Press V in order to interect with the assistant with your voice." ) continue if utterance[:-1].lower() == "s": u.internal_state() continue if utterance[:-1].lower() == "v": utterance = speech_to_text.process() print(utterance) else: utterance = utterance[:-1] response = u.process(utterance) if response: if response[-1] == "+": question, fail_response, similarity = fail_safe.answer_questions( utterance) coref_solver.prev.pop() if similarity > 0.7: response = fail_response else: response = response[:-1] tts = gTTS(text=response, lang='en') tts.save("response.mp3") os.system("mpg123 response.mp3 2> /dev/null") print("Bot: ", response) if response in ["Glad we talked!", "Happy to help!", "Gooodbye!"]: break else: question, response, similarity = fail_safe.answer_questions( utterance) # print(similarity) coref_solver.prev.pop() print("Bot: ", response) tts = gTTS(text=response, lang='en') tts.save("response.mp3") os.system("mpg123 response.mp3 2> /dev/null") print()