def DeepSpeech(Window, SpeechToNLPQueue, wavefile): # Create Signal Object SpeechSignal = GUISignal() SpeechSignal.signal.connect(Window.UpdateSpeechBox) MsgSignal = GUISignal() MsgSignal.signal.connect(Window.UpdateMsgBox) # References to models: model = 'DeepSpeech_Models/output_graph.pbmm' alphabet = 'DeepSpeech_Models/alphabet.txt' lm = 'DeepSpeech_Models/lm.binary' trie = 'DeepSpeech_Models/trie' print('Loading model from file {}'.format(model), file=sys.stderr) model_load_start = timer() ds = Model(model, N_FEATURES, N_CONTEXT, alphabet, BEAM_WIDTH) model_load_end = timer() - model_load_start print('Loaded model in {:.3}s.'.format(model_load_end), file=sys.stderr) if lm and trie: print('Loading language model from files {} {}'.format(lm, trie), file=sys.stderr) lm_load_start = timer() ds.enableDecoderWithLM(alphabet, lm, trie, LM_ALPHA, LM_BETA) lm_load_end = timer() - lm_load_start print('Loaded language model in {:.3}s.'.format(lm_load_end), file=sys.stderr) audio = wavefile fin = wave.open(audio, 'rb') fs = fin.getframerate() if fs != 16000: print( 'Warning: original sample rate ({}) is different than 16kHz. Resampling might produce erratic speech recognition.' .format(fs), file=sys.stderr) fs, audio = convert_samplerate(audio) else: audio = np.frombuffer(fin.readframes(fin.getnframes()), np.int16) audio_length = fin.getnframes() * (1 / 16000) fin.close() print('Running inference.', file=sys.stderr) inference_start = timer() output = (ds.stt(audio, fs)) print(output) inference_end = timer() - inference_start print('Inference took %0.3fs for %0.3fs audio file.' % (inference_end, audio_length), file=sys.stderr) QueueItem = SpeechNLPItem(output, True, 0, 0, 'Speech') SpeechToNLPQueue.put(QueueItem) SpeechSignal.signal.emit([QueueItem])
def generator(self): # Create Signal Objects MsgSignal = GUISignal() MsgSignal.signal.connect(self.Window.UpdateMsgBox) # Create Signal Objects VUSignal = GUISignal() VUSignal.signal.connect(self.Window.UpdateVUBox) while not self.closed: # Use a blocking get() to ensure there's at least one chunk of # data, and stop iteration if the chunk is None, indicating the # end of the audio stream. chunk = self._buff.get() if chunk is None: return data = [chunk] # Plot in the GUI signal = b''.join(data) signal = np.fromstring(signal, 'Int16') VUSignal.signal.emit([signal]) # Stop streaming after one minute, create new thread that does recognition if time.time() > (self.start_time + (10)): MsgSignal.signal.emit(["Recorded 10 seconds"]) self.Window.StartButton.setEnabled(True) break self.samplesCounter += self._chunk if self.Window.stopped == 1: print('Speech Tread Killed') self.Window.StartButton.setEnabled(True) return # Now consume whatever other data's still buffered. while True: try: chunk = self._buff.get(block=False) if chunk is None: return data.append(chunk) except queue.Empty: break yield signal
def DeepSpeech(Window, SpeechToNLPQueue): # Create Signal Object SpeechSignal = GUISignal() SpeechSignal.signal.connect(Window.UpdateSpeechBox) MsgSignal = GUISignal() MsgSignal.signal.connect(Window.UpdateMsgBox) # References to models: model = 'DeepSpeech_Models/output_graph.pbmm' alphabet = 'DeepSpeech_Models/alphabet.txt' lm = 'DeepSpeech_Models/lm.binary' trie = 'DeepSpeech_Models/trie' print('Loading model from file {}'.format(model), file=sys.stderr) model_load_start = timer() ds = Model(model, N_FEATURES, N_CONTEXT, alphabet, BEAM_WIDTH) model_load_end = timer() - model_load_start print('Loaded model in {:.3}s.'.format(model_load_end), file=sys.stderr) if lm and trie: print('Loading language model from files {} {}'.format(lm, trie), file=sys.stderr) lm_load_start = timer() ds.enableDecoderWithLM(alphabet, lm, trie, LM_ALPHA, LM_BETA) lm_load_end = timer() - lm_load_start print('Loaded language model in {:.3}s.'.format(lm_load_end), file=sys.stderr) audio = [] with MicrophoneStream(Window, RATE, CHUNK) as stream: audio_generator = stream.generator() for content in audio_generator: for sample in content: audio.append(sample) result = ds.stt(audio, 16000) QueueItem = SpeechNLPItem(result, True, 0, 0, 'Speech') SpeechToNLPQueue.put(QueueItem) SpeechSignal.signal.emit([QueueItem])
def GenerateForm(Window, text): # Create Signal Objects MsgSignal = GUISignal() MsgSignal.signal.connect(Window.UpdateMsgBox) ButtonsSignal = GUISignal() ButtonsSignal.signal.connect(Window.ButtonsSetEnabled) ButtonsSignal.signal.emit([(Window.GenerateFormButton, False)]) dummy12 = text dummy12 = dummy12.replace('\r', '').replace('\n', '') dummyP2 = dummy12.replace(' ', '%20') dummyP3 = dummyP2.replace('\'', '%27') dummyP = dummyP3.replace('&', '%26') part1 = 'curl -d text=' + dummyP + ' http://bark.phon.ioc.ee/punctuator' op = commands.getstatusoutput(part1) output = op[1].rsplit('\n', 1)[1] sentsList = textParse2.sent_tokenize(output) #final sentences wordsList = textParse2.word_tokenize(output) #final words file_name = str(datetime.datetime.now().strftime("%c")) try: prescription_form2.generateFields(sentsList, wordsList, dummy12, file_name) MsgSignal.signal.emit(["Form Generated: /Dumps/" + file_name + ".pdf"]) # Pop file open p = subprocess.Popen(["xdg-open", "./Dumps/" + file_name + ".pdf"]) returncode = p.wait() # wait to exit except Exception as e: print("Error encountered generating form. Exception: " + str(e)) MsgSignal.signal.emit( ["Error encountered generating form. Exception: " + str(e)]) ButtonsSignal.signal.emit([(Window.GenerateFormButton, True)]) print("Form Generator Thread Killed.")
def TextSpeech(Window, SpeechToNLPQueue, textfile_name): print("Entered TextSpeech") # Create GUI Signal Object SpeechSignal = GUISignal() SpeechSignal.signal.connect(Window.UpdateSpeechBox) MsgSignal = GUISignal() MsgSignal.signal.connect(Window.UpdateMsgBox) ButtonsSignal = GUISignal() ButtonsSignal.signal.connect(Window.ButtonsSetEnabled) Text_File = open(textfile_name, "rb") text = "" for line in Text_File.readlines(): text += line counter = 0 # Break into sentences dummy12 = text dummy12 = dummy12.replace('\r', '').replace('\n', '') dummyP2 = dummy12.replace(' ', '%20') dummyP3 = dummyP2.replace('\'', '%27') dummyP = dummyP3.replace('&', '%26') part1 = 'curl -d text=' + dummyP + ' http://bark.phon.ioc.ee/punctuator' op = commands.getstatusoutput(part1) output = op[1].rsplit('\n', 1)[1] sentsList = textParse2.sent_tokenize(output) #final sentences # Stream text num_chars_printed = 0 for sentence in sentsList: for i, character in enumerate(sentence): QueueItem = SpeechNLPItem(sentence[:i + 1], False, 0, num_chars_printed, 'Speech') SpeechSignal.signal.emit([QueueItem]) if (character == " "): time.sleep(BETWEEN_WORDS_PAUSE) elif (character == ","): time.sleep(COMMA_PAUSE) elif (character == "."): time.sleep(BETWEEN_SENETENCES_PAUSE) else: time.sleep(BETWEEN_CHARACTERS_PAUSE) num_chars_printed = len(sentence[:i + 1]) if (Window.stopped == 1): print('Text Speech Tread Killed') QueueItem = SpeechNLPItem(sentence[:i + 1], True, 0, num_chars_printed, 'Speech') SpeechSignal.signal.emit([QueueItem]) SpeechToNLPQueue.put(QueueItem) return QueueItem = SpeechNLPItem(sentence, True, 0, num_chars_printed, 'Speech') SpeechSignal.signal.emit([QueueItem]) SpeechToNLPQueue.put(QueueItem) num_chars_printed = 0 # Clean up and end thread MsgSignal.signal.emit(["Transcription of text file complete!"]) ButtonsSignal.signal.emit([(Window.StartButton, True), (Window.ComboBox, True), (Window.ResetButton, True)])
def generator(self): # Create GUI Signal Objects GoogleSignal = GUISignal() GoogleSignal.signal.connect(self.Window.StartGoogle) MsgSignal = GUISignal() MsgSignal.signal.connect(self.Window.UpdateMsgBox) VUSignal = GUISignal() VUSignal.signal.connect(self.Window.UpdateVUBox) while not self.closed: # Use a blocking get() to ensure there's at least one chunk of # data, and stop iteration if the chunk is None, indicating the # end of the audio stream. chunk = self._buff.get() if chunk is None: return data = [chunk] # VU Meter in the GUI signal = b''.join(data) signal = np.fromstring(signal, 'Int16') VUSignal.signal.emit([signal]) # Stop streaming after one minute, create new thread that does recognition if time.time() > (self.start_time + (60)): GoogleSignal.signal.emit(["Mic"]) MsgSignal.signal.emit([ "API's 1 minute limit reached. Restablishing connection!" ]) break self.samplesCounter += self._chunk if self.Window.stopped == 1: print('Speech Tread Killed') # Dump file to disk output_audio = wave.open( "./Dumps/" + str(datetime.datetime.now().strftime("%c")) + ".wav", 'wb') output_audio.setnchannels(1) # mono output_audio.setsampwidth(2) output_audio.setframerate(RATE) output_audio.writeframesraw(MicrophoneStream.audio_buffer) output_audio.close() MicrophoneStream.audio_buffer = "" return # Now consume whatever other data's still buffered. while True: try: chunk = self._buff.get(block=False) if chunk is None: return data.append(chunk) except queue.Empty: break #MicrophoneStream.audio_buffer = np.append(MicrophoneStream.audio_buffer, np.fromstring(b''.join(data), 'Int16')) MicrophoneStream.audio_buffer += b''.join(data) yield b''.join(data)
def GoogleSpeech(Window, SpeechToNLPQueue): # Create GUI Signal Object SpeechSignal = GUISignal() SpeechSignal.signal.connect(Window.UpdateSpeechBox) MsgSignal = GUISignal() MsgSignal.signal.connect(Window.UpdateMsgBox) ButtonsSignal = GUISignal() ButtonsSignal.signal.connect(Window.ButtonsSetEnabled) language_code = 'en-US' # a BCP-47 language tag client = speech.SpeechClient() config = types.RecognitionConfig( encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=RATE, language_code=language_code, profanity_filter=True) streaming_config = types.StreamingRecognitionConfig(config=config, interim_results=True) with MicrophoneStream(Window, RATE, CHUNK) as stream: audio_generator = stream.generator() requests = (types.StreamingRecognizeRequest(audio_content=content) for content in audio_generator) try: responses = client.streaming_recognize(streaming_config, requests) # Signal that streaming has started print( "Started speech recognition on microphone audio via Google Speech API.\nMicrophone Session counter: " + str(MicrophoneStream.micSessionCounter)) MsgSignal.signal.emit([ "Started speech recognition on microphone audio via Google Speech API.\nMicrophone Session counter: " + str(MicrophoneStream.micSessionCounter) ]) # Now, put the transcription responses to use. num_chars_printed = 0 responseTimeStamp = time.time() for response in responses: if not response.results: continue # The `results` list is DeepSpeechconsecutive. For streaming, we only care about # the first result being considered, since once it's `is_final`, it # moves on to considering the next utterance. result = response.results[0] if not result.alternatives: continue # Display the transcription of the top alternative. transcript = result.alternatives[0].transcript confidence = result.alternatives[0].confidence # Display interim results, but with a carriage return at the end of the # line, so subsequent lines will overwrite them. # If the previous result was longer than this one, we need to print # some extra spaces to overwrite the previous result overwrite_chars = ' ' * (num_chars_printed - len(transcript)) if result.is_final: #print(transcript + overwrite_chars) QueueItem = SpeechNLPItem(transcript, result.is_final, confidence, num_chars_printed, 'Speech') SpeechToNLPQueue.put(QueueItem) SpeechSignal.signal.emit([QueueItem]) num_chars_printed = 0 elif not result.is_final: #sys.stdout.write(transcript + overwrite_chars + '\r') #sys.stdout.flush() QueueItem = SpeechNLPItem(transcript, result.is_final, confidence, num_chars_printed, 'Speech') SpeechSignal.signal.emit([QueueItem]) num_chars_printed = len(transcript) except Exception as e: MsgSignal.signal.emit([ "Unable to get response from Google! Network or other issues. Please Try again!\n Exception: " + str(e) ]) ButtonsSignal.signal.emit([(Window.StartButton, True), (Window.ComboBox, True), (Window.ResetButton, True)]) sys.exit()
def generator(self): # Create GUI Signal Objects GoogleSignal = GUISignal() GoogleSignal.signal.connect(self.Window.StartGoogle) MsgSignal = GUISignal() MsgSignal.signal.connect(self.Window.UpdateMsgBox) VUSignal = GUISignal() VUSignal.signal.connect(self.Window.UpdateVUBox) ButtonsSignal = GUISignal() ButtonsSignal.signal.connect(self.Window.ButtonsSetEnabled) while not self.closed: # Use a blocking get() to ensure there's at least one chunk of # data, and stop iteration if the chunk is None, indicating the # end of the audio stream. #chunk = self._buff.get() time.sleep(.1) chunk = self.wf.readframes(CHUNK) if chunk == '': FileStream.position = 0 MsgSignal.signal.emit( ["Transcription of audio file complete!"]) ButtonsSignal.signal.emit([(self.Window.StartButton, True), (self.Window.ComboBox, True), (self.Window.ResetButton, True)]) return if chunk is None: return if self.samplesCounter / self._rate > 60: #FileStream.position -= 3 GoogleSignal.signal.emit(["File"]) print((self.samplesCounter) / self._rate) MsgSignal.signal.emit([ "API's 1 minute limit reached. Restablishing connection!" ]) break data = [chunk] # VU Meter in the GUI signal = b''.join(data) signal = np.fromstring(signal, 'Int16') VUSignal.signal.emit([signal]) self.samplesCounter += self._chunk FileStream.position += 1 if self.Window.stopped == 1: print('File Speech Tread Killed') mixer.music.stop() FileStream.position = 0 return # Now consume whatever other data's still buffered. while True: try: chunk = self._buff.get(block=False) if chunk is None: return data.append(chunk) self.samplesCounter += self._chunk FileStream.position += 1 except queue.Empty: break yield b''.join(data)
def CognitiveSystem(Window, SpeechToNLPQueue): # Create GUI signal objects SpeechSignal = GUISignal() SpeechSignal.signal.connect(Window.UpdateSpeechBox) ConceptExtractionSignal = GUISignal() ConceptExtractionSignal.signal.connect(Window.UpdateConceptExtractionBox) # Initialize BT framework parameters execfile("bt_parameters.py") # Setup BT Framework blackboard = Blackboard() root = py_trees.composites.Sequence("Root_1") IG = be.InformationGathering() TC = be.TextCollection() V = be.Vectorize() PS = be.ProtocolSelector() root.add_children([TC, IG, V, PS, be.protocols]) behaviour_tree = py_trees.trees.BehaviourTree(root) behaviour_tree.add_pre_tick_handler(pre_tick_handler) behaviour_tree.setup(15) Concepts_Graph = dict() SpeechText = "" NLP_Items = [] Tick_Counter = 1 while True: # Get queue item from the Speech-to-Text Module received = SpeechToNLPQueue.get() print("Received chunk") if (received == 'Kill'): print( "Cognitive System Thread received Kill Signal. Killing Cognitive System Thread." ) break if (Window.reset == 1): print( "Cognitive System Thread Received reset signal. Killing Cognitive System Thread." ) return # If item received from queue is legitmate else: #sentsList = [received.transcript] # Use online tool to find sentence boundaries dummy12 = received.transcript dummy12 = dummy12.replace('\r', '').replace('\n', '') dummyP2 = dummy12.replace(' ', '%20') dummyP3 = dummyP2.replace('\'', '%27') dummyP = dummyP3.replace('&', '%26') part1 = 'curl -d text=' + dummyP + ' http://bark.phon.ioc.ee/punctuator' op = commands.getstatusoutput(part1) output = op[1].rsplit('\n', 1)[1] sentsList = textParse2.sent_tokenize(output) #final sentences # Processes each chunk/sentence PunctuatedAndHighlightedText = "" for idx, item in enumerate(sentsList): blackboard.text = [item] behaviour_tree.tick_tock(sleep_ms=50, number_of_iterations=1, pre_tick_handler=None, post_tick_handler=None) pr, sv_s, s = TickResults(Window, NLP_Items) PunctuatedAndHighlightedTextChunk = item for sv in sv_s: if (sv[5] == Tick_Counter ): # if new concept found in this tick try: i = re.search( r'%s' % sv[3], PunctuatedAndHighlightedTextChunk).start() PunctuatedAndHighlightedTextChunk = str( PunctuatedAndHighlightedTextChunk[:i] + '<span style="background-color: #FFFF00">' + PunctuatedAndHighlightedTextChunk[i:i + len(sv[3])] + '</span>' + PunctuatedAndHighlightedTextChunk[i + len(sv[3]):]) except Exception as e: pass PunctuatedAndHighlightedText += PunctuatedAndHighlightedTextChunk + " " Tick_Counter += 1 if (Window.reset == 1): print( "Cognitive System Thread Received reset signal. Killing Cognitive System Thread." ) return PunctuatedAndHighlightedText = '<b>' + PunctuatedAndHighlightedText + '</b>' SpeechSignal.signal.emit([ SpeechNLPItem(PunctuatedAndHighlightedText, received.isFinal, received.confidence, received.numPrinted, 'NLP') ])
def TickResults(Window, NLP_Items): ConceptExtractionSignal = GUISignal() ConceptExtractionSignal.signal.connect(Window.UpdateConceptExtractionBox) ProtocolSignal = GUISignal() ProtocolSignal.signal.connect(Window.UpdateProtocolBoxes) b = Blackboard() protocol_candidates = [] signs_and_vitals = [] suggestions = [] print("===============================================================") #======= Top 3 protocol candidates print("\n======= Top 3 protocol candidates:") for p in b.protocol_flag: print(p, b.protocol_flag[p]) binary = b.protocol_flag[p][0] confidence = b.protocol_flag[p][1] if (binary): try: if (confidence != 'nan' and float(confidence) > 0.0): protocol_candidates.append((str(p), confidence)) except Exception as e: pass # Sort by confidence and take top 3 protocol_candidates = sorted(protocol_candidates, key=itemgetter(1), reverse=True)[:3] #======= Signs, symptoms, and vitals print("\n======= Signs, symptoms, and vitals:") for item in b.Vitals: if len(b.Vitals[item].content) > 0: content = (str(b.Vitals[item].name).capitalize(), str(b.Vitals[item].binary), str(b.Vitals[item].value), str(b.Vitals[item].content), str(round(b.Vitals[item].score / 1000, 2)), b.Vitals[item].tick) print(content) signs_and_vitals.append(content) if (content not in NLP_Items): NLP_Items.append(content) for item in b.Signs: if len(b.Signs[item].content) > 0: content = (str(b.Signs[item].name).capitalize(), str(b.Signs[item].binary), str(b.Signs[item].value), str(b.Signs[item].content), str(round(b.Signs[item].score / 1000, 2)), b.Signs[item].tick) print(content) signs_and_vitals.append(content) if (content not in NLP_Items): NLP_Items.append(content) # Sort by Tick signs_and_vitals = sorted(signs_and_vitals, key=itemgetter(5)) #======= Suggestions print("\n======= Suggestions:") for key in b.feedback: if b.feedback[key] > 0.1: content = (str(key).capitalize(), str(round(b.feedback[key], 2))) suggestions.append(content) print(content) # Sort by Concept suggestions = sorted(suggestions, key=itemgetter(1), reverse=True) #========================== Create output strings formatted for readibility protocol_candidates_str = "" for i, p in enumerate(protocol_candidates): protocol_candidates_str += "(" + p[0] + ", <b>" + str(round( p[1], 2)) + "</b>)<br>" signs_and_vitals_str = "" for sv in NLP_Items: signs_and_vitals_str += "(" for i, t in enumerate(sv): if (i != 3 and i != 4 and i != 5): signs_and_vitals_str += str(t)[0:len(str(t))] + ", " if (i == 4): signs_and_vitals_str += "<b>" + str( t)[0:len(str(t))] + "</b>, " signs_and_vitals_str = signs_and_vitals_str[:-2] + ")<br>" suggestions_str = "" for s in suggestions: suggestions_str += "(" + str(s[0]) + ", <b>" + str(s[1]) + "</b>)<br>" print("===============================================================") ProtocolSignal.signal.emit([protocol_candidates_str, suggestions_str]) ConceptExtractionSignal.signal.emit([signs_and_vitals_str]) return protocol_candidates, signs_and_vitals, suggestions