def open_recognizer(audio_config): q = Queue(maxsize=int(round(audio_config['buf_max_size'],audio_config['chunk']))) audio_source = AudioSource(q,True,True) audio_thread = Thread(target=audio_callback, args=(audio_config['address'],q)) audio_thread.start() try: rec_thread = Thread(target=recognize_using_weboscket, args=(audio_source,)) rec_thread.start() while True: pass except KeyboardInterrupt: audio_source.completed_recording() return
class Watson(): __metaclass__ = Singleton def __init__(self): self.CHUNK = 1024 self.BUF_MAX_SIZE = self.CHUNK * 10 self.q = Queue(maxsize=int(round(self.BUF_MAX_SIZE / self.CHUNK))) self.audio_source = AudioSource(self.q, True, True) self.FORMAT = pyaudio.paInt16 self.CHANNELS = 1 self.RATE = 44100 self.__apikey_stt = Config().Get("SpeechToText", "WatsonSTTAPIKey") self.__url_stt = Config().Get("SpeechToText", "WatsonSTTUrl") self.__apikey_tts = Config().Get("TextToSpeech", "WatsonTTSAPIKey") self.__url_tts = Config().Get("TextToSpeech", "WatsonTTSUrl") self.__voiceName = Config().Get("TextToSpeech", "WatsonVoiceName") self.__language_2letter_cc = Config().Get("SpeechToText", "CountryCode2Letter") self.__language_4letter_cc = Config().Get("SpeechToText", "CountryCode4Letter") self.__audioPlayer = Config().Get("TextToSpeech", "AudioPlayer") + " '{0}'" self.text_to_speech = TextToSpeechV1(url=self.__url_tts, iam_apikey=self.__apikey_tts) self.text_to_speech.set_default_headers( {'x-watson-learning-opt-out': "true"}) self.speech_to_text = SpeechToTextV1(url=self.__url_stt, iam_apikey=self.__apikey_stt) self.speech_to_text.set_default_headers( {'x-watson-learning-opt-out': "true"}) self.audio = pyaudio.PyAudio() # open stream using callback self.stream = self.audio.open(format=self.FORMAT, channels=self.CHANNELS, rate=self.RATE, input=True, frames_per_buffer=self.CHUNK, stream_callback=self.pyaudio_callback, start=False) try: rospy.init_node('STT_watson_node', anonymous=True) except: FileLogger().Info('already initialized') def Speak(self, audioString, playAudio=False): if (len(audioString) == 0): return tmpAudioFile = os.path.join(Global.EmeraldPath, "Data", "TTS", ("Watson_" + \ self.__language_2letter_cc + "_" + \ self.CleanString(audioString) + ".mp3")) if not os.path.isfile(tmpAudioFile): with open(join(dirname(__file__), tmpAudioFile), 'wb') as audio_file: response = self.text_to_speech.synthesize( audioString, accept='audio/mp3', voice=self.__voiceName).get_result() audio_file.write(response.content) if (playAudio): os.system(self.__audioPlayer.format(tmpAudioFile)) return tmpAudioFile def Listen(self): self.stream.start_stream() try: while True: recognize_thread = Thread( target=self.recognize_using_weboscket, args=()) recognize_thread.start() recognize_thread.join() except KeyboardInterrupt: # stop recording self.audio_source.completed_recording() self.stream.stop_stream() self.stream.close() self.audio.terminate() def CleanString(self, string): data = re.sub(r'\W+', '', string) return (data[:75] + '_TRIMMED') if len(data) > 75 else data def recognize_using_weboscket(self, *args): mycallback = MyRecognizeCallback() self.speech_to_text.recognize_using_websocket( audio=self.audio_source, content_type='audio/l16; rate=44100', recognize_callback=mycallback, interim_results=True, model='{0}_BroadbandModel'.format(self.__language_4letter_cc), smart_formatting=True) def pyaudio_callback(self, in_data, frame_count, time_info, status): try: self.q.put(in_data) except Full: pass return (None, pyaudio.paContinue)
audio = pyaudio.PyAudio() # open stream using callback stream = audio.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK, stream_callback=pyaudio_callback, start=False) ######################################################################### #### Start the recording and start service to recognize the stream ###### ######################################################################### print("Enter CTRL+C to end recording...") stream.start_stream() try: recognize_thread = Thread(target=recognize_using_weboscket, args=()) recognize_thread.start() while True: pass except KeyboardInterrupt: # stop recording audio_source.completed_recording() stream.stop_stream() stream.close() audio.terminate()
class MicrophoneToText: def __init__(self): """initialize the Microphone to Text service""" self.switch = True try: from Queue import Queue, Full except ImportError: from queue import Queue, Full ############################################### #### Initalize queue to store the recordings ## ############################################### self.CHUNK = 1024 # Note: It will discard if the websocket client can't consumme fast enough # So, increase the max size as per your choice self.BUF_MAX_SIZE = self.CHUNK * 100 # Buffer to store audio self.q = Queue(maxsize=int(round(self.BUF_MAX_SIZE / self.CHUNK))) # Create an instance of AudioSource self.audio_source = AudioSource(self.q, True, True) #with open('result.txt', 'w') as f: #pass # Create a results txt file self.result = open('result.txt', 'a+', encoding='utf-8') # Create a results dictionary self.keywords = dict({ 'street': [], 'location': [], 'capital': [], 'income': [], 'price': [] }) self.keywordsshort = dict() self.resultkeywords = dict({ 'street': [], 'location': [], 'capital': [], 'income': [], 'price': [] }) self.convs = ConvSent.ConvertSent() self.conv = ConvNumb.ConvertNumber() ############################################### #### Prepare Speech to Text Service ######## ############################################### # initialize speech to text service self.speech_to_text = SpeechToTextV1( #nicis key #iam_apikey='aBOJ7l-LsQuJDc9vdMcyomvfv9PeqKTToplptgmjPRlA', #url='https://gateway-lon.watsonplatform.net/speech-to-text/api') iam_apikey='SWm4Cbisst2AihTyz42f6RXVZjaLLX6UTcal_PQxtADf', url='https://stream-fra.watsonplatform.net/speech-to-text/api') ############################################### #### Prepare the for recording using Pyaudio ## ############################################### # Variables for recording the speech self.FORMAT = pyaudio.paInt16 self.CHANNELS = 1 self.RATE = 44100 # instantiate pyaudio self.audio = pyaudio.PyAudio() # open stream using callback self.stream = self.audio.open(format=self.FORMAT, channels=self.CHANNELS, rate=self.RATE, input=True, frames_per_buffer=self.CHUNK, stream_callback=self.pyaudio_callback, start=False) def switchoff(self): """Method to end the Microphone to Text service which closes all open connections and recordings :return: None """ self.switch = False self.audio_source.completed_recording() self.stream.stop_stream() self.stream.close() self.audio.terminate() self.result.close() def recognize_using_weboscket(self, *args): """Initiate the recognize service and pass the audio source :return: None """ self.mycallback = MyRecognizeCallback() self.speech_to_text.recognize_using_websocket( audio=self.audio_source, content_type='audio/l16; rate=44100', model='de-DE_BroadbandModel', recognize_callback=self.mycallback, interim_results=True) def analyze_txt(self): """analyzes the working txt file to find specific keywords and stores the results in a dict structure note: this method runs as long as the Microphone to Text service is active and gets executed all 5 seconds :return: None """ while self.switch: with open('result.txt', 'r') as f: for text in f: # text = text.lower() if 'straße' in text or 'adreße' in text or 'adresse' in text or 'strasse' in text or 'weg' in text: self.keywords['street'].append(self.find_word(text)) if 'ort' in text or 'postleitzahl' in text or 'in' in text: self.keywords['location'].append(self.find_word(text)) if 'eigenmittel' in text or 'eigenkapital' in text: self.keywords['capital'].append(self.find_word(text)) if 'einkommen' in text or 'verdiene' in text or 'verdienen' in text: self.keywords['income'].append(self.find_word(text)) if 'kaufpreis' in text or 'koste' in text: self.keywords['price'].append(self.find_word(text)) self.keywordsshort = { k: list(set(v)) for k, v in self.keywords.items() } sleep(5) def find_correct_keyword(self): """finds in the sentences with keywords in them the essential information :return: None """ for k, v in self.keywordsshort.items(): #bindwords = ['lautet', 'ist', 'sind', 'beträgt'] uselesswords = [ 'ähm', 'äh', 'ä', 'hh', ' ', 'oh', 'uh', 'und', '[geräusch]' ] for x in v: #for y in bindwords: #if y in x: #vals = x.split(y) #val = vals[1] val = x if val in uselesswords: continue if k == 'street': print("val:" + val) to_append = self.convs.find_street(val) print(to_append) if to_append != None and to_append != 'straße': self.resultkeywords[k].append(to_append) elif k == 'location': to_append1 = self.convs.find_city(val) if to_append1 != None: self.resultkeywords[k].append(to_append1) else: self.conv.setnum(val) to_append2 = self.conv.getnum() if to_append2 != 0: self.resultkeywords[k].append(to_append2) self.resultkeywords = { k: list(set(v)) for k, v in self.resultkeywords.items() } def print_results(self): """returns the final dict structure of results :return: dict with resultkeywords """ print(self.keywordsshort) self.find_correct_keyword() print(self.resultkeywords) return self.resultkeywords def find_word(self, text): """finds the transcript in a json formatted text input :param text: json formatted string :return: the actual transcribed sentence """ words = text.split('transcript":') words = words[1].split('}') word = words[0] return word def pyaudio_callback(self, in_data, frame_count, time_info, status): """puts a recording in the queue :param in_data: the recording to put in the queue :param frame_count: frame count if its specific otherwise not used :param time_info: timestamp if its specific otherwise not used :param status: status if its specific otherwise not used :return: None, the queue continues """ try: self.q.put(in_data) except Full: pass # discard return (None, pyaudio.paContinue) def threader(self): """Starts a thread to start the Microphone to Text service :return: None """ main_thread = Thread(target=self.main) main_thread.start() def main(self): """Start the recording, start the Microphone to Text service in a separate thread and start analyzing in a separate thread :return: None """ print("Enter CTRL+C or CTRL+F2 if in pycharm to end recording...") self.stream.start_stream() try: recognize_thread = Thread(target=self.recognize_using_weboscket, args=()) recognize_thread.start() analyze_thread = Thread(target=self.analyze_txt(), ) analyze_thread.start() while self.switch: pass except KeyboardInterrupt: # stop recording for developing purposes with keyboardinterrupt self.audio_source.completed_recording() self.stream.stop_stream() self.stream.close() self.audio.terminate() self.result.close()
def live_translate(): print('hey im here') try: from Queue import Queue, Full except ImportError: from queue import Queue, Full ############################################### #### Initalize queue to store the recordings ## ############################################### CHUNK = 1024 # Note: It will discard if the websocket client can't consumme fast enough # So, increase the max size as per your choice BUF_MAX_SIZE = CHUNK * 10 # Buffer to store audio q = Queue(maxsize=int(round(BUF_MAX_SIZE / CHUNK))) # Create an instance of AudioSource audio_source = AudioSource(q, True, True) audioTrans = {} with open("server\\routes\\spchToTxtLive.json", 'w') as f: json.dump(audioTrans, f) ############################################### #### Prepare Speech to Text Service ######## ############################################### # initialize speech to text service #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!Give Keys here!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!1 # speech_to_text = SpeechToTextV1( # url="URL HERE", # iam_apikey="APIKEY HERE") # define callback for the speech to text service class MyRecognizeCallback(RecognizeCallback): def __init__(self): RecognizeCallback.__init__(self) self.transcript = None def on_transcription(self, transcript): print('transcript: {}'.format(transcript)) print(transcript) audioTrans = transcript def on_connected(self): print('Connection was successful') def on_error(self, error): print('Error received: {}'.format(error)) def on_inactivity_timeout(self, error): print('Inactivity timeout: {}'.format(error)) def on_listening(self): print('Service is listening') def on_hypothesis(self, hypothesis): print(hypothesis) def on_data(self, data): self.transcript = data['results'][0]['alternatives'][0][ 'transcript'] print('{0}final: {1}'.format( '' if data['results'][0]['final'] else 'not ', self.transcript)) audioTrans = '{0}final: {1}'.format( '' if data['results'][0]['final'] else 'not ', self.transcript) json.dump(audioTrans, f) # print(data) def on_close(self): print("Connection closed") # this function will initiate the recognize service and pass in the AudioSource def recognize_using_weboscket(*args): mycallback = MyRecognizeCallback() speech_to_text.recognize_using_websocket( audio=audio_source, content_type='audio/l16; rate=44100', recognize_callback=mycallback, interim_results=True) print(mycallback.transcript) ############################################### #### Prepare the for recording using Pyaudio ## ############################################### # Variables for recording the speech FORMAT = pyaudio.paInt16 CHANNELS = 1 RATE = 44100 # define callback for pyaudio to store the recording in queue def pyaudio_callback(in_data, frame_count, time_info, status): try: q.put(in_data) except Full: pass # discard return (None, pyaudio.paContinue) # instantiate pyaudio audio = pyaudio.PyAudio() # open stream using callback stream = audio.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK, stream_callback=pyaudio_callback, start=False) ######################################################################### #### Start the recording and start service to recognize the stream ###### ######################################################################### print("Enter CTRL+C to end recording...") stream.start_stream() try: recognize_thread = Thread(target=recognize_using_weboscket, args=()) recognize_thread.start() while True: pass except KeyboardInterrupt: # stop recording audio_source.completed_recording() stream.stop_stream() stream.close() audio.terminate()