Пример #1
0
def open_recognizer(audio_config):

    q = Queue(maxsize=int(round(audio_config['buf_max_size'],audio_config['chunk'])))
    audio_source = AudioSource(q,True,True)

    audio_thread = Thread(target=audio_callback, args=(audio_config['address'],q))
    audio_thread.start()

    try:
        rec_thread = Thread(target=recognize_using_weboscket, args=(audio_source,))
        rec_thread.start()
        while True:
            pass
    except KeyboardInterrupt:
        audio_source.completed_recording()
        return
Пример #2
0
class Watson():
    __metaclass__ = Singleton

    def __init__(self):
        self.CHUNK = 1024
        self.BUF_MAX_SIZE = self.CHUNK * 10
        self.q = Queue(maxsize=int(round(self.BUF_MAX_SIZE / self.CHUNK)))
        self.audio_source = AudioSource(self.q, True, True)
        self.FORMAT = pyaudio.paInt16
        self.CHANNELS = 1
        self.RATE = 44100

        self.__apikey_stt = Config().Get("SpeechToText", "WatsonSTTAPIKey")
        self.__url_stt = Config().Get("SpeechToText", "WatsonSTTUrl")

        self.__apikey_tts = Config().Get("TextToSpeech", "WatsonTTSAPIKey")
        self.__url_tts = Config().Get("TextToSpeech", "WatsonTTSUrl")

        self.__voiceName = Config().Get("TextToSpeech", "WatsonVoiceName")

        self.__language_2letter_cc = Config().Get("SpeechToText",
                                                  "CountryCode2Letter")
        self.__language_4letter_cc = Config().Get("SpeechToText",
                                                  "CountryCode4Letter")
        self.__audioPlayer = Config().Get("TextToSpeech",
                                          "AudioPlayer") + " '{0}'"

        self.text_to_speech = TextToSpeechV1(url=self.__url_tts,
                                             iam_apikey=self.__apikey_tts)
        self.text_to_speech.set_default_headers(
            {'x-watson-learning-opt-out': "true"})

        self.speech_to_text = SpeechToTextV1(url=self.__url_stt,
                                             iam_apikey=self.__apikey_stt)
        self.speech_to_text.set_default_headers(
            {'x-watson-learning-opt-out': "true"})

        self.audio = pyaudio.PyAudio()

        # open stream using callback
        self.stream = self.audio.open(format=self.FORMAT,
                                      channels=self.CHANNELS,
                                      rate=self.RATE,
                                      input=True,
                                      frames_per_buffer=self.CHUNK,
                                      stream_callback=self.pyaudio_callback,
                                      start=False)
        try:
            rospy.init_node('STT_watson_node', anonymous=True)
        except:
            FileLogger().Info('already initialized')

    def Speak(self, audioString, playAudio=False):
        if (len(audioString) == 0):
            return
        tmpAudioFile = os.path.join(Global.EmeraldPath, "Data", "TTS", ("Watson_" + \
            self.__language_2letter_cc + "_" + \
            self.CleanString(audioString) + ".mp3"))

        if not os.path.isfile(tmpAudioFile):
            with open(join(dirname(__file__), tmpAudioFile),
                      'wb') as audio_file:
                response = self.text_to_speech.synthesize(
                    audioString, accept='audio/mp3',
                    voice=self.__voiceName).get_result()
                audio_file.write(response.content)
        if (playAudio):
            os.system(self.__audioPlayer.format(tmpAudioFile))
        return tmpAudioFile

    def Listen(self):
        self.stream.start_stream()

        try:
            while True:
                recognize_thread = Thread(
                    target=self.recognize_using_weboscket, args=())
                recognize_thread.start()

                recognize_thread.join()

        except KeyboardInterrupt:
            # stop recording
            self.audio_source.completed_recording()
            self.stream.stop_stream()
            self.stream.close()
            self.audio.terminate()

    def CleanString(self, string):
        data = re.sub(r'\W+', '', string)
        return (data[:75] + '_TRIMMED') if len(data) > 75 else data

    def recognize_using_weboscket(self, *args):
        mycallback = MyRecognizeCallback()
        self.speech_to_text.recognize_using_websocket(
            audio=self.audio_source,
            content_type='audio/l16; rate=44100',
            recognize_callback=mycallback,
            interim_results=True,
            model='{0}_BroadbandModel'.format(self.__language_4letter_cc),
            smart_formatting=True)

    def pyaudio_callback(self, in_data, frame_count, time_info, status):
        try:
            self.q.put(in_data)
        except Full:
            pass
        return (None, pyaudio.paContinue)
audio = pyaudio.PyAudio()

# open stream using callback
stream = audio.open(format=FORMAT,
                    channels=CHANNELS,
                    rate=RATE,
                    input=True,
                    frames_per_buffer=CHUNK,
                    stream_callback=pyaudio_callback,
                    start=False)

#########################################################################
#### Start the recording and start service to recognize the stream ######
#########################################################################

print("Enter CTRL+C to end recording...")
stream.start_stream()

try:
    recognize_thread = Thread(target=recognize_using_weboscket, args=())
    recognize_thread.start()

    while True:
        pass
except KeyboardInterrupt:
    # stop recording
    audio_source.completed_recording()
    stream.stop_stream()
    stream.close()
    audio.terminate()
Пример #4
0
class MicrophoneToText:
    def __init__(self):
        """initialize the Microphone to Text service"""

        self.switch = True

        try:
            from Queue import Queue, Full
        except ImportError:
            from queue import Queue, Full

        ###############################################
        #### Initalize queue to store the recordings ##
        ###############################################
        self.CHUNK = 1024
        # Note: It will discard if the websocket client can't consumme fast enough
        # So, increase the max size as per your choice
        self.BUF_MAX_SIZE = self.CHUNK * 100
        # Buffer to store audio
        self.q = Queue(maxsize=int(round(self.BUF_MAX_SIZE / self.CHUNK)))

        # Create an instance of AudioSource
        self.audio_source = AudioSource(self.q, True, True)

        #with open('result.txt', 'w') as f:
        #pass

        # Create a results txt file
        self.result = open('result.txt', 'a+', encoding='utf-8')

        # Create a results dictionary
        self.keywords = dict({
            'street': [],
            'location': [],
            'capital': [],
            'income': [],
            'price': []
        })

        self.keywordsshort = dict()
        self.resultkeywords = dict({
            'street': [],
            'location': [],
            'capital': [],
            'income': [],
            'price': []
        })

        self.convs = ConvSent.ConvertSent()
        self.conv = ConvNumb.ConvertNumber()

        ###############################################
        #### Prepare Speech to Text Service ########
        ###############################################

        # initialize speech to text service
        self.speech_to_text = SpeechToTextV1(
            #nicis key
            #iam_apikey='aBOJ7l-LsQuJDc9vdMcyomvfv9PeqKTToplptgmjPRlA',
            #url='https://gateway-lon.watsonplatform.net/speech-to-text/api')
            iam_apikey='SWm4Cbisst2AihTyz42f6RXVZjaLLX6UTcal_PQxtADf',
            url='https://stream-fra.watsonplatform.net/speech-to-text/api')

        ###############################################
        #### Prepare the for recording using Pyaudio ##
        ###############################################
        # Variables for recording the speech
        self.FORMAT = pyaudio.paInt16
        self.CHANNELS = 1
        self.RATE = 44100
        # instantiate pyaudio
        self.audio = pyaudio.PyAudio()

        # open stream using callback
        self.stream = self.audio.open(format=self.FORMAT,
                                      channels=self.CHANNELS,
                                      rate=self.RATE,
                                      input=True,
                                      frames_per_buffer=self.CHUNK,
                                      stream_callback=self.pyaudio_callback,
                                      start=False)

    def switchoff(self):
        """Method to end the Microphone to Text service which closes all open connections and recordings
        :return: None
        """

        self.switch = False

        self.audio_source.completed_recording()
        self.stream.stop_stream()
        self.stream.close()
        self.audio.terminate()
        self.result.close()

    def recognize_using_weboscket(self, *args):
        """Initiate the recognize service and pass the audio source
        :return: None
        """

        self.mycallback = MyRecognizeCallback()
        self.speech_to_text.recognize_using_websocket(
            audio=self.audio_source,
            content_type='audio/l16; rate=44100',
            model='de-DE_BroadbandModel',
            recognize_callback=self.mycallback,
            interim_results=True)

    def analyze_txt(self):
        """analyzes the working txt file to find specific keywords and stores the results in a dict structure
        note: this method runs as long as the Microphone to Text service is active and gets executed all 5 seconds
        :return: None
        """

        while self.switch:
            with open('result.txt', 'r') as f:
                for text in f:
                    # text = text.lower()
                    if 'straße' in text or 'adreße' in text or 'adresse' in text or 'strasse' in text or 'weg' in text:
                        self.keywords['street'].append(self.find_word(text))
                    if 'ort' in text or 'postleitzahl' in text or 'in' in text:
                        self.keywords['location'].append(self.find_word(text))
                    if 'eigenmittel' in text or 'eigenkapital' in text:
                        self.keywords['capital'].append(self.find_word(text))
                    if 'einkommen' in text or 'verdiene' in text or 'verdienen' in text:
                        self.keywords['income'].append(self.find_word(text))
                    if 'kaufpreis' in text or 'koste' in text:
                        self.keywords['price'].append(self.find_word(text))
            self.keywordsshort = {
                k: list(set(v))
                for k, v in self.keywords.items()
            }
            sleep(5)

    def find_correct_keyword(self):
        """finds in the sentences with keywords in them the essential information
        :return: None
        """

        for k, v in self.keywordsshort.items():
            #bindwords = ['lautet', 'ist', 'sind', 'beträgt']
            uselesswords = [
                'ähm', 'äh', 'ä', 'hh', ' ', 'oh', 'uh', 'und', '[geräusch]'
            ]
            for x in v:
                #for y in bindwords:
                #if y in x:
                #vals = x.split(y)
                #val = vals[1]

                val = x
                if val in uselesswords:
                    continue
                if k == 'street':
                    print("val:" + val)
                    to_append = self.convs.find_street(val)
                    print(to_append)
                    if to_append != None and to_append != 'straße':
                        self.resultkeywords[k].append(to_append)
                elif k == 'location':
                    to_append1 = self.convs.find_city(val)
                    if to_append1 != None:
                        self.resultkeywords[k].append(to_append1)
                else:
                    self.conv.setnum(val)
                    to_append2 = self.conv.getnum()
                    if to_append2 != 0:
                        self.resultkeywords[k].append(to_append2)

            self.resultkeywords = {
                k: list(set(v))
                for k, v in self.resultkeywords.items()
            }

    def print_results(self):
        """returns the final dict structure of results
        :return: dict with resultkeywords
        """

        print(self.keywordsshort)
        self.find_correct_keyword()
        print(self.resultkeywords)
        return self.resultkeywords

    def find_word(self, text):
        """finds the transcript in a json formatted text input
        :param text: json formatted string
        :return: the actual transcribed sentence
        """

        words = text.split('transcript":')
        words = words[1].split('}')
        word = words[0]
        return word

    def pyaudio_callback(self, in_data, frame_count, time_info, status):
        """puts a recording in the queue
        :param in_data: the recording to put in the queue
        :param frame_count: frame count if its specific otherwise not used
        :param time_info: timestamp if its specific otherwise not used
        :param status: status if its specific otherwise not used
        :return: None, the queue continues
        """

        try:
            self.q.put(in_data)
        except Full:
            pass  # discard
        return (None, pyaudio.paContinue)

    def threader(self):
        """Starts a thread to start the Microphone to Text service
        :return: None
        """

        main_thread = Thread(target=self.main)
        main_thread.start()

    def main(self):
        """Start the recording, start the Microphone to Text service in a separate thread
        and start analyzing in a separate thread
        :return: None
        """

        print("Enter CTRL+C or CTRL+F2 if in pycharm to end recording...")
        self.stream.start_stream()

        try:
            recognize_thread = Thread(target=self.recognize_using_weboscket,
                                      args=())
            recognize_thread.start()

            analyze_thread = Thread(target=self.analyze_txt(), )
            analyze_thread.start()

            while self.switch:
                pass

        except KeyboardInterrupt:
            # stop recording for developing purposes with keyboardinterrupt
            self.audio_source.completed_recording()
            self.stream.stop_stream()
            self.stream.close()
            self.audio.terminate()
            self.result.close()
Пример #5
0
def live_translate():
    print('hey im here')
    try:
        from Queue import Queue, Full
    except ImportError:
        from queue import Queue, Full
    ###############################################
    #### Initalize queue to store the recordings ##
    ###############################################
    CHUNK = 1024
    # Note: It will discard if the websocket client can't consumme fast enough
    # So, increase the max size as per your choice
    BUF_MAX_SIZE = CHUNK * 10
    # Buffer to store audio
    q = Queue(maxsize=int(round(BUF_MAX_SIZE / CHUNK)))
    # Create an instance of AudioSource
    audio_source = AudioSource(q, True, True)
    audioTrans = {}
    with open("server\\routes\\spchToTxtLive.json", 'w') as f:
        json.dump(audioTrans, f)
    ###############################################
    #### Prepare Speech to Text Service ########
    ###############################################
    # initialize speech to text service
    #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!Give Keys here!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!1
    # speech_to_text = SpeechToTextV1(
    #    url="URL HERE",
    #    iam_apikey="APIKEY HERE")

    # define callback for the speech to text service
    class MyRecognizeCallback(RecognizeCallback):
        def __init__(self):
            RecognizeCallback.__init__(self)
            self.transcript = None

        def on_transcription(self, transcript):
            print('transcript: {}'.format(transcript))
            print(transcript)
            audioTrans = transcript

        def on_connected(self):
            print('Connection was successful')

        def on_error(self, error):
            print('Error received: {}'.format(error))

        def on_inactivity_timeout(self, error):
            print('Inactivity timeout: {}'.format(error))

        def on_listening(self):
            print('Service is listening')

        def on_hypothesis(self, hypothesis):
            print(hypothesis)

        def on_data(self, data):
            self.transcript = data['results'][0]['alternatives'][0][
                'transcript']
            print('{0}final: {1}'.format(
                '' if data['results'][0]['final'] else 'not ',
                self.transcript))
            audioTrans = '{0}final: {1}'.format(
                '' if data['results'][0]['final'] else 'not ', self.transcript)
            json.dump(audioTrans, f)
            # print(data)
        def on_close(self):
            print("Connection closed")

    # this function will initiate the recognize service and pass in the AudioSource
    def recognize_using_weboscket(*args):
        mycallback = MyRecognizeCallback()
        speech_to_text.recognize_using_websocket(
            audio=audio_source,
            content_type='audio/l16; rate=44100',
            recognize_callback=mycallback,
            interim_results=True)
        print(mycallback.transcript)

    ###############################################
    #### Prepare the for recording using Pyaudio ##
    ###############################################
    # Variables for recording the speech
    FORMAT = pyaudio.paInt16
    CHANNELS = 1
    RATE = 44100

    # define callback for pyaudio to store the recording in queue
    def pyaudio_callback(in_data, frame_count, time_info, status):
        try:
            q.put(in_data)
        except Full:
            pass  # discard
        return (None, pyaudio.paContinue)

    # instantiate pyaudio
    audio = pyaudio.PyAudio()
    # open stream using callback
    stream = audio.open(format=FORMAT,
                        channels=CHANNELS,
                        rate=RATE,
                        input=True,
                        frames_per_buffer=CHUNK,
                        stream_callback=pyaudio_callback,
                        start=False)
    #########################################################################
    #### Start the recording and start service to recognize the stream ######
    #########################################################################
    print("Enter CTRL+C to end recording...")
    stream.start_stream()
    try:
        recognize_thread = Thread(target=recognize_using_weboscket, args=())
        recognize_thread.start()
        while True:
            pass

    except KeyboardInterrupt:
        # stop recording
        audio_source.completed_recording()
        stream.stop_stream()
        stream.close()
    audio.terminate()