Exemplo n.º 1
0
def DeepSpeech(Window, SpeechToNLPQueue, wavefile):

    # Create Signal Object
    SpeechSignal = GUISignal()
    SpeechSignal.signal.connect(Window.UpdateSpeechBox)

    MsgSignal = GUISignal()
    MsgSignal.signal.connect(Window.UpdateMsgBox)

    # References to models:
    model = 'DeepSpeech_Models/output_graph.pbmm'
    alphabet = 'DeepSpeech_Models/alphabet.txt'
    lm = 'DeepSpeech_Models/lm.binary'
    trie = 'DeepSpeech_Models/trie'

    print('Loading model from file {}'.format(model), file=sys.stderr)
    model_load_start = timer()
    ds = Model(model, N_FEATURES, N_CONTEXT, alphabet, BEAM_WIDTH)
    model_load_end = timer() - model_load_start
    print('Loaded model in {:.3}s.'.format(model_load_end), file=sys.stderr)

    if lm and trie:
        print('Loading language model from files {} {}'.format(lm, trie),
              file=sys.stderr)
        lm_load_start = timer()
        ds.enableDecoderWithLM(alphabet, lm, trie, LM_ALPHA, LM_BETA)
        lm_load_end = timer() - lm_load_start
        print('Loaded language model in {:.3}s.'.format(lm_load_end),
              file=sys.stderr)

    audio = wavefile

    fin = wave.open(audio, 'rb')
    fs = fin.getframerate()
    if fs != 16000:
        print(
            'Warning: original sample rate ({}) is different than 16kHz. Resampling might produce erratic speech recognition.'
            .format(fs),
            file=sys.stderr)
        fs, audio = convert_samplerate(audio)
    else:
        audio = np.frombuffer(fin.readframes(fin.getnframes()), np.int16)

    audio_length = fin.getnframes() * (1 / 16000)
    fin.close()

    print('Running inference.', file=sys.stderr)
    inference_start = timer()
    output = (ds.stt(audio, fs))
    print(output)
    inference_end = timer() - inference_start
    print('Inference took %0.3fs for %0.3fs audio file.' %
          (inference_end, audio_length),
          file=sys.stderr)

    QueueItem = SpeechNLPItem(output, True, 0, 0, 'Speech')
    SpeechToNLPQueue.put(QueueItem)
    SpeechSignal.signal.emit([QueueItem])
Exemplo n.º 2
0
    def generator(self):
        # Create Signal Objects
        MsgSignal = GUISignal()
        MsgSignal.signal.connect(self.Window.UpdateMsgBox)

        # Create Signal Objects
        VUSignal = GUISignal()
        VUSignal.signal.connect(self.Window.UpdateVUBox)

        while not self.closed:
            # Use a blocking get() to ensure there's at least one chunk of
            # data, and stop iteration if the chunk is None, indicating the
            # end of the audio stream.
            chunk = self._buff.get()
            if chunk is None:
                return
            data = [chunk]

            # Plot in the GUI
            signal = b''.join(data)
            signal = np.fromstring(signal, 'Int16')
            VUSignal.signal.emit([signal])

            # Stop streaming after one minute, create new thread that does recognition
            if time.time() > (self.start_time + (10)):
                MsgSignal.signal.emit(["Recorded 10 seconds"])
                self.Window.StartButton.setEnabled(True)
                break

            self.samplesCounter += self._chunk

            if self.Window.stopped == 1:
                print('Speech Tread Killed')
                self.Window.StartButton.setEnabled(True)
                return

            # Now consume whatever other data's still buffered.
            while True:
                try:
                    chunk = self._buff.get(block=False)
                    if chunk is None:
                        return
                    data.append(chunk)
                except queue.Empty:
                    break

            yield signal
Exemplo n.º 3
0
def DeepSpeech(Window, SpeechToNLPQueue):

    # Create Signal Object
    SpeechSignal = GUISignal()
    SpeechSignal.signal.connect(Window.UpdateSpeechBox)

    MsgSignal = GUISignal()
    MsgSignal.signal.connect(Window.UpdateMsgBox)

    # References to models:
    model = 'DeepSpeech_Models/output_graph.pbmm'
    alphabet = 'DeepSpeech_Models/alphabet.txt'
    lm = 'DeepSpeech_Models/lm.binary'
    trie = 'DeepSpeech_Models/trie'

    print('Loading model from file {}'.format(model), file=sys.stderr)
    model_load_start = timer()
    ds = Model(model, N_FEATURES, N_CONTEXT, alphabet, BEAM_WIDTH)
    model_load_end = timer() - model_load_start
    print('Loaded model in {:.3}s.'.format(model_load_end), file=sys.stderr)

    if lm and trie:
        print('Loading language model from files {} {}'.format(lm, trie),
              file=sys.stderr)
        lm_load_start = timer()
        ds.enableDecoderWithLM(alphabet, lm, trie, LM_ALPHA, LM_BETA)
        lm_load_end = timer() - lm_load_start
        print('Loaded language model in {:.3}s.'.format(lm_load_end),
              file=sys.stderr)

    audio = []
    with MicrophoneStream(Window, RATE, CHUNK) as stream:
        audio_generator = stream.generator()
        for content in audio_generator:
            for sample in content:
                audio.append(sample)

    result = ds.stt(audio, 16000)

    QueueItem = SpeechNLPItem(result, True, 0, 0, 'Speech')
    SpeechToNLPQueue.put(QueueItem)
    SpeechSignal.signal.emit([QueueItem])
Exemplo n.º 4
0
def GenerateForm(Window, text):

    # Create Signal Objects
    MsgSignal = GUISignal()
    MsgSignal.signal.connect(Window.UpdateMsgBox)

    ButtonsSignal = GUISignal()
    ButtonsSignal.signal.connect(Window.ButtonsSetEnabled)

    ButtonsSignal.signal.emit([(Window.GenerateFormButton, False)])

    dummy12 = text
    dummy12 = dummy12.replace('\r', '').replace('\n', '')
    dummyP2 = dummy12.replace(' ', '%20')
    dummyP3 = dummyP2.replace('\'', '%27')
    dummyP = dummyP3.replace('&', '%26')
    part1 = 'curl -d text=' + dummyP + ' http://bark.phon.ioc.ee/punctuator'
    op = commands.getstatusoutput(part1)
    output = op[1].rsplit('\n', 1)[1]
    sentsList = textParse2.sent_tokenize(output)  #final sentences
    wordsList = textParse2.word_tokenize(output)  #final words
    file_name = str(datetime.datetime.now().strftime("%c"))

    try:
        prescription_form2.generateFields(sentsList, wordsList, dummy12,
                                          file_name)

        MsgSignal.signal.emit(["Form Generated: /Dumps/" + file_name + ".pdf"])

        # Pop file open
        p = subprocess.Popen(["xdg-open", "./Dumps/" + file_name + ".pdf"])
        returncode = p.wait()  # wait to exit

    except Exception as e:
        print("Error encountered generating form. Exception: " + str(e))
        MsgSignal.signal.emit(
            ["Error encountered generating form. Exception: " + str(e)])

    ButtonsSignal.signal.emit([(Window.GenerateFormButton, True)])
    print("Form Generator Thread Killed.")
Exemplo n.º 5
0
def TextSpeech(Window, SpeechToNLPQueue, textfile_name):
    print("Entered TextSpeech")

    # Create GUI Signal Object
    SpeechSignal = GUISignal()
    SpeechSignal.signal.connect(Window.UpdateSpeechBox)

    MsgSignal = GUISignal()
    MsgSignal.signal.connect(Window.UpdateMsgBox)

    ButtonsSignal = GUISignal()
    ButtonsSignal.signal.connect(Window.ButtonsSetEnabled)

    Text_File = open(textfile_name, "rb")

    text = ""
    for line in Text_File.readlines():
        text += line
    counter = 0

    # Break into sentences
    dummy12 = text
    dummy12 = dummy12.replace('\r', '').replace('\n', '')
    dummyP2 = dummy12.replace(' ', '%20')
    dummyP3 = dummyP2.replace('\'', '%27')
    dummyP = dummyP3.replace('&', '%26')
    part1 = 'curl -d text=' + dummyP + ' http://bark.phon.ioc.ee/punctuator'
    op = commands.getstatusoutput(part1)
    output = op[1].rsplit('\n', 1)[1]
    sentsList = textParse2.sent_tokenize(output)  #final sentences

    # Stream text
    num_chars_printed = 0
    for sentence in sentsList:
        for i, character in enumerate(sentence):
            QueueItem = SpeechNLPItem(sentence[:i + 1], False, 0,
                                      num_chars_printed, 'Speech')
            SpeechSignal.signal.emit([QueueItem])
            if (character == " "):
                time.sleep(BETWEEN_WORDS_PAUSE)
            elif (character == ","):
                time.sleep(COMMA_PAUSE)
            elif (character == "."):
                time.sleep(BETWEEN_SENETENCES_PAUSE)
            else:
                time.sleep(BETWEEN_CHARACTERS_PAUSE)
            num_chars_printed = len(sentence[:i + 1])

            if (Window.stopped == 1):
                print('Text Speech Tread Killed')
                QueueItem = SpeechNLPItem(sentence[:i + 1], True, 0,
                                          num_chars_printed, 'Speech')
                SpeechSignal.signal.emit([QueueItem])
                SpeechToNLPQueue.put(QueueItem)
                return

        QueueItem = SpeechNLPItem(sentence, True, 0, num_chars_printed,
                                  'Speech')
        SpeechSignal.signal.emit([QueueItem])
        SpeechToNLPQueue.put(QueueItem)
        num_chars_printed = 0

    # Clean up and end thread
    MsgSignal.signal.emit(["Transcription of text file complete!"])
    ButtonsSignal.signal.emit([(Window.StartButton, True),
                               (Window.ComboBox, True),
                               (Window.ResetButton, True)])
Exemplo n.º 6
0
    def generator(self):
        # Create GUI Signal Objects
        GoogleSignal = GUISignal()
        GoogleSignal.signal.connect(self.Window.StartGoogle)

        MsgSignal = GUISignal()
        MsgSignal.signal.connect(self.Window.UpdateMsgBox)

        VUSignal = GUISignal()
        VUSignal.signal.connect(self.Window.UpdateVUBox)

        while not self.closed:
            # Use a blocking get() to ensure there's at least one chunk of
            # data, and stop iteration if the chunk is None, indicating the
            # end of the audio stream.
            chunk = self._buff.get()
            if chunk is None:
                return
            data = [chunk]

            # VU Meter in the GUI
            signal = b''.join(data)
            signal = np.fromstring(signal, 'Int16')
            VUSignal.signal.emit([signal])

            # Stop streaming after one minute, create new thread that does recognition
            if time.time() > (self.start_time + (60)):
                GoogleSignal.signal.emit(["Mic"])
                MsgSignal.signal.emit([
                    "API's 1 minute limit reached. Restablishing connection!"
                ])
                break

            self.samplesCounter += self._chunk

            if self.Window.stopped == 1:
                print('Speech Tread Killed')

                # Dump file to disk
                output_audio = wave.open(
                    "./Dumps/" + str(datetime.datetime.now().strftime("%c")) +
                    ".wav", 'wb')
                output_audio.setnchannels(1)  # mono
                output_audio.setsampwidth(2)
                output_audio.setframerate(RATE)
                output_audio.writeframesraw(MicrophoneStream.audio_buffer)
                output_audio.close()

                MicrophoneStream.audio_buffer = ""
                return

            # Now consume whatever other data's still buffered.
            while True:
                try:
                    chunk = self._buff.get(block=False)
                    if chunk is None:
                        return
                    data.append(chunk)
                except queue.Empty:
                    break

            #MicrophoneStream.audio_buffer = np.append(MicrophoneStream.audio_buffer, np.fromstring(b''.join(data), 'Int16'))
            MicrophoneStream.audio_buffer += b''.join(data)
            yield b''.join(data)
Exemplo n.º 7
0
def GoogleSpeech(Window, SpeechToNLPQueue):

    # Create GUI Signal Object
    SpeechSignal = GUISignal()
    SpeechSignal.signal.connect(Window.UpdateSpeechBox)

    MsgSignal = GUISignal()
    MsgSignal.signal.connect(Window.UpdateMsgBox)

    ButtonsSignal = GUISignal()
    ButtonsSignal.signal.connect(Window.ButtonsSetEnabled)

    language_code = 'en-US'  # a BCP-47 language tag

    client = speech.SpeechClient()
    config = types.RecognitionConfig(
        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=language_code,
        profanity_filter=True)
    streaming_config = types.StreamingRecognitionConfig(config=config,
                                                        interim_results=True)

    with MicrophoneStream(Window, RATE, CHUNK) as stream:
        audio_generator = stream.generator()
        requests = (types.StreamingRecognizeRequest(audio_content=content)
                    for content in audio_generator)

        try:
            responses = client.streaming_recognize(streaming_config, requests)

            # Signal that streaming has started
            print(
                "Started speech recognition on microphone audio via Google Speech API.\nMicrophone Session counter: "
                + str(MicrophoneStream.micSessionCounter))
            MsgSignal.signal.emit([
                "Started speech recognition on microphone audio via Google Speech API.\nMicrophone Session counter: "
                + str(MicrophoneStream.micSessionCounter)
            ])

            # Now, put the transcription responses to use.
            num_chars_printed = 0
            responseTimeStamp = time.time()

            for response in responses:
                if not response.results:
                    continue
                # The `results` list is DeepSpeechconsecutive. For streaming, we only care about
                # the first result being considered, since once it's `is_final`, it
                # moves on to considering the next utterance.
                result = response.results[0]
                if not result.alternatives:
                    continue

                # Display the transcription of the top alternative.
                transcript = result.alternatives[0].transcript
                confidence = result.alternatives[0].confidence

                # Display interim results, but with a carriage return at the end of the
                # line, so subsequent lines will overwrite them.
                # If the previous result was longer than this one, we need to print
                # some extra spaces to overwrite the previous result
                overwrite_chars = ' ' * (num_chars_printed - len(transcript))

                if result.is_final:
                    #print(transcript + overwrite_chars)
                    QueueItem = SpeechNLPItem(transcript, result.is_final,
                                              confidence, num_chars_printed,
                                              'Speech')
                    SpeechToNLPQueue.put(QueueItem)
                    SpeechSignal.signal.emit([QueueItem])
                    num_chars_printed = 0

                elif not result.is_final:
                    #sys.stdout.write(transcript + overwrite_chars + '\r')
                    #sys.stdout.flush()
                    QueueItem = SpeechNLPItem(transcript, result.is_final,
                                              confidence, num_chars_printed,
                                              'Speech')
                    SpeechSignal.signal.emit([QueueItem])
                    num_chars_printed = len(transcript)

        except Exception as e:
            MsgSignal.signal.emit([
                "Unable to get response from Google! Network or other issues. Please Try again!\n Exception: "
                + str(e)
            ])
            ButtonsSignal.signal.emit([(Window.StartButton, True),
                                       (Window.ComboBox, True),
                                       (Window.ResetButton, True)])
            sys.exit()
Exemplo n.º 8
0
    def generator(self):
        # Create GUI Signal Objects
        GoogleSignal = GUISignal()
        GoogleSignal.signal.connect(self.Window.StartGoogle)

        MsgSignal = GUISignal()
        MsgSignal.signal.connect(self.Window.UpdateMsgBox)

        VUSignal = GUISignal()
        VUSignal.signal.connect(self.Window.UpdateVUBox)

        ButtonsSignal = GUISignal()
        ButtonsSignal.signal.connect(self.Window.ButtonsSetEnabled)

        while not self.closed:
            # Use a blocking get() to ensure there's at least one chunk of
            # data, and stop iteration if the chunk is None, indicating the
            # end of the audio stream.
            #chunk = self._buff.get()
            time.sleep(.1)
            chunk = self.wf.readframes(CHUNK)

            if chunk == '':
                FileStream.position = 0
                MsgSignal.signal.emit(
                    ["Transcription of audio file complete!"])
                ButtonsSignal.signal.emit([(self.Window.StartButton, True),
                                           (self.Window.ComboBox, True),
                                           (self.Window.ResetButton, True)])
                return

            if chunk is None:
                return

            if self.samplesCounter / self._rate > 60:
                #FileStream.position -= 3
                GoogleSignal.signal.emit(["File"])
                print((self.samplesCounter) / self._rate)
                MsgSignal.signal.emit([
                    "API's 1 minute limit reached. Restablishing connection!"
                ])
                break

            data = [chunk]

            # VU Meter in the GUI
            signal = b''.join(data)
            signal = np.fromstring(signal, 'Int16')
            VUSignal.signal.emit([signal])

            self.samplesCounter += self._chunk
            FileStream.position += 1

            if self.Window.stopped == 1:
                print('File Speech Tread Killed')
                mixer.music.stop()
                FileStream.position = 0
                return

            # Now consume whatever other data's still buffered.
            while True:
                try:
                    chunk = self._buff.get(block=False)
                    if chunk is None:
                        return
                    data.append(chunk)
                    self.samplesCounter += self._chunk
                    FileStream.position += 1
                except queue.Empty:
                    break

            yield b''.join(data)
Exemplo n.º 9
0
def CognitiveSystem(Window, SpeechToNLPQueue):
    # Create GUI signal objects
    SpeechSignal = GUISignal()
    SpeechSignal.signal.connect(Window.UpdateSpeechBox)

    ConceptExtractionSignal = GUISignal()
    ConceptExtractionSignal.signal.connect(Window.UpdateConceptExtractionBox)

    # Initialize BT framework parameters
    execfile("bt_parameters.py")

    # Setup BT Framework
    blackboard = Blackboard()
    root = py_trees.composites.Sequence("Root_1")
    IG = be.InformationGathering()
    TC = be.TextCollection()
    V = be.Vectorize()
    PS = be.ProtocolSelector()
    root.add_children([TC, IG, V, PS, be.protocols])
    behaviour_tree = py_trees.trees.BehaviourTree(root)
    behaviour_tree.add_pre_tick_handler(pre_tick_handler)
    behaviour_tree.setup(15)
    Concepts_Graph = dict()
    SpeechText = ""
    NLP_Items = []
    Tick_Counter = 1

    while True:

        # Get queue item from the Speech-to-Text Module
        received = SpeechToNLPQueue.get()
        print("Received chunk")

        if (received == 'Kill'):
            print(
                "Cognitive System Thread received Kill Signal. Killing Cognitive System Thread."
            )
            break

        if (Window.reset == 1):
            print(
                "Cognitive System Thread Received reset signal. Killing Cognitive System Thread."
            )
            return

        # If item received from queue is legitmate
        else:
            #sentsList = [received.transcript]

            # Use online tool to find sentence boundaries
            dummy12 = received.transcript
            dummy12 = dummy12.replace('\r', '').replace('\n', '')
            dummyP2 = dummy12.replace(' ', '%20')
            dummyP3 = dummyP2.replace('\'', '%27')
            dummyP = dummyP3.replace('&', '%26')
            part1 = 'curl -d text=' + dummyP + ' http://bark.phon.ioc.ee/punctuator'
            op = commands.getstatusoutput(part1)
            output = op[1].rsplit('\n', 1)[1]
            sentsList = textParse2.sent_tokenize(output)  #final sentences

            # Processes each chunk/sentence
            PunctuatedAndHighlightedText = ""
            for idx, item in enumerate(sentsList):

                blackboard.text = [item]
                behaviour_tree.tick_tock(sleep_ms=50,
                                         number_of_iterations=1,
                                         pre_tick_handler=None,
                                         post_tick_handler=None)

                pr, sv_s, s = TickResults(Window, NLP_Items)

                PunctuatedAndHighlightedTextChunk = item

                for sv in sv_s:
                    if (sv[5] == Tick_Counter
                        ):  # if new concept found in this tick
                        try:
                            i = re.search(
                                r'%s' % sv[3],
                                PunctuatedAndHighlightedTextChunk).start()
                            PunctuatedAndHighlightedTextChunk = str(
                                PunctuatedAndHighlightedTextChunk[:i] +
                                '<span style="background-color: #FFFF00">' +
                                PunctuatedAndHighlightedTextChunk[i:i +
                                                                  len(sv[3])] +
                                '</span>' +
                                PunctuatedAndHighlightedTextChunk[i +
                                                                  len(sv[3]):])
                        except Exception as e:
                            pass

                PunctuatedAndHighlightedText += PunctuatedAndHighlightedTextChunk + " "
                Tick_Counter += 1

                if (Window.reset == 1):
                    print(
                        "Cognitive System Thread Received reset signal. Killing Cognitive System Thread."
                    )
                    return

            PunctuatedAndHighlightedText = '<b>' + PunctuatedAndHighlightedText + '</b>'
            SpeechSignal.signal.emit([
                SpeechNLPItem(PunctuatedAndHighlightedText, received.isFinal,
                              received.confidence, received.numPrinted, 'NLP')
            ])
Exemplo n.º 10
0
def TickResults(Window, NLP_Items):

    ConceptExtractionSignal = GUISignal()
    ConceptExtractionSignal.signal.connect(Window.UpdateConceptExtractionBox)

    ProtocolSignal = GUISignal()
    ProtocolSignal.signal.connect(Window.UpdateProtocolBoxes)

    b = Blackboard()
    protocol_candidates = []
    signs_and_vitals = []
    suggestions = []

    print("===============================================================")

    #======= Top 3 protocol candidates
    print("\n======= Top 3 protocol candidates:")
    for p in b.protocol_flag:
        print(p, b.protocol_flag[p])
        binary = b.protocol_flag[p][0]
        confidence = b.protocol_flag[p][1]
        if (binary):
            try:
                if (confidence != 'nan' and float(confidence) > 0.0):
                    protocol_candidates.append((str(p), confidence))
            except Exception as e:
                pass

    # Sort by confidence and take top 3
    protocol_candidates = sorted(protocol_candidates,
                                 key=itemgetter(1),
                                 reverse=True)[:3]

    #======= Signs, symptoms, and vitals
    print("\n======= Signs, symptoms, and vitals:")

    for item in b.Vitals:
        if len(b.Vitals[item].content) > 0:
            content = (str(b.Vitals[item].name).capitalize(),
                       str(b.Vitals[item].binary), str(b.Vitals[item].value),
                       str(b.Vitals[item].content),
                       str(round(b.Vitals[item].score / 1000,
                                 2)), b.Vitals[item].tick)
            print(content)
            signs_and_vitals.append(content)
            if (content not in NLP_Items):
                NLP_Items.append(content)

    for item in b.Signs:
        if len(b.Signs[item].content) > 0:
            content = (str(b.Signs[item].name).capitalize(),
                       str(b.Signs[item].binary), str(b.Signs[item].value),
                       str(b.Signs[item].content),
                       str(round(b.Signs[item].score / 1000,
                                 2)), b.Signs[item].tick)
            print(content)
            signs_and_vitals.append(content)
            if (content not in NLP_Items):
                NLP_Items.append(content)

    # Sort by Tick
    signs_and_vitals = sorted(signs_and_vitals, key=itemgetter(5))

    #======= Suggestions
    print("\n======= Suggestions:")
    for key in b.feedback:
        if b.feedback[key] > 0.1:
            content = (str(key).capitalize(), str(round(b.feedback[key], 2)))
            suggestions.append(content)
            print(content)

    # Sort by Concept
    suggestions = sorted(suggestions, key=itemgetter(1), reverse=True)

    #========================== Create output strings formatted for readibility
    protocol_candidates_str = ""
    for i, p in enumerate(protocol_candidates):
        protocol_candidates_str += "(" + p[0] + ", <b>" + str(round(
            p[1], 2)) + "</b>)<br>"

    signs_and_vitals_str = ""
    for sv in NLP_Items:
        signs_and_vitals_str += "("
        for i, t in enumerate(sv):
            if (i != 3 and i != 4 and i != 5):
                signs_and_vitals_str += str(t)[0:len(str(t))] + ", "
            if (i == 4):
                signs_and_vitals_str += "<b>" + str(
                    t)[0:len(str(t))] + "</b>, "
        signs_and_vitals_str = signs_and_vitals_str[:-2] + ")<br>"

    suggestions_str = ""
    for s in suggestions:
        suggestions_str += "(" + str(s[0]) + ", <b>" + str(s[1]) + "</b>)<br>"

    print("===============================================================")

    ProtocolSignal.signal.emit([protocol_candidates_str, suggestions_str])
    ConceptExtractionSignal.signal.emit([signs_and_vitals_str])

    return protocol_candidates, signs_and_vitals, suggestions