Exemplo n.º 1
0
def google_stt():
    """Transcribe the given audio file."""

    # 환경 변수 설정
    os.system(
        "export GOOGLE_APPLICATION_CREDENTIALS=//home/pi/git_refo/smartmirror/calendar/quick_google/client_secret_633702561980-l5hu5n4tfo4hd64po79s803039pj1u6c.apps.googleusercontent.com.json"
    )

    # 인코딩 설정
    reload(sys)
    sys.setdefaultencoding('utf-8')

    # 녹음
    os.system("arecord -D plughw:1,0 -f S16_LE -c1 -r16000 -d 3 input.wav")
    print("recording complete!")

    # start stt
    print("0")
    speech_client = speech.Client()
    print("1")

    with io.open('input.wav', 'rb') as audio_file:
        content = audio_file.read()
        audio_sample = speech_client.sample(content=content,
                                            source_uri=None,
                                            encoding='LINEAR16',
                                            sample_rate_hertz=16000)
    print("2")

    alternatives = audio_sample.recognize('ko-KR')
    for alternative in alternatives:
        return alternative.transcript
Exemplo n.º 2
0
def transcribe_gcs(gcs_uri):
    """Asynchronously transcribes the audio file specified by the gcs_uri."""
    from google.cloud import speech
    speech_client = speech.Client()
    filename = args.path[33:]
    filename = filename[:-4] + '.txt'
    filepath = 'transcripts/' + filename
    file = open(filepath,'w')
    audio_sample = speech_client.sample(
        content=None,
        source_uri=gcs_uri,
        encoding='LINEAR16')

    operation = audio_sample.long_running_recognize('fr-FR')

    retry_count = 100
    while retry_count > 0 and not operation.complete:
        retry_count -= 1
        time.sleep(2)
        operation.poll()

    if not operation.complete:
        print('Operation not complete and retry limit reached. BUUUUUUG   ')
        return

    alternatives = operation.results
    for alternative in alternatives:
        file.write(alternative.transcript.encode('utf-8'))
    #    print('Transcript: {}'.format(alternative.transcript))
     #    print('Confidence: {}'.format(alternative.confidence))
    # [END send_request_gcs]
    file.close()
Exemplo n.º 3
0
def google_stt():
    """Transcribe the given audio file."""

    # 환경 변수 설정
    os.system(
        "export GOOGLE_APPLICATION_CREDENTIALS=/home/pi/stt1-0f1857b5b0c0.json"
    )

    # 인코딩 설정
    reload(sys)
    sys.setdefaultencoding('utf-8')

    # 녹음
    #os.system("arecord -D plughw:1,0 -f S16_LE -c1 -r16000 -d 3 input.wav")
    #print("recording complete!")

    # start stt
    speech_client = speech.Client()

    with io.open('input.wav', 'rb') as audio_file:
        content = audio_file.read()
        audio_sample = speech_client.sample(content=content,
                                            source_uri=None,
                                            encoding='LINEAR16',
                                            sample_rate_hertz=16000)

    alternatives = audio_sample.recognize('ko-KR')
    for alternative in alternatives:
        return alternative.transcript
Exemplo n.º 4
0
def main():

    rospy.init_node('stt_node')
    pub = rospy.Publisher('stt', Speech_msg, queue_size=10)

    while True:
        #restart the client and sample
        stream = audioStreamingObject()
        print(stream.closed)

        #close thread
        close_thread = threading.Thread(target=close_loop, args=(stream, ))
        close_thread.start()
        client = speech.Client()
        sample = client.sample(stream=stream,
                               encoding=speech.Encoding.LINEAR16,
                               sample_rate_hertz=16000)
        results = sample.streaming_recognize(language_code='en-US')
        for result in results:
            for alternative in result.alternatives:

                msg = Speech_msg()
                msg.text = str(alternative.transcript)
                msg.confidence = float(alternative.confidence)
                pub.publish(msg)

        rospy.loginfo("restarting google speech api due to time limit")
Exemplo n.º 5
0
def main(speech_file):
    """Transcribe the given audio file.

    Args:
        speech_file: the name of the audio file.
    """
    # [START authenticating]
    # Application default credentials provided by env variable
    # GOOGLE_APPLICATION_CREDENTIALS
    from google.cloud import speech
    speech_client = speech.Client()
    # [END authenticating]

    # [START construct_request]
    # Loads the audio into memory
    with io.open(speech_file, 'rb') as audio_file:
        content = audio_file.read()
        audio_sample = speech_client.sample(
            content,
            source_uri=None,
            encoding='LINEAR16',
            sample_rate=16000)
    # [END construct_request]

    # [START send_request]
    alternatives = speech_client.speech_api.sync_recognize(audio_sample)
    for alternative in alternatives:
        print('Transcript: {}'.format(alternative.transcript))
Exemplo n.º 6
0
def _recognize(path, idx):
    client = speech.Client()

    stream = open(path, "rb")
    sample = client.sample(stream=stream,
                           encoding=speech.encoding.Encoding.LINEAR16,
                           sample_rate_hertz=8000)

    while True:
        time.sleep(0.5)

        print "send data to google"

        try:
            results = sample.streaming_recognize(language_code='en-US',
                                                 speech_contexts=[
                                                     'mailbox', 'folder',
                                                     'change', 'pound', 'star',
                                                     'options', 'folder'
                                                 ])

            for result in results:
                for alternative in result.alternatives:
                    print('-' * 20)
                    print('transcript: ' + alternative.transcript)
                    print('confidence: ' + str(alternative.confidence))

                    if idx not in trans:
                        trans[idx] = ''
                    trans[idx] += alternative.transcript + " "
                    print ">>> " + str(idx) + ": " + trans[idx]
        except:
            pass
Exemplo n.º 7
0
def main_voice(bot, update, user_data):
    bot.getFile(update.message.voice.file_id).download('voice.ogg')

    audio = AudioSegment.from_file('voice.ogg', format="ogg")
    audio.export("audio2.raw", format="raw")

    speech_client = speech.Client()
    with io.open('audio2.raw', 'rb') as audio_file:
        content = audio_file.read()
        sample = speech_client.sample(
            content,
            encoding='LINEAR16',
            sample_rate_hertz=48000)
    try:
        alternatives = sample.recognize('ru-RU')
    except Exception as inst:
        bot.sendMessage(update.message.chat_id, str(inst))

    # for alternative in alternatives:
    #     bot.sendMessage(update.message.chat_id, alternative.transcript)

    drug = alternatives[0].transcript
    msg = 'Я распознал вашу речь как ' + drug
    bot.sendMessage(update.message.chat_id, msg)

    return all_work(bot, update, drug)
Exemplo n.º 8
0
def transcribe_gcs(gcs_uri):
    """Asynchronously transcribes the audio file specified by the gcs_uri."""
    from google.cloud import speech
    speech_client = speech.Client()

    audio_sample = speech_client.sample(content=None,
                                        source_uri=gcs_uri,
                                        encoding='FLAC',
                                        sample_rate_hertz=16000)

    operation = audio_sample.long_running_recognize('en-US')

    retry_count = 100
    while retry_count > 0 and not operation.complete:
        retry_count -= 1
        time.sleep(2)
        operation.poll()

    if not operation.complete:
        print('Operation not complete and retry limit reached.')
        return

    alternatives = operation.results
    for alternative in alternatives:
        print('Transcript: {}'.format(alternative.transcript))
        print('Confidence: {}'.format(alternative.confidence))
Exemplo n.º 9
0
def run_quickstart():
    # [START speech_quickstart]
    import io
    import os

    # Imports the Google Cloud client library
    from google.cloud import speech

    # Instantiates a client
    speech_client = speech.Client()

    # The name of the audio file to transcribe
    file_name = os.path.join(os.path.dirname(__file__), 'resources',
                             'audio.raw')

    # Loads the audio into memory
    with io.open(file_name, 'rb') as audio_file:
        content = audio_file.read()
        sample = speech_client.sample(content,
                                      source_uri=None,
                                      encoding='LINEAR16',
                                      sample_rate_hertz=16000)

    # Detects speech in the audio file
    alternatives = sample.recognize('en-US')

    for alternative in alternatives:
        print('Transcript: {}'.format(alternative.transcript))
Exemplo n.º 10
0
def processSound(audio_stream, transcript):
    global stop
    speech_client = speech.Client()
    logging.debug("created client")

    audio_sample = speech_client.sample(
        stream=audio_stream,
        encoding=speech.encoding.Encoding.LINEAR16,
        sample_rate_hertz=RATE)

    while not stop:
        try:
            logging.debug("sampling")
            alternatives = audio_sample.streaming_recognize(
                'en-US', interim_results=True)
            # Find transcriptions of the audio content
            for alternative in alternatives:
                logging.debug('Transcript: {}'.format(alternative.transcript))
                if alternative.is_final:
                    logging.debug('Final: {}'.format(alternative.is_final))
                    logging.debug('Confidence: {}'.format(
                        alternative.confidence))
                    transcript.put(alternative.transcript)
        except ValueError, e:
            logging.warning("processor: end of audio {}".format(str(e)))
            stop = True
        except Exception, e:
            logging.error("Recognition raised {}".format(e))
Exemplo n.º 11
0
def transcribe_file(speech_file):
    """Transcribe the given audio file asynchronously."""
    from google.cloud import speech
    speech_client = speech.Client()
    with io.open(speech_file, 'rb') as audio_file:
        content = audio_file.read()
        audio_sample = speech_client.sample(
            content,
            source_uri=None,
            encoding='LINEAR16')

    operation = audio_sample.long_running_recognize('fr-FR')

    retry_count = 200
    while retry_count > 0 and not operation.complete:
        retry_count -= 1
        time.sleep(2)
        operation.poll()

    if not operation.complete:
        print('Operation not complete and retry limit reached.')
        return

    alternatives = operation.results
    for alternative in alternatives:
        print('Transcript: {}'.format(alternative.transcript))
        print('Confidence: {}'.format(alternative.confidence))
Exemplo n.º 12
0
def get_google_transcription(gcs_uri):
    """Asynchronously transcribes the audio file specified by the gcs_uri."""
    if gcs_uri[0:5] != 'gs://':
        gcs_uri = 'gs://' + gcs_uri
    speech_client = speech.Client()

    audio_sample = speech_client.sample(
        content=None,
        source_uri=gcs_uri,
        encoding='FLAC')
    operation = audio_sample.long_running_recognize('en-US')

    retry_count = 100
    while not operation.complete:
        retry_count -= 1
        time.sleep(2)
        try:
            operation.poll()
        except ValueError:
            "empty segment"
            # This should give something like "silence"
            return []

    if not operation.complete:
        print('Operation not complete and retry limit reached.')
        return

    return operation.results
def main():
    from google.cloud import speech
    speech_client = speech.Client()

    # For streaming audio from the microphone, there are three threads.
    # First, a thread that collects audio data as it comes in
    with record_audio(RATE, CHUNK) as audio_file:
        audio_sample = speech_client.sample(
            stream=audio_file,
            encoding=speech.encoding.Encoding.LINEAR16,
            sample_rate_hertz=16000)
        results = audio_sample.streaming_recognize('en-US',
                                                   interim_results=True)

        # Exit things cleanly on interrupt
        # signal.signal(signal.SIGINT, lambda *_: recognize_stream.cancel())

        # Now, put the transcription responses to use.
        try:
            listen_print_loop(results)

            # recognize_stream.cancel()
        except grpc.RpcError as e:
            code = e.code()
            # CANCELLED is caused by the interrupt handler, which is expected.
            if code is not code.CANCELLED:
                raise
Exemplo n.º 14
0
def transcribe_file(speech_filepath, output_filepath, keyword_file,
                    start_timestamp, length):
    """Transcribe the given audio file. Length is in seconds."""
    from google.cloud import speech
    speech_client = speech.Client()
    """Read keyword_file, if any."""
    keywords = []  #empty list
    if os.path.exists(keyword_file):
        with open(keyword_file, 'r') as f:
            keywords_raw = f.readlines()
            for i in range(len(keywords_raw)):
                keywords.append(keywords_raw[i].lower().strip())

    #MULTILINGUAL SUPPORT DISABLED FOR NOW
    language = 'en-US'
    '''
	#set language
	language = ''
	if lang == '-e':
		language = 'en-US'
	else:
		language = 'ja-JP'
	'''
    """create and open text file to save transcription"""
    save_file = open(output_filepath, "w+")
    text = ''
    """Slice audio into # of blocks, then send to google cloud for analysis"""
    sample_rate = 44100
    stop = -1
    if length > 0:
        stop = start_timestamp + sample_rate * length
    print("start and stop is ", start_timestamp, stop)

    count = 0
    for audio in sf.blocks(speech_filepath, start=start_timestamp, stop=stop, \
     blocksize=PAYLOAD_LIMIT, overlap=OVERLAP):
        sf.SoundFile('buffer.wav', 'w', sample_rate, 1,
                     'PCM_16').write(audio.sum(axis=1) / float(2))
        content = io.open('buffer.wav', 'rb').read()
        audio_sample = speech_client.sample(content=content,
                                            source_uri=None,
                                            encoding='LINEAR16',
                                            sample_rate=sample_rate)
        print("evaluating block ", count)
        count += 1

        try:
            alternatives = audio_sample.sync_recognize(language_code=language,
                                                       speech_context=keywords)
            for alternative in alternatives:
                text += alternative.transcript + ' '
        except ValueError:
            continue
    """final save"""
    #add stop timestep on top
    text = str(stop) + "\n" + text
    save_file.write(text.encode('utf-8'))
    save_file.close()
    return text
Exemplo n.º 15
0
 def __init__(self, max_alternatives=1):
     super().__init__()
     self.max_alter = max_alternatives
     self._mutex = QMutex()
     self._abort = False
     self._condition = QWaitCondition()
     self.client = speech.Client()
     self.file_path = None
Exemplo n.º 16
0
def setUpModule():
    Config.CLIENT = speech.Client()
    # Now create a bucket for GCS stored content.
    storage_client = storage.Client()
    bucket_name = 'new' + unique_resource_id()
    Config.TEST_BUCKET = storage_client.bucket(bucket_name)
    # 429 Too Many Requests in case API requests rate-limited.
    retry_429 = RetryErrors(exceptions.TooManyRequests)
    retry_429(Config.TEST_BUCKET.create)()
Exemplo n.º 17
0
def transcribe_gcs(gcs_uri):
    """Transcribes the audio file specified by the gcs_uri."""
    from google.cloud import speech
    speech_client = speech.Client()

    audio_sample = speech_client.sample(content=None,
                                        source_uri=gcs_uri,
                                        encoding='FLAC',
                                        sample_rate_hertz=48000)

    alternatives = audio_sample.recognize('ko-KR')
    for result in alternatives:
        print result.alternatives[0]._transcript
    def recognize(self, audio_file_name):
        speech_client = speech.Client()
        file_name = audio_file_name

        with io.open(file_name, 'rb') as audio_file:
            content = audio_file.read()
            sample = speech_client.sample(content,
                                          source_uri=None,
                                          encoding='LINEAR16',
                                          sample_rate_hertz=16000)

        alternatives = sample.recognize('es-AR')
        return alternatives
Exemplo n.º 19
0
def transcribe_gcs(gcs_uri):
    """Transcribes the audio file specified by the gcs_uri."""
    from google.cloud import speech
    speech_client = speech.Client()

    audio_sample = speech_client.sample(content=None,
                                        source_uri=gcs_uri,
                                        encoding='FLAC',
                                        sample_rate_hertz=16000)

    alternatives = audio_sample.recognize('it')
    for alternative in alternatives:
        print('Transcript: {}'.format(alternative.transcript))
Exemplo n.º 20
0
def transcribe_file_kor(speech_file):
    """Transcribe the given audio file."""
    speech_client = speech.Client()

    with io.open(speech_file, 'rb') as audio_file:
        content = audio_file.read()
        audio_sample = speech_client.sample(content=content,
                                            source_uri=None,
                                            encoding='LINEAR16',
                                            sample_rate_hertz=16000)

    alternatives = audio_sample.recognize('ko-KR')
    for alternative in alternatives:
        return alternative.transcript
Exemplo n.º 21
0
def transcribe_confirm(speech_file):
    """Transcribe the given audio file."""
    from google.cloud import speech
    speech_client = speech.Client()

    with io.open(speech_file, 'rb') as audio_file:
        content = audio_file.read()
        audio_sample = speech_client.sample(content=content,
                                            source_uri=None,
                                            encoding='LINEAR16')

    alternatives = audio_sample.recognize('en-US')
    for alternative in alternatives:
        text = alternative.transcript
        return detect_jarvis.is_right_user(text)
Exemplo n.º 22
0
def transcribe(speech_file):
    """Transcribe the given audio file."""

    speech_client = speech.Client()

    with io.open(speech_file, 'rb') as audio_file:
        content = audio_file.read()
        audio_sample = speech_client.sample(content=content,
                                            source_uri=None,
                                            encoding='OGG_OPUS',
                                            sample_rate_hertz=16000)

    alternatives = audio_sample.recognize('es-ES')
    for alternative in alternatives:
        return '{}'.format(alternative.transcript)
Exemplo n.º 23
0
def transcribe_file(speech_file):
    rec_audio()
    speech_client = speech.Client()

    with io.open(speech_file, 'rb') as audio_file:
        content = audio_file.read()
        audio_sample = speech_client.sample(content=content,
                                            source_uri=None,
                                            encoding='LINEAR16',
                                            sample_rate_hertz=16000)
    alternatives = audio_sample.recognize('tr-TR')
    for alternative in alternatives:
        speech_out = alternative.transcript.encode('utf-8')
        print('Transcript: {}'.format(speech_out))  #alternative.transcript))
        return speech_out
Exemplo n.º 24
0
def transcribe_file(speech_file):
    """Transcribe the given audio file."""

    speech_client = speech.Client()

    with io.open(speech_file, 'rb') as audio_file:
        content = audio_file.read()
        audio_sample = speech_client.sample(content=content,
                                            source_uri=None,
                                            encoding='LINEAR16',
                                            sample_rate=16000)

    alternatives = audio_sample.sync_recognize('en-US')
    for alternative in alternatives:
        print('Transcript: {}'.format(alternative.transcript))
Exemplo n.º 25
0
 def __init__(self):
     self.client = speech.Client()
     self.pub_recognized_word = rospy.Publisher('recognized_word',
                                                RecognizedWord,
                                                queue_size=10)
     self.language_code = 'en_US'
     self.vocabulary_file = rospy.get_param('~vocabulary_file', "")
     self.vocabulary = []
     if self.vocabulary_file != '':
         target_file = os.path.abspath(self.vocabulary_file)
         target_file = os.path.expanduser(self.vocabulary_file)
         print target_file
         with open(target_file) as f:
             self.vocabulary = yaml.load(f)
             rospy.loginfo('load user vocabulary...')
Exemplo n.º 26
0
    def transcribe_gcs(self, gcs_uri):
        """Transcribes the audio file specified by the gcs_uri."""
        from google.cloud import speech
        speech_client = speech.Client()

        audio_sample = speech_client.sample(
            content=None,
            source_uri=gcs_uri,
            encoding='FLAC',
            sample_rate_hertz=16000)
        try:
            alternatives = audio_sample.recognize(self.language)
            self.transcripts_list = [alternative.transcript
                                     for alternative in alternatives]
        except ValueError:
            self.transcripts_list = []
Exemplo n.º 27
0
def transcribe_feature(speech_file):
    """Transcribe the given audio file."""
    from google.cloud import speech
    speech_client = speech.Client()

    with io.open(speech_file, 'rb') as audio_file:
        content = audio_file.read()
        audio_sample = speech_client.sample(content=content,
                                            source_uri=None,
                                            encoding='LINEAR16')

    alternatives = audio_sample.recognize('en-US')
    for alternative in alternatives:
        text = alternative.transcript
        print 'feature', text
        return command.perform_task(text)
Exemplo n.º 28
0
def main():
    speech_client = speech.Client()

    with MicAsFile(RATE, CHUNK) as stream:
        audio_sample = speech_client.sample(
            stream=stream,
            encoding=speech.encoding.Encoding.LINEAR16,
            sample_rate_hertz=RATE)
        # See http://g.co/cloud/speech/docs/languages
        # for a list of supported languages.
        language_code = 'en-US'  # a BCP-47 language tag
        results_gen = audio_sample.streaming_recognize(
            language_code=language_code, interim_results=True)

        # Now, put the transcription responses to use.
        listen_print_loop(results_gen)
Exemplo n.º 29
0
    def test_use_bytes_instead_of_file_like_object(self):
        from google.cloud import speech
        from google.cloud.speech.sample import Sample

        credentials = _make_credentials()
        client = speech.Client(credentials=credentials, use_gax=True)
        client.connection = _Connection()
        client.connection.credentials = credentials

        sample = Sample(content=b'', encoding=speech.Encoding.FLAC,
                        sample_rate=self.SAMPLE_RATE)

        api = self._make_one(client)
        with self.assertRaises(ValueError):
            api.streaming_recognize(sample)
        self.assertEqual(client.connection._requested, [])
Exemplo n.º 30
0
def _recognize(recording_url):
    content = None
    wav = None

    try:
        for i in range(4):
            try:
                content = urllib2.urlopen(recording_url)
            except urllib2.HTTPError as e:
                app.logger.warn(
                    "evt=fetch_recording_error sid=%s recording_url=%s err=%s",
                    request.form['CallSid'], recording_url, e)
                time.sleep(0.5)
        if content is None:
            raise ValueError("Fetch recording failed")

        wav = wave.open(content, 'r')
        encoding = 'LINEAR16'
        sample_rate = wav.getframerate()
        frames = wav.readframes(wav.getnframes())

        client = speech.Client()
        sample = client.sample(encoding=encoding,
                               sample_rate=sample_rate,
                               content=frames)
        results = sample.sync_recognize(max_alternatives=1,
                                        language_code='zh-HK',
                                        speech_context=speech_context)
        app.logger.info(
            "evt=recognize_success sid=%s recording_url=%s transcript=%s confidence=%s",
            request.form['CallSid'], recording_url, results[0].transcript,
            results[0].confidence)

        return results[0].transcript
    except Exception as e:
        app.logger.warn("evt=recognize_fail sid=%s recording_url=%s err=%s",
                        request.form['CallSid'], recording_url, e)
        raise
    finally:
        # Clean up
        t = threading.Thread(target=_delete_recording, args=(recording_url, ))
        t.start()

        if content != None:
            content.close()
        if wav != None:
            wav.close()