def google_stt(): """Transcribe the given audio file.""" # 환경 변수 설정 os.system( "export GOOGLE_APPLICATION_CREDENTIALS=//home/pi/git_refo/smartmirror/calendar/quick_google/client_secret_633702561980-l5hu5n4tfo4hd64po79s803039pj1u6c.apps.googleusercontent.com.json" ) # 인코딩 설정 reload(sys) sys.setdefaultencoding('utf-8') # 녹음 os.system("arecord -D plughw:1,0 -f S16_LE -c1 -r16000 -d 3 input.wav") print("recording complete!") # start stt print("0") speech_client = speech.Client() print("1") with io.open('input.wav', 'rb') as audio_file: content = audio_file.read() audio_sample = speech_client.sample(content=content, source_uri=None, encoding='LINEAR16', sample_rate_hertz=16000) print("2") alternatives = audio_sample.recognize('ko-KR') for alternative in alternatives: return alternative.transcript
def transcribe_gcs(gcs_uri): """Asynchronously transcribes the audio file specified by the gcs_uri.""" from google.cloud import speech speech_client = speech.Client() filename = args.path[33:] filename = filename[:-4] + '.txt' filepath = 'transcripts/' + filename file = open(filepath,'w') audio_sample = speech_client.sample( content=None, source_uri=gcs_uri, encoding='LINEAR16') operation = audio_sample.long_running_recognize('fr-FR') retry_count = 100 while retry_count > 0 and not operation.complete: retry_count -= 1 time.sleep(2) operation.poll() if not operation.complete: print('Operation not complete and retry limit reached. BUUUUUUG ') return alternatives = operation.results for alternative in alternatives: file.write(alternative.transcript.encode('utf-8')) # print('Transcript: {}'.format(alternative.transcript)) # print('Confidence: {}'.format(alternative.confidence)) # [END send_request_gcs] file.close()
def google_stt(): """Transcribe the given audio file.""" # 환경 변수 설정 os.system( "export GOOGLE_APPLICATION_CREDENTIALS=/home/pi/stt1-0f1857b5b0c0.json" ) # 인코딩 설정 reload(sys) sys.setdefaultencoding('utf-8') # 녹음 #os.system("arecord -D plughw:1,0 -f S16_LE -c1 -r16000 -d 3 input.wav") #print("recording complete!") # start stt speech_client = speech.Client() with io.open('input.wav', 'rb') as audio_file: content = audio_file.read() audio_sample = speech_client.sample(content=content, source_uri=None, encoding='LINEAR16', sample_rate_hertz=16000) alternatives = audio_sample.recognize('ko-KR') for alternative in alternatives: return alternative.transcript
def main(): rospy.init_node('stt_node') pub = rospy.Publisher('stt', Speech_msg, queue_size=10) while True: #restart the client and sample stream = audioStreamingObject() print(stream.closed) #close thread close_thread = threading.Thread(target=close_loop, args=(stream, )) close_thread.start() client = speech.Client() sample = client.sample(stream=stream, encoding=speech.Encoding.LINEAR16, sample_rate_hertz=16000) results = sample.streaming_recognize(language_code='en-US') for result in results: for alternative in result.alternatives: msg = Speech_msg() msg.text = str(alternative.transcript) msg.confidence = float(alternative.confidence) pub.publish(msg) rospy.loginfo("restarting google speech api due to time limit")
def main(speech_file): """Transcribe the given audio file. Args: speech_file: the name of the audio file. """ # [START authenticating] # Application default credentials provided by env variable # GOOGLE_APPLICATION_CREDENTIALS from google.cloud import speech speech_client = speech.Client() # [END authenticating] # [START construct_request] # Loads the audio into memory with io.open(speech_file, 'rb') as audio_file: content = audio_file.read() audio_sample = speech_client.sample( content, source_uri=None, encoding='LINEAR16', sample_rate=16000) # [END construct_request] # [START send_request] alternatives = speech_client.speech_api.sync_recognize(audio_sample) for alternative in alternatives: print('Transcript: {}'.format(alternative.transcript))
def _recognize(path, idx): client = speech.Client() stream = open(path, "rb") sample = client.sample(stream=stream, encoding=speech.encoding.Encoding.LINEAR16, sample_rate_hertz=8000) while True: time.sleep(0.5) print "send data to google" try: results = sample.streaming_recognize(language_code='en-US', speech_contexts=[ 'mailbox', 'folder', 'change', 'pound', 'star', 'options', 'folder' ]) for result in results: for alternative in result.alternatives: print('-' * 20) print('transcript: ' + alternative.transcript) print('confidence: ' + str(alternative.confidence)) if idx not in trans: trans[idx] = '' trans[idx] += alternative.transcript + " " print ">>> " + str(idx) + ": " + trans[idx] except: pass
def main_voice(bot, update, user_data): bot.getFile(update.message.voice.file_id).download('voice.ogg') audio = AudioSegment.from_file('voice.ogg', format="ogg") audio.export("audio2.raw", format="raw") speech_client = speech.Client() with io.open('audio2.raw', 'rb') as audio_file: content = audio_file.read() sample = speech_client.sample( content, encoding='LINEAR16', sample_rate_hertz=48000) try: alternatives = sample.recognize('ru-RU') except Exception as inst: bot.sendMessage(update.message.chat_id, str(inst)) # for alternative in alternatives: # bot.sendMessage(update.message.chat_id, alternative.transcript) drug = alternatives[0].transcript msg = 'Я распознал вашу речь как ' + drug bot.sendMessage(update.message.chat_id, msg) return all_work(bot, update, drug)
def transcribe_gcs(gcs_uri): """Asynchronously transcribes the audio file specified by the gcs_uri.""" from google.cloud import speech speech_client = speech.Client() audio_sample = speech_client.sample(content=None, source_uri=gcs_uri, encoding='FLAC', sample_rate_hertz=16000) operation = audio_sample.long_running_recognize('en-US') retry_count = 100 while retry_count > 0 and not operation.complete: retry_count -= 1 time.sleep(2) operation.poll() if not operation.complete: print('Operation not complete and retry limit reached.') return alternatives = operation.results for alternative in alternatives: print('Transcript: {}'.format(alternative.transcript)) print('Confidence: {}'.format(alternative.confidence))
def run_quickstart(): # [START speech_quickstart] import io import os # Imports the Google Cloud client library from google.cloud import speech # Instantiates a client speech_client = speech.Client() # The name of the audio file to transcribe file_name = os.path.join(os.path.dirname(__file__), 'resources', 'audio.raw') # Loads the audio into memory with io.open(file_name, 'rb') as audio_file: content = audio_file.read() sample = speech_client.sample(content, source_uri=None, encoding='LINEAR16', sample_rate_hertz=16000) # Detects speech in the audio file alternatives = sample.recognize('en-US') for alternative in alternatives: print('Transcript: {}'.format(alternative.transcript))
def processSound(audio_stream, transcript): global stop speech_client = speech.Client() logging.debug("created client") audio_sample = speech_client.sample( stream=audio_stream, encoding=speech.encoding.Encoding.LINEAR16, sample_rate_hertz=RATE) while not stop: try: logging.debug("sampling") alternatives = audio_sample.streaming_recognize( 'en-US', interim_results=True) # Find transcriptions of the audio content for alternative in alternatives: logging.debug('Transcript: {}'.format(alternative.transcript)) if alternative.is_final: logging.debug('Final: {}'.format(alternative.is_final)) logging.debug('Confidence: {}'.format( alternative.confidence)) transcript.put(alternative.transcript) except ValueError, e: logging.warning("processor: end of audio {}".format(str(e))) stop = True except Exception, e: logging.error("Recognition raised {}".format(e))
def transcribe_file(speech_file): """Transcribe the given audio file asynchronously.""" from google.cloud import speech speech_client = speech.Client() with io.open(speech_file, 'rb') as audio_file: content = audio_file.read() audio_sample = speech_client.sample( content, source_uri=None, encoding='LINEAR16') operation = audio_sample.long_running_recognize('fr-FR') retry_count = 200 while retry_count > 0 and not operation.complete: retry_count -= 1 time.sleep(2) operation.poll() if not operation.complete: print('Operation not complete and retry limit reached.') return alternatives = operation.results for alternative in alternatives: print('Transcript: {}'.format(alternative.transcript)) print('Confidence: {}'.format(alternative.confidence))
def get_google_transcription(gcs_uri): """Asynchronously transcribes the audio file specified by the gcs_uri.""" if gcs_uri[0:5] != 'gs://': gcs_uri = 'gs://' + gcs_uri speech_client = speech.Client() audio_sample = speech_client.sample( content=None, source_uri=gcs_uri, encoding='FLAC') operation = audio_sample.long_running_recognize('en-US') retry_count = 100 while not operation.complete: retry_count -= 1 time.sleep(2) try: operation.poll() except ValueError: "empty segment" # This should give something like "silence" return [] if not operation.complete: print('Operation not complete and retry limit reached.') return return operation.results
def main(): from google.cloud import speech speech_client = speech.Client() # For streaming audio from the microphone, there are three threads. # First, a thread that collects audio data as it comes in with record_audio(RATE, CHUNK) as audio_file: audio_sample = speech_client.sample( stream=audio_file, encoding=speech.encoding.Encoding.LINEAR16, sample_rate_hertz=16000) results = audio_sample.streaming_recognize('en-US', interim_results=True) # Exit things cleanly on interrupt # signal.signal(signal.SIGINT, lambda *_: recognize_stream.cancel()) # Now, put the transcription responses to use. try: listen_print_loop(results) # recognize_stream.cancel() except grpc.RpcError as e: code = e.code() # CANCELLED is caused by the interrupt handler, which is expected. if code is not code.CANCELLED: raise
def transcribe_file(speech_filepath, output_filepath, keyword_file, start_timestamp, length): """Transcribe the given audio file. Length is in seconds.""" from google.cloud import speech speech_client = speech.Client() """Read keyword_file, if any.""" keywords = [] #empty list if os.path.exists(keyword_file): with open(keyword_file, 'r') as f: keywords_raw = f.readlines() for i in range(len(keywords_raw)): keywords.append(keywords_raw[i].lower().strip()) #MULTILINGUAL SUPPORT DISABLED FOR NOW language = 'en-US' ''' #set language language = '' if lang == '-e': language = 'en-US' else: language = 'ja-JP' ''' """create and open text file to save transcription""" save_file = open(output_filepath, "w+") text = '' """Slice audio into # of blocks, then send to google cloud for analysis""" sample_rate = 44100 stop = -1 if length > 0: stop = start_timestamp + sample_rate * length print("start and stop is ", start_timestamp, stop) count = 0 for audio in sf.blocks(speech_filepath, start=start_timestamp, stop=stop, \ blocksize=PAYLOAD_LIMIT, overlap=OVERLAP): sf.SoundFile('buffer.wav', 'w', sample_rate, 1, 'PCM_16').write(audio.sum(axis=1) / float(2)) content = io.open('buffer.wav', 'rb').read() audio_sample = speech_client.sample(content=content, source_uri=None, encoding='LINEAR16', sample_rate=sample_rate) print("evaluating block ", count) count += 1 try: alternatives = audio_sample.sync_recognize(language_code=language, speech_context=keywords) for alternative in alternatives: text += alternative.transcript + ' ' except ValueError: continue """final save""" #add stop timestep on top text = str(stop) + "\n" + text save_file.write(text.encode('utf-8')) save_file.close() return text
def __init__(self, max_alternatives=1): super().__init__() self.max_alter = max_alternatives self._mutex = QMutex() self._abort = False self._condition = QWaitCondition() self.client = speech.Client() self.file_path = None
def setUpModule(): Config.CLIENT = speech.Client() # Now create a bucket for GCS stored content. storage_client = storage.Client() bucket_name = 'new' + unique_resource_id() Config.TEST_BUCKET = storage_client.bucket(bucket_name) # 429 Too Many Requests in case API requests rate-limited. retry_429 = RetryErrors(exceptions.TooManyRequests) retry_429(Config.TEST_BUCKET.create)()
def transcribe_gcs(gcs_uri): """Transcribes the audio file specified by the gcs_uri.""" from google.cloud import speech speech_client = speech.Client() audio_sample = speech_client.sample(content=None, source_uri=gcs_uri, encoding='FLAC', sample_rate_hertz=48000) alternatives = audio_sample.recognize('ko-KR') for result in alternatives: print result.alternatives[0]._transcript
def recognize(self, audio_file_name): speech_client = speech.Client() file_name = audio_file_name with io.open(file_name, 'rb') as audio_file: content = audio_file.read() sample = speech_client.sample(content, source_uri=None, encoding='LINEAR16', sample_rate_hertz=16000) alternatives = sample.recognize('es-AR') return alternatives
def transcribe_gcs(gcs_uri): """Transcribes the audio file specified by the gcs_uri.""" from google.cloud import speech speech_client = speech.Client() audio_sample = speech_client.sample(content=None, source_uri=gcs_uri, encoding='FLAC', sample_rate_hertz=16000) alternatives = audio_sample.recognize('it') for alternative in alternatives: print('Transcript: {}'.format(alternative.transcript))
def transcribe_file_kor(speech_file): """Transcribe the given audio file.""" speech_client = speech.Client() with io.open(speech_file, 'rb') as audio_file: content = audio_file.read() audio_sample = speech_client.sample(content=content, source_uri=None, encoding='LINEAR16', sample_rate_hertz=16000) alternatives = audio_sample.recognize('ko-KR') for alternative in alternatives: return alternative.transcript
def transcribe_confirm(speech_file): """Transcribe the given audio file.""" from google.cloud import speech speech_client = speech.Client() with io.open(speech_file, 'rb') as audio_file: content = audio_file.read() audio_sample = speech_client.sample(content=content, source_uri=None, encoding='LINEAR16') alternatives = audio_sample.recognize('en-US') for alternative in alternatives: text = alternative.transcript return detect_jarvis.is_right_user(text)
def transcribe(speech_file): """Transcribe the given audio file.""" speech_client = speech.Client() with io.open(speech_file, 'rb') as audio_file: content = audio_file.read() audio_sample = speech_client.sample(content=content, source_uri=None, encoding='OGG_OPUS', sample_rate_hertz=16000) alternatives = audio_sample.recognize('es-ES') for alternative in alternatives: return '{}'.format(alternative.transcript)
def transcribe_file(speech_file): rec_audio() speech_client = speech.Client() with io.open(speech_file, 'rb') as audio_file: content = audio_file.read() audio_sample = speech_client.sample(content=content, source_uri=None, encoding='LINEAR16', sample_rate_hertz=16000) alternatives = audio_sample.recognize('tr-TR') for alternative in alternatives: speech_out = alternative.transcript.encode('utf-8') print('Transcript: {}'.format(speech_out)) #alternative.transcript)) return speech_out
def transcribe_file(speech_file): """Transcribe the given audio file.""" speech_client = speech.Client() with io.open(speech_file, 'rb') as audio_file: content = audio_file.read() audio_sample = speech_client.sample(content=content, source_uri=None, encoding='LINEAR16', sample_rate=16000) alternatives = audio_sample.sync_recognize('en-US') for alternative in alternatives: print('Transcript: {}'.format(alternative.transcript))
def __init__(self): self.client = speech.Client() self.pub_recognized_word = rospy.Publisher('recognized_word', RecognizedWord, queue_size=10) self.language_code = 'en_US' self.vocabulary_file = rospy.get_param('~vocabulary_file', "") self.vocabulary = [] if self.vocabulary_file != '': target_file = os.path.abspath(self.vocabulary_file) target_file = os.path.expanduser(self.vocabulary_file) print target_file with open(target_file) as f: self.vocabulary = yaml.load(f) rospy.loginfo('load user vocabulary...')
def transcribe_gcs(self, gcs_uri): """Transcribes the audio file specified by the gcs_uri.""" from google.cloud import speech speech_client = speech.Client() audio_sample = speech_client.sample( content=None, source_uri=gcs_uri, encoding='FLAC', sample_rate_hertz=16000) try: alternatives = audio_sample.recognize(self.language) self.transcripts_list = [alternative.transcript for alternative in alternatives] except ValueError: self.transcripts_list = []
def transcribe_feature(speech_file): """Transcribe the given audio file.""" from google.cloud import speech speech_client = speech.Client() with io.open(speech_file, 'rb') as audio_file: content = audio_file.read() audio_sample = speech_client.sample(content=content, source_uri=None, encoding='LINEAR16') alternatives = audio_sample.recognize('en-US') for alternative in alternatives: text = alternative.transcript print 'feature', text return command.perform_task(text)
def main(): speech_client = speech.Client() with MicAsFile(RATE, CHUNK) as stream: audio_sample = speech_client.sample( stream=stream, encoding=speech.encoding.Encoding.LINEAR16, sample_rate_hertz=RATE) # See http://g.co/cloud/speech/docs/languages # for a list of supported languages. language_code = 'en-US' # a BCP-47 language tag results_gen = audio_sample.streaming_recognize( language_code=language_code, interim_results=True) # Now, put the transcription responses to use. listen_print_loop(results_gen)
def test_use_bytes_instead_of_file_like_object(self): from google.cloud import speech from google.cloud.speech.sample import Sample credentials = _make_credentials() client = speech.Client(credentials=credentials, use_gax=True) client.connection = _Connection() client.connection.credentials = credentials sample = Sample(content=b'', encoding=speech.Encoding.FLAC, sample_rate=self.SAMPLE_RATE) api = self._make_one(client) with self.assertRaises(ValueError): api.streaming_recognize(sample) self.assertEqual(client.connection._requested, [])
def _recognize(recording_url): content = None wav = None try: for i in range(4): try: content = urllib2.urlopen(recording_url) except urllib2.HTTPError as e: app.logger.warn( "evt=fetch_recording_error sid=%s recording_url=%s err=%s", request.form['CallSid'], recording_url, e) time.sleep(0.5) if content is None: raise ValueError("Fetch recording failed") wav = wave.open(content, 'r') encoding = 'LINEAR16' sample_rate = wav.getframerate() frames = wav.readframes(wav.getnframes()) client = speech.Client() sample = client.sample(encoding=encoding, sample_rate=sample_rate, content=frames) results = sample.sync_recognize(max_alternatives=1, language_code='zh-HK', speech_context=speech_context) app.logger.info( "evt=recognize_success sid=%s recording_url=%s transcript=%s confidence=%s", request.form['CallSid'], recording_url, results[0].transcript, results[0].confidence) return results[0].transcript except Exception as e: app.logger.warn("evt=recognize_fail sid=%s recording_url=%s err=%s", request.form['CallSid'], recording_url, e) raise finally: # Clean up t = threading.Thread(target=_delete_recording, args=(recording_url, )) t.start() if content != None: content.close() if wav != None: wav.close()