def synthesize_text(text: str, output_audio_file_path: Path, language='DE'): """Synthesize text from String and write to output.mp3""" client = texttospeech.TextToSpeechClient() input_text = texttospeech.SynthesisInput(text=text) if language == 'DE': voice = texttospeech.VoiceSelectionParams( language_code="de-DE", name="de-DE-Wavenet-F", ssml_gender=texttospeech.SsmlVoiceGender.FEMALE ) elif language == 'EN': voice = texttospeech.VoiceSelectionParams( language_code="en-US", name="en-US-Wavenet-F", ssml_gender=texttospeech.SsmlVoiceGender.FEMALE ) else: raise NotImplementedError audio_config = texttospeech.AudioConfig( audio_encoding=texttospeech.AudioEncoding.MP3 ) response = client.synthesize_speech( request={"input": input_text, "voice": voice, "audio_config": audio_config} ) with open(output_audio_file_path.absolute(), "wb") as out: out.write(response.audio_content)
class TextToSpeechForm(FlaskForm): """ Create user form for submitting text for speech synthesis """ # set gcloud environment credentials os.environ[ 'GOOGLE_APPLICATION_CREDENTIALS'] = 'containerize-webapp-4f12b2a3ba23.json' # Instantiates a client client = texttospeech.TextToSpeechClient() # Performs the list voices request voices = client.list_voices() # Get language list voice_codes_list = list( dict.fromkeys([voice.language_codes[0] for voice in voices.voices])) language_list = [(ind + 1, voice) for ind, voice in enumerate(voice_codes_list)] # Get voice gender voice_gender = [(1, "Male"), (2, "Female")] text_field = TextAreaField('Input Text', validators=[DataRequired()]) language_options = SelectField(u'Input Language', validators=[Optional()], choices=language_list, default=12) gender_options = SelectField(u'Voice Gender', validators=[Optional()], choices=voice_gender, default=1) submit = SubmitField('Convert Text to Speech')
def __init__(self, voice_name, text): self.done = False language_code = '-'.join(voice_name.split('-')[:2]) encryptedText = self.encryptDecrypt(text) filename = f'audio/{language_code}_{voice_name}_{encryptedText}.wav' # check if audio already cached if path.exists(filename): print(filename + " already exists!") self.filename = filename self.done = True else: text_input = tts.SynthesisInput(text=text) voice_params = tts.VoiceSelectionParams( language_code=language_code, name=voice_name) audio_config = tts.AudioConfig( audio_encoding=tts.AudioEncoding.LINEAR16) client = tts.TextToSpeechClient(credentials=credentials) response = client.synthesize_speech( input=text_input, voice=voice_params, audio_config=audio_config) with open(filename, 'wb') as out: out.write(response.audio_content) print(f'Audio content written to "{filename}"') self.filename = filename self.done = True
def speech(): import os os.environ[ "GOOGLE_APPLICATION_CREDENTIALS"] = "dnd-text-to-speech-1186db76a3d4.json" text_to_speech = request.form['fname'] from google.cloud import texttospeech # Instantiates a client client = texttospeech.TextToSpeechClient() # Set the text input to be synthesized synthesis_input = texttospeech.types.SynthesisInput(text=text_to_speech) # Build the voice request, select the language code ("en-US") and the ssml # voice gender ("neutral") voice = texttospeech.types.VoiceSelectionParams( language_code='en-US', ssml_gender=texttospeech.enums.SsmlVoiceGender.NEUTRAL) # Select the type of audio file you want returned audio_config = texttospeech.types.AudioConfig( audio_encoding=texttospeech.enums.AudioEncoding.MP3) # Perform the text-to-speech request on the text input with the selected # voice parameters and audio file type response = client.synthesize_speech(synthesis_input, voice, audio_config) # The response's audio_content is binary. with open('static/audio/{}.mp3'.format(text_to_speech), 'wb') as out: # Write the response to the output file. out.write(response.audio_content) return render_template('sp.html', text_to_speech=text_to_speech)
def tts(msg): # Instantiates a client client = texttospeech.TextToSpeechClient() # Set the text input to be synthesized synthesis_input = texttospeech.types.SynthesisInput(text=msg) # Build the voice request, select the language code ("en-US") and the ssml # voice gender ("neutral") voice = texttospeech.types.VoiceSelectionParams( language_code='te-IN', ssml_gender=texttospeech.enums.SsmlVoiceGender.NEUTRAL) # Select the type of audio file you want returned audio_config = texttospeech.types.AudioConfig( audio_encoding=texttospeech.enums.AudioEncoding.MP3) # Perform the text-to-speech request on the text input with the selected # voice parameters and audio file type response = client.synthesize_speech(synthesis_input, voice, audio_config) # The response's audio_content is binary. with open(audio_file_loc+'output.mp3', 'wb') as out: # Write the response to the output file. out.write(response.audio_content) print('Audio content written to file "output.mp3"')
def __init__( self, hass, key_file=None, language=DEFAULT_LANG, gender=DEFAULT_GENDER, voice=DEFAULT_VOICE, encoding=DEFAULT_ENCODING, speed=1.0, pitch=0, gain=0, profiles=None, text_type=DEFAULT_TEXT_TYPE, ): """Init Google Cloud TTS service.""" self.hass = hass self.name = "Google Cloud TTS" self._language = language self._gender = gender self._voice = voice self._encoding = encoding self._speed = speed self._pitch = pitch self._gain = gain self._profiles = profiles self._text_type = text_type if key_file: self._client = texttospeech.TextToSpeechClient.from_service_account_json( key_file) else: self._client = texttospeech.TextToSpeechClient()
def list_voices(language_code=None): """Lists the available voices.""" from google.cloud import texttospeech client = texttospeech.TextToSpeechClient() # Performs the list voices request voices = client.list_voices(language_code=language_code) for voice in voices.voices: # Display the voice's name. Example: tpc-vocoded print(f"Name: {voice.name}") # Display the supported language codes for this voice. Example: "en-US" for language_code in voice.language_codes: print(f"Supported language: {language_code}") ssml_gender = texttospeech.SsmlVoiceGender(voice.ssml_gender) # Display the SSML Voice Gender print(f"SSML Voice Gender: {ssml_gender.name}") # Display the natural sample rate hertz for this voice. Example: 24000 print( f"Natural Sample Rate Hertz: {voice.natural_sample_rate_hertz}\n")
def get_text_to_speech(self, message_text: str): """ Connects to Google servers and gets the audio recorded version of the message text. :param message_text: The text to be converted to speech """ from google.cloud import texttospeech # Instantiates a client client = texttospeech.TextToSpeechClient() # Set the text input to be synthesized synthesis_input = texttospeech.types.SynthesisInput(text=message_text) # Build the voice request, select the language code ("en-US") and the ssml # voice gender ("neutral") # https://cloud.google.com/text-to-speech/docs/voices ''' Favourites: WaveNet en-IN en-IN-Wavenet-C MALE WaveNet en-GB en-GB-Wavenet-B MALE ''' voice = texttospeech.types.VoiceSelectionParams( language_code='en-US', name="en-GB-Wavenet-B", ssml_gender=texttospeech.enums.SsmlVoiceGender.NEUTRAL) # Select the type of audio file you want returned audio_config = texttospeech.types.AudioConfig( audio_encoding=texttospeech.enums.AudioEncoding.MP3) # Perform the text-to-speech request on the text input with the selected # voice parameters and audio file type self.response = client.synthesize_speech(synthesis_input, voice, audio_config)
def generate_audio_for_text(text_block, out_path, is_title=False): # return # Instantiates a client client = texttospeech.TextToSpeechClient() synthesis_input = texttospeech.SynthesisInput(text=text_block) if is_title: voice = texttospeech.VoiceSelectionParams(language_code='en-US', name='en-US-Wavenet-B') speed = 1.2 else: voice = texttospeech.VoiceSelectionParams(language_code='en-US', name='en-US-Wavenet-J') speed = 1.5 # Select the type of audio file you want returned audio_config = texttospeech.AudioConfig( audio_encoding=texttospeech.AudioEncoding.MP3, speaking_rate=speed, effects_profile_id=['headphone-class-device'] ) # Perform the text-to-speech request on the text input with the selected # voice parameters and audio file type response = client.synthesize_speech( input=synthesis_input, voice=voice, audio_config=audio_config ) # The response's audio_content is binary. with open(out_path, "wb") as out: # Write the response to the output file. out.write(response.audio_content) print('Audio content written to file', out_path)
def construct_voice(): client = texttospeech.TextToSpeechClient() #print("client created") f = open("convertedtext.txt", "r") txt=f.read() #print("read and set") synthesis_input = texttospeech.types.SynthesisInput(text=txt) voice = texttospeech.types.VoiceSelectionParams( language_code='en-IN', ssml_gender=texttospeech.enums.SsmlVoiceGender.NEUTRAL) audio_config = texttospeech.types.AudioConfig( audio_encoding=texttospeech.enums.AudioEncoding.MP3) #print("params set") response = client.synthesize_speech(synthesis_input, voice,audio_config) # print("Res received") with open('output.mp3', 'wb') as out: out.write(response.audio_content) print('Audio content written to file "output.mp3"') exists = os.path.isfile("output.flac") if exists: os.remove("output.flac") os.system('ffmpeg -i output.mp3 output.flac') print("Playing audio!!!") cmd='play output.flac' os.system(cmd)
def synthesize_text_with_audio_profile(text): """Synthesizes speech from the input string of text.""" from google.cloud import texttospeech client = texttospeech.TextToSpeechClient() input_text = texttospeech.types.SynthesisInput(text=text) # Note: the voice can also be specified by name. # Names of voices can be retrieved with client.list_voices(). voice = texttospeech.types.VoiceSelectionParams(language_code='en-US') # Note: you can pass in multiple effects_profile_id. They will be applied # in the same order they are provided. audio_config = texttospeech.types.AudioConfig( audio_encoding=texttospeech.enums.AudioEncoding.MP3, effects_profile_id=['handset-class-device']) response = client.synthesize_speech(input_text, voice, audio_config) # The response's audio_content is binary. speech_data_filename = 'speech_data.mp3' with open(speech_data_filename, 'wb') as speech_data: pickle.dump(response.audio_content, speech_data) print('Audio content written to file "%s"' % speech_data_filename) if os.path.exists(speech_data_filename): with open(speech_data_filename, 'rb') as speech_data: my_speech_data = pickle.load(speech_data)
def synthesize_text(text): """Synthesizes speech from the input string of text.""" from google.cloud import texttospeech client = texttospeech.TextToSpeechClient() input_text = texttospeech.types.SynthesisInput(text=text) # Note: the voice can also be specified by name. # Names of voices can be retrieved with client.list_voices(). voice = texttospeech.types.VoiceSelectionParams( language_code='en-US', name='en-US-Wavenet-C', ssml_gender=texttospeech.enums.SsmlVoiceGender.FEMALE) audio_config = texttospeech.types.AudioConfig( audio_encoding=texttospeech.enums.AudioEncoding.MP3) response = client.synthesize_speech(input_text, voice, audio_config) # The response's audio_content is binary. with open('output.mp3', 'wb') as out: out.write(response.audio_content) print('Audio content written to file "output.mp3"') return response.audio_content
def run_quickstart(): # [START tts_quickstart] """Synthesizes speech from the input string of text or ssml. Note: ssml must be well-formed according to: https://www.w3.org/TR/speech-synthesis/ """ from google.cloud import texttospeech # Instantiates a client client = texttospeech.TextToSpeechClient() # Set the text input to be synthesized synthesis_input = texttospeech.types.SynthesisInput( text=GlobalVariables.response) # Build the voice request, select the language code ("en-US") and the ssml # voice gender ("neutral") voice = texttospeech.types.VoiceSelectionParams( language_code='en-US', # 'en-US', 'ja-JP' ssml_gender=texttospeech.enums.SsmlVoiceGender.FEMALE) # Select the type of audio file you want returned audio_config = texttospeech.types.AudioConfig( audio_encoding=texttospeech.enums.AudioEncoding.MP3) # Perform the text-to-speech request on the text input with the selected # voice parameters and audio file type response = client.synthesize_speech(synthesis_input, voice, audio_config) # The response's audio_content is binary. os.remove('output.mp3') time.sleep(1) with open('output.mp3', 'wb') as out: # Write the response to the output file. out.write(response.audio_content)
def create_audio(text, language='ko-KR'): # Instantiates a client client = texttospeech.TextToSpeechClient() # Set the text input to be synthesized synthesis_input = texttospeech.types.SynthesisInput(text=text) # Build the voice request, select the language code ("ko-KR") and the ssml # voice gender ("neutral") voice = texttospeech.types.VoiceSelectionParams( language_code=language, ssml_gender=texttospeech.enums.SsmlVoiceGender.NEUTRAL) # Select the type of audio file you want returned audio_config = texttospeech.types.AudioConfig( audio_encoding=texttospeech.enums.AudioEncoding.MP3) # Perform the text-to-speech request on the text input with the selected # voice parameters and audio file type response = client.synthesize_speech(synthesis_input, voice, audio_config) # The response's audio_content is binary. with open('audio/{}.mp3'.format(text), 'wb') as out: # Write the response to the output file. out.write(response.audio_content)
def tts_control(synthesis_input): client = texttospeech.TextToSpeechClient() # Set the text input to be synthesized #synthesis_input = texttospeech.SynthesisInput(text=texttospeak) # Build the voice request, select the language code ("en-US") and the ssml # voice gender ("neutral") voice = texttospeech.VoiceSelectionParams( language_code="ko-KR", ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL) # Select the type of audio file you want returned audio_config = texttospeech.AudioConfig( audio_encoding=texttospeech.AudioEncoding.MP3) # Perform the text-to-speech request on the text input with the selected # voice parameters and audio file type response = client.synthesize_speech(input=synthesis_input, voice=voice, audio_config=audio_config) # The response's audio_content is binary. with open("output.mp3", "wb") as out: # Write the response to the output file. out.write(response.audio_content) print('Playing sound...') os.system('omxplayer ./output.mp3')
def synthesize_ssml(ssml): """Synthesizes speech from the input string of ssml. Note: ssml must be well-formed according to: https://www.w3.org/TR/speech-synthesis/ Example: <speak>Hello there.</speak> """ from google.cloud import texttospeech client = texttospeech.TextToSpeechClient() input_text = texttospeech.types.SynthesisInput(ssml=ssml) # Note: the voice can also be specified by name. # Names of voices can be retrieved with client.list_voices(). voice = texttospeech.types.VoiceSelectionParams( language_code='en-US', name='en-US-Wavenet-C', ssml_gender=texttospeech.enums.SsmlVoiceGender.FEMALE) audio_config = texttospeech.types.AudioConfig( audio_encoding=texttospeech.enums.AudioEncoding.MP3) response = client.synthesize_speech(input_text, voice, audio_config) # The response's audio_content is binary. with open('output.mp3', 'wb') as out: out.write(response.audio_content) print('Audio content written to file "output.mp3"')
def convertTextToSegment(self, wordsToSay): """ Source taken here https://cloud.google.com/text-to-speech/docs/create-audio#text-to-speech-text-python """ segmentFileName = self.localTmpFolder + "tmp.mp3" """Synthesizes speech from the input string of text.""" from google.cloud import texttospeech client = texttospeech.TextToSpeechClient() input_text = texttospeech.types.SynthesisInput(text=wordsToSay) # Note: the voice can also be specified by name. # Names of voices can be retrieved with client.list_voices(). voice = texttospeech.types.VoiceSelectionParams( language_code='en-US', ssml_gender=texttospeech.enums.SsmlVoiceGender.MALE) audio_config = texttospeech.types.AudioConfig( audio_encoding=texttospeech.enums.AudioEncoding.MP3) response = client.synthesize_speech(input_text, voice, audio_config) # The response's audio_content is binary. with open(segmentFileName, 'wb') as out: out.write(response.audio_content) segment = AudioSegment.from_mp3(segmentFileName) return segment
def list_voices(): """ Lists the available voices from Google. """ from google.cloud import texttospeech from google.cloud.texttospeech import enums client = texttospeech.TextToSpeechClient() # Performs the list voices request voices = client.list_voices() for voice in voices.voices: # Display the voice's name. Example: tpc-vocoded print('Name: {}'.format(voice.name)) # Display the supported language codes for this voice. Example: "en-US" for language_code in voice.language_codes: print('Supported language: {}'.format(language_code)) ssml_gender = enums.SsmlVoiceGender(voice.ssml_gender) # Display the SSML Voice Gender print('SSML Voice Gender: {}'.format(ssml_gender.name)) # Display the natural sample rate hertz for this voice. Example: 24000 print('Natural Sample Rate Hertz: {}\n'.format( voice.natural_sample_rate_hertz))
def convert_text(file_name, string_text, lang_code, voice_name): # Instantiates a client client = texttospeech.TextToSpeechClient() # Set the text input to be synthesized synthesis_input = texttospeech.types.SynthesisInput(text=string_text) # Build the voice request, select the language code ("en-US") and the ssml # voice gender ("neutral")new # voice = texttospeech.types.VoiceSelectionParams( # name = voice_name, # language_code='en-US', # ssml_gender=texttospeech.enums.SsmlVoiceGender.NEUTRAL) voice = texttospeech.types.VoiceSelectionParams(language_code=lang_code, name=voice_name) # Select the type of audio file you want returned audio_config = texttospeech.types.AudioConfig( audio_encoding=texttospeech.enums.AudioEncoding.MP3) # Perform the text-to-speech request on the text input with the selected # voice parameters and audio file type response = client.synthesize_speech(synthesis_input, voice, audio_config) # The response's audio_content is binary. with open(file_name, 'wb') as out: # Write the response to the output file. out.write(response.audio_content) print("Audio content written to file " + file_name)
def speak_comp(s): nwords = len(s.split(" ")) sperword = 0.4 # Instantiates a client client = texttospeech.TextToSpeechClient() # Set the text input to be synthesized synthesis_input = texttospeech.types.SynthesisInput(text=s) # Build the voice request, select the language code ("en-US") and the ssml with voice gender ("neutral") voice = texttospeech.types.VoiceSelectionParams( language_code='en-US', ssml_gender=texttospeech.enums.SsmlVoiceGender.NEUTRAL) # Select the type of audio file you want returned audio_config = texttospeech.types.AudioConfig( audio_encoding=texttospeech.enums.AudioEncoding.MP3) #Perform the text-to-speech request on the text input with the selected # voice parameters and audio file type publish_weather(s) response = client.synthesize_speech(synthesis_input, voice, audio_config) print('response.audio_content') print(type(response.audio_content)) # The response's audio_content is binary. with open('output.mp3', 'wb') as out: # Write the response to the output file. out.write(response.audio_content) print('Audio content written to file "output.mp3"') os.system('mpg123 output.mp3') os.remove('output.mp3') #removes the mp3 file
def synthesize_text(text): """Synthesizes speech from the input string of text.""" from google.cloud import texttospeech client = texttospeech.TextToSpeechClient() input_text = texttospeech.SynthesisInput(text=text) # Note: the voice can also be specified by name. # Names of voices can be retrieved with client.list_voices(). voice = texttospeech.VoiceSelectionParams( language_code="en-US", name="en-US-Standard-C", ssml_gender=texttospeech.SsmlVoiceGender.FEMALE, ) audio_config = texttospeech.AudioConfig( audio_encoding=texttospeech.AudioEncoding.MP3) response = client.synthesize_speech(request={ "input": input_text, "voice": voice, "audio_config": audio_config }) # The response's audio_content is binary. with open("output.mp3", "wb") as out: out.write(response.audio_content) print('Audio content written to file "output.mp3"')
def synthesize_ssml(ssml): """Synthesizes speech from the input string of ssml. Note: ssml must be well-formed according to: https://www.w3.org/TR/speech-synthesis/ Example: <speak>Hello there.</speak> """ client = texttospeech.TextToSpeechClient() input_text = texttospeech.SynthesisInput(ssml=ssml) # Note: the voice can also be specified by name. # Names of voices can be retrieved with client.list_voices(). voice = texttospeech.VoiceSelectionParams( language_code="en-US", name="en-US-Standard-C", ssml_gender=texttospeech.SsmlVoiceGender.FEMALE, ) audio_config = texttospeech.AudioConfig( audio_encoding=texttospeech.AudioEncoding.LINEAR16, speaking_rate=1, pitch=0, ) response = client.synthesize_speech(input=input_text, voice=voice, audio_config=audio_config) # The response's audio_content is binary. with open("output.wav", "wb") as out: out.write(response.audio_content) print('Audio content written to file "output.mp3"')
def list_languages(): client = texttospeech.TextToSpeechClient() voices = client.list_voices().voices languages = unique_languages_from_voices(voices) print(f" Languages: {len(languages)} ".center(60, "-")) for i, language in enumerate(sorted(languages)): print(f"{language:>10}", end="" if i % 5 < 4 else "\n")
def trySetupService(self): api_key_file = self.settings.getKeyFile() if api_key_file: try: credentials = service_account.Credentials.from_service_account_file(api_key_file) self.service = texttospeech.TextToSpeechClient(credentials=credentials) except: print('speech setup failed')
def setUpTTS(): global voice, audio_config, ttsClient ttsClient = texttospeech.TextToSpeechClient() voice = texttospeech.types.VoiceSelectionParams( language_code='en-US', ssml_gender=texttospeech.enums.SsmlVoiceGender.FEMALE) audio_config = texttospeech.types.AudioConfig( audio_encoding=texttospeech.enums.AudioEncoding.MP3)
def list_languages(): client = tts.TextToSpeechClient() voices = client.list_voices().voices languages = unique_languages_from_voices(voices) print(f' Languages: {len(languages)} '.center(60, '-')) for i, language in enumerate(sorted(languages)): print(f'{language:>10}', end='' if i % 5 < 4 else '\n')
def __init__(self): self.client = texttospeech.TextToSpeechClient() self.voice = texttospeech.types.VoiceSelectionParams( language_code='en-US', ssml_gender=texttospeech.enums.SsmlVoiceGender.NEUTRAL) self.audio_config = texttospeech.types.AudioConfig( audio_encoding=texttospeech.enums.AudioEncoding.MP3) self.response = None
def backend_process(text, language_code): credentials_file = keys['google_speech'] credentials = service_account.Credentials.from_service_account_file( credentials_file) client = texttospeech.TextToSpeechClient(credentials=credentials) audio_content = text2speech(client, text, language_code) return audio_content
def __init__(self): super(SpeechClient, self).__init__() self.client = texttospeech.TextToSpeechClient() self.voice = texttospeech.types.VoiceSelectionParams( language_code='en-US', ssml_gender=texttospeech.enums.SsmlVoiceGender.FEMALE) self.audio_config = texttospeech.types.AudioConfig( audio_encoding=texttospeech.enums.AudioEncoding.LINEAR16)
def trans_text(): # Trans text # get text the user entered input_text = request.form['text'] # create Client object translate_client = translate.Client() # decode text if it's a binary type if isinstance(input_text, six.binary_type): transcription = transcription.decode('utf-8') # get translation result by passing text and target language to client # Text can also be a sequence of strings, in which case this method # will return a sequence of results for each text. result = translate_client.translate(input_text, target_language='en') # only interested in translated text translation = result['translatedText'] # Translated text to speech # Instantiates a client client = texttospeech.TextToSpeechClient() # Set the text input to be synthesized synthesis_input = texttospeech.types.SynthesisInput(text=translation) # Build the voice request, select the language code ("en-US") and the ssml # voice gender ("neutral") voice = texttospeech.types.VoiceSelectionParams( language_code='en-US', ssml_gender=texttospeech.enums.SsmlVoiceGender.NEUTRAL) # Select the type of audio file you want returned audio_config = texttospeech.types.AudioConfig( audio_encoding=texttospeech.enums.AudioEncoding.MP3) # Perform the text-to-speech request on the text input with the selected # voice parameters and audio file type response = client.synthesize_speech(synthesis_input, voice, audio_config) # Store into bucket speech_file = 'output.mp3' # Create a Cloud Storage client. storage_client = storage.Client() # Get the bucket that the file will be uploaded to. bucket = storage_client.get_bucket(CLOUD_STORAGE_BUCKET) # Create a new blob and upload the file's content. blob = bucket.blob(speech_file) blob.upload_from_string(response.audio_content, content_type='audio/mpeg') # Make the blob publicly viewable. blob.make_public() #make entries entries = [ dict(iText=input_text, transText=translation, link='https://storage.cloud.google.com/' + CLOUD_STORAGE_BUCKET + '/' + speech_file) ] # Go to transtext.html. return render_template('transtext.html', entries=entries)