def comment_to_mp3(input_text, FILEPATH_QUOTA, POST_ID, gen_number, randomize=False): num_of_chars = len(input_text) with open(FILEPATH_QUOTA, 'r') as f: line = f.readline() quota_remaining = int(line) if quota_remaining < num_of_chars + 100: raise Exception("Quota depleted :(") if randomize: selected_voice = random.choice(names_list) print(f"RANDOM VOICE = {selected_voice}") else: selected_voice = 'en-US-Wavenet-H' client = texttospeech.TextToSpeechClient() synthesis_input = texttospeech.SynthesisInput(text=input_text) voice = texttospeech.VoiceSelectionParams( language_code="en-US", ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL, name=selected_voice) # Select the type of audio file you want returned audio_config = texttospeech.AudioConfig( audio_encoding=texttospeech.AudioEncoding.MP3) # Perform the text-to-speech request on the text input with the selected # voice parameters and audio file type response = client.synthesize_speech(input=synthesis_input, voice=voice, audio_config=audio_config) with open(FILEPATH_QUOTA, 'w') as f: quota_remaining -= num_of_chars print(f"-----QUOTA REMAINING: {quota_remaining}-----") f.write(str(quota_remaining)) filename = input_text[0:8].replace(".", "").replace("*", "") filename = re.sub('[\W_]+', '', filename) if not os.path.exists(POST_ID): os.makedirs(POST_ID) with open(f"./{POST_ID}/{str(gen_number)+filename}.mp3", "wb") as out: # Write the response to the output file. out.write(response.audio_content) print( f'Audio content written to file "{str(gen_number)+filename}.mp3"')
def text_to_wav(voice_name, text): ''' Using Google Cloud Platform to handle the text-to-speech task source 1: https://codelabs.developers.google.com/codelabs/cloud-text-speech-python3/index.html?index=..%2F..index#8 source 2: https://cloud.google.com/text-to-speech/docs/reference/libraries You will need your own Google Cloud Platform authentication file ----------------------------- Convert the text to wav file Args: voice_name: (str) name for the wave file text: (str) The text that is converted to speech Returns: filename: (str) name of the wave file ''' language_code = '-'.join(voice_name.split('-')[:2]) output_name = f'output-{get_date()}' # Instantiates a client with your Google Cloud Platform authentication json file client = texttospeech.TextToSpeechClient.from_service_account_json( "<YOUR_AUTHENTICATION_FILE.json>") # Set the text input to be synthesized synthesis_input = texttospeech.SynthesisInput(text=text) # Build the voice request, select the language code ("en-US") and the ssml # voice gender ("neutral") voice = texttospeech.VoiceSelectionParams(language_code=language_code, name=voice_name) # Select the type of audio file you want returned audio_config = texttospeech.AudioConfig( audio_encoding=texttospeech.AudioEncoding.LINEAR16) response = client.synthesize_speech(input=synthesis_input, voice=voice, audio_config=audio_config) filename = f'{output_name}.wav' with open(filename, 'wb') as out: out.write(response.audio_content) print(f'Audio content written to "{filename}"') out.close() return filename
def create_audio_file(self, filename, language, text): synthesis_input = texttospeech.SynthesisInput(text=text) voice = texttospeech.VoiceSelectionParams( language_code=language, ssml_gender=texttospeech.SsmlVoiceGender.MALE) audio_config = texttospeech.AudioConfig( audio_encoding=texttospeech.AudioEncoding.MP3) response = self.cloudTTSClient.synthesize_speech( input=synthesis_input, voice=voice, audio_config=audio_config) with open(filename, "wb") as out: out.write(response.audio_content) return
def synthesise(self, text=None, ssml=None, filename="output"): synthesis_input = texttospeech.SynthesisInput(text=text, ssml=ssml) voice = texttospeech.VoiceSelectionParams( language_code="en-US", name="en-US-Wavenet-J" ) audio_config = texttospeech.AudioConfig( audio_encoding=texttospeech.AudioEncoding.MP3 ) response = self.client.synthesize_speech( input=synthesis_input, voice=voice, audio_config=audio_config ) with open(f'/home/fareed/Music/records/{filename}.mp3' , "wb") as out: out.write(response.audio_content)
def __init__(self, sio, sid, bot_message_evt, message): self.sio = sio self.sid = sid self.bot_message_evt = bot_message_evt self.message = message self.client = texttospeech.TextToSpeechClient() self.voice = texttospeech.VoiceSelectionParams( language_code="fr-FR", ssml_gender=texttospeech.SsmlVoiceGender.FEMALE) # Select the type of audio file you want returned self.audio_config = texttospeech.AudioConfig( audio_encoding=texttospeech.AudioEncoding.LINEAR16)
def gen_letters_audio(): client = texttospeech.TextToSpeechClient() voice = texttospeech.VoiceSelectionParams(language_code='en_US', name='en-US-Wavenet-E') # Select the type of audio file you want returned audio_config = texttospeech.AudioConfig( audio_encoding=texttospeech.AudioEncoding.MP3) for letter in string.ascii_lowercase: audio = text2audio(client, letter, voice, audio_config) audio2file(letter, audio, prepath='letters_audio')
def speak(text, languageCode, voiceName=None, speakingRate=1): """Converts text to audio Args: text (String): Text to be spoken languageCode (String): Language (i.e. "en") voiceName: (String, optional): See https://cloud.google.com/text-to-speech/docs/voices speakingRate: (int, optional): speed up or slow down speaking Returns: bytes : Audio in wav format """ # Instantiates a client client = texttospeech.TextToSpeechClient() # Set the text input to be synthesized synthesis_input = texttospeech.SynthesisInput(text=text) # Build the voice request, select the language code ("en-US") and the ssml # voice gender ("neutral") if not voiceName: voice = texttospeech.VoiceSelectionParams( language_code=languageCode, ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL) else: voice = texttospeech.VoiceSelectionParams(language_code=languageCode, name=voiceName) # Select the type of audio file you want returned audio_config = texttospeech.AudioConfig( audio_encoding=texttospeech.AudioEncoding.MP3, speaking_rate=speakingRate) # Perform the text-to-speech request on the text input with the selected # voice parameters and audio file type response = client.synthesize_speech(input=synthesis_input, voice=voice, audio_config=audio_config) return response.audio_content
def text_to_speech(text, outfile): """Converts plaintext to SSML and generates synthetic audio from SSML ARGS text: text to synthesize outfile: filename to use to store synthetic audio RETURNS nothing """ # Replace special characters with HTML Ampersand Character Codes # These Codes prevent the API from confusing text with # SSML commands # For example, '<' --> '<' and '&' --> '&' escaped_lines = html.escape(text) # Convert plaintext to SSML in order to wait two seconds # between each line in synthetic speech ssml = "<speak>{}</speak>".format( escaped_lines.replace("\n", '\n<break time="2s"/>')) # Instantiates a client client = texttospeech.TextToSpeechClient() # Sets the text input to be synthesized synthesis_input = texttospeech.SynthesisInput(ssml=ssml) # Builds the voice request, selects the language code ("en-US") and # the SSML voice gender ("MALE") voice = texttospeech.VoiceSelectionParams( language_code="en-US", ssml_gender=texttospeech.SsmlVoiceGender.MALE) # Selects the type of audio file to return audio_config = texttospeech.AudioConfig( audio_encoding=texttospeech.AudioEncoding.MP3) # Performs the text-to-speech request on the text input with the selected # voice parameters and audio file type request = texttospeech.SynthesizeSpeechRequest(input=synthesis_input, voice=voice, audio_config=audio_config) response = client.synthesize_speech(request=request) # Writes the synthetic audio to the output file. with open(outfile, "wb") as out: out.write(response.audio_content) print("Audio content written to file " + outfile)
def __init__(self, language_code, voice_name=None, command_prefix='!tts'): super().__init__() self.command_prefix = command_prefix self.tts_client = texttospeech.TextToSpeechClient() self.voice = texttospeech.VoiceSelectionParams( language_code=language_code, name=voice_name, ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL ) self.audio_config = texttospeech.AudioConfig( audio_encoding=texttospeech.AudioEncoding.MP3 )
def create_audio(text): # Set the text input to be synthesized synthesis_input = texttospeech.SynthesisInput(text=text) # Build the voice request voice = texttospeech.VoiceSelectionParams(language_code="en-US") # Perform the text-to-speech request on the text input with the selected # voice parameters and audio file type response = client.synthesize_speech( input=synthesis_input, voice=voice, audio_config=audio_config ) return response.audio_content
def speech_synthesize(text): client = texttospeech.TextToSpeechClient() synthesis_input = texttospeech.SynthesisInput(text=text) voice = texttospeech.VoiceSelectionParams( language_code="en-US", ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL) audio_config = texttospeech.AudioConfig( audio_encoding=texttospeech.AudioEncoding.MP3) response = client.synthesize_speech(input=synthesis_input, voice=voice, audio_config=audio_config) with open("output.mp3", "wb") as out: out.write(response.audio_content) print('Audio content written to file "output.mp3"')
def __init__(self): # Instantiates a client self.client = texttospeech.TextToSpeechClient() # Build the voice request # noinspection PyTypeChecker self.voice = texttospeech.VoiceSelectionParams( language_code="fr-FR", ssml_gender=texttospeech.SsmlVoiceGender.FEMALE) # Select the type of audio file you want returned # noinspection PyTypeChecker self.audio_config = texttospeech.AudioConfig( audio_encoding=texttospeech.AudioEncoding.MP3)
def __init__(self, key, voiceParams=texttospeech.VoiceSelectionParams( language_code="en-GB", name="en-GB-Wavenet-B", ssml_gender=texttospeech.SsmlVoiceGender.MALE)): # store Google Cloud key in a system environment variable os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = key # store voice parameters and audio configuration self.voiceParams = voiceParams # initialize and store GC TextToSpeech client self.client = texttospeech.TextToSpeechClient()
def text_to_ogg(text): synthesis_input = texttospeech.SynthesisInput(text=text) voice = texttospeech.VoiceSelectionParams( language_code="en-ZA", ssml_gender=texttospeech.SsmlVoiceGender.FEMALE) audio_config = texttospeech.AudioConfig( audio_encoding=texttospeech.AudioEncoding.OGG_OPUS) response = client.synthesize_speech(input=synthesis_input, voice=voice, audio_config=audio_config) return response.audio_content
def main(): takephoto() with open('image.jpg', 'rb') as image_file: content = image_file.read() image = vision.types.Image(content=content) response = client_vision.label_detection(image=image) response = client_vision.label(image=image) labels = response.label_annotations print('Labels:') synthesis_input = '' # Make a simple comma delimited string type sentence. for label in labels: print(label.description) synthesis_input = label.description + ', ' + synthesis_input synthesis_in = texttospeech.SynthesisInput(text=synthesis_input) # Let's make this a premium Wavenet voice in SSML voice = texttospeech.VoiceSelectionParams( language_code="en-US", name="en-US-Wavenet-A", ssml_gender=texttospeech.SsmlVoiceGender.MALE) # Select the type of audio file you want returned audio_config = texttospeech.AudioConfig( audio_encoding=texttospeech.AudioEncoding.MP3) # Perform the text-to-speech request on the text input with the selected # voice parameters and audio file type response = client_tts.synthesize_speech(input=synthesis_in, voice=voice, audio_config=audio_config) # The response's audio_content is binary. with open("output.mp3", "wb") as out: # Write the response to the output file. out.write(response.audio_content) print('Audio content written to file "output.mp3"') file = "output.mp3" # apt install mpg123 # Save the audio file to the dir os.system("mpg123 " + file)
def text2speech(text, language, output_file): client = texttospeech.TextToSpeechClient.from_service_account_json(GC_CREDENTIALS) input_ = texttospeech.SynthesisInput(text=text) voice = texttospeech.VoiceSelectionParams(language_code=language) audio_config = texttospeech.AudioConfig(audio_encoding=texttospeech.AudioEncoding.LINEAR16, effects_profile_id=['headphone-class-device']) response = client.synthesize_speech(input=input_, voice=voice,audio_config=audio_config) with open(output_file, "wb") as f: f.write(response.audio_content) print("Speech written to the output file '{}'".format(output_file)) return response.audio_content
def __init__(self, bot: Bot, config: Dict[str, Any]): self.config = config #tts起動時処理 self.gcp = texttospeech.TextToSpeechClient() self.voice = texttospeech.VoiceSelectionParams( language_code=self.LANG_CODE, ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL, ) self.audio_config = texttospeech.AudioConfig( audio_encoding=texttospeech.AudioEncoding.LINEAR16, sample_rate_hertz=self.SAMPLING_RATE, ) self.dic = SpeakCog.load_dic() self.bot = bot self.tts_ch = None
def convert_summary_to_audio(summary, filename): from google.cloud import texttospeech client = texttospeech.TextToSpeechClient() synthesis_input = texttospeech.SynthesisInput(text=summary) voice = texttospeech.VoiceSelectionParams( language_code="en-US", ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL ) audio_config = texttospeech.AudioConfig( audio_encoding=texttospeech.AudioEncoding.MP3 ) response = client.synthesize_speech( input=synthesis_input, voice=voice, audio_config=audio_config ) with open("static/"+filename+".mp3", "wb") as out: out.write(response.audio_content)
def __init__(self): self.log = Logger("testing") self.log.set_debug(True) self.rapi = RobotAPI(logger=self.log) InputMessage.logger = self.log OutputMessage.logger = self.log # TekException.logger = self.log # Rasa configuration for local mode # ip = '192.168.1.4' ip = "localhost" self.url = f"http://{ip}:5005/webhooks/rest/webhook" # ip = 'http://c115e00953e1.ngrok.io' # self.url = f"{ip}/webhooks/rest/webhook" self.username = '******' # Google text to speech configuration path = os.path.abspath(os.path.dirname(__file__)) os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = \ path + "/elsa-277912-3dfe30a8f65b.json" self.tts_client = tts.TextToSpeechClient() self.audio_config = tts.AudioConfig( audio_encoding=tts.AudioEncoding.LINEAR16, sample_rate_hertz=44100) self.voice = tts.VoiceSelectionParams(\ language_code = 'el-GR',\ ssml_gender = tts.SsmlVoiceGender.NEUTRAL) # Google speech to text configuration self.stt_client = speech.SpeechClient() self.speech_config = speech.RecognitionConfig(\ encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,\ sample_rate_hertz=44100,\ language_code='el-GR') # Greet user # out = self.rapi.speak(InputMessage({ # 'texts': ["Γεια σου, είμαι η Έλσα. Πώς θα μπορούσα να σε βοηθήσω;"], # 'volume': 80, # volume may be suppressed by the ELSA's global volume # 'language': Languages.EL # or Languages.EN for English # })) self.speak( text="Γειά σου, είμαι η Έλσα! Πώς θα μπορούσα να σε βοηθήσω;") print("App started!")
def ssml_to_speech(text, file): ttsClient = texttospeech.TextToSpeechClient() synthesis_input = texttospeech.SynthesisInput(text=text) voice = texttospeech.VoiceSelectionParams( language_code="ja-JP", ssml_gender=texttospeech.SsmlVoiceGender.MALE ) audio_config = texttospeech.AudioConfig( audio_encoding=texttospeech.AudioEncoding.MP3 ) response = ttsClient.synthesize_speech( input=synthesis_input, voice=voice, audio_config=audio_config ) with open(file, "wb") as out: out.write(response.audio_content) print("Audio content written to file " + file) return file
def __init__(self, lang, name, gender): self.__lang__ = lang self.__name__ = name if gender == "Male": self.__gender__ = texttospeech.SsmlVoiceGender.MALE else: self.__gender__ = texttospeech.SsmlVoiceGender.FEMALE self.voice = texttospeech.VoiceSelectionParams( language_code=self.__lang__, name=self.__name__, ssml_gender=self.__gender__, ) #ssml_gender=texttospeech.SsmlVoiceGender.MALE, self.client = texttospeech.TextToSpeechClient() self.audio_config = texttospeech.AudioConfig( audio_encoding=texttospeech.AudioEncoding.LINEAR16)
def __synthesize_voice(self, news_txt): input_text = texttospeech.SynthesisInput(text=news_txt) voice = texttospeech.VoiceSelectionParams( language_code="en-US", ssml_gender=texttospeech.SsmlVoiceGender. SSML_VOICE_GENDER_UNSPECIFIED, name=self.voice_name) audio_config = texttospeech.AudioConfig( audio_encoding=texttospeech.AudioEncoding.MP3, speaking_rate=0.9) response = self.client.synthesize_speech(input=input_text, voice=voice, audio_config=audio_config) return response
def text_to_mp3(voice_name, text): language_code = "-".join(voice_name.split("-")[:2]) text_input = texttospeech.SynthesisInput(text=text) voice_params = texttospeech.VoiceSelectionParams( language_code=language_code, name=voice_name) audio_config = texttospeech.AudioConfig( audio_encoding=texttospeech.AudioEncoding.MP3, speaking_rate=1.00, pitch=0.00) client = texttospeech.TextToSpeechClient() response = client.synthesize_speech(input=text_input, voice=voice_params, audio_config=audio_config) return store_audio_to_gcs(response.audio_content)
def text_to_mp3(voice_name, text): """Read out the given text in the given voice and write it to a file. """ language_code = "-".join(voice_name.split("-")[:2]) text_input = tts.SynthesisInput(text=text) voice_params = tts.VoiceSelectionParams(language_code=language_code, name=voice_name) audio_config = tts.AudioConfig(audio_encoding=tts.AudioEncoding.MP3) client = tts.TextToSpeechClient() response = client.synthesize_speech(input=text_input, voice=voice_params, audio_config=audio_config) filename = f"{language_code}.mp3" with open(filename, "wb") as out: out.write(response.audio_content) print(f'Audio content written to "{filename}"')
def generate_audio_tts(text, audio_path): synthesis_input = texttospeech.SynthesisInput(text=text) voice = texttospeech.VoiceSelectionParams( language_code=voice_language_code, name=voice_language_name, ssml_gender=voice_gender) audio_config = texttospeech.AudioConfig(audio_encoding=voice_encoding) response = client.synthesize_speech(input=synthesis_input, voice=voice, audio_config=audio_config) logger.info("Audio synthetized") # The response's audio_content is binary. with open(audio_path, "wb") as out: out.write(response.audio_content) logger.info("Audio content written to file {}".format(audio_path))
def translate_and_speech(text, translate_client, speech_client, mp3_num, now_str): filename_prefix = os.path.splitext(os.path.basename(__file__))[0] + "_" translation_filename = filename_prefix + "translation_" + now_str + ".txt" mp3_filename = filename_prefix + "out_" + now_str + "_" + str( mp3_num) + ".mp3" playstate_filename = filename_prefix + "play_" + now_str + ".txt" if isinstance(text, six.binary_type): text = text.decode("utf-8") translation = translate_client.translate(text, source_language="ja", target_language="en", format_="text") with open(translation_filename, 'a') as f: print(str(mp3_num), file=f) print(text, file=f) print(translation["translatedText"], file=f) print(translation["translatedText"]) synthesis_input = texttospeech.SynthesisInput( text=translation["translatedText"]) voice = texttospeech.VoiceSelectionParams( language_code=out_language_code, ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL) audio_config = texttospeech.AudioConfig( audio_encoding=texttospeech.AudioEncoding.MP3) out_speech = speech_client.synthesize_speech(input=synthesis_input, voice=voice, audio_config=audio_config) with open(mp3_filename, "wb") as out: out.write(out_speech.audio_content) if os.path.isfile(playstate_filename): while True: f = open(playstate_filename, 'r') play_count = f.read() f.close() if int(play_count) == mp3_num: break time.sleep(1) with open(playstate_filename, "w") as out: out.write(str(mp3_num)) playsound(mp3_filename) with open(playstate_filename, "w") as out: out.write(str(mp3_num + 1))
def synthesis_speech(text, lang='日本語', gender='male'): client = texttospeech.TextToSpeechClient() synthesis_input = texttospeech.SynthesisInput(text=text) voice = texttospeech.VoiceSelectionParams(language_code=lang_code[lang], ssml_gender=gender_type[gender]) audio_config = texttospeech.AudioConfig( audio_encoding=texttospeech.AudioEncoding.MP3) response = client.synthesize_speech(input=synthesis_input, voice=voice, audio_config=audio_config) return response
def synthesize(self, text: str, voice: Voice) -> bytes: """Synthesizes text and returns the voice as an mp3 file.""" input_text = texttospeech.SynthesisInput(text=text) voice = texttospeech.VoiceSelectionParams( language_code=voice.language_code, name=voice.name, ssml_gender=voice.gender, ) audio_config = texttospeech.AudioConfig( audio_encoding=texttospeech.AudioEncoding.MP3, speaking_rate=0.75) request = texttospeech.SynthesizeSpeechRequest( input=input_text, voice=voice, audio_config=audio_config) response = self._client.synthesize_speech(request=request) return bytes(response.audio_content)
def text_to_pcm(input, lang): lang = lang or 'en-GB' if lang == 'en-GB': voice_name = 'en-GB-Wavenet-F' else: voice_name = lang + '-Wavenet-A' voice = texttospeech.VoiceSelectionParams(language_code=lang, name=voice_name) synthesis_input = texttospeech.SynthesisInput(text=input) response = client.synthesize_speech(input=synthesis_input, voice=voice, audio_config=audio_config) return response.audio_content
def audio(): # video_stream.__del__() # video_stream.closefile() VideoCamera().__del__() VideoCamera().closefile() global data #print("data : ", data) """Synthesizes speech from the input string of text or ssml. Note: ssml must be well-formed according to: https://www.w3.org/TR/speech-synthesis/ """ #from google.cloud import texttospeech # Instantiates a client client = texttospeech.TextToSpeechClient() # with open('flaskblog/output.txt', 'r') as file: # data = file.read().replace('\n', '') # Set the text input to be synthesized synthesis_input = texttospeech.SynthesisInput(text=data) # Build the voice request, select the language code ("en-US") and the ssml # voice gender ("neutral") voice = texttospeech.VoiceSelectionParams( language_code="en-US", ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL) # Select the type of audio file you want returned audio_config = texttospeech.AudioConfig( audio_encoding=texttospeech.AudioEncoding.MP3) # Perform the text-to-speech request on the text input with the selected # voice parameters and audio file type response = client.synthesize_speech(input=synthesis_input, voice=voice, audio_config=audio_config) # The response's audio_content is binary. with open("flaskblog/static/output.mp3", "wb") as out: # Write the response to the output file. out.write(response.audio_content) print('Audio content written to file "output.mp3"') return render_template('audio.html')