Пример #1
0
def write_speech(text, file_name, config):
    print('WRITE SPEECH')
    print([text, file_name, config])
    # Instantiates a client
    client = texttospeech.TextToSpeechClient()

    # Set the text input to be synthesized
    synthesis_input = texttospeech.SynthesisInput(text=text)
    input_text = texttospeech.SynthesisInput(text=text)
    # Build the voice request, select the language code ("en-US") and the ssml
    # voice gender ("neutral")
    voice = texttospeech.VoiceSelectionParams(
        language_code=config.get('language', 'en-US'),
        ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL)

    # Select the type of audio file you want returned
    audio_config = texttospeech.AudioConfig(
        audio_encoding=texttospeech.AudioEncoding.MP3)

    # Perform the text-to-speech request on the text input with the selected
    # voice parameters and audio file type
    response = client.synthesize_speech(request={
        "input": input_text,
        "voice": voice,
        "audio_config": audio_config
    })
    print('GOT GOO SPEECH REQ')

    return response.audio_content
Пример #2
0
def playAudioFromText(Text="",):
    
    # Instantiates a client
    client = texttospeech.TextToSpeechClient()
    
    # Set the text input to be synthesized
    
    if '<speak>' in Text:
        synthesis_input = texttospeech.SynthesisInput(ssml=Text)
    else:
        synthesis_input = texttospeech.SynthesisInput(text=Text)

    # Build the voice request, select the language code ("en-US") and the ssml
    # voice gender ("neutral")
    voice = texttospeech.VoiceSelectionParams(
        language_code=Settings.getSetting()['language-tts'],
        ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL,name=Settings.getSetting()['name-tts'])

    # Select the type of audio file you want returned
    audio_config = texttospeech.AudioConfig(
        audio_encoding=texttospeech.AudioEncoding.MP3,speaking_rate=Settings.getSetting()['speaking_rate-tts'])

    # Perform the text-to-speech request on the text input with the selected
    # voice parameters and audio file type
    response = client.synthesize_speech(input=synthesis_input,voice=voice,audio_config=audio_config)
    # The response's audio_content is binary.
    mem_file = io.BytesIO(response.audio_content)
    mem_file.seek(0)
    mixer.init()
    mixer.music.load(mem_file)
    mixer.music.play(0)
    while mixer.music.get_busy():
        pass
    mem_file.close()
Пример #3
0
def TTS_text(sentence, output):
    from google.cloud import texttospeech

    # Instantiates a client
    client = texttospeech.TextToSpeechClient()

    # Set the text input to be synthesized
    synthesis_input = texttospeech.SynthesisInput(text=sentence)

    # Build the voice request, select the language code ("en-US") and the ssml
    # voice gender ("neutral")
    voice = texttospeech.VoiceSelectionParams(
        language_code="ko-KR",
        name="ko-KR-Standard-A",
        ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL)

    # Select the type of audio file you want returned
    audio_config = texttospeech.AudioConfig(
        audio_encoding=texttospeech.AudioEncoding.MP3)

    # Perform the text-to-speech request on the text input with the selected
    # voice parameters and audio file type
    response = client.synthesize_speech(input=synthesis_input,
                                        voice=voice,
                                        audio_config=audio_config)

    # The response's audio_content is binary.
    with open(output, "wb") as out:
        # Write the response to the output file.
        out.write(response.audio_content)
        print('Audio content written to file {}'.format(output))
    # [END tts_quickstart]

    return os.path.abspath(output)
Пример #4
0
def play():

    for file in os.listdir("static"):
        if file.endswith(".mp3"):
            os.remove("static/" + file)

    #get pasta
    text = random.choice(pastas)
    synthesis_input = texttospeech.SynthesisInput(text=text)

    voice = texttospeech.VoiceSelectionParams(
        language_code="en-US",
        ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL)

    audio_config = texttospeech.AudioConfig(
        audio_encoding=texttospeech.AudioEncoding.MP3)

    response = client.synthesize_speech(input=synthesis_input,
                                        voice=voice,
                                        audio_config=audio_config)

    print(response)

    name = "static/temp" + str(random.randint(0, 10000000)) + ".mp3"

    with open(name, "wb") as out:
        # Write the response to the output file.
        out.write(response.audio_content)
        print('Audio content written to file "output.mp3"')

    return render_template("playpage.html",
                           words=text,
                           track_ogg=name,
                           track_mp3=name)
Пример #5
0
    def download_tmp_audio_file(self, card_type, foreign_text):
        """Returns a temporary filename with the audio."""

        language_id = self.config().card_type_property(\
            "sublanguage_id", card_type)
        if not language_id:
            language_id = self.config().card_type_property(\
                "language_id", card_type)

        if " ج " in foreign_text:
            singular, plural = foreign_text.split(" ج ")
            foreign_text = \
                f"""<speak>{singular}<break time="0.3s"/>{plural}</speak>"""
        if "<br>" in foreign_text:
            foreign_text = "<speak>" + \
                foreign_text.replace("<br>", """<break time="0.3s"/>""") +\
                "</speak>"
        client = texttospeech.TextToSpeechClient()
        synthesis_input = texttospeech.SynthesisInput(ssml=foreign_text)
        voice = texttospeech.VoiceSelectionParams(
            language_code=language_id,
            ssml_gender=texttospeech.SsmlVoiceGender.FEMALE)
        audio_config = texttospeech.AudioConfig(
            audio_encoding=texttospeech.AudioEncoding.MP3)
        response = client.synthesize_speech(request={
            "input": synthesis_input,
            "voice": voice,
            "audio_config": audio_config
        })

        filename = expand_path("__GTTS__TMP__.mp3",
                               self.database().media_dir())
        with open(filename, 'wb') as mp3_file:
            mp3_file.write(response.audio_content)
        return filename
Пример #6
0
def synthesize_text(text):
    """Synthesizes speech from the input string of text."""
    from google.cloud import texttospeech

    client = texttospeech.TextToSpeechClient()

    input_text = texttospeech.SynthesisInput(text=text)

    # Note: the voice can also be specified by name.
    # Names of voices can be retrieved with client.list_voices().
    voice = texttospeech.VoiceSelectionParams(
        language_code="en-US",
        name="en-US-Wavenet-D",
        ssml_gender=texttospeech.SsmlVoiceGender.MALE,
    )

    audio_config = texttospeech.AudioConfig(
        audio_encoding=texttospeech.AudioEncoding.MP3)

    response = client.synthesize_speech(request={
        "input": input_text,
        "voice": voice,
        "audio_config": audio_config
    })

    # The response's audio_content is binary.
    with open("output" + datetime.now().strftime("%H%M%S") + ".mp3",
              "wb") as out:
        out.write(response.audio_content)
        print('Audio content written to file "output.mp3"')
def speech_result_return(create_text):

    client = texttospeech.TextToSpeechClient()

    # Set the text input to be synthesized
    #text_send_to_server2 = text_send_to_server()
    text_send_to_server2 = create_text
    #text_send_to_server2 = input("nyomjad: ")
    synthesis_input = texttospeech.SynthesisInput(text=text_send_to_server2)

    # Build the voice request, select the language code ("en-US") and the ssml
    # voice gender ("neutral")
    voice = texttospeech.VoiceSelectionParams(
        language_code="en-GB", ssml_gender=texttospeech.SsmlVoiceGender.FEMALE)

    # Select the type of audio file you want returned
    audio_config = texttospeech.AudioConfig(
        audio_encoding=texttospeech.AudioEncoding.MP3)

    # Perform the text-to-speech request on the text input with the selected
    # voice parameters and audio file type
    response = client.synthesize_speech(input=synthesis_input,
                                        voice=voice,
                                        audio_config=audio_config)

    # The response's audio_content is binary.
    with open("text.mp3", "wb") as out:
        file = out.write(response.audio_content)
        print('Audio content written to file "text.mp3"')
        play_mp3_content()
Пример #8
0
def synthesize_text_file(text_file):
    """Synthesizes speech from the input file of text."""
    from google.cloud import texttospeech

    client = texttospeech.TextToSpeechClient()

    with open(text_file, "r") as f:
        text = f.read()
        input_text = texttospeech.SynthesisInput(text=text)

    # Note: the voice can also be specified by name.
    # Names of voices can be retrieved with client.list_voices().
    voice = texttospeech.VoiceSelectionParams(
        language_code="ko-KR",
        name="ko-KR-Standard-C",
        ssml_gender=texttospeech.SsmlVoiceGender.FEMALE)

    audio_config = texttospeech.AudioConfig(
        audio_encoding=texttospeech.AudioEncoding.MP3)

    response = client.synthesize_speech(request={
        "input": input_text,
        "voice": voice,
        "audio_config": audio_config
    })

    # The response's audio_content is binary.
    with open("output.mp3", "wb") as out:
        out.write(response.audio_content)
        print('Audio content written to file "output.mp3"')
def synthesize_ssml(ssml, langcode, voicename, ssmlgender, output_filename):
    """Synthesizes speech from the input string of ssml.

    Note: ssml must be well-formed according to:
    https://www.w3.org/TR/speech-synthesis/

    Example: <speak>Hello there.</speak>
    """
    client = texttospeech.TextToSpeechClient()

    input_text = texttospeech.SynthesisInput(ssml=ssml)

    # Note: the voice can also be specified by name.
    # Names of voices can be retrieved with client.list_voices().
    voice = texttospeech.VoiceSelectionParams(
        language_code=langcode,
        name=voicename,
        ssml_gender=ssmlgender,
    )

    audio_config = texttospeech.AudioConfig(
        audio_encoding=texttospeech.AudioEncoding.MP3)

    response = client.synthesize_speech(input=input_text,
                                        voice=voice,
                                        audio_config=audio_config)

    # The response's audio_content is binary.
    with open(output_filename, "wb") as out:
        out.write(response.audio_content)
        print('Audio content written to file', output_filename, '.')
def ssml_to_audio(ssml_text, outfile):

    # Google Cloud Credentials: auth setup https://cloud.google.com/docs/authentication/getting-started
    os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '/Users/christinatuttle/Downloads/text-to-speech-service-account.json'

    # Instantiates a client
    client = texttospeech.TextToSpeechClient()

    # Set the text input to be synthesized
    synthesis_input = texttospeech.SynthesisInput(ssml=ssml_text)

    # Build the voice request, select the language code ("en-US") and the ssml voice gender ("neutral")
    voice = texttospeech.VoiceSelectionParams(
        language_code="en-US", ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL
    )

    # Select the type of audio file you want returned
    audio_config = texttospeech.AudioConfig(
        audio_encoding=texttospeech.AudioEncoding.MP3
    )

    # Perform the text-to-speech request on the text input with the selected voice parameters and audio file type
    response = client.synthesize_speech(
        input=synthesis_input, voice=voice, audio_config=audio_config
    )

    # The response's audio_content is binary.
    with open(outfile, "wb") as out:
        # Write the response to the output file.
        out.write(response.audio_content)
        print('Audio content written to file "' + outfile + '"')
Пример #11
0
async def tts(ctx, language, *, input):
    # Set the text input to be synthesized

    synthesis_input = texttospeech.SynthesisInput(text=input)

    # Build the voice request, select the language code ("en-US") and the ssml
    # voice gender ("neutral")

    voice = texttospeech.VoiceSelectionParams(
        language_code=language,
        ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL,
    )

    # Select the type of audio file you want returned
    audio_config = texttospeech.AudioConfig(
        audio_encoding=texttospeech.AudioEncoding.MP3)

    # Perform the text-to-speech request on the text input with the selected
    # voice parameters and audio file type
    response = ttsClient.synthesize_speech(input=synthesis_input,
                                           voice=voice,
                                           audio_config=audio_config)

    # The response's audio_content is binary.
    with open("output.mp3", "wb") as out:
        # Write the response to the output file.
        out.write(response.audio_content)
    await ctx.send(file=discord.File("output.mp3"))
Пример #12
0
def getAudio(inputStr):
    # Instantiates a client
    tts = texttospeech.TextToSpeechAsyncClient.from_service_account_json(
        GoogleApiFile)

    # Set the text input to be synthesized
    synthesis_input = texttospeech.SynthesisInput(text=inputStr)

    # Build the voice request, select the language code ("en-US") and the ssml
    # voice gender ("neutral")
    voice = texttospeech.VoiceSelectionParams({
        "language_code":
        "en-US",
        "ssml_gender":
        texttospeech.SsmlVoiceGender.MALE,
        "name":
        "en-US-Wavenet-I"
    })

    # Select the type of audio file you want returned
    audio_config = texttospeech.AudioConfig(
        {"audio_encoding": texttospeech.AudioEncoding.MP3})

    # Perform the text-to-speech request on the text input with the selected
    # voice parameters and audio file type
    response = tts.synthesize_speech(input=synthesis_input,
                                     voice=voice,
                                     audio_config=audio_config)

    # The response's audio_content is binary.
    with open(SoundFile, "wb") as out:
        # Write the response to the output file.
        out.write(response.audio_content)
Пример #13
0
async def voice(txt: str):
    #list_voices()
    # Set the text input to be synthesized
    synthesis_input = texttospeech.SynthesisInput(text=txt)

    # Build the voice request, select the language code ("en-US") and the ssml
    # voice gender ("neutral")
    voice = texttospeech.VoiceSelectionParams(
        language_code="en-US",
        name="en-US-Wavenet-F",
        ssml_gender=texttospeech.SsmlVoiceGender.FEMALE)

    # Select the type of audio file you want returned
    audio_config = texttospeech.AudioConfig(
        audio_encoding=texttospeech.AudioEncoding.LINEAR16)

    # Perform the text-to-speech request on the text input with the selected
    # voice parameters and audio file type
    response = tts_client.synthesize_speech(input=synthesis_input,
                                            voice=voice,
                                            audio_config=audio_config)

    # The response's audio_content is binary.
    async def generate():
        with io.BytesIO(response.audio_content) as buf:
            data = buf.read(1024)
            while data:
                yield data
                data = buf.read(1024)

    return StreamingResponse(generate(), media_type="audio/wav")
Пример #14
0
def TTS_ssml(ssml_sentence, output):
    from google.cloud import texttospeech

    client = texttospeech.TextToSpeechClient()

    input_text = texttospeech.SynthesisInput(ssml=ssml_sentence)

    # Note: the voice can also be specified by name.
    # Names of voices can be retrieved with client.list_voices().
    voice = texttospeech.VoiceSelectionParams(
        language_code="ko-KR",
        name="ko-KR-Wavenet-D",
        ssml_gender=texttospeech.SsmlVoiceGender.MALE,
    )

    audio_config = texttospeech.AudioConfig(
        audio_encoding=texttospeech.AudioEncoding.MP3)

    response = client.synthesize_speech(input=input_text,
                                        voice=voice,
                                        audio_config=audio_config)

    # The response's audio_content is binary.
    with open(output, "wb") as out:
        out.write(response.audio_content)
        print('Audio content written to file {}'.format(output))

        return os.path.abspath(output)
Пример #15
0
def synthesize_text(text, file_name):
    client = texttospeech.TextToSpeechClient()

    input_text = texttospeech.SynthesisInput(text=text)

    # Note: the voice can also be specified by name.
    # Names of voices can be retrieved with client.list_voices().
    voice = texttospeech.VoiceSelectionParams(
        language_code="en-US",
        name="en-US-Standard-C",
        ssml_gender=texttospeech.SsmlVoiceGender.FEMALE,
    )

    audio_config = texttospeech.AudioConfig(
        audio_encoding=texttospeech.AudioEncoding.MP3)

    response = client.synthesize_speech(request={
        "input": input_text,
        "voice": voice,
        "audio_config": audio_config
    })

    # The response's audio_content is binary.
    with open(f"{file_name}_audiobook.mp3", "wb") as out:
        out.write(response.audio_content)
        print(f'Audio content written to file "{file_name}_audiobook.mp3"')
Пример #16
0
def translate():
    """
    Route to synthesize speech using Google Text-to-Speech API.
    """

    # Get requested text
    messages = json.loads(request.args['messages'])

    # Instantiates a client
    client = texttospeech.TextToSpeechClient()

    # Set the text input to be synthesized
    synthesis_input = texttospeech.SynthesisInput(text=messages['text'])

    # Build the voice request, select the language code ("en-US") and the ssml
    # voice gender ("neutral")
    voice = texttospeech.VoiceSelectionParams(
        language_code=messages['language'], ssml_gender=messages['gender'])

    # Select the type of audio file you want returned
    audio_config = texttospeech.AudioConfig(audio_encoding=2)

    # Perform the text-to-speech request on the text input with the selected
    # voice parameters and audio file type
    response = client.synthesize_speech(input=synthesis_input,
                                        voice=voice,
                                        audio_config=audio_config)

    # The response's audio_content is binary.
    with open('./static/output.mp3', 'wb') as out:
        # Write the response to the output file.
        out.write(response.audio_content)
        print('Audio content written to file "output.mp3"')

    return send_file('./static/output.mp3', attachment_filename='output.mp3')
Пример #17
0
def text_to_speech(text_content, output_file_path):
    """
    文字列を渡すと音声ファイルを作成して保存する。
    """
    # Instantiates a client
    client = texttospeech.TextToSpeechClient()

    # Set the text input to be synthesized
    synthesis_input = texttospeech.SynthesisInput(text=text_content)

    # Build the voice request, select the language code ("en-US") and the ssml
    # voice gender ("neutral")
    voice = texttospeech.VoiceSelectionParams(
        language_code="ja-JP",
        ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL,
        name="ja-JP-Wavenet-B")

    # Select the type of audio file you want returned
    audio_config = texttospeech.AudioConfig(
        audio_encoding=texttospeech.AudioEncoding.MP3)

    # Perform the text-to-speech request on the text input with the selected
    # voice parameters and audio file type
    response = client.synthesize_speech(input=synthesis_input,
                                        voice=voice,
                                        audio_config=audio_config)

    # The response's audio_content is binary.
    with open(output_file_path, "wb") as out:
        # Write the response to the output file.
        out.write(response.audio_content)
Пример #18
0
def synthesize_text_file(text_file):
    """Synthesizes speech from the input file of text."""
    from google.cloud import texttospeech

    client = texttospeech.TextToSpeechClient()

    with open(text_file, "r") as f:
        text = f.read()
        # convert plain text to SSML
        # Wait 1 second on each line break
        ssml = "<speak>{}</speak>".format(
            text.replace("\n\n", '.<break time="1s"/>\n\n'))
        input_text = texttospeech.SynthesisInput(ssml=ssml)

    # Note: the voice can also be specified by name.
    # Names of voices can be retrieved with client.list_voices().
    voice = texttospeech.VoiceSelectionParams(
        name='en-US-Wavenet-D',
        language_code="en-US",
        ssml_gender=texttospeech.SsmlVoiceGender.MALE)

    audio_config = texttospeech.AudioConfig(
        audio_encoding=texttospeech.AudioEncoding.MP3)

    response = client.synthesize_speech(request={
        "input": input_text,
        "voice": voice,
        "audio_config": audio_config
    })

    # The response's audio_content is binary.
    output_file = text_file + '.mp3'
    with open(output_file, "wb") as out:
        out.write(response.audio_content)
        print('Audio content written to file "{}"'.format(output_file))
Пример #19
0
def synthesize_ssml_file(ssml_file):
    """Synthesizes speech from the input file of ssml.

    Note: ssml must be well-formed according to:
        https://www.w3.org/TR/speech-synthesis/
    """
    from google.cloud import texttospeech

    client = texttospeech.TextToSpeechClient()

    with open(ssml_file, "r") as f:
        ssml = f.read()
        input_text = texttospeech.SynthesisInput(ssml=ssml)

    # Note: the voice can also be specified by name.
    # Names of voices can be retrieved with client.list_voices().
    voice = texttospeech.VoiceSelectionParams(
        language_code="ko-KR",
        name="ko-KR-Standard-C",
        ssml_gender=texttospeech.SsmlVoiceGender.FEMALE)

    audio_config = texttospeech.AudioConfig(
        audio_encoding=texttospeech.AudioEncoding.MP3)

    response = client.synthesize_speech(input=input_text,
                                        voice=voice,
                                        audio_config=audio_config)

    # The response's audio_content is binary.
    with open("output.mp3", "wb") as out:
        out.write(response.audio_content)
        print('Audio content written to file "output.mp3"')
Пример #20
0
def synthesize_text(text):
    """Synthesizes speech from the input string of text."""
    client = texttospeech.TextToSpeechClient()

    input_text = texttospeech.SynthesisInput(text=text)

    voice = texttospeech.VoiceSelectionParams(
        language_code="uk-UA",
        name="uk-UA-Standard-A",
        ssml_gender=texttospeech.SsmlVoiceGender.FEMALE,
    )

    audio_config = texttospeech.AudioConfig(
        audio_encoding=texttospeech.AudioEncoding.MP3)

    response = client.synthesize_speech(request={
        "input": input_text,
        "voice": voice,
        "audio_config": audio_config
    })

    with open("output.mp3", "wb") as out:
        out.write(response.audio_content)

    playsound("output.mp3")
Пример #21
0
    def __init__(self, text, filename):
        self.text = text
        self.filename = filename
        self.client = texttospeech.TextToSpeechClient()
        self.synthesis_input = texttospeech.SynthesisInput(text=self.text)
        # Set the text input to be synthesized
        # Build the voice request, select the language code ("en-US") and the ssml
        # voice gender ("neutral")
        self.voice = texttospeech.VoiceSelectionParams(
            language_code="en-US",
            ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL)
        # Select the type of audio file you want returned
        self.audio_config = texttospeech.AudioConfig(
            audio_encoding=texttospeech.AudioEncoding.MP3)
        # Perform the text-to-speech request on the text input with the selected
        # voice parameters and audio file type
        self.response = self.client.synthesize_speech(
            input=self.synthesis_input,
            voice=self.voice,
            audio_config=self.audio_config)

        # The response's audio_content is binary.
        with open(f"./static/audio/{filename}.mp3", "wb") as out:
            # Write the response to the output file.
            out.write(self.response.audio_content)
Пример #22
0
def synthesize_text_with_audio_profile(text, output, effects_profile_id):
    """Synthesizes speech from the input string of text."""
    from google.cloud import texttospeech

    client = texttospeech.TextToSpeechClient()

    input_text = texttospeech.SynthesisInput(text=text)

    # Note: the voice can also be specified by name.
    # Names of voices can be retrieved with client.list_voices().
    voice = texttospeech.VoiceSelectionParams(language_code="en-US")

    # Note: you can pass in multiple effects_profile_id. They will be applied
    # in the same order they are provided.
    audio_config = texttospeech.AudioConfig(
        audio_encoding=texttospeech.AudioEncoding.MP3,
        effects_profile_id=[effects_profile_id],
    )

    response = client.synthesize_speech(input=input_text,
                                        voice=voice,
                                        audio_config=audio_config)

    # The response's audio_content is binary.
    with open(output, "wb") as out:
        out.write(response.audio_content)
        print('Audio content written to file "%s"' % output)
def text_to_speech_pcm(text, language='en-us', gender=texttospeech.SsmlVoiceGender.NEUTRAL) -> bytes:
    # Create a text-to-speech client with maximum receive size of 24MB. This limit can be adjusted if necessary. It needs to be specified because the default of 4MB is not
    # enough for some definitions.
    channel = TextToSpeechGrpcTransport.create_channel(options=[('grpc.max_receive_message_length', 24 * 1024 * 1024)])
    transport = TextToSpeechGrpcTransport(channel=channel)
    client = texttospeech.TextToSpeechClient(transport=transport)

    language_components = language.split('-')
    language_code = '-'.join(language_components[:2])
    name = None
    if len(language_components) == 4:
        name = language

    # Build the voice request
    voice = texttospeech.VoiceSelectionParams(
        language_code=language_code, ssml_gender=gender, name=name
    )

    # Select the type of audio file you want returned
    audio_config = texttospeech.AudioConfig(
        audio_encoding=texttospeech.AudioEncoding.LINEAR16,
        sample_rate_hertz=48000
    )

    # Set the text input to be synthesized
    synthesis_input = texttospeech.SynthesisInput(text=text)

    # Request text-to-speech data
    response = client.synthesize_speech(input=synthesis_input, voice=voice, audio_config=audio_config)
    return response.audio_content
def convert_to_audio(name, text, save_to):
    # initialize the API client
    client = texttospeech.TextToSpeechClient()
    # we can send up to 5000 characters per request, so split up the text
    step = 5000
    for j, i in enumerate(range(0, len(text), step)):
        synthesis_input = texttospeech.SynthesisInput(text=text[i:i + step])
        voice = texttospeech.VoiceSelectionParams(language_code='en-US',
                                                  name='en-US-Wavenet-B')
        audio_config = texttospeech.AudioConfig(
            audio_encoding=texttospeech.AudioEncoding.MP3)
        logging.info(f'Synthesizing speech for {name}_{j}')
        response = client.synthesize_speech(input=synthesis_input,
                                            voice=voice,
                                            audio_config=audio_config)
        with open(f'{name}_{j}.mp3', 'wb') as out:
            # Write the response to the output file.
            out.write(response.audio_content)
            logging.info(f'Audio content written to file "{name}_{j}.mp3"')

    mp3_segments = sorted(glob(f'{name}_*.mp3'))
    segments = [AudioSegment.from_mp3(f) for f in mp3_segments]

    logging.info(f'Stitching together {len(segments)} mp3 files for {name}')
    audio = functools.reduce(lambda a, b: a + b, segments)

    logging.info(f'Exporting {name}.mp3')
    audio.export(f'{save_to}/{name}.mp3', format='mp3')

    logging.info('Removing intermediate files')
    for f in mp3_segments:
        os.remove(f)
Пример #25
0
 def speak(self,
           text,
           rate=0.8,
           language='en-US',
           filename=None,
           voice_name='en-US-Wavenet-D'):
     """
     Create the audio file from text.
     """
     if not filename:
         filename = self.filename_from_text(text, rate, language)
     filepath = path_in_medialib(filename)
     filepath_wav = f'{filepath}.wav'
     filepath_mp3 = f'{filepath}.mp3'
     if os.path.exists(filepath_wav):
         return filepath_wav
     synthesis_input = texttospeech.SynthesisInput(text=text)
     voice = texttospeech.VoiceSelectionParams(language_code=language,
                                               name=voice_name)
     # Select the type of audio file you want returned
     audio_config = texttospeech.AudioConfig(
         audio_encoding=texttospeech.AudioEncoding.MP3, speaking_rate=rate)
     # voice parameters and audio file type
     response = self.client.synthesize_speech(input=synthesis_input,
                                              voice=voice,
                                              audio_config=audio_config)
     # The response's audio_content is binary.
     with open(filepath_mp3, "wb") as out:
         out.write(response.audio_content)
     self.mp3_to_wav(filepath_mp3, filepath_wav)
     return filepath_wav
Пример #26
0
def text_to_speech_converter(text, lang, voice_name=None, speaking_rate=1):
    # convert text to audio
    client = texttospeech.TextToSpeechAsyncClient()
    synthesis_input = texttospeech.SynthesisInput(text=text)

    # build the voice request, set lang to 'en-US' and ssml voice genter to 'neutral'
    if not voice_name:
        voice = texttospeech.VoiceSelectionParams(
            language_code=lang,
            ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL)
    else:
        voice = texttospeech.VoiceSelectionParams(language_code=lang,
                                                  name=voice_name)

    # select the type of audio file to return as the output
    audio_config = texttospeech.AudioConfig(
        audio_encoding=texttospeech.AudioEncoding.MP3,
        speaking_rate=speaking_rate)

    # preform the text-to-speech request on the text input with the selected voice params and output type
    response = client.synthesize_speech(input=synthesis_input,
                                        voice=voice,
                                        audio_config=audio_config)

    return response.audio_content
def write_transcription_audioFile(text, filename):

    filename = my_audio_out_directory + filename
    client = texttospeech.TextToSpeechClient.from_service_account_file(
        my_credentials_file_path)
    synthesis_input = texttospeech.SynthesisInput(text=text)
    voice = texttospeech.VoiceSelectionParams(
        language_code="en-GB",
        name="en-GB-Wavenet-B",
        ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL)
    audio_config = texttospeech.AudioConfig(
        audio_encoding=texttospeech.AudioEncoding.LINEAR16,
        speaking_rate=0.89,
        pitch=2.4,
        sample_rate_hertz=8000)
    response = client.synthesize_speech(input=synthesis_input,
                                        voice=voice,
                                        audio_config=audio_config)

    wav_filename = filename + ".wav"
    with open(wav_filename, "wb") as out:
        out.write(response.audio_content)

    sln_filename = filename + ".sln"
    with open(sln_filename, "wb") as out:
        audio = bytearray(response.audio_content)
        audio_without_wav_header = audio[44:]
        out.write(audio_without_wav_header)
Пример #28
0
def text_to_speech(word):
    '''
    :param word:
    :return: the binary of the sound in selected encoding
    '''
    # Instantiates a client
    client = texttospeech.TextToSpeechClient()
    # Set the text input to be synthesized
    synthesis_input = texttospeech.SynthesisInput(text=word)
    # Build the voice request, select the language code ("en-US") and the ssml
    # voice gender ("neutral")
    voice = texttospeech.VoiceSelectionParams(
        language_code="zh-CN", ssml_gender=texttospeech.SsmlVoiceGender.FEMALE
    )
    # Select the type of audio file you want returned
    audio_config = texttospeech.AudioConfig(
        audio_encoding=texttospeech.AudioEncoding.LINEAR16
    )
    # Perform the text-to-speech request on the text input with the selected
    # voice parameters and audio file type
    response = client.synthesize_speech(
        input=synthesis_input, voice=voice, audio_config=audio_config
    )
    # The response's audio_content is binary.
    return response
Пример #29
0
def run(text):
    # Instantiates a client
    client = texttospeech.TextToSpeechClient()

    # Set the text input to be synthesized
    synthesis_input = texttospeech.SynthesisInput(text=text)

    # Build the voice request, select the language code ("en-US") and the ssml
    # voice gender ("neutral")
    voice = texttospeech.VoiceSelectionParams(
        language_code="en-US",
        ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL)

    # Select the type of audio file you want returned
    audio_config = texttospeech.AudioConfig(
        audio_encoding=texttospeech.AudioEncoding.MP3)

    # Perform the text-to-speech request on the text input with the selected
    # voice parameters and audio file type
    response = client.synthesize_speech(input=synthesis_input,
                                        voice=voice,
                                        audio_config=audio_config)

    # The response's audio_content is binary.
    with open("output.mp3", "wb") as out:
        # Write the response to the output file.
        out.write(response.audio_content)
        print('Audio content written to file "output.mp3"')

    return 'Audio content written to file "output.mp3"'
Пример #30
0
    def speak(self, language, text, gender="neutral"):
        """
        Generates a .mp3 audio file based on the input language code
        (ex. en-US) and text (ex. I love coding).
        :param language: google text-to-speech language code (differnet from translate API)
        :param text: the raw text to speak
        :param gender: neutral, male, or female
        :return: None
        """
        voice_input = texttospeech.SynthesisInput(text=text)

        if language not in self.speech_langs.values():
            raise ValueError("Language not supported by Speech API")

        #voice settings
        if (gender.lower() == "female"):
            voice = texttospeech.VoiceSelectionParams(language_code=language,
                                                      ssml_gender=self.female)
        elif (gender.lower() == "male"):
            voice = texttospeech.VoiceSelectionParams(language_code=language,
                                                      ssml_gender=self.male)
        else:
            voice = texttospeech.VoiceSelectionParams(language_code=language,
                                                      ssml_gender=self.neutral)

        #ogg_opus encoding, need to test if .WAV will work
        audio_config = texttospeech.AudioConfig(
            audio_encoding=texttospeech.AudioEncoding.OGG_OPUS)

        #PERFORM MAGIC
        response = self.speech_client.synthesize_speech(
            input=voice_input, voice=voice, audio_config=audio_config)

        with open("audio_data/temp/output.ogg", "wb", 0) as out:
            out.write(response.audio_content)