def voice_from_text(text, path): filepath = "{}/speech_{}.wav".format(path, str(uuid.uuid1().hex)) if os.path.isfile(filepath): os.remove(filepath) ssml_string = """ <speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="https://www.w3.org/2001/mstts" xml:lang="en-US"> <voice name="en-US-JennyNeural"> <mstts:express-as style="chat"> {} </mstts:express-as> </voice> </speak> """.format(text) speech_config = speechsdk.SpeechConfig( subscription=api_keys["microsoft-speech"]["key"], region=api_keys["microsoft-speech"]["region"]) # audio_config = AudioOutputConfig(filename=filepath) synthesizer = SpeechSynthesizer(speech_config=speech_config, audio_config=None) result = synthesizer.speak_ssml_async(ssml_string).get() stream = AudioDataStream(result) stream.save_to_wav_file(filepath) synthesizer.speak_text_async(text) return filepath
def show_action_from_speech(intent, entities): import matplotlib.pyplot as plt from azure.cognitiveservices.speech import SpeechConfig, SpeechSynthesizer, AudioConfig from PIL import Image from dotenv import load_dotenv import json import os action = 'unknown' device = 'none' if intent in ['switch_on', 'switch_off']: # Check for entities if len(entities) > 0: # Check for a device entity # Get the first entity (if any) if entities[0]["type"] == 'device': device = entities[0]["entity"] action = intent + '_' + device load_dotenv() cog_key = os.getenv('SPEECH_KEY') cog_location = os.getenv('SPEECH_REGION') response_text = "OK, I'll {} the {}!".format(intent, device).replace("_", " ") speech_config = SpeechConfig(cog_key, cog_location) speech_synthesizer = SpeechSynthesizer(speech_config) result = speech_synthesizer.speak_text(response_text) img_name = action + '.jpg' img = Image.open(os.path.join("data", "luis", img_name)) plt.axis('off') plt.imshow(img)
def generaraudio(): archivo = open("uploads/archivo.txt", "r") documentos = archivo.read() documentos = documentos.rstrip('\n') synthesizer = SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config) synthesizer.speak_text_async(documentos)
def audio_tts(self, text): self.speech_config.set_speech_synthesis_output_format(SpeechSynthesisOutputFormat["Riff24Khz16BitMonoPcm"]) synthesizer = SpeechSynthesizer(speech_config=self.speech_config, audio_config=None) ssml_string = open("ssml.xml", "r").read() result = synthesizer.speak_ssml_async(ssml_string).get() stream = AudioDataStream(result) stream.save_to_wav_file("/root/alfonso/ext/")
def tts(language, text): speech_config = SpeechConfig(subscription=tts_key, region=region) synthesizer = SpeechSynthesizer(speech_config=speech_config, audio_config=None) ssml_string = open("spellout/common/language.xml", "r").read() ssml_string = ssml_string.format(lang=language, text=text) result = synthesizer.speak_ssml_async(ssml_string).get() result = result.audio_data return result
def voice_from_text(text, path): filepath = "{}/speech.wav".format(path) if os.path.isfile(filepath): os.remove(filepath) speech_config = speechsdk.SpeechConfig(subscription=api_keys["microsoft-speech"]["key"], region=api_keys["microsoft-speech"]["region"]) audio_config = AudioOutputConfig(filename=filepath) synthesizer = SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config) synthesizer.speak_text_async("The Birch canoe slid on the smooth planks. Glue the sheet to the dark blue background.") return path
def welcome_message(name): speech_config = speechsdk.SpeechConfig( subscription="b58d19e457574aa39bc0f8b9b763cd55", region="australiaeast") audio_config = AudioOutputConfig( filename= "C:/Users/Pranav Patel/Documents/schabu/back_end/python/welcome.wav") synthesizer = SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config) text = "Hello " + name + "! Welcome to Schubu Recrutiment Process. Please Click on the Start button to begin the interview process." synthesizer.speak_text_async(text) print(text)
def azure_text_to_speech(self, text): try: synthesizer = SpeechSynthesizer(speech_config=self.aservice, audio_config=None) ssml = TSUBAKI_SSML.format(text=text) result = synthesizer.speak_ssml_async(ssml).get() data = result.audio_data if not data: logger.error(str(result.cancellation_details)) return data except Exception as e: logger.exception("Azure Text to Speech Failiure:")
def _do_tts(self, use_speaker: bool, ssml_config: str, output_file: str): print("Start: ", output_file) speech_config = SpeechConfig(subscription=self._subscription, region=self._region) audio_config = AudioOutputConfig(use_default_speaker=use_speaker) synthesizer = SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config) result = synthesizer.speak_ssml_async(ssml_config).get() stream = AudioDataStream(result) stream.save_to_wav_file(output_file) print("Finished", output_file)
def generate_voice(): ############# AZURE ####################### # set volume/rate/pitch -> volume default = 50 rate = "-12%" pitch = "3%" vol_ = 10 # AZURE 키 필요 speech_config = SpeechConfig(subscription="APIKEY", region="eastus") speech_config.set_speech_synthesis_output_format( SpeechSynthesisOutputFormat["Riff24Khz16BitMonoPcm"]) synthesizer = SpeechSynthesizer(speech_config=speech_config, audio_config=None) num_ = 0 # text file with open('./tts_storage/text/tts_script.txt', encoding='utf-8') as file_in: text = "" for line in file_in: text += line print("## TTS script:", text) root = ElementTree.fromstring( '<speak xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="http://www.w3.org/2001/mstts" xmlns:emo="http://www.w3.org/2009/10/emotionml" version="1.0" xml:lang="ko-KR"><voice name="ko-KR-SunHiNeural"><prosody volume="{}" rate="{}" pitch="{}">{}</prosody></voice></speak>' .format(vol_, rate, pitch, text)) xml_script = ElementTree.ElementTree() ElementTree.dump(root) xml_script._setroot(root) xml_script.write('ssml.xml')
def result(): message = request.form['message'] number = request.form['number'] speech_config = SpeechConfig( subscription="0a6a0817af9f46aea9054beaa3d30290", region="westeurope") audio_config = AudioOutputConfig(filename="message_fr.wav") speech_config.speech_synthesis_voice_name = "fr-FR-DeniseNeural" synthesizer = SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config) synthesizer.speak_text_async(message) # Add your subscription key and endpoint subscription_key = "e134037165514c648a57bf6ccc95e541" endpoint = "https://api.cognitive.microsofttranslator.com" # Add your location, also known as region. The default is global. # This is required if using a Cognitive Services resource. location = "francecentral" path = '/translate' constructed_url = endpoint + path params = {'api-version': '3.0', 'from': 'fr', 'to': ['en']} constructed_url = endpoint + path headers = { 'Ocp-Apim-Subscription-Key': subscription_key, 'Ocp-Apim-Subscription-Region': location, 'Content-type': 'application/json', 'X-ClientTraceId': str(uuid.uuid4()) } # You can pass more than one object in body. body = [{'text': message}] quest = requests.post(constructed_url, params=params, headers=headers, json=body) response = quest.json() translator = response[0]["translations"][0]["text"] audio_config = AudioOutputConfig(filename="message_en.wav") speech_config.speech_synthesis_voice_name = "en-US-AriaNeural" synthesizer = SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config) synthesizer.speak_text_async(translator) data = {"number": number} with open("limit.json", "w") as file: json.dump(data, file) return (message)
def tts(item): speech_config = SpeechConfig( subscription="bc0912f626b44d5a8bb00e4497644fa4", region="westus") audio_config = AudioOutputConfig(filename="./result.wav") synthesizer = SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config) appendString = "" # if res == "OK": # appendString = "is in direction you're looking" # else: # appendString = "is not in direction you're looking" # result = synthesizer.speak_text_async(item + appendString).get() stream = AudioDataStream(result) stream.save_to_wav_file("./result.mp3")
async def setup_azure(filename): """ Returns an Azure Speech Synthesizer pointing to the given filename """ auto_detect_source_language_config = None speech_config = SpeechConfig(subscription=setup['azure']['key'], region=setup['azure']['region']) if setup['azure']['voice'] == '' or setup['azure']['voice'] == 'default': auto_detect_source_language_config = AutoDetectSourceLanguageConfig( None, None) else: speech_config.speech_synthesis_voice_name = setup['azure']['voice'] if filename == None: audio_config = AudioOutputConfig(use_default_speaker=True) else: audio_config = AudioOutputConfig(filename=filename) synthesizer = SpeechSynthesizer( speech_config=speech_config, audio_config=audio_config, auto_detect_source_language_config=auto_detect_source_language_config) return synthesizer
speech_key, service_region = os.getenv('SPEECH_RESOURCE_KEY'), "westus" # A speech Synthesizer is created with the given settings. speech_config = SpeechConfig(subscription=speech_key, region=service_region) print("Enter your choice :") print("1. Output from speaker") print("2. Save output to a file\n") choice = int(input()) # Output is recieved via the device speaker if (choice == 1): audio_config = AudioOutputConfig(use_default_speaker=True) # Output is saved in the files whose name is provided as an input elif (choice == 2): audio_config = AudioOutputConfig( filename=("tts_output/" + input("Enter the name of the output file : ") + ".wav")) # A speech Synthesizer is initialized with given settings synthesizer = SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config) # An asynchronous call to the api is made with the input waiting for the output synthesizer.speak_text_async(input("Enter a string : "))
# speech_recognizer = SpeechRecognizer(speech_config=speech_config) print("Say something...") result = speech_recognizer.recognize_once() # Checks result. if result.reason == ResultReason.RecognizedSpeech: print("Recognized: {}".format(result.text)) elif result.reason == ResultReason.NoMatch: print("No speech could be recognized: {}".format(result.no_match_details)) elif result.reason == ResultReason.Canceled: cancellation_details = cancellation_details print("Speech Recognition canceled: {}".format(cancellation_details.reason)) if cancellation_details.reason == CancellationReason.Error: print("Error details: {}".format(cancellation_details.error_details)) #output audio_config = AudioOutputConfig(use_default_speaker=True) language_config = SourceLanguageConfig("ko-KR") synthesizer = SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config) synthesizer.speak_text_async("result.text")
from azure.cognitiveservices.speech import AudioDataStream, SpeechConfig, SpeechSynthesizer, SpeechSynthesisOutputFormat from azure.cognitiveservices.speech.audio import AudioOutputConfig addr = 'http://127.0.0.1:5000' test_url = addr + '/predict_api' content_type = 'image/jpeg' headers = {'content-type': content_type} print("read img") img = cv2.imread('images/t21.jpg') _, img_encoded = cv2.imencode('.jpg', img) print("send img") response = requests.post(test_url, data=img_encoded.tostring(), headers=headers) print("recv img") pred = json.loads(response.text) query = pred["pred"] stopwords = ['startseq', 'endseq'] querywords = query.split() resultwords = [word for word in querywords if word.lower() not in stopwords] result = ' '.join(resultwords) print(result) res = '<speak version="1.0" xmlns="https://www.w3.org/2001/10/synthesis" xml:lang="en-US"><voice name="en-US-Guy24kRUS">' + result + '</voice></speak>' subscription_key = '639cbe821c074e68ba19be3d46a9cbda' speech_config = SpeechConfig(subscription=subscription_key, region="centralindia") audio_config = AudioOutputConfig(use_default_speaker=True) synthesizer = SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config) synthesizer.speak_ssml_async(res)
get_handw_text_results = computervision_client.get_read_result( operation_id) if get_handw_text_results.status not in ['notStarted', 'running']: break time.sleep(1) #Crear el archivo de texto donde se guardara el texto obtenido de la imagen archivo = open("archivo.txt", "w") # Imprimimos el texto linea por linea if get_handw_text_results.status == OperationStatusCodes.succeeded: for text_result in get_handw_text_results.analyze_result.read_results: for line in text_result.lines: print(line.text) archivo.write(line.text) #print(line.bounding_box) print() archivo.close() ###### Código para convertir el texto obtenido y archivado en la imagen en audio ###### #Creacion del audio del texto obtenido audio_config = AudioOutputConfig(use_default_speaker=True) # #Escritura del archivo de audio synthesizer = SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config) archivo_string = open("archivo.txt", "r", encoding="utf-8-sig").read() synthesizer.speak_text_async(archivo_string).get()
search_results = response.json() i2download = [img["contentUrl"] for img in search_results["value"]][randrange(20)] print(i2download) response = requests.get(i2download) ext = i2download[-3:] imgfile = open( "results/{}/{:04d}/{:04d}.{}".format(uuid, snumb, wi, ext), "wb") imgfile.write(response.content) imgfile.close() print("Got {}".format(wi)) time.sleep(2) audio_config = AudioOutputConfig( filename="results/{}/{:04d}/wav.wav".format(uuid, snumb)) synthesizer = SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config) synthesizer.speak_text_async(sentence) os.system( "python collage_maker.py -o results/{0}/slide-{1:04d}.png -f results/{0}/{1:04d} -w 800 -i 600" .format(uuid, snumb)) print( 'ffmpeg -loop 1 -i results/{0}/slide-{1:04d}.png -i results/{0}/{1:04d}/wav.wav -c:v libx264 -tune stillimage -c:a aac -b:a 192k -pix_fmt yuv420p -shortest results/{0}/{1:04d}.mp4 -vf "pad=ceil(iw/2)*2:ceil(ih/2)*2"' .format(uuid, snumb)) pngs = [ x for x in os.listdir("results/{}".format(uuid)) if x[-3:] == "png" ] for png in pngs:
"--audio_name", required=True, help="Please Enter Audio File Name(Without extension)") args = vars(ap.parse_args()) # In[2]: speech_config = SpeechConfig(subscription="__KEY", region="Region") # In[3]: audio_config = AudioOutputConfig(filename="public/python/output_audio_files/" + args['audio_name'] + "_summary.wav") # In[4]: with open("summary/" + args['audio_name'] + "_summary.txt", 'r') as file: data = file.read().replace('\n', '') # In[5]: data # In[11]: synthesizer = SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config) synthesizer.speak_text_async(data) # In[ ]:
#!/usr/bin/python3 from azure.cognitiveservices.speech import AudioDataStream, SpeechConfig, SpeechSynthesizer, SpeechSynthesisOutputFormat import azure.cognitiveservices.speech as speechsdk from azure.cognitiveservices.speech.audio import AudioOutputConfig voice = "zh-CN-XiaoxiaoNeural" text = '你好' speech_config = SpeechConfig(subscription="3cb77646eea84168b348969306ff2a3c", region="eastus") speech_config.speech_synthesis_voice_name = voice audio_config = AudioOutputConfig(filename="file.wav") synthesizer = SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config) result = synthesizer.speak_text_async(text).get() # Check result if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted: print("Speech synthesized to speaker for text [{}] with voice [{}]".format( text, voice)) elif result.reason == speechsdk.ResultReason.Canceled: cancellation_details = result.cancellation_details print("Speech synthesis canceled: {}".format(cancellation_details.reason)) if cancellation_details.reason == speechsdk.CancellationReason.Error: print("Error details: {}".format(cancellation_details.error_details))
import azure.cognitiveservices.speech as speechsdk from azure.cognitiveservices.speech import AudioDataStream, SpeechConfig, SpeechSynthesizer, SpeechSynthesisOutputFormat from azure.cognitiveservices.speech.audio import AudioOutputConfig speech_key, service_region = "UseYourSpeechAPI", "eastus" speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region) voice = "Microsoft Server Speech Text to Speech Voice (en-US, GuyNeural)" #en-US-GuyRUS speech_config.speech_synthesis_voice_name = voice speech_config.set_speech_synthesis_output_format( SpeechSynthesisOutputFormat["Riff24Khz16BitMonoPcm"]) audio_config = AudioOutputConfig(filename="c:/OutputVoiceFile.mp3") synthesizer = SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config) synthesizer.speak_text_async( "Hello World, This is a test of creating a playable mp3 file")
robot = Reachy( right_arm=parts.RightArm(io='ws', hand='force_gripper'), left_arm=parts.LeftArm(io='ws', hand='force_gripper'), ) engine = pyttsx3.init() robot.left_arm.shoulder_roll.goal_position = 0 robot.left_arm.arm_yaw.goal_position = 0 robot.left_arm.elbow_pitch.goal_position = 0 robot.left_arm.hand.forearm_yaw.goal_position = 0 speech_config = SpeechConfig(subscription="subscriptionkey", region="westeurope") audio_config = AudioOutputConfig(use_default_speaker=True) synthesizer = SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config) ASSISTANT_API_ENDPOINT = 'embeddedassistant.googleapis.com' END_OF_UTTERANCE = embedded_assistant_pb2.AssistResponse.END_OF_UTTERANCE DIALOG_FOLLOW_ON = embedded_assistant_pb2.DialogStateOut.DIALOG_FOLLOW_ON CLOSE_MICROPHONE = embedded_assistant_pb2.DialogStateOut.CLOSE_MICROPHONE PLAYING = embedded_assistant_pb2.ScreenOutConfig.PLAYING DEFAULT_GRPC_DEADLINE = 60 * 3 + 5 global spokenAnswer global followUp global followUpSentence spokenAnswer = "" followUpSentence = "" followUp = False
from azure.cognitiveservices.language.luis.runtime import LUISRuntimeClient from msrest.authentication import CognitiveServicesCredentials import json luis_app_id = '20263b4d-b405-4c9b-8de8-e51663797c41' luis_key = 'b45490c8a83243f9a6320ec7e8e85a43' luis_endpoint = 'https://koinonos-language-understanding.cognitiveservices.azure.com/' # Configure speech recognizer speech_key, service_region = "40a03ef9d3d44916bdcd1c4457b82c13", "eastus" speech_config = SpeechConfig(subscription=speech_key, region=service_region) speech_recognizer = SpeechRecognizer(speech_config=speech_config) # Configure speech synthesizer audio_config = AudioOutputConfig(use_default_speaker=True) synthesizer = SpeechSynthesizer(speech_config=speech_config) runtimeCredentials = CognitiveServicesCredentials(luis_key) clientRuntime = LUISRuntimeClient(endpoint=luis_endpoint, credentials=runtimeCredentials) print("Start listening...") speech = speech_recognizer.recognize_once() try: while speech.text != "Stop": # Production == slot name print("Your query is: ", speech.text) predictionRequest = { "query" : speech.text} predictionResponse = clientRuntime.prediction.get_slot_prediction(luis_app_id, "Production", predictionRequest) print("Top intent: {}".format(predictionResponse.prediction.top_intent))