예제 #1
0
    def __init__(self):
        QMainWindow.__init__(self)

        self.setMinimumSize(QSize(880, 640))
        self.setWindowTitle("苏维埃社会主义语言学习机")

        # Add text field
        self.txtEdit = QPlainTextEdit(self)
        self.txtEdit.insertPlainText(sampleText)
        self.txtEdit.move(40, 10)
        self.txtEdit.resize(800, 300)

        # Add button
        self.btn1 = QPushButton("学习一个 (F1)", self)
        self.btn1.move(40, 310)
        self.btn1.clicked.connect(self.button1Clicked)
        # Add button
        self.btn2 = QPushButton("念洋文 (F2)", self)
        self.btn2.move(160, 310)
        self.btn2.clicked.connect(self.button2Clicked)

        # Add label
        self.lbl = QTextEdit(self)
        self.lbl.setReadOnly(True)
        self.lbl.move(40, 340)
        self.lbl.resize(800, 200)

        self.tts = TTS("jane", "mp3", "ae918646-fa47-4e66-96b6-6ce44d6d3146")
        self.player = QtMultimedia.QMediaPlayer(self)
        self.show()
예제 #2
0
    def test_init(self):
        tts = TTS(SPEAKERS[0], AUDIO_FORMATS[0], KEY)

        self.assertEqual(tts._TTS__params["speaker"], SPEAKERS[0])
        self.assertEqual(tts._TTS__params["format"], AUDIO_FORMATS[0])
        self.assertEqual(tts._TTS__params["key"], KEY)
        self.assertEqual(tts._TTS__params["lang"], "ru-RU")
예제 #3
0
    def __init__(self, **kwargs):

        self.logger = kwargs.get('logger', logging.getLogger(__name__))
        self.yandex_tts = TTS(speaker=kwargs.get('speaker', 'jane'),
                              audio_format='wav',
                              emotion=kwargs.get('emotion'),
                              key=kwargs['key'])
예제 #4
0
    def setUp(self):
        self.tts = TTS(SPEAKERS[0], AUDIO_FORMATS[0], KEY)
        self.tts.generate(TEXT)

        # create temp dir for tests
        self.tmp_dir = "tmp"
        os.makedirs(self.tmp_dir)
        os.chdir(self.tmp_dir)
예제 #5
0
def text_to_speech(text, file, key=YANDEX_API_KEY, speaker=speaker):

    tts = TTS(speaker, "wav", '%s' % key, emotion='good', speed='0.9', quality='lo')
    try:
        tts.generate(text.encode('utf-8'))
    except Exception:
        tts.generate(text)
    tts.save(file)
예제 #6
0
 def speak(audioString):
     tts = TTS("ermil",
               "mp3",
               "25d87483-720a-46ea-82bd-7f89d4c95bbd",
               lang='en-US',
               emotion="good")
     tts.generate(audioString + " ")
     tts.save()
     os.system("mpg321 --stereo speech.mp3 ")
예제 #7
0
    def test_data(self):
        self.tts.generate(TEXT)

        # data received
        self.assertIsNotNone(self.tts._data)

        # more words than more data
        self.other_tts = TTS(SPEAKERS[0], AUDIO_FORMATS[0], KEY)
        self.other_tts.generate(TEXT * 2)
        self.assertLess(list(self.tts._data), list(self.other_tts._data))
예제 #8
0
    def test_save_without_data(self):
        tmp_dir = "tmp"
        os.makedirs(tmp_dir)
        os.chdir(tmp_dir)

        tts = TTS(SPEAKERS[0], AUDIO_FORMATS[0], KEY)
        # save without call generate
        self.assertRaises(Exception, tts.save, "empty_data")
        self.assertFalse(os.path.isfile("empty_data"))

        os.chdir("..")
        rmtree(tmp_dir, ignore_errors=True)
예제 #9
0
    def test_init_with_kwargs(self):
        speed = random.choice(SPEEDS)
        lang = random.choice(LANGUAGES)
        emotion = random.choice(EMOTIONS)

        tts = TTS(SPEAKERS[0], AUDIO_FORMATS[0], KEY,
                  speed=speed, lang=lang, emotion=emotion)

        self.assertEqual(tts._TTS__params["speaker"], SPEAKERS[0])
        self.assertEqual(tts._TTS__params["format"], AUDIO_FORMATS[0])
        self.assertEqual(tts._TTS__params["key"], KEY)
        self.assertEqual(tts._TTS__params["lang"], lang)
        self.assertEqual(tts._TTS__params["speed"], speed)
        self.assertEqual(tts._TTS__params["emotion"], emotion)
예제 #10
0
def generate_tts(tts_text):
    tts_voice = tts_voices[randint(0, len(tts_voices) - 1)]
    tts_md5 = md5(tts_text.encode('utf-8')).hexdigest()
    tts_path = os.path.join(os.getcwd(), 'media', 'cache',
                            'tts.' + tts_md5 + '.' + tts_voice + '.opus')
    if os.path.exists(tts_path):
        return tts_path
    else:
        try:
            key = config.get('tts', 'api key')
            tts = TTS(tts_voice, 'opus', key, lang='ru_RU', emotion='neutral')
            tts.generate(tts_text)
            return tts.save(tts_path)
        except (NoSectionError, NoOptionError):
            return None
예제 #11
0
    def _generate_audio_file(self):
        """
        Generic method used as a Callback in TTSModule
            - must provided the audio file and write it on the disk

        .. raises:: FailToLoadSoundFile
        """

        # Since the gTTS lib disabled the SSL verification we get rid of insecure request warning
        requests.packages.urllib3.disable_warnings(InsecureRequestWarning)

        tts = TTS(self.speaker,"mp3", self.key, self.language)
	tts.generate(text=self.words)
        
        # OK we get the audio we can write the sound file
        tts.save(self.file_path)
        
        # Re enable the warnings to avoid affecting the whole kalliope process 
        warnings.resetwarnings()
예제 #12
0
def just_yandex_tts(
        text):  # синтез речи от Яндекс SpeechCloud (Технологии Яндекса)
    try:
        tts = TTS(
            "oksana", "mp3", "60a2b005-738e-42b6-8b78-9ee9b7d57031", speed=1.2
        )  # если не работает, то нужно получить и указать свой ключ от Яндекс SpeechCloud
        tts.generate(text)  #можно менять скорость
        tts.save('speechY.mp3')
    except Exception as e:
        print("[YandexTTS] Не удалось синтезировать речь: {0}".format(e))
        return

    mixer.init()
    mixer.music.load('speechY.mp3')
    mixer.music.play()
    while mixer.music.get_busy():
        time.sleep(0.1)
    mixer.music.stop()
    mixer.music.load(
        'new.waw'
    )  # нужен второй аудио файл, иначе миксер валится из-за permission error!!!
예제 #13
0
def say(words):
    #    words= translator.translate(words, dest=language)
    #    words=words.text
    #    words=words.replace("Text, ",'',1)
    #    words=words.strip()
    print(words)
    getConfig(path)
    md5 = hashlib.sha1(words.encode('utf-8')).hexdigest()
    filemp3 = ""
    for file in os.listdir("/tmp/"):
        if file.endswith(md5 + ".wav"):
            filemp3 = (os.path.join(file))

    if filemp3 == md5 + ".wav":
        print("Файл уже записан")
        os.system("aplay -q /tmp/" + filemp3)
    elif filemp3 == md5 + ".mp3":
        print("Файл уже записан")
        s.system("mpg123 -q " + filemp3)

    elif PROVIDERTTS == "Yandex":
        print("Генерируем файл")
        #tts = gTTS(text=words, lang=languageG)
        tts = TTS("alyss", "wav", APIKEYTTS, lang=language, emotion="good")
        tts.generate(words)
        words = hashlib.sha1(words.encode('utf-8')).hexdigest()
        ttsfilename = "/tmp/" + words + ".wav"
        tts.save(ttsfilename)
        os.system("aplay -q " + ttsfilename)
        #os.remove(ttsfilename)

    elif PROVIDERTTS == "Google":
        print("Генерируем файл")
        tts = gTTS(text=words, lang=languageG)
        words = hashlib.sha1(words.encode('utf-8')).hexdigest()
        ttsfilename = "/tmp/" + words + ".mp3"
        tts.save(ttsfilename)
        os.system("mpg123 -q " + ttsfilename)
예제 #14
0
import speech_recognition as sr
from yandex_speech import TTS
from pygame import mixer
from pygame.time import delay
from os import remove
from platform import system

r = sr.Recognizer()

tts = TTS("oksana", "mp3", "60556d09-0e84-42b7-8974-9d0b01cfee33")
mixer.init(frequency=48000)

def listen():
	'''
	Listening audio input from the microphone
	and return the recognized text using google's
	speech to text open api and library speechrecognition
	'''
	with sr.Microphone() as source:
		audio = r.listen(source)
	try:
		return r.recognize_google(audio, language="ru-RU")
	except sr.UnknownValueError:
		return 1
	except sr.RequestError:
		return 2

def say(text, savepath='phrases/livespeech.mp3'):
	'''
	Instantly to synthesize the text
	and speak it through the speakers
예제 #15
0
def t_to_s(text):
	if (len(text)>500): text = text[0:490]
	tts = TTS("oksana", "opus", "b04291f2-5e31-4c8e-af57-1695b7bd5f16", lang="ru_RU", emotion="good")
	tts.generate(text)
	tts.save("ramazan")
	return "ramazan"
예제 #16
0
def t_to_s(text):
    tts = TTS("oksana", "opus", "b04291f2-5e31-4c8e-af57-1695b7bd5f16")
    tts.generate(text)
    tts.save("ramazan")
    return "ramazan"
예제 #17
0
def test():
    os.environ["CUDA_VISIBLE_DEVICES"] = config.device_ids

    result_dir = 'temp/' + config.in_file
    motion_dir = result_dir + '/motion/'

    os.mkdir(result_dir)
    os.mkdir(motion_dir)

    pca = torch.FloatTensor(np.load('basics/pca.npy')[:, :6])
    mean = torch.FloatTensor(np.load('basics/mean.npy'))
    decoder = VG_net()
    encoder = AT_net()

    state_dict2 = multi2single(config.vg_model, 1)

    decoder.load_state_dict(state_dict2)

    state_dict = multi2single(config.at_model, 1)
    encoder.load_state_dict(state_dict)

    encoder.eval()
    decoder.eval()
    test_file = result_dir + "/" + config.in_file + ".wav"
    test_file_old = result_dir + "/old_" + config.in_file + ".wav"
    if config.text_tts == "" and config.news_url != "":
        parse_news_content = get_info(config.news_url)['news_content']
    else:
        parse_news_content = config.text_tts
    tts = TTS(config.name_tts,
              "wav",
              "000000-0000-0000-0000-00000000",
              config.lang_tts,
              emotion="neutral",
              speed=1)
    # test content
    tts.generate(parse_news_content[:1999])
    if config.shift == 1:
        tts.save(test_file_old)
        audio_shift(test_file_old, test_file)
    else:
        tts.save(test_file)

    example_image, example_landmark = generator_demo_example_lips(
        config.person)
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
    ])
    example_image = cv2.cvtColor(example_image, cv2.COLOR_BGR2RGB)
    example_image = transform(example_image)

    example_landmark = example_landmark.reshape(
        (1, example_landmark.shape[0] * example_landmark.shape[1]))

    if config.cuda == True:
        example_image = Variable(example_image.view(1, 3, 128, 128)).cuda()
        example_landmark = Variable(
            torch.FloatTensor(example_landmark.astype(float))).cuda()
    else:
        example_image = Variable(example_image.view(1, 3, 128, 128))
        example_landmark = Variable(
            torch.FloatTensor(example_landmark.astype(float)))
    example_landmark = example_landmark * 5.0
    example_landmark = example_landmark - mean.expand_as(example_landmark)
    example_landmark = torch.mm(example_landmark, pca)
    speech, sr = librosa.load(test_file, sr=16000)
    mfcc = python_speech_features.mfcc(speech, 16000, winstep=0.01)
    speech = np.insert(speech, 0, np.zeros(1920))
    speech = np.append(speech, np.zeros(1920))
    mfcc = python_speech_features.mfcc(speech, 16000, winstep=0.01)

    sound, _ = librosa.load(test_file, sr=44100)

    print('=======================================')
    print('Generate images')
    t = time.time()
    ind = 3
    with torch.no_grad():
        fake_lmark = []
        input_mfcc = []
        while ind <= int(mfcc.shape[0] / 4) - 4:
            t_mfcc = mfcc[(ind - 3) * 4:(ind + 4) * 4, 1:]
            t_mfcc = torch.FloatTensor(t_mfcc)
            input_mfcc.append(t_mfcc)
            ind += 1
        input_mfcc = torch.stack(input_mfcc, dim=0)
        input_mfcc = input_mfcc.unsqueeze(0)
        fake_lmark = encoder(example_landmark, input_mfcc)
        fake_lmark = fake_lmark.view(
            fake_lmark.size(0) * fake_lmark.size(1), 6)
        example_landmark = torch.mm(example_landmark, pca.t())
        example_landmark = example_landmark + mean.expand_as(example_landmark)
        fake_lmark[:, 1:6] *= 2 * torch.FloatTensor(
            np.array([1.1, 1.2, 1.3, 1.4, 1.5]))
        fake_lmark = torch.mm(fake_lmark, pca.t())
        fake_lmark = fake_lmark + mean.expand_as(fake_lmark)

        fake_lmark = fake_lmark.unsqueeze(0)

        fake_lmark = fake_lmark.data.cpu().numpy()

        file_mark = result_dir + "/" + config.in_file + ".npy"
        file_mp4 = result_dir + "/" + config.in_file  # + ".mp4"
        np.save(file_mark, fake_lmark)
        mark_paint.mark_video(fake_lmark, motion_dir)

        cmd = 'ffmpeg -framerate 25 -i ' + motion_dir + '%d.png  -filter:v scale=512:-1 -c:v libx264 -pix_fmt yuv420p ' + file_mp4 + '.mp4'
        subprocess.call(cmd, shell=True)
        print('video done')

        cmd = 'ffmpeg -i ' + file_mp4 + '.mp4 -i ' + test_file + ' -c:v copy -c:a aac -strict experimental ' + file_mp4 + '_result.mp4'
        subprocess.call(cmd, shell=True)
        print('video+audio done')

        return file_mark
    return False
예제 #18
0
            # Если скрипт запущен в тестовом режиме из консоли с передачей параметра test
            if len(sys.argv) > 1 and str(sys.argv[1]) == "test":
                # Выводим информацию на экран
                print "to_speaker = " + to_speaker
                print "count_rings = " + str(count_rings)
            # Если скрипт запущен в боевом режиме
            else:
                # Запишем информацию в логи
                write_log("to_speaker = " + to_speaker)
                write_log("count_rings = " + str(count_rings))

            # Если это первый звонок (первый раз прозвенел аппарат телефона при входящем вызове)
            if count_rings == 0:
                # Передадим в Yandex.Speech наш текст для произнесения и запишем полученный файл в формате mp3
                file_mp3 = "/home/asterisk/to_speaker/name_or_number"
                tts = TTS("zahar", "mp3", "*****-****-****-****-***********")
                tts.generate(str(to_speaker))
                tts.save(file_mp3)
                file_mp3 = file_mp3 + ".mp3"

                # Если это внутренний звонок, то сделаем паузу, чтобы сначала зазвенел телефонный аппарат, а потом заговорил робот,
                # иначе, при внутренних звонках, робот говорит раньше, чем звонит телефон, и этим нас пугает немного :)
                if mc.get("is_internal") is not None:
                    mc.delete("is_internal")
                    # Паузу не делаем, на трубках надо было включить "не пропускать первый ring"
                    # time.sleep(2)

            # Если количество звонков меньше пяти
            if count_rings < 5:
                to_speaker_all = '/usr/bin/mplayer -ao alsa -really-quiet -noconsolecontrols ' + file_mp3
                os.system(to_speaker_all)
예제 #19
0
 def setUp(self):
     self.tts = TTS(SPEAKERS[0], AUDIO_FORMATS[0], KEY)