示例#1
0
    def check_ffmpeg(cls):
        """
        Check whether ``ffmpeg`` can be called.

        Return ``True`` on failure and ``False`` on success.

        :rtype: bool
        """
        try:
            from aeneas.ffmpegwrapper import FFMPEGWrapper
            input_file_path = gf.absolute_path(u"tools/res/audio.mp3",
                                               __file__)
            handler, output_file_path = gf.tmp_file(suffix=u".wav")
            converter = FFMPEGWrapper()
            result = converter.convert(input_file_path, output_file_path)
            gf.delete_file(handler, output_file_path)
            if result:
                gf.print_success(u"ffmpeg         OK")
                return False
        except:
            pass
        gf.print_error(u"ffmpeg         ERROR")
        gf.print_info(
            u"  Please make sure you have ffmpeg installed correctly")
        gf.print_info(
            u"  and that its path is in your PATH environment variable")
        return True
示例#2
0
    def check_espeak(cls):
        """
        Check whether ``espeak`` can be called.

        Return ``True`` on failure and ``False`` on success.

        :rtype: bool
        """
        try:
            from aeneas.espeakwrapper import ESPEAKWrapper
            text = u"From fairest creatures we desire increase,"
            language = u"eng"
            handler, output_file_path = gf.tmp_file(suffix=u".wav")
            espeak = ESPEAKWrapper()
            result = espeak.synthesize_single(text, language, output_file_path)
            gf.delete_file(handler, output_file_path)
            if result:
                gf.print_success(u"espeak         OK")
                return False
        except:
            pass
        gf.print_error(u"espeak         ERROR")
        gf.print_info(
            u"  Please make sure you have espeak installed correctly")
        gf.print_info(
            u"  and that its path is in your PATH environment variable")
        gf.print_info(
            u"  You might also want to check that the espeak-data directory")
        gf.print_info(
            u"  is set up correctly, for example, it has the correct permissions"
        )
        return True
示例#3
0
 def synthesize_multiple(self,
                         text_file,
                         ofp=None,
                         quit_after=None,
                         backwards=False,
                         zero_length=False):
     if ofp is None:
         handler, output_file_path = gf.tmp_file(suffix=".wav")
     else:
         handler = None
         output_file_path = ofp
     try:
         rconf = RuntimeConfiguration()
         rconf[RuntimeConfiguration.TTS] = u"festival"
         rconf[RuntimeConfiguration.TTS_PATH] = u"text2wave"
         tts_engine = FESTIVALWrapper(rconf=rconf)
         anchors, total_time, num_chars = tts_engine.synthesize_multiple(
             text_file, output_file_path, quit_after, backwards)
         gf.delete_file(handler, output_file_path)
         if zero_length:
             self.assertEqual(total_time, 0.0)
         else:
             self.assertGreater(total_time, 0.0)
     except (OSError, TypeError, UnicodeDecodeError, ValueError) as exc:
         gf.delete_file(handler, output_file_path)
         raise exc
示例#4
0
 def inner(c_ext, cew_subprocess):
     if ofp is None:
         handler, output_file_path = gf.tmp_file(suffix=".wav")
     else:
         handler = None
         output_file_path = ofp
     try:
         rconf = RuntimeConfiguration()
         rconf[RuntimeConfiguration.C_EXTENSIONS] = c_ext
         rconf[RuntimeConfiguration.CEW_SUBPROCESS_ENABLED] = cew_subprocess
         tts_engine = ESPEAKWrapper(rconf=rconf)
         anchors, total_time, num_chars = tts_engine.synthesize_multiple(
             text_file,
             output_file_path,
             quit_after,
             backwards
         )
         gf.delete_file(handler, output_file_path)
         if zero_length:
             self.assertEqual(total_time, 0.0)
         else:
             self.assertGreater(total_time, 0.0)
     except (OSError, TypeError, UnicodeDecodeError, ValueError) as exc:
         gf.delete_file(handler, output_file_path)
         raise exc
示例#5
0
 def inner(c_ext, cew_subprocess, cache):
     if ofp is None:
         handler, output_file_path = gf.tmp_file(suffix=".wav")
     else:
         handler = None
         output_file_path = ofp
     try:
         rconf = RuntimeConfiguration()
         rconf[RuntimeConfiguration.TTS] = self.TTS
         rconf[RuntimeConfiguration.TTS_PATH] = self.TTS_PATH
         rconf[RuntimeConfiguration.C_EXTENSIONS] = c_ext
         rconf[RuntimeConfiguration.CEW_SUBPROCESS_ENABLED] = cew_subprocess
         rconf[RuntimeConfiguration.TTS_CACHE] = cache
         tts_engine = self.TTS_CLASS(rconf=rconf)
         anchors, total_time, num_chars = tts_engine.synthesize_multiple(
             text_file,
             output_file_path,
             quit_after,
             backwards
         )
         gf.delete_file(handler, output_file_path)
         if cache:
             tts_engine.clear_cache()
         if zero_length:
             self.assertEqual(total_time, 0.0)
         else:
             self.assertGreater(total_time, 0.0)
     except (OSError, TypeError, UnicodeDecodeError, ValueError) as exc:
         gf.delete_file(handler, output_file_path)
         if (cache) and (tts_engine is not None):
             tts_engine.clear_cache()
         with self.assertRaises(expected_exc):
             raise exc
示例#6
0
    def check_espeak(cls):
        """
        Check whether ``espeak`` can be called.

        Return ``True`` on failure and ``False`` on success.

        :rtype: bool
        """
        try:
            from aeneas.textfile import TextFile
            from aeneas.textfile import TextFragment
            from aeneas.ttswrappers.espeakttswrapper import ESPEAKTTSWrapper
            text = u"From fairest creatures we desire increase,"
            text_file = TextFile()
            text_file.add_fragment(TextFragment(language=u"eng", lines=[text], filtered_lines=[text]))
            handler, output_file_path = gf.tmp_file(suffix=u".wav")
            ESPEAKTTSWrapper().synthesize_multiple(text_file, output_file_path)
            gf.delete_file(handler, output_file_path)
            gf.print_success(u"espeak         OK")
            return False
        except:
            pass
        gf.print_error(u"espeak         ERROR")
        gf.print_info(u"  Please make sure you have espeak installed correctly")
        gf.print_info(u"  and that its path is in your PATH environment variable")
        gf.print_info(u"  You might also want to check that the espeak-data directory")
        gf.print_info(u"  is set up correctly, for example, it has the correct permissions")
        return True
示例#7
0
 def inner(c_ext, cew_subprocess, cache):
     if ofp is None:
         handler, output_file_path = gf.tmp_file(suffix=".wav")
     else:
         handler = None
         output_file_path = ofp
     try:
         rconf = RuntimeConfiguration()
         rconf[RuntimeConfiguration.TTS] = self.TTS
         rconf[RuntimeConfiguration.TTS_PATH] = self.TTS_PATH
         rconf[RuntimeConfiguration.C_EXTENSIONS] = c_ext
         rconf[RuntimeConfiguration.
               CEW_SUBPROCESS_ENABLED] = cew_subprocess
         rconf[RuntimeConfiguration.TTS_CACHE] = cache
         tts_engine = self.TTS_CLASS(rconf=rconf)
         anchors, total_time, num_chars = tts_engine.synthesize_multiple(
             text_file, output_file_path, quit_after, backwards)
         gf.delete_file(handler, output_file_path)
         if cache:
             tts_engine.clear_cache()
         if zero_length:
             self.assertEqual(total_time, 0.0)
         else:
             self.assertGreater(total_time, 0.0)
     except (OSError, TypeError, UnicodeDecodeError, ValueError) as exc:
         gf.delete_file(handler, output_file_path)
         if (cache) and (tts_engine is not None):
             tts_engine.clear_cache()
         with self.assertRaises(expected_exc):
             raise exc
示例#8
0
    def check_espeak(cls):
        """
        Check whether ``espeak`` can be called.

        Return ``True`` on failure and ``False`` on success.

        :rtype: bool
        """
        try:
            from aeneas.espeakwrapper import ESPEAKWrapper
            text = u"From fairest creatures we desire increase,"
            language = u"eng"
            handler, output_file_path = gf.tmp_file(suffix=u".wav")
            espeak = ESPEAKWrapper()
            result = espeak.synthesize_single(
                text,
                language,
                output_file_path
            )
            gf.delete_file(handler, output_file_path)
            if result:
                gf.print_success(u"espeak         OK")
                return False
        except:
            pass
        gf.print_error(u"espeak         ERROR")
        gf.print_info(u"  Please make sure you have espeak installed correctly")
        gf.print_info(u"  and that its path is in your PATH environment variable")
        gf.print_info(u"  You might also want to check that the espeak-data directory")
        gf.print_info(u"  is set up correctly, for example, it has the correct permissions")
        return True
 def synthesize_multiple(self, text_file, ofp=None, quit_after=None, backwards=False, zero_length=False):
     if ofp is None:
         handler, output_file_path = gf.tmp_file(suffix=".wav")
     else:
         handler = None
         output_file_path = ofp
     try:
         rconf = RuntimeConfiguration()
         rconf[RuntimeConfiguration.TTS] = u"festival"
         rconf[RuntimeConfiguration.TTS_PATH] = u"text2wave"
         tts_engine = FESTIVALWrapper(rconf=rconf)
         anchors, total_time, num_chars = tts_engine.synthesize_multiple(
             text_file,
             output_file_path,
             quit_after,
             backwards
         )
         gf.delete_file(handler, output_file_path)
         if zero_length:
             self.assertEqual(total_time, 0.0)
         else:
             self.assertGreater(total_time, 0.0)
     except (OSError, TypeError, UnicodeDecodeError, ValueError) as exc:
         gf.delete_file(handler, output_file_path)
         raise exc
示例#10
0
    def check_espeak(cls):
        """
        Check whether ``espeak`` can be called.

        Return ``True`` on failure and ``False`` on success.

        :rtype: bool
        """
        try:
            from aeneas.textfile import TextFile
            from aeneas.textfile import TextFragment
            from aeneas.ttswrappers.espeakttswrapper import ESPEAKTTSWrapper
            text = u"From fairest creatures we desire increase,"
            text_file = TextFile()
            text_file.add_fragment(TextFragment(language=u"eng", lines=[text], filtered_lines=[text]))
            handler, output_file_path = gf.tmp_file(suffix=u".wav")
            ESPEAKTTSWrapper().synthesize_multiple(text_file, output_file_path)
            gf.delete_file(handler, output_file_path)
            gf.print_success(u"espeak         OK")
            return False
        except:
            pass
        gf.print_error(u"espeak         ERROR")
        gf.print_info(u"  Please make sure you have espeak installed correctly")
        gf.print_info(u"  and that its path is in your PATH environment variable")
        gf.print_info(u"  You might also want to check that the espeak-data directory")
        gf.print_info(u"  is set up correctly, for example, it has the correct permissions")
        return True
示例#11
0
 def test_close_file_handler(self):
     handler, path = gf.tmp_file()
     self.assertTrue(gf.file_exists(path))
     gf.close_file_handler(handler)
     self.assertTrue(gf.file_exists(path))
     gf.delete_file(handler, path)
     self.assertFalse(gf.file_exists(path))
示例#12
0
 def test_read_file_bytes(self):
     handler, path = gf.tmp_file()
     with io.open(path, "w", encoding="utf-8") as tmp_file:
         tmp_file.write(u"Foo bar")
     contents = gf.read_file_bytes(path)
     self.assertTrue(gf.is_bytes(contents))
     self.assertEqual(len(contents), 7)
     gf.delete_file(handler, path)
 def test_read_file_bytes(self):
     handler, path = gf.tmp_file()
     with io.open(path, "w", encoding="utf-8") as tmp_file:
         tmp_file.write(u"Foo bar")
     contents = gf.read_file_bytes(path)
     self.assertTrue(gf.is_bytes(contents))
     self.assertEqual(len(contents), 7)
     gf.delete_file(handler, path)
示例#14
0
 def test_write(self):
     audiofile = self.load(self.AUDIO_FILE_WAVE, rs=True)
     data = audiofile.audio_samples
     handler, output_file_path = gf.tmp_file(suffix=".wav")
     audiofile.write(output_file_path)
     audiocopy = self.load(output_file_path)
     datacopy = audiocopy.audio_samples
     self.assertTrue((datacopy == data).all())
     gf.delete_file(handler, output_file_path)
示例#15
0
 def test_write(self):
     audiofile = self.load(self.AUDIO_FILE_WAVE, rs=True)
     data = audiofile.audio_samples
     handler, output_file_path = gf.tmp_file(suffix=".wav")
     audiofile.write(output_file_path)
     audiocopy = self.load(output_file_path)
     datacopy = audiocopy.audio_samples
     self.assertTrue((datacopy == data).all())
     gf.delete_file(handler, output_file_path)
示例#16
0
    def _loop_use_cache(self, helper_function, num, fragment):
        """ Synthesize all fragments using the cache """
        self.log([u"Examining fragment %d (cache)...", num])
        fragment_info = (fragment.language, fragment.filtered_text)
        if self.cache.is_cached(fragment_info):
            self.log(u"Fragment cached: retrieving audio data from cache")

            # read data from file, whose path is in the cache
            file_handler, file_path = self.cache.get(fragment_info)
            self.log([u"Reading cached fragment at '%s'...", file_path])
            succeeded, data = self._read_audio_data(file_path)
            if not succeeded:
                self.log_crit(
                    u"An unexpected error occurred while reading cached audio file"
                )
                return (False, None)
            self.log([u"Reading cached fragment at '%s'... done", file_path])
        else:
            self.log(u"Fragment not cached: synthesizing and caching")

            # creating destination file
            file_info = gf.tmp_file(
                suffix=u".cache.wav",
                root=self.rconf[RuntimeConfiguration.TMP_PATH])
            file_handler, file_path = file_info
            self.log([u"Synthesizing fragment to '%s'...", file_path])

            # synthesize and get the duration of the output file
            voice_code = self._language_to_voice_code(fragment.language)
            self.log(u"Calling helper function")
            succeeded, data = helper_function(text=fragment.filtered_text,
                                              voice_code=voice_code,
                                              output_file_path=file_path,
                                              return_audio_data=True)
            # check output
            if not succeeded:
                self.log_crit(
                    u"An unexpected error occurred in helper_function")
                return (False, None)
            self.log([u"Synthesizing fragment to '%s'... done", file_path])
            duration, sr_nu, enc_nu, samples = data
            if duration > 0:
                self.log(u"Fragment has > 0 duration, adding it to cache")
                self.cache.add(fragment_info, file_info)
                self.log(u"Added fragment to cache")
            else:
                self.log(u"Fragment has zero duration, not adding it to cache")
            self.log([
                u"Closing file handler for cached output file path '%s'",
                file_path
            ])
            gf.close_file_handler(file_handler)
        self.log([u"Examining fragment %d (cache)... done", num])
        return (True, data)
示例#17
0
 def test_output_sync_map(self):
     task = Task()
     task.configuration = TaskConfiguration()
     task.configuration["language"] = Language.ENG
     task.configuration["o_format"] = SyncMapFormat.TXT
     task.sync_map = self.dummy_sync_map()
     handler, output_file_path = gf.tmp_file(suffix=".txt")
     task.sync_map_file_path_absolute = output_file_path
     path = task.output_sync_map_file()
     self.assertIsNotNone(path)
     self.assertEqual(path, output_file_path)
     gf.delete_file(handler, output_file_path)
示例#18
0
 def test_compress_file(self):
     input_path = self.FILES["unpacked"]["path"]
     for key in self.FILES:
         fmt = self.FILES[key]["format"]
         if fmt != ContainerFormat.UNPACKED:
             handler, output_path = gf.tmp_file(suffix="." + fmt)
             cont = Container(output_path, fmt)
             cont.compress(input_path)
             self.assertTrue(os.path.isfile(output_path))
             copy = Container(output_path, fmt)
             self.assertEqual(copy.entries, self.EXPECTED_ENTRIES)
             gf.delete_file(handler, output_path)
示例#19
0
 def test_output_sync_map(self):
     task = Task()
     task.configuration = TaskConfiguration()
     task.configuration["language"] = Language.ENG
     task.configuration["o_format"] = SyncMapFormat.TXT
     task.sync_map = self.dummy_sync_map()
     handler, output_file_path = gf.tmp_file(suffix=".txt")
     task.sync_map_file_path_absolute = output_file_path
     path = task.output_sync_map_file()
     self.assertIsNotNone(path)
     self.assertEqual(path, output_file_path)
     gf.delete_file(handler, output_file_path)
示例#20
0
 def inner(c_ext, cew_subprocess):
     handler, output_file_path = gf.tmp_file(suffix=".wav")
     tfl = TextFile(gf.absolute_path(path, __file__), TextFileFormat.PLAIN)
     tfl.set_language(Language.ENG)
     synth = Synthesizer(logger=logger)
     synth.rconf[RuntimeConfiguration.C_EXTENSIONS] = c_ext
     synth.rconf[RuntimeConfiguration.CEW_SUBPROCESS_ENABLED] = cew_subprocess
     result = synth.synthesize(tfl, output_file_path, quit_after=quit_after, backwards=backwards)
     gf.delete_file(handler, output_file_path)
     self.assertEqual(len(result[0]), expected)
     if expected2 is not None:
         self.assertAlmostEqual(result[1], expected2, places=0)
示例#21
0
 def test_compress_file(self):
     input_path = self.FILES["unpacked"]["path"]
     for key in self.FILES:
         fmt = self.FILES[key]["format"]
         if fmt != ContainerFormat.UNPACKED:
             handler, output_path = gf.tmp_file(suffix="." + fmt)
             cont = Container(output_path, fmt)
             cont.compress(input_path)
             self.assertTrue(os.path.isfile(output_path))
             copy = Container(output_path, fmt)
             self.assertEqual(copy.entries, self.EXPECTED_ENTRIES)
             gf.delete_file(handler, output_path)
示例#22
0
 def inner(c_ext, cew_subprocess):
     handler, output_file_path = gf.tmp_file(suffix=".wav")
     tfl = TextFile(gf.absolute_path(path, __file__), TextFileFormat.PLAIN)
     tfl.set_language(Language.ENG)
     synth = Synthesizer(logger=logger)
     synth.rconf[RuntimeConfiguration.C_EXTENSIONS] = c_ext
     synth.rconf[RuntimeConfiguration.CEW_SUBPROCESS_ENABLED] = cew_subprocess
     result = synth.synthesize(tfl, output_file_path, quit_after=quit_after, backwards=backwards)
     gf.delete_file(handler, output_file_path)
     self.assertEqual(len(result[0]), expected)
     if expected2 is not None:
         self.assertAlmostEqual(result[1], expected2, places=0)
示例#23
0
 def execute(self, config_string, audio_path, text_path):
     handler, tmp_path = gf.tmp_file()
     task = Task(config_string)
     task.audio_file_path_absolute = gf.absolute_path(audio_path, __file__)
     task.text_file_path_absolute = gf.absolute_path(text_path, __file__)
     executor = ExecuteTask(task)
     executor.execute()
     task.sync_map_file_path_absolute = tmp_path
     result_path = task.output_sync_map_file()
     self.assertIsNotNone(result_path)
     self.assertEqual(result_path, tmp_path)
     self.assertGreater(len(gf.read_file_bytes(result_path)), 0)
     gf.delete_file(handler, tmp_path)
 def perform_run(self, audio_file_path, text_file_path, config_string, rconf_string):
     output_file_handler, output_file_path = gf.tmp_file()
     executor = ExecuteTaskCLI(use_sys=False)
     verbose = "-v" if self.verbose else ""
     executor.run(arguments=[
         "dummy placeholder for aeneas.tools.execute_task",
         audio_file_path,
         text_file_path,
         config_string,
         output_file_path,
         "-r=\"%s\"" % rconf_string,
         verbose
     ])
     gf.delete_file(output_file_handler, output_file_path)
     return executor.logger
示例#25
0
 def test_cew_synthesize_single(self):
     handler, output_file_path = gf.tmp_file(suffix=".wav")
     try:
         import aeneas.cew.cew
         sr, begin, end = aeneas.cew.cew.synthesize_single(
             output_file_path,
             u"en",                      # NOTE cew requires the actual eSpeak voice code
             u"Dummy"
         )
         self.assertEqual(sr, 22050)
         self.assertEqual(begin, 0)
         self.assertGreater(end, 0)
     except ImportError:
         pass
     gf.delete_file(handler, output_file_path)
示例#26
0
        def synthesize_and_clean(text, voice_code):
            """
            Synthesize a single fragment via subprocess,
            and immediately remove the temporary file.

            :rtype: tuple (duration, sample_rate, encoding, samples)
            """
            self.log(u"Synthesizing text...")
            handler, tmp_destination = gf.tmp_file(suffix=u".wav", root=self.rconf[RuntimeConfiguration.TMP_PATH])
            result, data = self._synthesize_single_subprocess(
                text=(text + u" "),
                voice_code=voice_code,
                output_file_path=tmp_destination
            )
            self.log([u"Removing temporary file '%s'", tmp_destination])
            gf.delete_file(handler, tmp_destination)
            self.log(u"Synthesizing text... done")
            return data
示例#27
0
 def synthesize_single(self, text, language, ofp=None, zero_length=False):
     if ofp is None:
         handler, output_file_path = gf.tmp_file(suffix=".wav")
     else:
         handler = None
         output_file_path = ofp
     try:
         rconf = RuntimeConfiguration()
         rconf[RuntimeConfiguration.TTS] = u"festival"
         rconf[RuntimeConfiguration.TTS_PATH] = u"text2wave"
         tts_engine = FESTIVALWrapper(rconf=rconf)
         result = tts_engine.synthesize_single(text, language, output_file_path)
         gf.delete_file(handler, output_file_path)
         if zero_length:
             self.assertEqual(result, 0)
         else:
             self.assertGreater(result, 0)
     except (OSError, TypeError, UnicodeDecodeError, ValueError) as exc:
         gf.delete_file(handler, output_file_path)
         raise exc
示例#28
0
 def inner(c_ext, cew_subprocess):
     if ofp is None:
         handler, output_file_path = gf.tmp_file(suffix=".wav")
     else:
         handler = None
         output_file_path = ofp
     try:
         rconf = RuntimeConfiguration()
         rconf[RuntimeConfiguration.C_EXTENSIONS] = c_ext
         rconf[RuntimeConfiguration.CEW_SUBPROCESS_ENABLED] = cew_subprocess
         tts_engine = ESPEAKWrapper(rconf=rconf)
         result = tts_engine.synthesize_single(text, language, output_file_path)
         gf.delete_file(handler, output_file_path)
         if zero_length:
             self.assertEqual(result, 0)
         else:
             self.assertGreater(result, 0)
     except (OSError, TypeError, UnicodeDecodeError, ValueError) as exc:
         gf.delete_file(handler, output_file_path)
         raise exc
示例#29
0
 def synthesize_single(self, text, language, ofp=None, zero_length=False):
     if ofp is None:
         handler, output_file_path = gf.tmp_file(suffix=".wav")
     else:
         handler = None
         output_file_path = ofp
     try:
         rconf = RuntimeConfiguration()
         rconf[RuntimeConfiguration.TTS] = u"festival"
         rconf[RuntimeConfiguration.TTS_PATH] = u"text2wave"
         tts_engine = FESTIVALWrapper(rconf=rconf)
         result = tts_engine.synthesize_single(text, language,
                                               output_file_path)
         gf.delete_file(handler, output_file_path)
         if zero_length:
             self.assertEqual(result, 0)
         else:
             self.assertGreater(result, 0)
     except (OSError, TypeError, UnicodeDecodeError, ValueError) as exc:
         gf.delete_file(handler, output_file_path)
         raise exc
示例#30
0
    def _synthesize(self, text_file):
        """
        Synthesize text into a WAVE file.

        Return a tuple consisting of:

        1. the handler of the generated audio file
        2. the path of the generated audio file
        3. the list of anchors, that is, a list of floats
           each representing the start time of the corresponding
           text fragment in the generated wave file
           ``[start_1, start_2, ..., start_n]``
        4. a tuple describing the format of the audio file

        :param text_file: the text to be synthesized
        :type  text_file: :class:`~aeneas.textfile.TextFile`
        :rtype: tuple (handler, string, list)
        """
        handler, path = gf.tmp_file(suffix=u".wav", root=self.rconf[RuntimeConfiguration.TMP_PATH])
        result = self.synthesizer.synthesize(text_file, path)
        return (handler, path, result[0], self.synthesizer.output_audio_format)
示例#31
0
 def _compose_output_file_path(self, extension, output_file_path=None):
     """
     If ``output_file_path`` is given, use it.
     Otherwise (``output_file_path`` is ``None``),
     create a temporary file with the correct extension.
     """
     self.log(u"Determining output file path...")
     if output_file_path is None:
         self.log(u"output_file_path is None: creating temp file")
         handler, output_file_path = gf.tmp_file(
             root=self.rconf[RuntimeConfiguration.TMP_PATH],
             suffix=(".%s" % extension)
         )
         gf.delete_file(handler, output_file_path)
     else:
         self.log(u"output_file_path is not None: cheking that file can be written")
         if not gf.file_can_be_written(output_file_path):
             self.log_exc(u"Path '%s' cannot be written. Wrong permissions?" % (output_file_path), None, True, OSError)
     self.log(u"Determining output file path... done")
     self.log([u"Output file path is '%s'", output_file_path])
     return output_file_path
示例#32
0
 def test_cew_synthesize_multiple_lang(self):
     handler, output_file_path = gf.tmp_file(suffix=".wav")
     try:
         c_quit_after = 0.0
         c_backwards = 0
         c_text = [
             (u"en",
              u"Dummy 1"),  # NOTE cew requires the actual eSpeak voice code
             (u"it", u"Segnaposto 2"
              ),  # NOTE cew requires the actual eSpeak voice code
             (u"en",
              u"Dummy 3"),  # NOTE cew requires the actual eSpeak voice code
         ]
         import aeneas.cew.cew
         sr, sf, intervals = aeneas.cew.cew.synthesize_multiple(
             output_file_path, c_quit_after, c_backwards, c_text)
         self.assertEqual(sr, 22050)
         self.assertEqual(sf, 3)
         self.assertEqual(len(intervals), 3)
     except ImportError:
         pass
     gf.delete_file(handler, output_file_path)
示例#33
0
 def test_cew_synthesize_multiple_lang(self):
     handler, output_file_path = gf.tmp_file(suffix=".wav")
     try:
         c_quit_after = 0.0
         c_backwards = 0
         c_text = [
             (u"en", u"Dummy 1"),        # NOTE cew requires the actual eSpeak voice code
             (u"it", u"Segnaposto 2"),   # NOTE cew requires the actual eSpeak voice code
             (u"en", u"Dummy 3"),        # NOTE cew requires the actual eSpeak voice code
         ]
         import aeneas.cew.cew
         sr, sf, intervals = aeneas.cew.cew.synthesize_multiple(
             output_file_path,
             c_quit_after,
             c_backwards,
             c_text
         )
         self.assertEqual(sr, 22050)
         self.assertEqual(sf, 3)
         self.assertEqual(len(intervals), 3)
     except ImportError:
         pass
     gf.delete_file(handler, output_file_path)
示例#34
0
    def _synthesize(self, text_file):
        """
        Synthesize text into a WAVE file.

        Return:

        1. handler of the generated wave file
        2. path of the generated wave file
        3. the list of anchors, that is, a list of floats
           each representing the start time of the corresponding
           text fragment in the generated wave file
           ``[start_1, start_2, ..., start_n]``
        4. if the synthesizer produced a PCM16 mono WAVE file

        :param synthesizer: the synthesizer to use
        :type  synthesizer: :class:`~aeneas.synthesizer.Synthesizer`
        :rtype: tuple (handler, string, list)
        """
        synthesizer = Synthesizer(rconf=self.rconf, logger=self.logger)
        handler, path = gf.tmp_file(suffix=u".wav", root=self.rconf[RuntimeConfiguration.TMP_PATH])
        result = synthesizer.synthesize(text_file, path)
        anchors = result[0]
        return (handler, path, anchors, synthesizer.output_is_mono_wave)
示例#35
0
    def _synthesize(self, text_file):
        """
        Synthesize text into a WAVE file.

        Return:

        1. handler of the generated wave file
        2. path of the generated wave file
        3. the list of anchors, that is, a list of floats
           each representing the start time of the corresponding
           text fragment in the generated wave file
           ``[start_1, start_2, ..., start_n]``
        4. if the synthesizer produced a PCM16 mono WAVE file

        :param synthesizer: the synthesizer to use
        :type  synthesizer: :class:`~aeneas.synthesizer.Synthesizer`
        :rtype: tuple (handler, string, list)
        """
        synthesizer = Synthesizer(rconf=self.rconf, logger=self.logger)
        handler, path = gf.tmp_file(
            suffix=u".wav", root=self.rconf[RuntimeConfiguration.TMP_PATH])
        result = synthesizer.synthesize(text_file, path)
        anchors = result[0]
        return (handler, path, anchors, synthesizer.output_is_mono_wave)
示例#36
0
    def check_ffmpeg(cls):
        """
        Check whether ``ffmpeg`` can be called.

        Return ``True`` on failure and ``False`` on success.

        :rtype: bool
        """
        try:
            from aeneas.ffmpegwrapper import FFMPEGWrapper
            input_file_path = gf.absolute_path(u"tools/res/audio.mp3", __file__)
            handler, output_file_path = gf.tmp_file(suffix=u".wav")
            converter = FFMPEGWrapper()
            result = converter.convert(input_file_path, output_file_path)
            gf.delete_file(handler, output_file_path)
            if result:
                gf.print_success(u"ffmpeg         OK")
                return False
        except:
            pass
        gf.print_error(u"ffmpeg         ERROR")
        gf.print_info(u"  Please make sure you have ffmpeg installed correctly")
        gf.print_info(u"  and that its path is in your PATH environment variable")
        return True
示例#37
0
 def _compose_output_file_path(self, extension, output_file_path=None):
     """
     If ``output_file_path`` is given, use it.
     Otherwise (``output_file_path`` is ``None``),
     create a temporary file with the correct extension.
     """
     self.log(u"Determining output file path...")
     if output_file_path is None:
         self.log(u"output_file_path is None: creating temp file")
         handler, output_file_path = gf.tmp_file(
             root=self.rconf[RuntimeConfiguration.TMP_PATH],
             suffix=(".%s" % extension))
         gf.delete_file(handler, output_file_path)
     else:
         self.log(
             u"output_file_path is not None: cheking that file can be written"
         )
         if not gf.file_can_be_written(output_file_path):
             self.log_exc(
                 u"Path '%s' cannot be written. Wrong permissions?" %
                 (output_file_path), None, True, OSError)
     self.log(u"Determining output file path... done")
     self.log([u"Output file path is '%s'", output_file_path])
     return output_file_path
示例#38
0
    def synthesize_multiple(self, audio_file_path, c_quit_after, c_backwards, u_text):
        """
        Synthesize the text contained in the given fragment list
        into a ``wav`` file.

        :param string audio_file_path: the path to the output audio file
        :param float c_quit_after: stop synthesizing as soon as
                                   reaching this many seconds
        :param bool c_backwards: synthesizing from the end of the text file
        :param object u_text: a list of ``(voice_code, text)`` tuples
        :rtype: tuple ``(sample_rate, synthesized, intervals)``
        """
        self.log([u"Audio file path: '%s'", audio_file_path])
        self.log([u"c_quit_after: '%.3f'", c_quit_after])
        self.log([u"c_backwards: '%d'", c_backwards])

        text_file_handler, text_file_path = gf.tmp_file()
        data_file_handler, data_file_path = gf.tmp_file()
        self.log([u"Temporary text file path: '%s'", text_file_path])
        self.log([u"Temporary data file path: '%s'", data_file_path])

        self.log(u"Populating the text file...")
        with io.open(text_file_path, "w", encoding="utf-8") as tmp_text_file:
            for f_voice_code, f_text in u_text:
                tmp_text_file.write(u"%s %s\n" % (f_voice_code, f_text))
        self.log(u"Populating the text file... done")

        arguments = [
            self.rconf[RuntimeConfiguration.CEW_SUBPROCESS_PATH],
            "-m",
            "aeneas.cewsubprocess",
            "%.3f" % c_quit_after,
            "%d" % c_backwards,
            text_file_path,
            audio_file_path,
            data_file_path,
        ]
        self.log([u"Calling with arguments '%s'", u" ".join(arguments)])
        proc = subprocess.Popen(
            arguments, stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True
        )
        proc.communicate()

        self.log(u"Reading output data...")
        with io.open(data_file_path, "r", encoding="utf-8") as data_file:
            lines = data_file.read().splitlines()
            sr = int(lines[0])
            sf = int(lines[1])
            intervals = []
            for line in lines[2:]:
                values = line.split(u" ")
                if len(values) == 2:
                    intervals.append((TimeValue(values[0]), TimeValue(values[1])))
        self.log(u"Reading output data... done")

        self.log(u"Deleting text and data files...")
        gf.delete_file(text_file_handler, text_file_path)
        gf.delete_file(data_file_handler, data_file_path)
        self.log(u"Deleting text and data files... done")

        return (sr, sf, intervals)
示例#39
0
文件: sd.py 项目: danielbair/aeneas
    def _detect(self, min_length, max_length, tail=False):
        """
        Detect the head or tail within ``min_length`` and ``max_length`` duration.

        If detecting the tail, the real wave MFCC and the query are reversed
        so that the tail detection problem reduces to a head detection problem.

        Return the duration of the head or tail, in seconds.

        :param min_length: estimated minimum length
        :type  min_length: :class:`~aeneas.exacttiming.TimeValue`
        :param max_length: estimated maximum length
        :type  max_length: :class:`~aeneas.exacttiming.TimeValue`
        :rtype: :class:`~aeneas.exacttiming.TimeValue`
        :raises: TypeError: if one of the parameters is not ``None`` or a number
        :raises: ValueError: if one of the parameters is negative
        """
        def _sanitize(value, default, name):
            if value is None:
                value = default
            try:
                value = TimeValue(value)
            except (TypeError, ValueError, InvalidOperation) as exc:
                self.log_exc(u"The value of %s is not a number" % (name), exc, True, TypeError)
            if value < 0:
                self.log_exc(u"The value of %s is negative" % (name), None, True, ValueError)
            return value

        min_length = _sanitize(min_length, self.MIN_LENGTH, "min_length")
        max_length = _sanitize(max_length, self.MAX_LENGTH, "max_length")
        mws = self.rconf.mws
        min_length_frames = int(min_length / mws)
        max_length_frames = int(max_length / mws)
        self.log([u"MFCC window shift s:     %.3f", mws])
        self.log([u"Min start length s:      %.3f", min_length])
        self.log([u"Min start length frames: %d", min_length_frames])
        self.log([u"Max start length s:      %.3f", max_length])
        self.log([u"Max start length frames: %d", max_length_frames])
        self.log([u"Tail?:                   %s", str(tail)])

        self.log(u"Synthesizing query...")
        synt_duration = max_length * self.QUERY_FACTOR
        self.log([u"Synthesizing at least %.3f seconds", synt_duration])
        tmp_handler, tmp_file_path = gf.tmp_file(suffix=u".wav", root=self.rconf[RuntimeConfiguration.TMP_PATH])
        synt = Synthesizer(rconf=self.rconf, logger=self.logger)
        anchors, total_time, synthesized_chars = synt.synthesize(
            self.text_file,
            tmp_file_path,
            quit_after=synt_duration,
            backwards=tail
        )
        self.log(u"Synthesizing query... done")

        self.log(u"Extracting MFCCs for query...")
        query_mfcc = AudioFileMFCC(tmp_file_path, rconf=self.rconf, logger=self.logger)
        self.log(u"Extracting MFCCs for query... done")

        self.log(u"Cleaning up...")
        gf.delete_file(tmp_handler, tmp_file_path)
        self.log(u"Cleaning up... done")

        search_window = max_length * self.AUDIO_FACTOR
        search_window_end = min(int(search_window / mws), self.real_wave_mfcc.all_length)
        self.log([u"Query MFCC length (frames): %d", query_mfcc.all_length])
        self.log([u"Real MFCC length (frames):  %d", self.real_wave_mfcc.all_length])
        self.log([u"Search window end (s):      %.3f", search_window])
        self.log([u"Search window end (frames): %d", search_window_end])

        if tail:
            self.log(u"Tail => reversing real_wave_mfcc and query_mfcc")
            self.real_wave_mfcc.reverse()
            query_mfcc.reverse()

        # NOTE: VAD will be run here, if not done before
        speech_intervals = self.real_wave_mfcc.intervals(speech=True, time=False)
        if len(speech_intervals) < 1:
            self.log(u"No speech intervals, hence no start found")
            if tail:
                self.real_wave_mfcc.reverse()
            return TimeValue("0.000")

        # generate a list of begin indices
        search_end = None
        candidates_begin = []
        for interval in speech_intervals:
            if (interval[0] >= min_length_frames) and (interval[0] <= max_length_frames):
                candidates_begin.append(interval[0])
            search_end = interval[1]
            if search_end >= search_window_end:
                break

        # for each begin index, compute the acm cost
        # to match the query
        # note that we take the min over the last column of the acm
        # meaning that we allow to match the entire query wave
        # against a portion of the real wave
        candidates = []
        for candidate_begin in candidates_begin:
            self.log([u"Candidate interval starting at %d == %.3f", candidate_begin, candidate_begin * mws])
            try:
                rwm = AudioFileMFCC(
                    mfcc_matrix=self.real_wave_mfcc.all_mfcc[:, candidate_begin:search_end],
                    rconf=self.rconf,
                    logger=self.logger
                )
                dtw = DTWAligner(
                    real_wave_mfcc=rwm,
                    synt_wave_mfcc=query_mfcc,
                    rconf=self.rconf,
                    logger=self.logger
                )
                acm = dtw.compute_accumulated_cost_matrix()
                last_column = acm[:, -1]
                min_value = numpy.min(last_column)
                min_index = numpy.argmin(last_column)
                self.log([u"Candidate interval: %d %d == %.3f %.3f", candidate_begin, search_end, candidate_begin * mws, search_end * mws])
                self.log([u"  Min value: %.6f", min_value])
                self.log([u"  Min index: %d == %.3f", min_index, min_index * mws])
                candidates.append((min_value, candidate_begin, min_index))
            except Exception as exc:
                self.log_exc(u"An unexpected error occurred while running _detect", exc, False, None)

        # reverse again the real wave
        if tail:
            self.log(u"Tail => reversing real_wave_mfcc again")
            self.real_wave_mfcc.reverse()

        # return
        if len(candidates) < 1:
            self.log(u"No candidates found")
            return TimeValue("0.000")
        self.log(u"Candidates:")
        for candidate in candidates:
            self.log([u"  Value: %.6f Begin Time: %.3f Min Index: %d", candidate[0], candidate[1] * mws, candidate[2]])
        best = sorted(candidates)[0][1]
        self.log([u"Best candidate: %d == %.3f", best, best * mws])
        return best * mws
示例#40
0
    def _time_and_combine(self, text_file):
        """
        Combine original audio clips into a single WAV file.

        Return a tuple consisting of:

        1. the handler of the generated audio file
        2. the path of the generated audio file
        3. the list of anchors, that is, a list of floats
           each representing the start time of the corresponding
           text fragment in the generated wave file
           ``[start_1, start_2, ..., start_n]``
        4. a tuple describing the format of the audio file

        :param text_file: the text with audio clips to be timed/combined
        :type  text_file: :class:`~aeneas.textfile.TextFile`
        :rtype: tuple (handler, string, list)
        """
        import subprocess

        # Concatenate all clips into a single, temporary file
        handler, path = gf.tmp_file(
            suffix=u".wav", root=self.rconf[RuntimeConfiguration.TMP_PATH])
        cmd = "ffmpeg -y -f concat -i {} -c copy {}".format(
            text_file.file_path, path)
        subprocess.call(cmd, shell=True)

        audio_format = ('pcm_s161e', 1, 2)

        # Build "synt" anchor times
        anchor_time, anchors = TimeValue('0.0'), []
        for fragment in text_file.fragments:
            audio_path = 'output/sample/{}'.format(fragment.text.split("'")[1])
            audio_file = AudioFileMFCC(file_path=audio_path,
                                       file_format=audio_format)
            # TODO: Investigate faster ways to get the audio_length
            # cmd = 'ffprobe -i {} -show_entries format=duration -v quiet -of csv="p=0"'.format(audio_path)
            # subprocess.call(cmd, shell=True)
            # # should become... (to get response)
            # cmds = ['ffprobe', '-i', audio_path, '-show_entries', 'format=duration',
            #         '-v', 'quiet', '-of', 'csv="p=0"']
            # p = subprocess.Popen(cmds, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
            # output, err = p.communicate()
            # audio_length = TimeValue(output)
            anchors.append([anchor_time, fragment.identifier, audio_path])
            anchor_time += audio_file.audio_length

        # [
        #     [TimeValue('0.0'), u'f000001', 'output/sample/audio01.wav'],
        #     [TimeValue('0.339625'), u'f000002', 'output/sample/audio02.wav'],
        #     [TimeValue('3.5526875'), u'f000003', 'output/sample/audio03.wav'],
        #     [TimeValue('6.6874375'), u'f000004', 'output/sample/audio04.wav'],
        #     [TimeValue('9.5609375'), u'f000005', 'output/sample/audio05.wav'],
        #     [TimeValue('12.4344375'), u'f000006', 'output/sample/audio06.wav'],
        #     [TimeValue('16.1961250'), u'f000007', 'output/sample/audio07.wav'],
        #     [TimeValue('19.9578125'), u'f000008', 'output/sample/audio08.wav'],
        #     [TimeValue('23.0925625'), u'f000009', 'output/sample/audio09.wav'],
        #     [TimeValue('28.0297500'), u'f000010', 'output/sample/audio10.wav'],
        #     [TimeValue('31.1645000'), u'f000011', 'output/sample/audio11.wav'],
        #     [TimeValue('33.5678125'), u'f000012', 'output/sample/audio12.wav'],
        #     [TimeValue('37.0943750'), u'f000013', 'output/sample/audio13.wav'],
        #     [TimeValue('40.2030000'), u'f000014', 'output/sample/audio14.wav'],
        #     [TimeValue('43.8601875'), u'f000015', 'output/sample/audio15.wav']
        # ]

        # import pdb; pdb.set_trace()

        return (handler, path, anchors, audio_format)
示例#41
0
 def test_file_size_nonzero(self):
     handler, path = gf.tmp_file()
     with io.open(path, "w", encoding="utf-8") as tmp_file:
         tmp_file.write(u"Foo bar")
     self.assertEqual(gf.file_size(path), 7)
     gf.delete_file(handler, path)
 def test_file_size_zero(self):
     handler, path = gf.tmp_file()
     self.assertEqual(gf.file_size(path), 0)
     gf.delete_file(handler, path)
示例#43
0
    def audio_from_youtube(self,
                           source_url,
                           download=True,
                           output_file_path=None,
                           preferred_index=None,
                           largest_audio=True,
                           preferred_format=None):
        """
        Download an audio stream from a YouTube video,
        and save it to file.

        If ``download`` is ``False``, return the list
        of available audiostreams but do not download.

        Otherwise, download the audio stream best matching
        the provided parameters, as follows.
        If ``preferred_index`` is not ``None``,
        download the audio stream at that index.
        If ``largest_audio`` is ``True``,
        download the largest audiostream;
        otherwise, download the smallest audiostream.
        If ``preferred_format`` is not ``None``,
        download the audiostream having that format.
        The latter option works in combination with ``largest_audio``.

        Return the path of the downloaded file.

        :param string source_url: the URL of the YouTube video
        :param bool download: if ``True``, download the audio stream
                              best matching ``preferred_index`` or ``preferred_format``
                              and ``largest_audio``;
                              if ``False``, return the list of available audio streams
        :param string output_file_path: the path where the downloaded audio should be saved;
                                        if ``None``, create a temporary file
        :param int preferred_index: preferably download this audio stream
        :param bool largest_audio: if ``True``, download the largest audio stream available;
                                   if ``False``, download the smallest one.
        :param string preferred_format: preferably download this audio format
        :rtype: string or list of pafy audio streams
        :raises: ImportError: if ``pafy`` is not installed
        :raises: OSError: if ``output_file_path`` cannot be written
        :raises: ValueError: if ``source_url`` is not a valid YouTube URL
        """
        def select_audiostream(audiostreams):
            """ Select the audiostream best matching the given parameters. """
            if preferred_index is not None:
                if preferred_index in range(len(audiostreams)):
                    self.log([
                        u"Selecting audiostream with index %d", preferred_index
                    ])
                    return audiostreams[preferred_index]
                else:
                    self.log_warn([
                        u"Audio stream index '%d' not allowed", preferred_index
                    ])
                    self.log_warn(u"Ignoring the requested audio stream index")
            # selecting by preferred format
            streams = audiostreams
            if preferred_format is not None:
                self.log([
                    u"Selecting audiostreams by preferred format %s",
                    preferred_format
                ])
                streams = [
                    audiostream for audiostream in streams
                    if audiostream.extension == preferred_format
                ]
                if len(streams) < 1:
                    self.log([
                        u"No audiostream with preferred format %s",
                        preferred_format
                    ])
                    streams = audiostreams
            # sort by size
            streams = sorted([(audio.get_filesize(), audio)
                              for audio in streams])
            if largest_audio:
                self.log(u"Selecting largest audiostream")
                selected = streams[-1][1]
            else:
                self.log(u"Selecting smallest audiostream")
                selected = streams[0][1]
            return selected

        try:
            import pafy
        except ImportError as exc:
            self.log_exc(u"Python module pafy is not installed", exc, True,
                         ImportError)

        try:
            video = pafy.new(source_url)
        except (IOError, OSError, ValueError) as exc:
            self.log_exc(
                u"The specified source URL '%s' is not a valid YouTube URL or you are offline"
                % (source_url), exc, True, ValueError)

        if not download:
            self.log(u"Returning the list of audio streams")
            return video.audiostreams

        output_path = output_file_path
        if output_file_path is None:
            self.log(u"output_path is None: creating temp file")
            handler, output_path = gf.tmp_file(
                root=self.rconf[RuntimeConfiguration.TMP_PATH])
        else:
            if not gf.file_can_be_written(output_path):
                self.log_exc(
                    u"Path '%s' cannot be written. Wrong permissions?" %
                    (output_path), None, True, OSError)

        audiostream = select_audiostream(video.audiostreams)
        if output_file_path is None:
            gf.delete_file(handler, output_path)
            output_path += "." + audiostream.extension

        self.log([u"output_path is '%s'", output_path])
        self.log(u"Downloading...")
        audiostream.download(filepath=output_path, quiet=True)
        self.log(u"Downloading... done")
        return output_path
示例#44
0
 def test_output_html_for_tuning(self):
     syn = self.read(SyncMapFormat.XML, multiline=True, utf8=True)
     handler, output_file_path = gf.tmp_file(suffix=".html")
     audio_file_path = "foo.mp3"
     syn.output_html_for_tuning(audio_file_path, output_file_path, None)
     gf.delete_file(handler, output_file_path)
    def run(self, arguments, show_help=True):
        """
        Program entry point.

        Please note that the first item in ``arguments`` is discarded,
        as it is assumed to be the script/invocation name;
        pass a "dumb" placeholder if you call this method with
        an argument different that ``sys.argv``.

        :param arguments: the list of arguments
        :type  arguments: list
        :param show_help: if ``False``, do not show help on ``-h`` and ``--help``
        :type  show_help: bool
        :rtype: int
        """
        # convert arguments into Unicode strings
        if self.use_sys:
            # check that sys.stdin.encoding and sys.stdout.encoding are set to utf-8
            if not gf.FROZEN:
                if sys.stdin.encoding not in ["UTF-8", "UTF8"]:
                    self.print_warning(
                        u"The default input encoding is not UTF-8.")
                    self.print_warning(
                        u"You might want to set 'PYTHONIOENCODING=UTF-8' in your shell."
                    )
                if sys.stdout.encoding not in ["UTF-8", "UTF8"]:
                    self.print_warning(
                        u"The default output encoding is not UTF-8.")
                    self.print_warning(
                        u"You might want to set 'PYTHONIOENCODING=UTF-8' in your shell."
                    )
            # decode using sys.stdin.encoding
            args = [gf.safe_unicode_stdin(arg) for arg in arguments]
        else:
            # decode using utf-8 (but you should pass Unicode strings as parameters anyway)
            args = [gf.safe_unicode(arg) for arg in arguments]

        if show_help:
            if u"-h" in args:
                return self.print_help(short=True)

            if u"--help" in args:
                return self.print_help(short=False)

            if u"--version" in args:
                return self.print_name_version()

        # store formal arguments
        self.formal_arguments_raw = arguments
        self.formal_arguments = args

        # to obtain the actual arguments,
        # remove the first one and "special" switches
        args = args[1:]
        set_args = set(args)

        # set verbosity, if requested
        for flag in set([u"-v", u"--verbose"]) & set_args:
            self.verbose = True
            args.remove(flag)
        for flag in set([u"-vv", u"--very-verbose"]) & set_args:
            self.verbose = True
            self.very_verbose = True
            args.remove(flag)

        # set RuntimeConfiguration string, if specified
        for flag in [u"-r", u"--runtime-configuration"]:
            rconf_string = self.has_option_with_value(flag,
                                                      actual_arguments=False)
            if rconf_string is not None:
                self.rconf = RuntimeConfiguration(rconf_string)
                args.remove("%s=%s" % (flag, rconf_string))

        # set log file path, if requested
        log_path = None
        for flag in [u"-l", u"--log"]:
            log_path = self.has_option_with_value(flag, actual_arguments=False)
            if log_path is not None:
                args.remove("%s=%s" % (flag, log_path))
            elif flag in set_args:
                handler, log_path = gf.tmp_file(
                    suffix=u".log",
                    root=self.rconf[RuntimeConfiguration.TMP_PATH])
                args.remove(flag)
            if log_path is not None:
                self.log_file_path = log_path

        # if no actual arguments left, print help
        if (len(args) < 1) and (show_help):
            return self.print_help(short=True)

        # store actual arguments
        self.actual_arguments = args

        # create logger
        self.logger = Logger(tee=self.verbose,
                             tee_show_datetime=self.very_verbose)
        self.log([u"Formal arguments: %s", self.formal_arguments])
        self.log([u"Actual arguments: %s", self.actual_arguments])
        self.log([u"Runtime configuration: '%s'", self.rconf.config_string()])

        # perform command
        exit_code = self.perform_command()
        self.log([u"Execution completed with code %d", exit_code])

        # output log if requested
        if self.log_file_path is not None:
            self.log([
                u"User requested saving log to file '%s'", self.log_file_path
            ])
            self.logger.write(self.log_file_path)
            if self.use_sys:
                self.print_info(u"Log written to file '%s'" %
                                self.log_file_path)

        return self.exit(exit_code)
示例#46
0
    def _synthesize_single_subprocess_helper(self,
                                             text,
                                             voice_code,
                                             output_file_path=None,
                                             return_audio_data=True):
        """
        This is an helper function to synthesize a single text fragment via ``subprocess``.

        If ``output_file_path`` is ``None``,
        the audio data will not persist to file at the end of the method.

        If ``return_audio_data`` is ``True``,
        return the audio data at the end of the function call;
        if ``False``, just return ``(True, None)`` in case of success.

        :rtype: tuple (result, (duration, sample_rate, codec, data)) or (result, None)
        """
        # return zero if text is the empty string
        if len(text) == 0:
            #
            # NOTE sample_rate, codec, data do not matter
            #      if the duration is 0.000 => set them to None
            #
            self.log(u"len(text) is zero: returning 0.000")
            return (True, (TimeValue("0.000"), None, None, None))

        # create a temporary output file if needed
        synt_tmp_file = (output_file_path is None)
        if synt_tmp_file:
            self.log(
                u"Synthesizer helper called with output_file_path=None => creating temporary output file"
            )
            output_file_handler, output_file_path = gf.tmp_file(
                suffix=u".wav", root=self.rconf[RuntimeConfiguration.TMP_PATH])
            self.log([u"Temporary output file path is '%s'", output_file_path])

        try:
            # if the TTS engine reads text from file,
            # write the text into a temporary file
            if self.CLI_PARAMETER_TEXT_PATH in self.subprocess_arguments:
                self.log(u"TTS engine reads text from file")
                tmp_text_file_handler, tmp_text_file_path = gf.tmp_file(
                    suffix=u".txt",
                    root=self.rconf[RuntimeConfiguration.TMP_PATH])
                self.log([
                    u"Creating temporary text file '%s'...", tmp_text_file_path
                ])
                with io.open(tmp_text_file_path, "w",
                             encoding="utf-8") as tmp_text_file:
                    tmp_text_file.write(text)
                self.log([
                    u"Creating temporary text file '%s'... done",
                    tmp_text_file_path
                ])
            else:
                self.log(u"TTS engine reads text from stdin")
                tmp_text_file_handler = None
                tmp_text_file_path = None

            # copy all relevant arguments
            self.log(u"Creating arguments list...")
            arguments = []
            for arg in self.subprocess_arguments:
                if arg == self.CLI_PARAMETER_VOICE_CODE_FUNCTION:
                    arguments.extend(
                        self._voice_code_to_subprocess(voice_code))
                elif arg == self.CLI_PARAMETER_VOICE_CODE_STRING:
                    arguments.append(voice_code)
                elif arg == self.CLI_PARAMETER_TEXT_PATH:
                    arguments.append(tmp_text_file_path)
                elif arg == self.CLI_PARAMETER_WAVE_PATH:
                    arguments.append(output_file_path)
                elif arg == self.CLI_PARAMETER_TEXT_STDIN:
                    # placeholder, do not append
                    pass
                elif arg == self.CLI_PARAMETER_WAVE_STDOUT:
                    # placeholder, do not append
                    pass
                else:
                    arguments.append(arg)
            self.log(u"Creating arguments list... done")

            # actual call via subprocess
            self.log(u"Calling TTS engine...")
            self.log([u"Calling with arguments '%s'", arguments])
            self.log([u"Calling with text '%s'", text])
            proc = subprocess.Popen(arguments,
                                    stdout=subprocess.PIPE,
                                    stdin=subprocess.PIPE,
                                    stderr=subprocess.PIPE,
                                    universal_newlines=True)
            if self.CLI_PARAMETER_TEXT_STDIN in self.subprocess_arguments:
                self.log(u"Passing text via stdin...")
                if gf.PY2:
                    (stdoutdata,
                     stderrdata) = proc.communicate(input=gf.safe_bytes(text))
                else:
                    (stdoutdata, stderrdata) = proc.communicate(input=text)
                self.log(u"Passing text via stdin... done")
            else:
                self.log(u"Passing text via file...")
                (stdoutdata, stderrdata) = proc.communicate()
                self.log(u"Passing text via file... done")
            proc.stdout.close()
            proc.stdin.close()
            proc.stderr.close()

            if self.CLI_PARAMETER_WAVE_STDOUT in self.subprocess_arguments:
                self.log(u"TTS engine wrote audio data to stdout")
                self.log(
                    [u"Writing audio data to file '%s'...", output_file_path])
                with io.open(output_file_path, "wb") as output_file:
                    output_file.write(stdoutdata)
                self.log([
                    u"Writing audio data to file '%s'... done",
                    output_file_path
                ])
            else:
                self.log(u"TTS engine wrote audio data to file")

            if tmp_text_file_path is not None:
                self.log(
                    [u"Delete temporary text file '%s'", tmp_text_file_path])
                gf.delete_file(tmp_text_file_handler, tmp_text_file_path)

            self.log(u"Calling TTS ... done")
        except Exception as exc:
            self.log_exc(
                u"An unexpected error occurred while calling TTS engine via subprocess",
                exc, False, None)
            return (False, None)

        # check the file can be read
        if not gf.file_can_be_read(output_file_path):
            self.log_exc(
                u"Output file '%s' cannot be read" % (output_file_path), None,
                True, None)
            return (False, None)

        # read audio data
        ret = self._read_audio_data(
            output_file_path) if return_audio_data else (True, None)

        # if the output file was temporary, remove it
        if synt_tmp_file:
            self.log([
                u"Removing temporary output file path '%s'", output_file_path
            ])
            gf.delete_file(output_file_handler, output_file_path)

        # return audio data or (True, None)
        return ret
示例#47
0
 def test_file_exists_true(self):
     handler, path = gf.tmp_file()
     self.assertTrue(gf.file_exists(path))
     gf.delete_file(handler, path)
示例#48
0
 def test_file_size_zero(self):
     handler, path = gf.tmp_file()
     self.assertEqual(gf.file_size(path), 0)
     gf.delete_file(handler, path)
 def test_delete_file_existing(self):
     handler, path = gf.tmp_file()
     self.assertTrue(gf.file_exists(path))
     gf.delete_file(handler, path)
     self.assertFalse(gf.file_exists(path))
示例#50
0
    def read_samples_from_file(self):
        """
        Load the audio samples from file into memory.

        If ``self.file_format`` is ``None`` or it is not
        ``("pcm_s16le", 1, self.rconf.sample_rate)``,
        the file will be first converted
        to a temporary PCM16 mono WAVE file.
        Audio data will be read from this temporary file,
        which will be then deleted from disk immediately.

        Otherwise,
        the audio data will be read directly
        from the given file,
        which will not be deleted from disk.

        :raises: :class:`~aeneas.audiofile.AudioFileConverterError`: if the path to the ``ffmpeg`` executable cannot be called
        :raises: :class:`~aeneas.audiofile.AudioFileUnsupportedFormatError`: if the audio file has a format not supported
        :raises: OSError: if the audio file cannot be read
        """
        self.log(u"Loading audio data...")

        # check the file can be read
        if not gf.file_can_be_read(self.file_path):
            self.log_exc(u"File '%s' cannot be read" % (self.file_path), None, True, OSError)

        # determine if we need to convert the audio file
        convert_audio_file = (
            (self.file_format is None) or
            (
                (self.rconf.safety_checks) and
                (self.file_format != ("pcm_s16le", 1, self.rconf.sample_rate))
            )
        )

        # convert the audio file if needed
        if convert_audio_file:
            # convert file to PCM16 mono WAVE with correct sample rate
            self.log(u"self.file_format is None or not good => converting self.file_path")
            tmp_handler, tmp_file_path = gf.tmp_file(suffix=u".wav", root=self.rconf[RuntimeConfiguration.TMP_PATH])
            self.log([u"Temporary PCM16 mono WAVE file: '%s'", tmp_file_path])
            try:
                self.log(u"Converting audio file to mono...")
                converter = FFMPEGWrapper(rconf=self.rconf, logger=self.logger)
                converter.convert(self.file_path, tmp_file_path)
                self.file_format = ("pcm_s16le", 1, self.rconf.sample_rate)
                self.log(u"Converting audio file to mono... done")
            except FFMPEGPathError:
                gf.delete_file(tmp_handler, tmp_file_path)
                self.log_exc(u"Unable to call ffmpeg executable", None, True, AudioFileConverterError)
            except OSError:
                gf.delete_file(tmp_handler, tmp_file_path)
                self.log_exc(u"Audio file format not supported by ffmpeg", None, True, AudioFileUnsupportedFormatError)
        else:
            # read the file directly
            if self.rconf.safety_checks:
                self.log(u"self.file_format is good => reading self.file_path directly")
            else:
                self.log_warn(u"Safety checks disabled => reading self.file_path directly")
            tmp_handler = None
            tmp_file_path = self.file_path

        # TODO allow calling C extension cwave to read samples faster
        try:
            self.audio_format = "pcm16"
            self.audio_channels = 1
            self.audio_sample_rate, self.__samples = scipywavread(tmp_file_path)
            # scipy reads a sample as an int16_t, that is, a number in [-32768, 32767]
            # so we convert it to a float64 in [-1, 1]
            self.__samples = self.__samples.astype("float64") / 32768
            self.__samples_capacity = len(self.__samples)
            self.__samples_length = self.__samples_capacity
            self._update_length()
        except ValueError:
            self.log_exc(u"Audio format not supported by scipywavread", None, True, AudioFileUnsupportedFormatError)

        # if we converted the audio file, delete the temporary converted audio file
        if convert_audio_file:
            gf.delete_file(tmp_handler, tmp_file_path)
            self.log([u"Deleted temporary audio file: '%s'", tmp_file_path])

        self._update_length()
        self.log([u"Sample length:  %.3f", self.audio_length])
        self.log([u"Sample rate:    %d", self.audio_sample_rate])
        self.log([u"Audio format:   %s", self.audio_format])
        self.log([u"Audio channels: %d", self.audio_channels])
        self.log(u"Loading audio data... done")
 def test_file_size_nonzero(self):
     handler, path = gf.tmp_file()
     with io.open(path, "w", encoding="utf-8") as tmp_file:
         tmp_file.write(u"Foo bar")
     self.assertEqual(gf.file_size(path), 7)
     gf.delete_file(handler, path)
示例#52
0
    def read_samples_from_file(self):
        """
        Load the audio samples from file into memory.

        If ``self.is_mono_wave`` is ``False``,
        the file will be first converted
        to a temporary PCM16 mono WAVE file.
        Audio data will be read from this temporary file,
        which will be then deleted from disk immediately.

        If ``self.is_mono_wave`` is ``True``,
        the audio data will be read directly
        from the given file,
        which will not be deleted from disk.

        :raises: :class:`~aeneas.audiofile.AudioFileConverterError`: if the path to the ``ffmpeg`` executable cannot be called
        :raises: :class:`~aeneas.audiofile.AudioFileUnsupportedFormatError`: if the audio file has a format not supported
        :raises: OSError: if the audio file cannot be read
        """
        self.log(u"Loading audio data...")

        # check the file can be read
        if not gf.file_can_be_read(self.file_path):
            self.log_exc(u"File '%s' cannot be read" % (self.file_path), None, True, OSError)

        # convert file to PCM16 mono WAVE
        if self.is_mono_wave:
            self.log(u"is_mono_wave=True => reading self.file_path directly")
            tmp_handler = None
            tmp_file_path = self.file_path
        else:
            self.log(u"is_mono_wave=False => converting self.file_path")
            tmp_handler, tmp_file_path = gf.tmp_file(suffix=u".wav", root=self.rconf[RuntimeConfiguration.TMP_PATH])
            self.log([u"Temporary PCM16 mono WAVE file: '%s'", tmp_file_path])
            try:
                self.log(u"Converting audio file to mono...")
                converter = FFMPEGWrapper(rconf=self.rconf, logger=self.logger)
                converter.convert(self.file_path, tmp_file_path)
                self.log(u"Converting audio file to mono... done")
            except FFMPEGPathError:
                gf.delete_file(tmp_handler, tmp_file_path)
                self.log_exc(u"Unable to call ffmpeg executable", None, True, AudioFileConverterError)
            except OSError:
                gf.delete_file(tmp_handler, tmp_file_path)
                self.log_exc(u"Audio file format not supported by ffmpeg", None, True, AudioFileUnsupportedFormatError)

        # TODO allow calling C extension cwave to read samples faster
        try:
            self.audio_format = "pcm16"
            self.audio_channels = 1
            self.audio_sample_rate, self.__samples = scipywavread(tmp_file_path)
            # scipy reads a sample as an int16_t, that is, a number in [-32768, 32767]
            # so we convert it to a float64 in [-1, 1]
            self.__samples = self.__samples.astype("float64") / 32768
            self.__samples_capacity = len(self.__samples)
            self.__samples_length = self.__samples_capacity
            self._update_length()
        except ValueError:
            self.log_exc(u"Audio format not supported by scipywavread", None, True, AudioFileUnsupportedFormatError)

        if not self.is_mono_wave:
            gf.delete_file(tmp_handler, tmp_file_path)
            self.log([u"Deleted temporary PCM16 mono WAVE file: '%s'", tmp_file_path])

        self._update_length()
        self.log([u"Sample length:  %.3f", self.audio_length])
        self.log([u"Sample rate:    %d", self.audio_sample_rate])
        self.log([u"Audio format:   %s", self.audio_format])
        self.log([u"Audio channels: %d", self.audio_channels])
        self.log(u"Loading audio data... done")
 def test_file_exists_true(self):
     handler, path = gf.tmp_file()
     self.assertTrue(gf.file_exists(path))
     gf.delete_file(handler, path)
 def write(self, fmt, multiline=False, utf8=False, parameters=PARAMETERS):
     suffix = "." + fmt
     syn = self.read(SyncMapFormat.XML, multiline, utf8, self.PARAMETERS)
     handler, output_file_path = gf.tmp_file(suffix=suffix)
     syn.write(fmt, output_file_path, parameters)
     gf.delete_file(handler, output_file_path)
示例#55
0
    def _detect(self, min_length, max_length, tail=False):
        """
        Detect the head or tail within ``min_length`` and ``max_length`` duration.

        If detecting the tail, the real wave MFCC and the query are reversed
        so that the tail detection problem reduces to a head detection problem.

        Return the duration of the head or tail, in seconds.

        :param min_length: estimated minimum length
        :type  min_length: :class:`~aeneas.timevalue.TimeValue`
        :param max_length: estimated maximum length
        :type  max_length: :class:`~aeneas.timevalue.TimeValue`
        :rtype: :class:`~aeneas.timevalue.TimeValue`
        :raises: TypeError: if one of the parameters is not ``None`` or a number
        :raises: ValueError: if one of the parameters is negative
        """
        def _sanitize(value, default, name):
            if value is None:
                value = default
            try:
                value = TimeValue(value)
            except (TypeError, ValueError, InvalidOperation) as exc:
                self.log_exc(u"The value of %s is not a number" % (name), exc, True, TypeError)
            if value < 0:
                self.log_exc(u"The value of %s is negative" % (name), None, True, ValueError)
            return value

        min_length = _sanitize(min_length, self.MIN_LENGTH, "min_length")
        max_length = _sanitize(max_length, self.MAX_LENGTH, "max_length")
        mws = self.rconf.mws
        min_length_frames = int(min_length / mws)
        max_length_frames = int(max_length / mws)
        self.log([u"MFCC window shift s:     %.3f", mws])
        self.log([u"Min start length s:      %.3f", min_length])
        self.log([u"Min start length frames: %d", min_length_frames])
        self.log([u"Max start length s:      %.3f", max_length])
        self.log([u"Max start length frames: %d", max_length_frames])
        self.log([u"Tail?:                   %s", str(tail)])

        self.log(u"Synthesizing query...")
        synt_duration = max_length * self.QUERY_FACTOR
        self.log([u"Synthesizing at least %.3f seconds", synt_duration])
        tmp_handler, tmp_file_path = gf.tmp_file(suffix=u".wav", root=self.rconf[RuntimeConfiguration.TMP_PATH])
        synt = Synthesizer(rconf=self.rconf, logger=self.logger)
        anchors, total_time, synthesized_chars = synt.synthesize(
            self.text_file,
            tmp_file_path,
            quit_after=synt_duration,
            backwards=tail
        )
        self.log(u"Synthesizing query... done")

        self.log(u"Extracting MFCCs for query...")
        query_mfcc = AudioFileMFCC(tmp_file_path, rconf=self.rconf, logger=self.logger)
        self.log(u"Extracting MFCCs for query... done")

        self.log(u"Cleaning up...")
        gf.delete_file(tmp_handler, tmp_file_path)
        self.log(u"Cleaning up... done")

        search_window = max_length * self.AUDIO_FACTOR
        search_window_end = min(int(search_window / mws), self.real_wave_mfcc.all_length)
        self.log([u"Query MFCC length (frames): %d", query_mfcc.all_length])
        self.log([u"Real MFCC length (frames):  %d", self.real_wave_mfcc.all_length])
        self.log([u"Search window end (s):      %.3f", search_window])
        self.log([u"Search window end (frames): %d", search_window_end])

        if tail:
            self.log(u"Tail => reversing real_wave_mfcc and query_mfcc")
            self.real_wave_mfcc.reverse()
            query_mfcc.reverse()

        # NOTE: VAD will be run here, if not done before
        speech_intervals = self.real_wave_mfcc.intervals(speech=True, time=False)
        if len(speech_intervals) < 1:
            self.log(u"No speech intervals, hence no start found")
            if tail:
                self.real_wave_mfcc.reverse()
            return TimeValue("0.000")

        # generate a list of begin indices
        search_end = None
        candidates_begin = []
        for interval in speech_intervals:
            if (interval[0] >= min_length_frames) and (interval[0] <= max_length_frames):
                candidates_begin.append(interval[0])
            search_end = interval[1]
            if search_end >= search_window_end:
                break

        # for each begin index, compute the acm cost
        # to match the query
        # note that we take the min over the last column of the acm
        # meaning that we allow to match the entire query wave
        # against a portion of the real wave
        candidates = []
        for candidate_begin in candidates_begin:
            self.log([u"Candidate interval starting at %d == %.3f", candidate_begin, candidate_begin * mws])
            try:
                rwm = AudioFileMFCC(
                    mfcc_matrix=self.real_wave_mfcc.all_mfcc[:, candidate_begin:search_end],
                    rconf=self.rconf,
                    logger=self.logger
                )
                dtw = DTWAligner(
                    real_wave_mfcc=rwm,
                    synt_wave_mfcc=query_mfcc,
                    rconf=self.rconf,
                    logger=self.logger
                )
                acm = dtw.compute_accumulated_cost_matrix()
                last_column = acm[:, -1]
                min_value = numpy.min(last_column)
                min_index = numpy.argmin(last_column)
                self.log([u"Candidate interval: %d %d == %.3f %.3f", candidate_begin, search_end, candidate_begin * mws, search_end * mws])
                self.log([u"  Min value: %.6f", min_value])
                self.log([u"  Min index: %d == %.3f", min_index, min_index * mws])
                candidates.append((min_value, candidate_begin, min_index))
            except Exception as exc:
                self.log_exc(u"An unexpected error occurred while running _detect", exc, False, None)

        # reverse again the real wave
        if tail:
            self.log(u"Tail => reversing real_wave_mfcc again")
            self.real_wave_mfcc.reverse()

        # return
        if len(candidates) < 1:
            self.log(u"No candidates found")
            return TimeValue("0.000")
        self.log(u"Candidates:")
        for candidate in candidates:
            self.log([u"  Value: %.6f Begin Time: %.3f Min Index: %d", candidate[0], candidate[1] * mws, candidate[2]])
        best = sorted(candidates)[0][1]
        self.log([u"Best candidate: %d == %.3f", best, best * mws])
        return best * mws
示例#56
0
 def test_delete_file_existing(self):
     handler, path = gf.tmp_file()
     self.assertTrue(gf.file_exists(path))
     gf.delete_file(handler, path)
     self.assertFalse(gf.file_exists(path))
示例#57
0
    def synthesize_multiple(self, audio_file_path, c_quit_after, c_backwards,
                            u_text):
        """
        Synthesize the text contained in the given fragment list
        into a ``wav`` file.

        :param string audio_file_path: the path to the output audio file
        :param float c_quit_after: stop synthesizing as soon as
                                   reaching this many seconds
        :param bool c_backwards: synthesizing from the end of the text file
        :param object u_text: a list of ``(voice_code, text)`` tuples
        :rtype: tuple ``(sample_rate, synthesized, intervals)``
        """
        self.log([u"Audio file path: '%s'", audio_file_path])
        self.log([u"c_quit_after: '%.3f'", c_quit_after])
        self.log([u"c_backwards: '%d'", c_backwards])

        text_file_handler, text_file_path = gf.tmp_file()
        data_file_handler, data_file_path = gf.tmp_file()
        self.log([u"Temporary text file path: '%s'", text_file_path])
        self.log([u"Temporary data file path: '%s'", data_file_path])

        self.log(u"Populating the text file...")
        with io.open(text_file_path, "w", encoding="utf-8") as tmp_text_file:
            for f_voice_code, f_text in u_text:
                tmp_text_file.write(u"%s %s\n" % (f_voice_code, f_text))
        self.log(u"Populating the text file... done")

        arguments = [
            self.rconf[RuntimeConfiguration.CEW_SUBPROCESS_PATH], "-m",
            "aeneas.cewsubprocess",
            "%.3f" % c_quit_after,
            "%d" % c_backwards, text_file_path, audio_file_path, data_file_path
        ]
        self.log([u"Calling with arguments '%s'", u" ".join(arguments)])
        proc = subprocess.Popen(arguments,
                                stdout=subprocess.PIPE,
                                stdin=subprocess.PIPE,
                                stderr=subprocess.PIPE,
                                universal_newlines=True)
        proc.communicate()

        self.log(u"Reading output data...")
        with io.open(data_file_path, "r", encoding="utf-8") as data_file:
            lines = data_file.read().splitlines()
            sr = int(lines[0])
            sf = int(lines[1])
            intervals = []
            for line in lines[2:]:
                values = line.split(u" ")
                if len(values) == 2:
                    intervals.append(
                        (TimeValue(values[0]), TimeValue(values[1])))
        self.log(u"Reading output data... done")

        self.log(u"Deleting text and data files...")
        gf.delete_file(text_file_handler, text_file_path)
        gf.delete_file(data_file_handler, data_file_path)
        self.log(u"Deleting text and data files... done")

        return (sr, sf, intervals)
示例#58
0
    def _synthesize_single_subprocess(self, text, voice_code, output_file_path):
        """
        Synthesize a single text fragment via ``subprocess``.

        :rtype: tuple (result, (duration, sample_rate, encoding, samples))
        """
        self.log(u"Synthesizing using pure Python...")
        try:
            # if the TTS engine reads text from file,
            # write the text into a temporary file
            if self.CLI_PARAMETER_TEXT_PATH in self.subprocess_arguments:
                self.log(u"TTS engine reads text from file")
                tmp_text_file_handler, tmp_text_file_path = gf.tmp_file(suffix=u".txt", root=self.rconf[RuntimeConfiguration.TMP_PATH])
                self.log([u"Creating temporary text file '%s'...", tmp_text_file_path])
                with io.open(tmp_text_file_path, "w", encoding="utf-8") as tmp_text_file:
                    tmp_text_file.write(text)
                self.log([u"Creating temporary text file '%s'... done", tmp_text_file_path])
            else:
                self.log(u"TTS engine reads text from stdin")
                tmp_text_file_handler = None
                tmp_text_file_path = None

            # copy all relevant arguments
            self.log(u"Creating arguments list...")
            arguments = []
            for arg in self.subprocess_arguments:
                if arg == self.CLI_PARAMETER_VOICE_CODE_FUNCTION:
                    arguments.extend(self._voice_code_to_subprocess(voice_code))
                elif arg == self.CLI_PARAMETER_VOICE_CODE_STRING:
                    arguments.append(voice_code)
                elif arg == self.CLI_PARAMETER_TEXT_PATH:
                    arguments.append(tmp_text_file_path)
                elif arg == self.CLI_PARAMETER_WAVE_PATH:
                    arguments.append(output_file_path)
                elif arg == self.CLI_PARAMETER_TEXT_STDIN:
                    # placeholder, do not append
                    pass
                elif arg == self.CLI_PARAMETER_WAVE_STDOUT:
                    # placeholder, do not append
                    pass
                else:
                    arguments.append(arg)
            self.log(u"Creating arguments list... done")

            # actual call via subprocess
            self.log(u"Calling TTS engine...")
            self.log([u"Calling with arguments '%s'", arguments])
            self.log([u"Calling with text '%s'", text])
            proc = subprocess.Popen(
                arguments,
                stdout=subprocess.PIPE,
                stdin=subprocess.PIPE,
                stderr=subprocess.PIPE,
                universal_newlines=True
            )
            if self.CLI_PARAMETER_TEXT_STDIN in self.subprocess_arguments:
                self.log(u"Passing text via stdin...")
                if gf.PY2:
                    (stdoutdata, stderrdata) = proc.communicate(input=gf.safe_bytes(text))
                else:
                    (stdoutdata, stderrdata) = proc.communicate(input=text)
                self.log(u"Passing text via stdin... done")
            else:
                self.log(u"Passing text via file...")
                (stdoutdata, stderrdata) = proc.communicate()
                self.log(u"Passing text via file... done")
            proc.stdout.close()
            proc.stdin.close()
            proc.stderr.close()

            if self.CLI_PARAMETER_WAVE_STDOUT in self.subprocess_arguments:
                self.log(u"TTS engine wrote audio data to stdout")
                self.log([u"Writing audio data to file '%s'...", output_file_path])
                with io.open(output_file_path, "wb") as output_file:
                    output_file.write(stdoutdata)
                self.log([u"Writing audio data to file '%s'... done", output_file_path])
            else:
                self.log(u"TTS engine wrote audio data to file")

            if tmp_text_file_path is not None:
                self.log([u"Delete temporary text file '%s'", tmp_text_file_path])
                gf.delete_file(tmp_text_file_handler, tmp_text_file_path)

            self.log(u"Calling TTS ... done")
        except Exception as exc:
            self.log_exc(u"An unexpected error occurred while calling TTS engine via subprocess", exc, False, None)
            return (False, None)

        # check the file can be read
        if not gf.file_can_be_read(output_file_path):
            self.log_exc(u"Output file '%s' cannot be read" % (output_file_path), None, True, None)
            return (False, None)

        # return the duration of the output file
        try:
            # if we know the TTS outputs to PCM16 mono WAVE,
            # we can read samples directly from it,
            # without an intermediate conversion through ffmpeg
            audio_file = AudioFile(
                file_path=output_file_path,
                is_mono_wave=self.OUTPUT_MONO_WAVE,
                rconf=self.rconf,
                logger=self.logger
            )
            audio_file.read_samples_from_file()
            self.log([u"Duration of '%s': %f", output_file_path, audio_file.audio_length])
            self.log(u"Synthesizing using pure Python... done")
            return (True, (
                audio_file.audio_length,
                audio_file.audio_sample_rate,
                audio_file.audio_format,
                audio_file.audio_samples
            ))
        except (AudioFileUnsupportedFormatError, OSError) as exc:
            self.log_exc(u"An unexpected error occurred while trying to read the sythesized audio file", exc, True, None)
            return (False, None)