def inner(c_ext, cew_subprocess): if ofp is None: handler, output_file_path = gf.tmp_file(suffix=".wav") else: handler = None output_file_path = ofp try: rconf = RuntimeConfiguration() rconf[RuntimeConfiguration.C_EXTENSIONS] = c_ext rconf[RuntimeConfiguration.CEW_SUBPROCESS_ENABLED] = cew_subprocess tts_engine = ESPEAKWrapper(rconf=rconf) anchors, total_time, num_chars = tts_engine.synthesize_multiple( text_file, output_file_path, quit_after, backwards ) gf.delete_file(handler, output_file_path) if zero_length: self.assertEqual(total_time, 0.0) else: self.assertGreater(total_time, 0.0) except (OSError, TypeError, UnicodeDecodeError, ValueError) as exc: gf.delete_file(handler, output_file_path) raise exc
def check_espeak(cls): """ Check whether ``espeak`` can be called. Return ``True`` on failure and ``False`` on success. :rtype: bool """ try: from aeneas.espeakwrapper import ESPEAKWrapper text = u"From fairest creatures we desire increase," language = u"eng" handler, output_file_path = gf.tmp_file(suffix=u".wav") espeak = ESPEAKWrapper() result = espeak.synthesize_single( text, language, output_file_path ) gf.delete_file(handler, output_file_path) if result: gf.print_success(u"espeak OK") return False except: pass gf.print_error(u"espeak ERROR") gf.print_info(u" Please make sure you have espeak installed correctly") gf.print_info(u" and that its path is in your PATH environment variable") gf.print_info(u" You might also want to check that the espeak-data directory") gf.print_info(u" is set up correctly, for example, it has the correct permissions") return True
def step4(): on_info("Test 4/6 (espeak)...") try: on_info(" Trying to call espeak...") from aeneas.espeakwrapper import ESPEAKWrapper from aeneas.language import Language text = u"From fairest creatures we desire increase," language = Language.EN handler, output_file_path = tempfile.mkstemp(suffix=".wav") espeak = ESPEAKWrapper() result = espeak.synthesize(text, language, output_file_path) os.close(handler) os.remove(output_file_path) if result: on_info(" Trying to call espeak... succeeded.") return True else: on_error(" Unable to call espeak.") on_error( " Please make sure you have espeak installed correctly and that it is in your $PATH." ) except: on_error(" Unable to call espeak.") on_error( " Please make sure you have espeak installed correctly and that it is in your $PATH." ) return False
def check_espeak(cls): """ Check whether ``espeak`` can be called. Return ``True`` on failure and ``False`` on success. :rtype: bool """ try: from aeneas.espeakwrapper import ESPEAKWrapper text = u"From fairest creatures we desire increase," language = u"eng" handler, output_file_path = gf.tmp_file(suffix=u".wav") espeak = ESPEAKWrapper() result = espeak.synthesize_single(text, language, output_file_path) gf.delete_file(handler, output_file_path) if result: gf.print_success(u"espeak OK") return False except: pass gf.print_error(u"espeak ERROR") gf.print_info( u" Please make sure you have espeak installed correctly") gf.print_info( u" and that its path is in your PATH environment variable") gf.print_info( u" You might also want to check that the espeak-data directory") gf.print_info( u" is set up correctly, for example, it has the correct permissions" ) return True
def test_synthesize_unicode(self): text = u"Ausführliche" language = Language.DE handler, output_file_path = tempfile.mkstemp(suffix=".wav") espeak = ESPEAKWrapper() result = espeak.synthesize(text, language, output_file_path) self.assertGreater(result, 0) os.close(handler) os.remove(output_file_path)
def test_empty_text(self): text = "" language = Language.IT handler, output_file_path = tempfile.mkstemp(suffix=".wav") espeak = ESPEAKWrapper() result = espeak.synthesize(text, language, output_file_path) self.assertEqual(result, 0) os.close(handler) os.remove(output_file_path)
def test_replace_language(self): text = u"Временами Сашке хотелось перестать делать то" language = Language.UK handler, output_file_path = tempfile.mkstemp(suffix=".wav") espeak = ESPEAKWrapper() result = espeak.synthesize(text, language, output_file_path) self.assertGreater(result, 0) os.close(handler) os.remove(output_file_path)
def synthesize(self, text, language, zero_length=False): handler, output_file_path = tempfile.mkstemp(suffix=".wav") espeak = ESPEAKWrapper() result = espeak.synthesize(text, language, output_file_path) if zero_length: self.assertEqual(result, 0) else: self.assertGreater(result, 0) delete_file(handler, output_file_path)
def test_synthesize(self): text = u"Nel mezzo del cammin di nostra vita" language = Language.IT handler, output_file_path = tempfile.mkstemp(suffix=".wav") espeak = ESPEAKWrapper() result = espeak.synthesize(text, language, output_file_path) self.assertGreater(result, 0) os.close(handler) os.remove(output_file_path)
def main(): if len(sys.argv) < 4: usage() return text = sys.argv[1] language = sys.argv[2] output_file_path = sys.argv[3] synth = ESPEAKWrapper() synth.synthesize(text, language, output_file_path) print "Created file '%s'" % output_file_path
def main(): """ Entry point """ if len(sys.argv) < 4: usage() return text = sys.argv[1] language = sys.argv[2] output_file_path = sys.argv[3] synt = ESPEAKWrapper() synt.synthesize(text, language, output_file_path) print "[INFO] Created file '%s'" % output_file_path
def _select_tts_engine(self): """ Select the TTS engine to be used by looking at the rconf object. """ self.log(u"Selecting TTS engine...") if self.rconf[RuntimeConfiguration.TTS] == self.CUSTOM: self.log(u"TTS engine: custom") tts_path = self.rconf[RuntimeConfiguration.TTS_PATH] if not gf.file_can_be_read(tts_path): self.log_exc(u"Cannot read tts_path", None, True, OSError) try: import imp self.log([ u"Loading CustomTTSWrapper module from '%s'...", tts_path ]) imp.load_source("CustomTTSWrapperModule", tts_path) self.log([ u"Loading CustomTTSWrapper module from '%s'... done", tts_path ]) self.log(u"Importing CustomTTSWrapper...") from CustomTTSWrapperModule import CustomTTSWrapper self.log(u"Importing CustomTTSWrapper... done") self.log(u"Creating CustomTTSWrapper instance...") self.tts_engine = CustomTTSWrapper(rconf=self.rconf, logger=self.logger) self.log(u"Creating CustomTTSWrapper instance... done") except Exception as exc: self.log_exc(u"Unable to load custom TTS wrapper", exc, True, OSError) elif self.rconf[RuntimeConfiguration.TTS] == self.FESTIVAL: self.log(u"TTS engine: Festival") self.tts_engine = FESTIVALWrapper(rconf=self.rconf, logger=self.logger) elif self.rconf[RuntimeConfiguration.TTS] == self.NUANCETTSAPI: try: import requests except ImportError as exc: self.log_exc( u"Unable to import requests for Nuance TTS API wrapper", exc, True, ImportError) self.log(u"TTS engine: Nuance TTS API") self.tts_engine = NuanceTTSAPIWrapper(rconf=self.rconf, logger=self.logger) else: self.log(u"TTS engine: eSpeak") self.tts_engine = ESPEAKWrapper(rconf=self.rconf, logger=self.logger) self.log(u"Selecting TTS engine... done")
def inner(c_ext, cew_subprocess): if ofp is None: handler, output_file_path = gf.tmp_file(suffix=".wav") else: handler = None output_file_path = ofp try: rconf = RuntimeConfiguration() rconf[RuntimeConfiguration.C_EXTENSIONS] = c_ext rconf[RuntimeConfiguration.CEW_SUBPROCESS_ENABLED] = cew_subprocess tts_engine = ESPEAKWrapper(rconf=rconf) result = tts_engine.synthesize_single(text, language, output_file_path) gf.delete_file(handler, output_file_path) if zero_length: self.assertEqual(result, 0) else: self.assertGreater(result, 0) except (OSError, TypeError, UnicodeDecodeError, ValueError) as exc: gf.delete_file(handler, output_file_path) raise exc
def _select_tts_engine(self): """ Select the TTS engine to be used by looking at the rconf object. """ self.log(u"Selecting TTS engine...") if self.rconf[RuntimeConfiguration.TTS] == self.CUSTOM: self.log(u"TTS engine: custom") tts_path = self.rconf[RuntimeConfiguration.TTS_PATH] if not gf.file_can_be_read(tts_path): self.log_exc(u"Cannot read tts_path", None, True, OSError) try: import imp self.log([u"Loading CustomTTSWrapper module from '%s'...", tts_path]) imp.load_source("CustomTTSWrapperModule", tts_path) self.log([u"Loading CustomTTSWrapper module from '%s'... done", tts_path]) self.log(u"Importing CustomTTSWrapper...") from CustomTTSWrapperModule import CustomTTSWrapper self.log(u"Importing CustomTTSWrapper... done") self.log(u"Creating CustomTTSWrapper instance...") self.tts_engine = CustomTTSWrapper(rconf=self.rconf, logger=self.logger) self.log(u"Creating CustomTTSWrapper instance... done") except Exception as exc: self.log_exc(u"Unable to load custom TTS wrapper", exc, True, OSError) elif self.rconf[RuntimeConfiguration.TTS] == self.FESTIVAL: self.log(u"TTS engine: Festival") self.tts_engine = FESTIVALWrapper(rconf=self.rconf, logger=self.logger) elif self.rconf[RuntimeConfiguration.TTS] == self.NUANCETTSAPI: try: import requests except ImportError as exc: self.log_exc(u"Unable to import requests for Nuance TTS API wrapper", exc, True, ImportError) self.log(u"TTS engine: Nuance TTS API") self.tts_engine = NuanceTTSAPIWrapper(rconf=self.rconf, logger=self.logger) else: self.log(u"TTS engine: eSpeak") self.tts_engine = ESPEAKWrapper(rconf=self.rconf, logger=self.logger) self.log(u"Selecting TTS engine... done")
def step4(): on_info("Test 4/6 (espeak)...") try: on_info(" Trying to call espeak...") from aeneas.espeakwrapper import ESPEAKWrapper from aeneas.language import Language text = u"From fairest creatures we desire increase," language = Language.EN handler, output_file_path = tempfile.mkstemp(suffix=".wav") espeak = ESPEAKWrapper() result = espeak.synthesize(text, language, output_file_path) os.close(handler) os.remove(output_file_path) if result: on_info(" Trying to call espeak... succeeded.") return True else: on_error(" Unable to call espeak.") on_error(" Please make sure you have espeak installed correctly and that it is in your $PATH.") except: on_error(" Unable to call espeak.") on_error(" Please make sure you have espeak installed correctly and that it is in your $PATH.") return False
class Synthesizer(Loggable): """ A class to synthesize text fragments into an audio file, along with the corresponding time anchors. :param rconf: a runtime configuration :type rconf: :class:`~aeneas.runtimeconfiguration.RuntimeConfiguration` :param logger: the logger object :type logger: :class:`~aeneas.logger.Logger` :raises: OSError: if a custom TTS engine is requested but it cannot be loaded :raises: ImportError: if the Nuance TTS API wrapper is requested but the``requests`` module is not installed """ CUSTOM = "custom" """ Select custom TTS engine wrapper """ ESPEAK = "espeak" """ Select eSpeak wrapper """ FESTIVAL = "festival" """ Select Festival wrapper """ NUANCETTSAPI = "nuancettsapi" """ Select Nuance TTS API wrapper """ ALLOWED_VALUES = [CUSTOM, ESPEAK, FESTIVAL, NUANCETTSAPI] """ List of all the allowed values """ TAG = u"Synthesizer" def __init__(self, rconf=None, logger=None): super(Synthesizer, self).__init__(rconf=rconf, logger=logger) self.tts_engine = None self._select_tts_engine() def _select_tts_engine(self): """ Select the TTS engine to be used by looking at the rconf object. """ self.log(u"Selecting TTS engine...") if self.rconf[RuntimeConfiguration.TTS] == self.CUSTOM: self.log(u"TTS engine: custom") tts_path = self.rconf[RuntimeConfiguration.TTS_PATH] if not gf.file_can_be_read(tts_path): self.log_exc(u"Cannot read tts_path", None, True, OSError) try: import imp self.log([u"Loading CustomTTSWrapper module from '%s'...", tts_path]) imp.load_source("CustomTTSWrapperModule", tts_path) self.log([u"Loading CustomTTSWrapper module from '%s'... done", tts_path]) self.log(u"Importing CustomTTSWrapper...") from CustomTTSWrapperModule import CustomTTSWrapper self.log(u"Importing CustomTTSWrapper... done") self.log(u"Creating CustomTTSWrapper instance...") self.tts_engine = CustomTTSWrapper(rconf=self.rconf, logger=self.logger) self.log(u"Creating CustomTTSWrapper instance... done") except Exception as exc: self.log_exc(u"Unable to load custom TTS wrapper", exc, True, OSError) elif self.rconf[RuntimeConfiguration.TTS] == self.FESTIVAL: self.log(u"TTS engine: Festival") self.tts_engine = FESTIVALWrapper(rconf=self.rconf, logger=self.logger) elif self.rconf[RuntimeConfiguration.TTS] == self.NUANCETTSAPI: try: import requests except ImportError as exc: self.log_exc(u"Unable to import requests for Nuance TTS API wrapper", exc, True, ImportError) self.log(u"TTS engine: Nuance TTS API") self.tts_engine = NuanceTTSAPIWrapper(rconf=self.rconf, logger=self.logger) else: self.log(u"TTS engine: eSpeak") self.tts_engine = ESPEAKWrapper(rconf=self.rconf, logger=self.logger) self.log(u"Selecting TTS engine... done") def output_is_mono_wave(self): """ Return ``True`` if the TTS engine outputs a PCM16 mono WAVE file. This information can be used to avoid converting the audio file output by the TTS engine. :rtype: bool """ if self.tts_engine is not None: return self.tts_engine.OUTPUT_MONO_WAVE return False def synthesize( self, text_file, audio_file_path, quit_after=None, backwards=False ): """ Synthesize the text contained in the given fragment list into a ``wav`` file. Return a tuple ``(anchors, total_time, num_chars)``. :param text_file: the text file to be synthesized :type text_file: :class:`~aeneas.textfile.TextFile` :param string audio_file_path: the path to the output audio file :param float quit_after: stop synthesizing as soon as reaching this many seconds :param bool backwards: if ``True``, synthesizing from the end of the text file :rtype: tuple :raises: TypeError: if ``text_file`` is ``None`` or not an instance of ``TextFile`` :raises: OSError: if ``audio_file_path`` cannot be written :raises: OSError: if ``tts=custom`` in the RuntimeConfiguration and ``tts_path`` cannot be read """ if text_file is None: self.log_exc(u"text_file is None", None, True, TypeError) if not isinstance(text_file, TextFile): self.log_exc(u"text_file is not an instance of TextFile", None, True, TypeError) if not gf.file_can_be_written(audio_file_path): self.log_exc(u"Audio file path '%s' cannot be written" % (audio_file_path), None, True, OSError) if self.tts_engine is None: self.log_exc(u"Cannot select the TTS engine", None, True, ValueError) # synthesize self.log(u"Synthesizing text...") result = self.tts_engine.synthesize_multiple( text_file=text_file, output_file_path=audio_file_path, quit_after=quit_after, backwards=backwards ) self.log(u"Synthesizing text... done") # check that the output file has been written if not gf.file_exists(audio_file_path): self.log_exc(u"Audio file path '%s' cannot be read" % (audio_file_path), None, True, OSError) return result
class Synthesizer(Loggable): """ A class to synthesize text fragments into an audio file, along with the corresponding time anchors. :param rconf: a runtime configuration :type rconf: :class:`~aeneas.runtimeconfiguration.RuntimeConfiguration` :param logger: the logger object :type logger: :class:`~aeneas.logger.Logger` :raises: OSError: if a custom TTS engine is requested but it cannot be loaded :raises: ImportError: if the Nuance TTS API wrapper is requested but the``requests`` module is not installed """ CUSTOM = "custom" """ Select custom TTS engine wrapper """ ESPEAK = "espeak" """ Select eSpeak wrapper """ FESTIVAL = "festival" """ Select Festival wrapper """ NUANCETTSAPI = "nuancettsapi" """ Select Nuance TTS API wrapper """ ALLOWED_VALUES = [CUSTOM, ESPEAK, FESTIVAL, NUANCETTSAPI] """ List of all the allowed values """ TAG = u"Synthesizer" def __init__(self, rconf=None, logger=None): super(Synthesizer, self).__init__(rconf=rconf, logger=logger) self.tts_engine = None self._select_tts_engine() def _select_tts_engine(self): """ Select the TTS engine to be used by looking at the rconf object. """ self.log(u"Selecting TTS engine...") if self.rconf[RuntimeConfiguration.TTS] == self.CUSTOM: self.log(u"TTS engine: custom") tts_path = self.rconf[RuntimeConfiguration.TTS_PATH] if not gf.file_can_be_read(tts_path): self.log_exc(u"Cannot read tts_path", None, True, OSError) try: import imp self.log([ u"Loading CustomTTSWrapper module from '%s'...", tts_path ]) imp.load_source("CustomTTSWrapperModule", tts_path) self.log([ u"Loading CustomTTSWrapper module from '%s'... done", tts_path ]) self.log(u"Importing CustomTTSWrapper...") from CustomTTSWrapperModule import CustomTTSWrapper self.log(u"Importing CustomTTSWrapper... done") self.log(u"Creating CustomTTSWrapper instance...") self.tts_engine = CustomTTSWrapper(rconf=self.rconf, logger=self.logger) self.log(u"Creating CustomTTSWrapper instance... done") except Exception as exc: self.log_exc(u"Unable to load custom TTS wrapper", exc, True, OSError) elif self.rconf[RuntimeConfiguration.TTS] == self.FESTIVAL: self.log(u"TTS engine: Festival") self.tts_engine = FESTIVALWrapper(rconf=self.rconf, logger=self.logger) elif self.rconf[RuntimeConfiguration.TTS] == self.NUANCETTSAPI: try: import requests except ImportError as exc: self.log_exc( u"Unable to import requests for Nuance TTS API wrapper", exc, True, ImportError) self.log(u"TTS engine: Nuance TTS API") self.tts_engine = NuanceTTSAPIWrapper(rconf=self.rconf, logger=self.logger) else: self.log(u"TTS engine: eSpeak") self.tts_engine = ESPEAKWrapper(rconf=self.rconf, logger=self.logger) self.log(u"Selecting TTS engine... done") def output_is_mono_wave(self): """ Return ``True`` if the TTS engine outputs a PCM16 mono WAVE file. This information can be used to avoid converting the audio file output by the TTS engine. :rtype: bool """ if self.tts_engine is not None: return self.tts_engine.OUTPUT_MONO_WAVE return False def synthesize(self, text_file, audio_file_path, quit_after=None, backwards=False): """ Synthesize the text contained in the given fragment list into a ``wav`` file. Return a tuple ``(anchors, total_time, num_chars)``. :param text_file: the text file to be synthesized :type text_file: :class:`~aeneas.textfile.TextFile` :param string audio_file_path: the path to the output audio file :param float quit_after: stop synthesizing as soon as reaching this many seconds :param bool backwards: if ``True``, synthesizing from the end of the text file :rtype: tuple :raises: TypeError: if ``text_file`` is ``None`` or not an instance of ``TextFile`` :raises: OSError: if ``audio_file_path`` cannot be written :raises: OSError: if ``tts=custom`` in the RuntimeConfiguration and ``tts_path`` cannot be read """ if text_file is None: self.log_exc(u"text_file is None", None, True, TypeError) if not isinstance(text_file, TextFile): self.log_exc(u"text_file is not an instance of TextFile", None, True, TypeError) if not gf.file_can_be_written(audio_file_path): self.log_exc( u"Audio file path '%s' cannot be written" % (audio_file_path), None, True, OSError) if self.tts_engine is None: self.log_exc(u"Cannot select the TTS engine", None, True, ValueError) # synthesize self.log(u"Synthesizing text...") result = self.tts_engine.synthesize_multiple( text_file=text_file, output_file_path=audio_file_path, quit_after=quit_after, backwards=backwards) self.log(u"Synthesizing text... done") # check that the output file has been written if not gf.file_exists(audio_file_path): self.log_exc( u"Audio file path '%s' cannot be read" % (audio_file_path), None, True, OSError) return result
def synthesize(self, text_file, audio_file_path): """ Synthesize the text contained in the given fragment list into a ``wav`` file. :param text_file: the text file to be synthesized :type text_file: :class:`aeneas.textfile.TextFile` :param audio_file_path: the path to the output audio file :type audio_file_path: string (path) """ # time anchors anchors = [] # initialize time current_time = 0.0 # waves is used to concatenate all the fragments WAV files waves = numpy.array([]) # espeak wrapper espeak = ESPEAKWrapper(logger=self.logger) num = 0 # for each fragment, synthesize it and concatenate it for fragment in text_file.fragments: # synthesize and get the duration of the output file self._log("Synthesizing fragment %d" % num) handler, tmp_destination = tempfile.mkstemp( suffix=".wav", dir=gf.custom_tmp_dir() ) duration = espeak.synthesize( text=fragment.text, language=fragment.language, output_file_path=tmp_destination ) # store for later output anchors.append([current_time, fragment.identifier, fragment.text]) # concatenate to buffer self._log("Fragment %d starts at: %f" % (num, current_time)) if duration > 0: self._log("Fragment %d duration: %f" % (num, duration)) current_time += duration data, sample_frequency, encoding = wavread(tmp_destination) # # TODO this might result in memory swapping # if we have a large number of fragments # is there a better way? # # waves = numpy.concatenate((waves, data)) # # append seems faster than concatenate, as it should waves = numpy.append(waves, data) else: self._log("Fragment %d has zero duration" % num) # remove temporary file self._log("Removing temporary file '%s'" % tmp_destination) os.close(handler) os.remove(tmp_destination) num += 1 # output WAV file, concatenation of synthesized fragments self._log("Writing audio file '%s'" % audio_file_path) wavwrite(waves, audio_file_path, sample_frequency, encoding) # return the time anchors self._log("Returning %d time anchors" % len(anchors)) return anchors
def main(): on_info("Test 1/4...") try: on_info("Trying to import package aeneas...") import aeneas on_info("Trying to import package aeneas... succeeded.") except ImportError: on_error("Unable to import package aeneas.") on_error("Check that you have installed the following Python (2.7.x) packages:") on_error("1. BeautifulSoup") on_error("2. numpy") on_error("3. scikits") return on_info("Test 2/4...") try: on_info("Trying to call ffprobe...") from aeneas.ffprobewrapper import FFPROBEWrapper file_path = get_abs_path("aeneas/tests/res/container/job/assets/p001.mp3") prober = FFPROBEWrapper() properties = prober.read_properties(file_path) on_info("Trying to call ffprobe... succeeded.") except: on_error("Unable to call ffprobe.") on_error("Please make sure you have ffprobe installed correctly and that it is in your $PATH.") return on_info("Test 3/4...") try: on_info("Trying to call ffmpeg...") from aeneas.ffmpegwrapper import FFMPEGWrapper input_file_path = get_abs_path("aeneas/tests/res/container/job/assets/p001.mp3") handler, output_file_path = tempfile.mkstemp(suffix=".wav") converter = FFMPEGWrapper() result = converter.convert(input_file_path, output_file_path) os.close(handler) os.remove(output_file_path) if not result: on_error("Unable to call ffmpeg.") on_error("Please make sure you have ffmpeg installed correctly and that it is in your $PATH.") return on_info("Trying to call ffmpeg... succeeded.") except: on_error("Unable to call ffmpeg.") on_error("Please make sure you have ffmpeg installed correctly and that it is in your $PATH.") return on_info("Test 4/4...") try: on_info("Trying to call espeak...") from aeneas.espeakwrapper import ESPEAKWrapper from aeneas.language import Language text = u"From fairest creatures we desire increase," language = Language.EN handler, output_file_path = tempfile.mkstemp(suffix=".wav") espeak = ESPEAKWrapper() result = espeak.synthesize(text, language, output_file_path) os.close(handler) os.remove(output_file_path) if not result: on_error("Unable to call espeak.") on_error("Please make sure you have espeak installed correctly and that it is in your $PATH.") return on_info("Trying to call espeak... succeeded.") except: on_error("Unable to call espeak.") on_error("Please make sure you have espeak installed correctly and that it is in your $PATH.") return on_info("Congratulations, all dependencies are met.") on_info("Enjoy running aeneas!")
def synthesize(self, text_file, audio_file_path, quit_after=None, backwards=False): """ Synthesize the text contained in the given fragment list into a ``wav`` file. :param text_file: the text file to be synthesized :type text_file: :class:`aeneas.textfile.TextFile` :param audio_file_path: the path to the output audio file :type audio_file_path: string (path) :param quit_after: stop synthesizing as soon as reaching this many seconds :type quit_after: float :param backwards: synthesizing from the end of the text file :type backwards: bool """ # time anchors anchors = [] # initialize time current_time = 0.0 # waves is used to concatenate all the fragments WAV files waves = numpy.array([]) # espeak wrapper espeak = ESPEAKWrapper(logger=self.logger) if quit_after is not None: self._log(["Quit after reaching %.3f", quit_after]) if backwards: self._log("Synthesizing backwards") # for each fragment, synthesize it and concatenate it num = 0 num_chars = 0 fragments = text_file.fragments if backwards: fragments = fragments[::-1] for fragment in fragments: # synthesize and get the duration of the output file self._log(["Synthesizing fragment %d", num]) handler, tmp_destination = tempfile.mkstemp( suffix=".wav", dir=gf.custom_tmp_dir() ) duration = espeak.synthesize( text=fragment.text, language=fragment.language, output_file_path=tmp_destination ) # store for later output anchors.append([current_time, fragment.identifier, fragment.text]) # increase the character counter num_chars += fragment.characters # concatenate to buffer self._log(["Fragment %d starts at: %f", num, current_time]) if duration > 0: self._log(["Fragment %d duration: %f", num, duration]) current_time += duration data, sample_frequency, encoding = wavread(tmp_destination) # # TODO this might result in memory swapping # if we have a large number of fragments # is there a better way? # # NOTE since append cannot be in place, # it seems that the only alternative is pre-allocating # the destination array, # possibly truncating or extending it as needed # if backwards: waves = numpy.append(data, waves) else: waves = numpy.append(waves, data) else: self._log(["Fragment %d has zero duration", num]) # remove temporary file self._log(["Removing temporary file '%s'", tmp_destination]) os.close(handler) os.remove(tmp_destination) num += 1 if (quit_after is not None) and (current_time > quit_after): self._log(["Quitting after reached duration %.3f", current_time]) break # output WAV file, concatenation of synthesized fragments self._log(["Writing audio file '%s'", audio_file_path]) wavwrite(waves, audio_file_path, sample_frequency, encoding) # return the time anchors # TODO anchors do not make sense if backwards == True self._log(["Returning %d time anchors", len(anchors)]) self._log(["Current time %.3f", current_time]) self._log(["Synthesized %d characters", num_chars]) return (anchors, current_time, num_chars)
def synthesize(self, text_file, audio_file_path, quit_after=None, backwards=False): """ Synthesize the text contained in the given fragment list into a ``wav`` file. :param text_file: the text file to be synthesized :type text_file: :class:`aeneas.textfile.TextFile` :param audio_file_path: the path to the output audio file :type audio_file_path: string (path) :param quit_after: stop synthesizing as soon as reaching this many seconds :type quit_after: float :param backwards: synthesizing from the end of the text file :type backwards: bool """ # time anchors anchors = [] # initialize time current_time = 0.0 # waves is used to concatenate all the fragments WAV files waves = numpy.array([]) # espeak wrapper espeak = ESPEAKWrapper(logger=self.logger) if quit_after is not None: self._log(["Quit after reaching %.3f", quit_after]) if backwards: self._log("Synthesizing backwards") # for each fragment, synthesize it and concatenate it num = 0 num_chars = 0 fragments = text_file.fragments if backwards: fragments = fragments[::-1] for fragment in fragments: # synthesize and get the duration of the output file self._log(["Synthesizing fragment %d", num]) handler, tmp_destination = tempfile.mkstemp( suffix=".wav", dir=gf.custom_tmp_dir()) duration = espeak.synthesize(text=fragment.text, language=fragment.language, output_file_path=tmp_destination) # store for later output anchors.append([current_time, fragment.identifier, fragment.text]) # increase the character counter num_chars += fragment.characters # concatenate to buffer self._log(["Fragment %d starts at: %f", num, current_time]) if duration > 0: self._log(["Fragment %d duration: %f", num, duration]) current_time += duration data, sample_frequency, encoding = wavread(tmp_destination) # # TODO this might result in memory swapping # if we have a large number of fragments # is there a better way? # # NOTE since append cannot be in place, # it seems that the only alternative is pre-allocating # the destination array, # possibly truncating or extending it as needed # if backwards: waves = numpy.append(data, waves) else: waves = numpy.append(waves, data) else: self._log(["Fragment %d has zero duration", num]) # remove temporary file self._log(["Removing temporary file '%s'", tmp_destination]) os.close(handler) os.remove(tmp_destination) num += 1 if (quit_after is not None) and (current_time > quit_after): self._log( ["Quitting after reached duration %.3f", current_time]) break # output WAV file, concatenation of synthesized fragments self._log(["Writing audio file '%s'", audio_file_path]) wavwrite(waves, audio_file_path, sample_frequency, encoding) # return the time anchors # TODO anchors do not make sense if backwards == True self._log(["Returning %d time anchors", len(anchors)]) self._log(["Current time %.3f", current_time]) self._log(["Synthesized %d characters", num_chars]) return (anchors, current_time, num_chars)