Exemplo n.º 1
0
 def __init__(
     self,
     real_wave_mfcc=None,
     synt_wave_mfcc=None,
     real_wave_path=None,
     synt_wave_path=None,
     rconf=None,
     logger=None
 ):
     if (real_wave_mfcc is not None) and (type(real_wave_mfcc) is not AudioFileMFCC):
         raise ValueError(u"Real wave mfcc must be None or of type AudioFileMFCC")
     if (synt_wave_mfcc is not None) and (type(synt_wave_mfcc) is not AudioFileMFCC):
         raise ValueError(u"Synt wave mfcc must be None or of type AudioFileMFCC")
     if (real_wave_path is not None) and (not gf.file_can_be_read(real_wave_path)):
         raise ValueError(u"Real wave cannot be read")
     if (synt_wave_path is not None) and (not gf.file_can_be_read(synt_wave_path)):
         raise ValueError(u"Synt wave cannot be read")
     if (rconf is not None) and (rconf[RuntimeConfiguration.DTW_ALGORITHM] not in DTWAlgorithm.ALLOWED_VALUES):
         raise ValueError(u"Algorithm value not allowed")
     super(DTWAligner, self).__init__(rconf=rconf, logger=logger)
     self.real_wave_mfcc = real_wave_mfcc
     self.synt_wave_mfcc = synt_wave_mfcc
     self.real_wave_path = real_wave_path
     self.synt_wave_path = synt_wave_path
     if (self.real_wave_mfcc is None) and (self.real_wave_path is not None):
         self.real_wave_mfcc = AudioFileMFCC(self.real_wave_path, rconf=self.rconf, logger=self.logger)
     if (self.synt_wave_mfcc is None) and (self.synt_wave_path is not None):
         self.synt_wave_mfcc = AudioFileMFCC(self.synt_wave_path, rconf=self.rconf, logger=self.logger)
     self.dtw = None
Exemplo n.º 2
0
    def _read_from_file(self):
        """
        Read text fragments from file.
        """
        # test if we can read the given file
        if not gf.file_can_be_read(self.file_path):
            self.log_exc(u"File '%s' cannot be read" % (self.file_path), None, True, OSError)

        if self.file_format not in TextFileFormat.ALLOWED_VALUES:
            self.log_exc(u"Text file format '%s' is not supported." % (self.file_format), None, True, ValueError)

        # read the contents of the file
        self.log([u"Reading contents of file '%s'", self.file_path])
        with io.open(self.file_path, "r", encoding="utf-8") as text_file:
            lines = text_file.readlines()

        # clear text fragments
        self.clear()

        # parse the contents
        map_read_function = {
            TextFileFormat.MPLAIN: self._read_mplain,
            TextFileFormat.MUNPARSED: self._read_munparsed,
            TextFileFormat.PARSED: self._read_parsed,
            TextFileFormat.PLAIN: self._read_plain,
            TextFileFormat.SUBTITLES: self._read_subtitles,
            TextFileFormat.UNPARSED: self._read_unparsed
        }
        map_read_function[self.file_format](lines)

        # log the number of fragments
        self.log([u"Parsed %d fragments", len(self.fragments)])
Exemplo n.º 3
0
 def _select_tts_engine(self):
     """
     Select the TTS engine to be used by looking at the rconf object.
     """
     self.log(u"Selecting TTS engine...")
     requested_tts_engine = self.rconf[RuntimeConfiguration.TTS]
     if requested_tts_engine == self.CUSTOM:
         self.log(u"TTS engine: custom")
         tts_path = self.rconf[RuntimeConfiguration.TTS_PATH]
         if tts_path is None:
             self.log_exc(u"You must specify a value for tts_path", None, True, ValueError)
         if not gf.file_can_be_read(tts_path):
             self.log_exc(u"Cannot read tts_path", None, True, OSError)
         try:
             import imp
             self.log([u"Loading CustomTTSWrapper module from '%s'...", tts_path])
             imp.load_source("CustomTTSWrapperModule", tts_path)
             self.log([u"Loading CustomTTSWrapper module from '%s'... done", tts_path])
             self.log(u"Importing CustomTTSWrapper...")
             from CustomTTSWrapperModule import CustomTTSWrapper
             self.log(u"Importing CustomTTSWrapper... done")
             self.log(u"Creating CustomTTSWrapper instance...")
             self.tts_engine = CustomTTSWrapper(rconf=self.rconf, logger=self.logger)
             self.log(u"Creating CustomTTSWrapper instance... done")
         except Exception as exc:
             self.log_exc(u"Unable to load custom TTS wrapper", exc, True, OSError)
     elif requested_tts_engine == self.AWS:
         try:
             import boto3
         except ImportError as exc:
             self.log_exc(u"Unable to import boto3 for AWS Polly TTS API wrapper", exc, True, ImportError)
         self.log(u"TTS engine: AWS Polly TTS API")
         self.tts_engine = AWSTTSWrapper(rconf=self.rconf, logger=self.logger)
     elif requested_tts_engine == self.NUANCE:
         try:
             import requests
         except ImportError as exc:
             self.log_exc(u"Unable to import requests for Nuance TTS API wrapper", exc, True, ImportError)
         self.log(u"TTS engine: Nuance TTS API")
         self.tts_engine = NuanceTTSWrapper(rconf=self.rconf, logger=self.logger)
     elif requested_tts_engine == self.ESPEAKNG:
         self.log(u"TTS engine: eSpeak-ng")
         self.tts_engine = ESPEAKNGTTSWrapper(rconf=self.rconf, logger=self.logger)
     elif requested_tts_engine == self.FESTIVAL:
         self.log(u"TTS engine: Festival")
         self.tts_engine = FESTIVALTTSWrapper(rconf=self.rconf, logger=self.logger)
     elif requested_tts_engine == self.MACOS:
         self.log(u"TTS engine: macOS")
         self.tts_engine = MacOSTTSWrapper(rconf=self.rconf, logger=self.logger)
     else:
         self.log(u"TTS engine: eSpeak")
         self.tts_engine = ESPEAKTTSWrapper(rconf=self.rconf, logger=self.logger)
     self.log(u"Selecting TTS engine... done")
Exemplo n.º 4
0
    def check_input_file_or_directory(self, path):
        """
        If the given path does not exist, emit an error
        and return ``False``. Otherwise return ``True``.

        :param path: the path of the input file or directory
        :type  path: string (path)
        :rtype: bool
        """
        if (not gf.file_can_be_read(path)) and (not os.path.isdir(path)):
            self.print_error(u"Unable to read file or directory '%s'" % (path))
            self.print_error(u"Make sure the path is written/escaped correctly and that you have read permission on it")
            return False
        return True
Exemplo n.º 5
0
 def download(
         self,
         expected_size,
         download_format=None,
         largest_audio=True,
 ):
     path = self.audio_from_youtube(
         self.URL_VALID,
         download=True,
         output_file_path=None,
         download_format=download_format,
         largest_audio=largest_audio
     )
     self.assertTrue(gf.file_can_be_read(path))
     self.assertEqual(gf.file_size(path), expected_size)
     gf.delete_file(None, path)
Exemplo n.º 6
0
    def check_file_encoding(self, input_file_path):
        """
        Check whether the given file is UTF-8 encoded.

        :param string input_file_path: the path of the file to be checked
        :rtype: :class:`~aeneas.validator.ValidatorResult`
        """
        self.log([u"Checking encoding of file '%s'", input_file_path])
        self.result = ValidatorResult()
        if not gf.file_can_be_read(input_file_path):
            self._failed(u"File '%s' cannot be read." % (input_file_path))
            return self.result
        with io.open(input_file_path, "rb") as file_object:
            bstring = file_object.read()
            self._check_utf8_encoding(bstring)
        return self.result
Exemplo n.º 7
0
    def read_properties(self):
        """
        Populate this object by reading
        the audio properties of the file at the given path.

        Currently this function uses
        :class:`~aeneas.ffprobewrapper.FFPROBEWrapper`
        to get the audio file properties.

        :raises: :class:`~aeneas.audiofile.AudioFileProbeError`: if the path to the ``ffprobe`` executable cannot be called
        :raises: :class:`~aeneas.audiofile.AudioFileUnsupportedFormatError`: if the audio file has a format not supported
        :raises: OSError: if the audio file cannot be read
        """
        self.log(u"Reading properties...")

        # check the file can be read
        if not gf.file_can_be_read(self.file_path):
            self.log_exc(u"File '%s' cannot be read" % (self.file_path), None, True, OSError)

        # get the file size
        self.log([u"Getting file size for '%s'", self.file_path])
        self.file_size = gf.file_size(self.file_path)
        self.log([u"File size for '%s' is '%d'", self.file_path, self.file_size])

        # get the audio properties using FFPROBEWrapper
        try:
            self.log(u"Reading properties with FFPROBEWrapper...")
            properties = FFPROBEWrapper(
                rconf=self.rconf,
                logger=self.logger
            ).read_properties(self.file_path)
            self.log(u"Reading properties with FFPROBEWrapper... done")
        except FFPROBEPathError:
            self.log_exc(u"Unable to call ffprobe executable", None, True, AudioFileProbeError)
        except (FFPROBEUnsupportedFormatError, FFPROBEParsingError):
            self.log_exc(u"Audio file format not supported by ffprobe", None, True, AudioFileUnsupportedFormatError)

        # save relevant properties in results inside the audiofile object
        self.audio_length = TimeValue(properties[FFPROBEWrapper.STDOUT_DURATION])
        self.audio_format = properties[FFPROBEWrapper.STDOUT_CODEC_NAME]
        self.audio_sample_rate = gf.safe_int(properties[FFPROBEWrapper.STDOUT_SAMPLE_RATE])
        self.audio_channels = gf.safe_int(properties[FFPROBEWrapper.STDOUT_CHANNELS])
        self.log([u"Stored audio_length: '%s'", self.audio_length])
        self.log([u"Stored audio_format: '%s'", self.audio_format])
        self.log([u"Stored audio_sample_rate: '%s'", self.audio_sample_rate])
        self.log([u"Stored audio_channels: '%s'", self.audio_channels])
        self.log(u"Reading properties... done")
Exemplo n.º 8
0
    def read(self, sync_map_format, input_file_path, parameters=None):
        """
        Read sync map fragments from the given file in the specified format,
        and add them the current (this) sync map.

        Return ``True`` if the call succeeded,
        ``False`` if an error occurred.

        :param sync_map_format: the format of the sync map
        :type  sync_map_format: :class:`~aeneas.syncmap.SyncMapFormat`
        :param string input_file_path: the path to the input file to read
        :param dict parameters: additional parameters (e.g., for ``SMIL`` input)
        :raises: ValueError: if ``sync_map_format`` is ``None`` or it is not an allowed value
        :raises: OSError: if ``input_file_path`` does not exist
        """
        if sync_map_format is None:
            self.log_exc(u"Sync map format is None", None, True, ValueError)
        if sync_map_format not in SyncMapFormat.CODE_TO_CLASS:
            self.log_exc(u"Sync map format '%s' is not allowed" % (sync_map_format), None, True, ValueError)
        if not gf.file_can_be_read(input_file_path):
            self.log_exc(u"Cannot read sync map file '%s'. Wrong permissions?" % (input_file_path), None, True, OSError)

        self.log([u"Input format:     '%s'", sync_map_format])
        self.log([u"Input path:       '%s'", input_file_path])
        self.log([u"Input parameters: '%s'", parameters])

        reader = (SyncMapFormat.CODE_TO_CLASS[sync_map_format])(
            variant=sync_map_format,
            parameters=parameters,
            rconf=self.rconf,
            logger=self.logger
        )

        # open file for reading
        self.log(u"Reading input file...")
        with io.open(input_file_path, "r", encoding="utf-8") as input_file:
            input_text = input_file.read()
        reader.parse(input_text=input_text, syncmap=self)
        self.log(u"Reading input file... done")

        # overwrite language if requested
        language = gf.safe_get(parameters, gc.PPN_SYNCMAP_LANGUAGE, None)
        if language is not None:
            self.log([u"Overwriting language to '%s'", language])
            for fragment in self.fragments:
                fragment.text_fragment.language = language
Exemplo n.º 9
0
 def _select_tts_engine(self):
     """
     Select the TTS engine to be used by looking at the rconf object.
     """
     self.log(u"Selecting TTS engine...")
     if self.rconf[RuntimeConfiguration.TTS] == self.CUSTOM:
         self.log(u"TTS engine: custom")
         tts_path = self.rconf[RuntimeConfiguration.TTS_PATH]
         if not gf.file_can_be_read(tts_path):
             self.log_exc(u"Cannot read tts_path", None, True, OSError)
         try:
             import imp
             self.log([u"Loading CustomTTSWrapper module from '%s'...", tts_path])
             imp.load_source("CustomTTSWrapperModule", tts_path)
             self.log([u"Loading CustomTTSWrapper module from '%s'... done", tts_path])
             self.log(u"Importing CustomTTSWrapper...")
             from CustomTTSWrapperModule import CustomTTSWrapper
             self.log(u"Importing CustomTTSWrapper... done")
             self.log(u"Creating CustomTTSWrapper instance...")
             self.tts_engine = CustomTTSWrapper(rconf=self.rconf, logger=self.logger)
             self.log(u"Creating CustomTTSWrapper instance... done")
         except Exception as exc:
             self.log_exc(u"Unable to load custom TTS wrapper", exc, True, OSError)
     elif self.rconf[RuntimeConfiguration.TTS] == self.FESTIVAL:
         self.log(u"TTS engine: Festival")
         self.tts_engine = FESTIVALWrapper(rconf=self.rconf, logger=self.logger)
     elif self.rconf[RuntimeConfiguration.TTS] == self.NUANCETTSAPI:
         try:
             import requests
         except ImportError as exc:
             self.log_exc(u"Unable to import requests for Nuance TTS API wrapper", exc, True, ImportError)
         self.log(u"TTS engine: Nuance TTS API")
         self.tts_engine = NuanceTTSAPIWrapper(rconf=self.rconf, logger=self.logger)
     else:
         self.log(u"TTS engine: eSpeak")
         self.tts_engine = ESPEAKWrapper(rconf=self.rconf, logger=self.logger)
     self.log(u"Selecting TTS engine... done")
Exemplo n.º 10
0
 def file_path(self, file_path):
     if (file_path is not None) and (not gf.file_can_be_read(file_path)):
         self.log_exc(u"Text file '%s' cannot be read" % (file_path), None,
                      True, OSError)
     self.__file_path = file_path
Exemplo n.º 11
0
    def _synthesize_single_subprocess_helper(self,
                                             text,
                                             voice_code,
                                             output_file_path=None,
                                             return_audio_data=True):
        """
        This is an helper function to synthesize a single text fragment via ``subprocess``.

        If ``output_file_path`` is ``None``,
        the audio data will not persist to file at the end of the method.

        If ``return_audio_data`` is ``True``,
        return the audio data at the end of the function call;
        if ``False``, just return ``(True, None)`` in case of success.

        :rtype: tuple (result, (duration, sample_rate, codec, data)) or (result, None)
        """
        # return zero if text is the empty string
        if len(text) == 0:
            #
            # NOTE sample_rate, codec, data do not matter
            #      if the duration is 0.000 => set them to None
            #
            self.log(u"len(text) is zero: returning 0.000")
            return (True, (TimeValue("0.000"), None, None, None))

        # create a temporary output file if needed
        synt_tmp_file = (output_file_path is None)
        if synt_tmp_file:
            self.log(
                u"Synthesizer helper called with output_file_path=None => creating temporary output file"
            )
            output_file_handler, output_file_path = gf.tmp_file(
                suffix=u".wav", root=self.rconf[RuntimeConfiguration.TMP_PATH])
            self.log([u"Temporary output file path is '%s'", output_file_path])

        try:
            # if the TTS engine reads text from file,
            # write the text into a temporary file
            if self.CLI_PARAMETER_TEXT_PATH in self.subprocess_arguments:
                self.log(u"TTS engine reads text from file")
                tmp_text_file_handler, tmp_text_file_path = gf.tmp_file(
                    suffix=u".txt",
                    root=self.rconf[RuntimeConfiguration.TMP_PATH])
                self.log([
                    u"Creating temporary text file '%s'...", tmp_text_file_path
                ])
                with io.open(tmp_text_file_path, "w",
                             encoding="utf-8") as tmp_text_file:
                    tmp_text_file.write(text)
                self.log([
                    u"Creating temporary text file '%s'... done",
                    tmp_text_file_path
                ])
            else:
                self.log(u"TTS engine reads text from stdin")
                tmp_text_file_handler = None
                tmp_text_file_path = None

            # copy all relevant arguments
            self.log(u"Creating arguments list...")
            arguments = []
            for arg in self.subprocess_arguments:
                if arg == self.CLI_PARAMETER_VOICE_CODE_FUNCTION:
                    arguments.extend(
                        self._voice_code_to_subprocess(voice_code))
                elif arg == self.CLI_PARAMETER_VOICE_CODE_STRING:
                    arguments.append(voice_code)
                elif arg == self.CLI_PARAMETER_TEXT_PATH:
                    arguments.append(tmp_text_file_path)
                elif arg == self.CLI_PARAMETER_WAVE_PATH:
                    arguments.append(output_file_path)
                elif arg == self.CLI_PARAMETER_TEXT_STDIN:
                    # placeholder, do not append
                    pass
                elif arg == self.CLI_PARAMETER_WAVE_STDOUT:
                    # placeholder, do not append
                    pass
                else:
                    arguments.append(arg)
            self.log(u"Creating arguments list... done")

            # actual call via subprocess
            self.log(u"Calling TTS engine...")
            self.log([u"Calling with arguments '%s'", arguments])
            self.log([u"Calling with text '%s'", text])
            proc = subprocess.Popen(arguments,
                                    stdout=subprocess.PIPE,
                                    stdin=subprocess.PIPE,
                                    stderr=subprocess.PIPE,
                                    universal_newlines=True)
            if self.CLI_PARAMETER_TEXT_STDIN in self.subprocess_arguments:
                self.log(u"Passing text via stdin...")
                if gf.PY2:
                    (stdoutdata,
                     stderrdata) = proc.communicate(input=gf.safe_bytes(text))
                else:
                    (stdoutdata, stderrdata) = proc.communicate(input=text)
                self.log(u"Passing text via stdin... done")
            else:
                self.log(u"Passing text via file...")
                (stdoutdata, stderrdata) = proc.communicate()
                self.log(u"Passing text via file... done")
            proc.stdout.close()
            proc.stdin.close()
            proc.stderr.close()

            if self.CLI_PARAMETER_WAVE_STDOUT in self.subprocess_arguments:
                self.log(u"TTS engine wrote audio data to stdout")
                self.log(
                    [u"Writing audio data to file '%s'...", output_file_path])
                with io.open(output_file_path, "wb") as output_file:
                    output_file.write(stdoutdata)
                self.log([
                    u"Writing audio data to file '%s'... done",
                    output_file_path
                ])
            else:
                self.log(u"TTS engine wrote audio data to file")

            if tmp_text_file_path is not None:
                self.log(
                    [u"Delete temporary text file '%s'", tmp_text_file_path])
                gf.delete_file(tmp_text_file_handler, tmp_text_file_path)

            self.log(u"Calling TTS ... done")
        except Exception as exc:
            self.log_exc(
                u"An unexpected error occurred while calling TTS engine via subprocess",
                exc, False, None)
            return (False, None)

        # check the file can be read
        if not gf.file_can_be_read(output_file_path):
            self.log_exc(
                u"Output file '%s' cannot be read" % (output_file_path), None,
                True, None)
            return (False, None)

        # read audio data
        ret = self._read_audio_data(
            output_file_path) if return_audio_data else (True, None)

        # if the output file was temporary, remove it
        if synt_tmp_file:
            self.log([
                u"Removing temporary output file path '%s'", output_file_path
            ])
            gf.delete_file(output_file_handler, output_file_path)

        # return audio data or (True, None)
        return ret
Exemplo n.º 12
0
    def read_samples_from_file(self):
        """
        Load the audio samples from file into memory.

        If ``self.is_mono_wave`` is ``False``,
        the file will be first converted
        to a temporary PCM16 mono WAVE file.
        Audio data will be read from this temporary file,
        which will be then deleted from disk immediately.

        If ``self.is_mono_wave`` is ``True``,
        the audio data will be read directly
        from the given file,
        which will not be deleted from disk.

        :raises: :class:`~aeneas.audiofile.AudioFileConverterError`: if the path to the ``ffmpeg`` executable cannot be called
        :raises: :class:`~aeneas.audiofile.AudioFileUnsupportedFormatError`: if the audio file has a format not supported
        :raises: OSError: if the audio file cannot be read
        """
        self.log(u"Loading audio data...")

        # check the file can be read
        if not gf.file_can_be_read(self.file_path):
            self.log_exc(u"File '%s' cannot be read" % (self.file_path), None, True, OSError)

        # convert file to PCM16 mono WAVE
        if self.is_mono_wave:
            self.log(u"is_mono_wave=True => reading self.file_path directly")
            tmp_handler = None
            tmp_file_path = self.file_path
        else:
            self.log(u"is_mono_wave=False => converting self.file_path")
            tmp_handler, tmp_file_path = gf.tmp_file(suffix=u".wav", root=self.rconf[RuntimeConfiguration.TMP_PATH])
            self.log([u"Temporary PCM16 mono WAVE file: '%s'", tmp_file_path])
            try:
                self.log(u"Converting audio file to mono...")
                converter = FFMPEGWrapper(rconf=self.rconf, logger=self.logger)
                converter.convert(self.file_path, tmp_file_path)
                self.log(u"Converting audio file to mono... done")
            except FFMPEGPathError:
                gf.delete_file(tmp_handler, tmp_file_path)
                self.log_exc(u"Unable to call ffmpeg executable", None, True, AudioFileConverterError)
            except OSError:
                gf.delete_file(tmp_handler, tmp_file_path)
                self.log_exc(u"Audio file format not supported by ffmpeg", None, True, AudioFileUnsupportedFormatError)

        # TODO allow calling C extension cwave to read samples faster
        try:
            self.audio_format = "pcm16"
            self.audio_channels = 1
            self.audio_sample_rate, self.__samples = scipywavread(tmp_file_path)
            # scipy reads a sample as an int16_t, that is, a number in [-32768, 32767]
            # so we convert it to a float64 in [-1, 1]
            self.__samples = self.__samples.astype("float64") / 32768
            self.__samples_capacity = len(self.__samples)
            self.__samples_length = self.__samples_capacity
            self._update_length()
        except ValueError:
            self.log_exc(u"Audio format not supported by scipywavread", None, True, AudioFileUnsupportedFormatError)

        if not self.is_mono_wave:
            gf.delete_file(tmp_handler, tmp_file_path)
            self.log([u"Deleted temporary PCM16 mono WAVE file: '%s'", tmp_file_path])

        self._update_length()
        self.log([u"Sample length:  %.3f", self.audio_length])
        self.log([u"Sample rate:    %d", self.audio_sample_rate])
        self.log([u"Audio format:   %s", self.audio_format])
        self.log([u"Audio channels: %d", self.audio_channels])
        self.log(u"Loading audio data... done")
Exemplo n.º 13
0
    def read_properties(self, audio_file_path):
        """
        Read the properties of an audio file
        and return them as a dictionary.

        Example: ::

            d["index"]=0
            d["codec_name"]=mp3
            d["codec_long_name"]=MP3 (MPEG audio layer 3)
            d["profile"]=unknown
            d["codec_type"]=audio
            d["codec_time_base"]=1/44100
            d["codec_tag_string"]=[0][0][0][0]
            d["codec_tag"]=0x0000
            d["sample_fmt"]=s16p
            d["sample_rate"]=44100
            d["channels"]=1
            d["channel_layout"]=mono
            d["bits_per_sample"]=0
            d["id"]=N/A
            d["r_frame_rate"]=0/0
            d["avg_frame_rate"]=0/0
            d["time_base"]=1/14112000
            d["start_pts"]=0
            d["start_time"]=0.000000
            d["duration_ts"]=1545083190
            d["duration"]=109.487188
            d["bit_rate"]=128000
            d["max_bit_rate"]=N/A
            d["bits_per_raw_sample"]=N/A
            d["nb_frames"]=N/A
            d["nb_read_frames"]=N/A
            d["nb_read_packets"]=N/A
            d["DISPOSITION:default"]=0
            d["DISPOSITION:dub"]=0
            d["DISPOSITION:original"]=0
            d["DISPOSITION:comment"]=0
            d["DISPOSITION:lyrics"]=0
            d["DISPOSITION:karaoke"]=0
            d["DISPOSITION:forced"]=0
            d["DISPOSITION:hearing_impaired"]=0
            d["DISPOSITION:visual_impaired"]=0
            d["DISPOSITION:clean_effects"]=0
            d["DISPOSITION:attached_pic"]=0

        :param string audio_file_path: the path of the audio file to analyze
        :rtype: dict
        :raises: TypeError: if ``audio_file_path`` is None
        :raises: OSError: if the file at ``audio_file_path`` cannot be read
        :raises: FFPROBEParsingError: if the call to ``ffprobe`` does not produce any output
        :raises: FFPROBEPathError: if the path to the ``ffprobe`` executable cannot be called
        :raises: FFPROBEUnsupportedFormatError: if the file has a format not supported by ``ffprobe``
        """

        # test if we can read the file at audio_file_path
        if audio_file_path is None:
            self.log_exc(u"The audio file path is None", None, True, TypeError)
        if not gf.file_can_be_read(audio_file_path):
            self.log_exc(u"Input file '%s' cannot be read" % (audio_file_path), None, True, OSError)

        # call ffprobe
        arguments = [self.rconf[RuntimeConfiguration.FFPROBE_PATH]]
        arguments.extend(self.FFPROBE_PARAMETERS)
        arguments.append(audio_file_path)
        self.log([u"Calling with arguments '%s'", arguments])
        try:
            proc = subprocess.Popen(
                arguments,
                stdout=subprocess.PIPE,
                stdin=subprocess.PIPE,
                stderr=subprocess.PIPE
            )
            (stdoutdata, stderrdata) = proc.communicate()
            proc.stdout.close()
            proc.stdin.close()
            proc.stderr.close()
        except OSError as exc:
            self.log_exc(u"Unable to call the '%s' ffprobe executable" % (self.rconf[RuntimeConfiguration.FFPROBE_PATH]), exc, True, FFPROBEPathError)
        self.log(u"Call completed")

        # check there is some output
        if (stdoutdata is None) or (len(stderrdata) == 0):
            self.log_exc(u"ffprobe produced no output", None, True, FFPROBEParsingError)

        # decode stdoutdata and stderrdata to Unicode string
        try:
            stdoutdata = gf.safe_unicode(stdoutdata)
            stderrdata = gf.safe_unicode(stderrdata)
        except UnicodeDecodeError as exc:
            self.log_exc(u"Unable to decode ffprobe out/err", exc, True, FFPROBEParsingError)

        # dictionary for the results
        results = {
            self.STDOUT_CHANNELS: None,
            self.STDOUT_CODEC_NAME: None,
            self.STDOUT_DURATION: None,
            self.STDOUT_SAMPLE_RATE: None
        }

        # scan the first audio stream the ffprobe stdout output
        # TODO more robust parsing
        # TODO deal with multiple audio streams
        for line in stdoutdata.splitlines():
            if line == self.STDOUT_END_STREAM:
                self.log(u"Reached end of the stream")
                break
            elif len(line.split("=")) == 2:
                key, value = line.split("=")
                results[key] = value
                self.log([u"Found property '%s'='%s'", key, value])

        try:
            self.log([u"Duration found in stdout: '%s'", results[self.STDOUT_DURATION]])
            results[self.STDOUT_DURATION] = TimeValue(results[self.STDOUT_DURATION])
            self.log(u"Valid duration")
        except:
            self.log_warn(u"Invalid duration")
            results[self.STDOUT_DURATION] = None
            # try scanning ffprobe stderr output
            for line in stderrdata.splitlines():
                match = self.STDERR_DURATION_REGEX.search(line)
                if match is not None:
                    self.log([u"Found matching line '%s'", line])
                    results[self.STDOUT_DURATION] = gf.time_from_hhmmssmmm(line)
                    self.log([u"Extracted duration '%.3f'", results[self.STDOUT_DURATION]])
                    break

        if results[self.STDOUT_DURATION] is None:
            self.log_exc(u"No duration found in stdout or stderr. Unsupported audio file format?", None, True, FFPROBEUnsupportedFormatError)

        # return dictionary
        self.log(u"Returning dict")
        return results
Exemplo n.º 14
0
    def _synthesize_single_subprocess(self, text, voice_code, output_file_path):
        """
        Synthesize a single text fragment via ``subprocess``.

        :rtype: tuple (result, (duration, sample_rate, encoding, samples))
        """
        self.log(u"Synthesizing using pure Python...")
        try:
            # if the TTS engine reads text from file,
            # write the text into a temporary file
            if self.CLI_PARAMETER_TEXT_PATH in self.subprocess_arguments:
                self.log(u"TTS engine reads text from file")
                tmp_text_file_handler, tmp_text_file_path = gf.tmp_file(suffix=u".txt", root=self.rconf[RuntimeConfiguration.TMP_PATH])
                self.log([u"Creating temporary text file '%s'...", tmp_text_file_path])
                with io.open(tmp_text_file_path, "w", encoding="utf-8") as tmp_text_file:
                    tmp_text_file.write(text)
                self.log([u"Creating temporary text file '%s'... done", tmp_text_file_path])
            else:
                self.log(u"TTS engine reads text from stdin")
                tmp_text_file_handler = None
                tmp_text_file_path = None

            # copy all relevant arguments
            self.log(u"Creating arguments list...")
            arguments = []
            for arg in self.subprocess_arguments:
                if arg == self.CLI_PARAMETER_VOICE_CODE_FUNCTION:
                    arguments.extend(self._voice_code_to_subprocess(voice_code))
                elif arg == self.CLI_PARAMETER_VOICE_CODE_STRING:
                    arguments.append(voice_code)
                elif arg == self.CLI_PARAMETER_TEXT_PATH:
                    arguments.append(tmp_text_file_path)
                elif arg == self.CLI_PARAMETER_WAVE_PATH:
                    arguments.append(output_file_path)
                elif arg == self.CLI_PARAMETER_TEXT_STDIN:
                    # placeholder, do not append
                    pass
                elif arg == self.CLI_PARAMETER_WAVE_STDOUT:
                    # placeholder, do not append
                    pass
                else:
                    arguments.append(arg)
            self.log(u"Creating arguments list... done")

            # actual call via subprocess
            self.log(u"Calling TTS engine...")
            self.log([u"Calling with arguments '%s'", arguments])
            self.log([u"Calling with text '%s'", text])
            proc = subprocess.Popen(
                arguments,
                stdout=subprocess.PIPE,
                stdin=subprocess.PIPE,
                stderr=subprocess.PIPE,
                universal_newlines=True
            )
            if self.CLI_PARAMETER_TEXT_STDIN in self.subprocess_arguments:
                self.log(u"Passing text via stdin...")
                if gf.PY2:
                    (stdoutdata, stderrdata) = proc.communicate(input=gf.safe_bytes(text))
                else:
                    (stdoutdata, stderrdata) = proc.communicate(input=text)
                self.log(u"Passing text via stdin... done")
            else:
                self.log(u"Passing text via file...")
                (stdoutdata, stderrdata) = proc.communicate()
                self.log(u"Passing text via file... done")
            proc.stdout.close()
            proc.stdin.close()
            proc.stderr.close()

            if self.CLI_PARAMETER_WAVE_STDOUT in self.subprocess_arguments:
                self.log(u"TTS engine wrote audio data to stdout")
                self.log([u"Writing audio data to file '%s'...", output_file_path])
                with io.open(output_file_path, "wb") as output_file:
                    output_file.write(stdoutdata)
                self.log([u"Writing audio data to file '%s'... done", output_file_path])
            else:
                self.log(u"TTS engine wrote audio data to file")

            if tmp_text_file_path is not None:
                self.log([u"Delete temporary text file '%s'", tmp_text_file_path])
                gf.delete_file(tmp_text_file_handler, tmp_text_file_path)

            self.log(u"Calling TTS ... done")
        except Exception as exc:
            self.log_exc(u"An unexpected error occurred while calling TTS engine via subprocess", exc, False, None)
            return (False, None)

        # check the file can be read
        if not gf.file_can_be_read(output_file_path):
            self.log_exc(u"Output file '%s' cannot be read" % (output_file_path), None, True, None)
            return (False, None)

        # return the duration of the output file
        try:
            # if we know the TTS outputs to PCM16 mono WAVE,
            # we can read samples directly from it,
            # without an intermediate conversion through ffmpeg
            audio_file = AudioFile(
                file_path=output_file_path,
                is_mono_wave=self.OUTPUT_MONO_WAVE,
                rconf=self.rconf,
                logger=self.logger
            )
            audio_file.read_samples_from_file()
            self.log([u"Duration of '%s': %f", output_file_path, audio_file.audio_length])
            self.log(u"Synthesizing using pure Python... done")
            return (True, (
                audio_file.audio_length,
                audio_file.audio_sample_rate,
                audio_file.audio_format,
                audio_file.audio_samples
            ))
        except (AudioFileUnsupportedFormatError, OSError) as exc:
            self.log_exc(u"An unexpected error occurred while trying to read the sythesized audio file", exc, True, None)
            return (False, None)
Exemplo n.º 15
0
    def read_samples_from_file(self):
        """
        Load the audio samples from file into memory.

        If ``self.file_format`` is ``None`` or it is not
        ``("pcm_s16le", 1, self.rconf.sample_rate)``,
        the file will be first converted
        to a temporary PCM16 mono WAVE file.
        Audio data will be read from this temporary file,
        which will be then deleted from disk immediately.

        Otherwise,
        the audio data will be read directly
        from the given file,
        which will not be deleted from disk.

        :raises: :class:`~aeneas.audiofile.AudioFileConverterError`: if the path to the ``ffmpeg`` executable cannot be called
        :raises: :class:`~aeneas.audiofile.AudioFileUnsupportedFormatError`: if the audio file has a format not supported
        :raises: OSError: if the audio file cannot be read
        """
        self.log(u"Loading audio data...")

        # check the file can be read
        if not gf.file_can_be_read(self.file_path):
            self.log_exc(u"File '%s' cannot be read" % (self.file_path), None, True, OSError)

        # determine if we need to convert the audio file
        convert_audio_file = (
            (self.file_format is None) or
            (
                (self.rconf.safety_checks) and
                (self.file_format != ("pcm_s16le", 1, self.rconf.sample_rate))
            )
        )

        # convert the audio file if needed
        if convert_audio_file:
            # convert file to PCM16 mono WAVE with correct sample rate
            self.log(u"self.file_format is None or not good => converting self.file_path")
            tmp_handler, tmp_file_path = gf.tmp_file(suffix=u".wav", root=self.rconf[RuntimeConfiguration.TMP_PATH])
            self.log([u"Temporary PCM16 mono WAVE file: '%s'", tmp_file_path])
            try:
                self.log(u"Converting audio file to mono...")
                converter = FFMPEGWrapper(rconf=self.rconf, logger=self.logger)
                converter.convert(self.file_path, tmp_file_path)
                self.file_format = ("pcm_s16le", 1, self.rconf.sample_rate)
                self.log(u"Converting audio file to mono... done")
            except FFMPEGPathError:
                gf.delete_file(tmp_handler, tmp_file_path)
                self.log_exc(u"Unable to call ffmpeg executable", None, True, AudioFileConverterError)
            except OSError:
                gf.delete_file(tmp_handler, tmp_file_path)
                self.log_exc(u"Audio file format not supported by ffmpeg", None, True, AudioFileUnsupportedFormatError)
        else:
            # read the file directly
            if self.rconf.safety_checks:
                self.log(u"self.file_format is good => reading self.file_path directly")
            else:
                self.log_warn(u"Safety checks disabled => reading self.file_path directly")
            tmp_handler = None
            tmp_file_path = self.file_path

        # TODO allow calling C extension cwave to read samples faster
        try:
            self.audio_format = "pcm16"
            self.audio_channels = 1
            self.audio_sample_rate, self.__samples = scipywavread(tmp_file_path)
            # scipy reads a sample as an int16_t, that is, a number in [-32768, 32767]
            # so we convert it to a float64 in [-1, 1]
            self.__samples = self.__samples.astype("float64") / 32768
            self.__samples_capacity = len(self.__samples)
            self.__samples_length = self.__samples_capacity
            self._update_length()
        except ValueError:
            self.log_exc(u"Audio format not supported by scipywavread", None, True, AudioFileUnsupportedFormatError)

        # if we converted the audio file, delete the temporary converted audio file
        if convert_audio_file:
            gf.delete_file(tmp_handler, tmp_file_path)
            self.log([u"Deleted temporary audio file: '%s'", tmp_file_path])

        self._update_length()
        self.log([u"Sample length:  %.3f", self.audio_length])
        self.log([u"Sample rate:    %d", self.audio_sample_rate])
        self.log([u"Audio format:   %s", self.audio_format])
        self.log([u"Audio channels: %d", self.audio_channels])
        self.log(u"Loading audio data... done")
Exemplo n.º 16
0
 def test_file_can_be_read_true(self):
     handler, path = gf.tmp_file()
     self.assertTrue(gf.file_can_be_read(path))
     gf.delete_file(handler, path)
Exemplo n.º 17
0
    def convert(
            self,
            input_file_path,
            output_file_path,
            head_length=None,
            process_length=None
    ):
        """
        Convert the audio file at ``input_file_path``
        into ``output_file_path``,
        using the parameters set in the constructor
        or through the ``parameters`` property.

        You can skip the beginning of the audio file
        by specifying ``head_length`` seconds to skip
        (if it is ``None``, start at time zero),
        and you can specify to convert
        only ``process_length`` seconds
        (if it is ``None``, process the entire input file length).

        By specifying both ``head_length`` and ``process_length``,
        you can skip a portion at the beginning and at the end
        of the original input file.

        :param string input_file_path: the path of the audio file to convert
        :param string output_file_path: the path of the converted audio file
        :param float head_length: skip these many seconds
                                  from the beginning of the audio file
        :param float process_length: process these many seconds of the audio file
        :raises: :class:`~aeneas.ffmpegwrapper.FFMPEGPathError`: if the path to the ``ffmpeg`` executable cannot be called
        :raises: OSError: if ``input_file_path`` does not exist
                          or ``output_file_path`` cannot be written
        """
        # test if we can read the input file
        if not gf.file_can_be_read(input_file_path):
            self.log_exc(u"Input file '%s' cannot be read" % (input_file_path), None, True, OSError)

        # test if we can write the output file
        if not gf.file_can_be_written(output_file_path):
            self.log_exc(u"Output file '%s' cannot be written" % (output_file_path), None, True, OSError)

        # call ffmpeg
        arguments = [self.rconf[RuntimeConfiguration.FFMPEG_PATH]]
        arguments.extend(["-i", input_file_path])
        if head_length is not None:
            arguments.extend(["-ss", head_length])
        if process_length is not None:
            arguments.extend(["-t", process_length])
        if self.rconf.sample_rate in self.FFMPEG_PARAMETERS_MAP:
            arguments.extend(self.FFMPEG_PARAMETERS_MAP[self.rconf.sample_rate])
        else:
            arguments.extend(self.FFMPEG_PARAMETERS_DEFAULT)
        arguments.append(output_file_path)
        self.log([u"Calling with arguments '%s'", arguments])
        try:
            proc = subprocess.Popen(
                arguments,
                stdout=subprocess.PIPE,
                stdin=subprocess.PIPE,
                stderr=subprocess.PIPE
            )
            proc.communicate()
            proc.stdout.close()
            proc.stdin.close()
            proc.stderr.close()
        except OSError as exc:
            self.log_exc(u"Unable to call the '%s' ffmpeg executable" % (self.rconf[RuntimeConfiguration.FFMPEG_PATH]), exc, True, FFMPEGPathError)
        self.log(u"Call completed")

        # check if the output file exists
        if not gf.file_exists(output_file_path):
            self.log_exc(u"Output file '%s' was not written" % (output_file_path), None, True, OSError)

        # returning the output file path
        self.log([u"Returning output file path '%s'", output_file_path])
        return output_file_path
Exemplo n.º 18
0
 def test_file_can_be_read_false(self):
     path = "/foo/bar/baz"
     self.assertFalse(gf.file_can_be_read(path))
Exemplo n.º 19
0
    def convert(self,
                input_file_path,
                output_file_path,
                head_length=None,
                process_length=None):
        """
        Convert the audio file at ``input_file_path``
        into ``output_file_path``,
        using the parameters set in the constructor
        or through the ``parameters`` property.

        You can skip the beginning of the audio file
        by specifying ``head_length`` seconds to skip
        (if it is ``None``, start at time zero),
        and you can specify to convert
        only ``process_length`` seconds
        (if it is ``None``, process the entire input file length).

        By specifying both ``head_length`` and ``process_length``,
        you can skip a portion at the beginning and at the end
        of the original input file.

        :param string input_file_path: the path of the audio file to convert
        :param string output_file_path: the path of the converted audio file
        :param float head_length: skip these many seconds
                                  from the beginning of the audio file
        :param float process_length: process these many seconds of the audio file
        :raises: :class:`~aeneas.ffmpegwrapper.FFMPEGPathError`: if the path to the ``ffmpeg`` executable cannot be called
        :raises: OSError: if ``input_file_path`` does not exist
                          or ``output_file_path`` cannot be written
        """
        # test if we can read the input file
        if not gf.file_can_be_read(input_file_path):
            self.log_exc(u"Input file '%s' cannot be read" % (input_file_path),
                         None, True, OSError)

        # test if we can write the output file
        if not gf.file_can_be_written(output_file_path):
            self.log_exc(
                u"Output file '%s' cannot be written" % (output_file_path),
                None, True, OSError)

        # call ffmpeg
        arguments = [self.rconf[RuntimeConfiguration.FFMPEG_PATH]]
        arguments.extend(["-i", input_file_path])
        if head_length is not None:
            arguments.extend(["-ss", head_length])
        if process_length is not None:
            arguments.extend(["-t", process_length])
        if self.rconf.sample_rate in self.FFMPEG_PARAMETERS_MAP:
            arguments.extend(
                self.FFMPEG_PARAMETERS_MAP[self.rconf.sample_rate])
        else:
            arguments.extend(self.FFMPEG_PARAMETERS_DEFAULT)
        arguments.append(output_file_path)
        self.log([u"Calling with arguments '%s'", arguments])
        try:
            proc = subprocess.Popen(arguments,
                                    stdout=subprocess.PIPE,
                                    stdin=subprocess.PIPE,
                                    stderr=subprocess.PIPE)
            proc.communicate()
            proc.stdout.close()
            proc.stdin.close()
            proc.stderr.close()
        except OSError as exc:
            self.log_exc(
                u"Unable to call the '%s' ffmpeg executable" %
                (self.rconf[RuntimeConfiguration.FFMPEG_PATH]), exc, True,
                FFMPEGPathError)
        self.log(u"Call completed")

        # check if the output file exists
        if not gf.file_exists(output_file_path):
            self.log_exc(
                u"Output file '%s' was not written" % (output_file_path), None,
                True, OSError)

        # returning the output file path
        self.log([u"Returning output file path '%s'", output_file_path])
        return output_file_path
Exemplo n.º 20
0
 def file_path(self, file_path):
     if (file_path is not None) and (not gf.file_can_be_read(file_path)):
         self.log_exc(u"Text file '%s' cannot be read" % (file_path), None, True, OSError)
     self.__file_path = file_path
Exemplo n.º 21
0
 def test_file_can_be_read_true(self):
     handler, path = gf.tmp_file()
     self.assertTrue(gf.file_can_be_read(path))
     gf.delete_file(handler, path)
Exemplo n.º 22
0
    def read_properties(self, audio_file_path):
        """
        Read the properties of an audio file
        and return them as a dictionary.

        Example: ::

            d["index"]=0
            d["codec_name"]=mp3
            d["codec_long_name"]=MP3 (MPEG audio layer 3)
            d["profile"]=unknown
            d["codec_type"]=audio
            d["codec_time_base"]=1/44100
            d["codec_tag_string"]=[0][0][0][0]
            d["codec_tag"]=0x0000
            d["sample_fmt"]=s16p
            d["sample_rate"]=44100
            d["channels"]=1
            d["channel_layout"]=mono
            d["bits_per_sample"]=0
            d["id"]=N/A
            d["r_frame_rate"]=0/0
            d["avg_frame_rate"]=0/0
            d["time_base"]=1/14112000
            d["start_pts"]=0
            d["start_time"]=0.000000
            d["duration_ts"]=1545083190
            d["duration"]=109.487188
            d["bit_rate"]=128000
            d["max_bit_rate"]=N/A
            d["bits_per_raw_sample"]=N/A
            d["nb_frames"]=N/A
            d["nb_read_frames"]=N/A
            d["nb_read_packets"]=N/A
            d["DISPOSITION:default"]=0
            d["DISPOSITION:dub"]=0
            d["DISPOSITION:original"]=0
            d["DISPOSITION:comment"]=0
            d["DISPOSITION:lyrics"]=0
            d["DISPOSITION:karaoke"]=0
            d["DISPOSITION:forced"]=0
            d["DISPOSITION:hearing_impaired"]=0
            d["DISPOSITION:visual_impaired"]=0
            d["DISPOSITION:clean_effects"]=0
            d["DISPOSITION:attached_pic"]=0

        :param string audio_file_path: the path of the audio file to analyze
        :rtype: dict
        :raises: TypeError: if ``audio_file_path`` is None
        :raises: OSError: if the file at ``audio_file_path`` cannot be read
        :raises: FFPROBEParsingError: if the call to ``ffprobe`` does not produce any output
        :raises: FFPROBEPathError: if the path to the ``ffprobe`` executable cannot be called
        :raises: FFPROBEUnsupportedFormatError: if the file has a format not supported by ``ffprobe``
        """

        # test if we can read the file at audio_file_path
        if audio_file_path is None:
            self.log_exc(u"The audio file path is None", None, True, TypeError)
        if not gf.file_can_be_read(audio_file_path):
            self.log_exc(u"Input file '%s' cannot be read" % (audio_file_path), None, True, OSError)

        # call ffprobe
        arguments = [self.rconf[RuntimeConfiguration.FFPROBE_PATH]]
        arguments.extend(self.FFPROBE_PARAMETERS)
        arguments.append(audio_file_path)
        self.log([u"Calling with arguments '%s'", arguments])
        try:
            proc = subprocess.Popen(
                arguments,
                stdout=subprocess.PIPE,
                stdin=subprocess.PIPE,
                stderr=subprocess.PIPE
            )
            (stdoutdata, stderrdata) = proc.communicate()
            proc.stdout.close()
            proc.stdin.close()
            proc.stderr.close()
        except OSError as exc:
            self.log_exc(u"Unable to call the '%s' ffprobe executable" % (self.rconf[RuntimeConfiguration.FFPROBE_PATH]), exc, True, FFPROBEPathError)
        self.log(u"Call completed")

        # check there is some output
        if (stdoutdata is None) or (len(stderrdata) == 0):
            self.log_exc(u"ffprobe produced no output", None, True, FFPROBEParsingError)

        # decode stdoutdata and stderrdata to Unicode string
        try:
            stdoutdata = gf.safe_unicode(stdoutdata)
            stderrdata = gf.safe_unicode(stderrdata)
        except UnicodeDecodeError as exc:
            self.log_exc(u"Unable to decode ffprobe out/err", exc, True, FFPROBEParsingError)

        # dictionary for the results
        results = {
            self.STDOUT_CHANNELS : None,
            self.STDOUT_CODEC_NAME : None,
            self.STDOUT_DURATION : None,
            self.STDOUT_SAMPLE_RATE : None
        }

        # scan the first audio stream the ffprobe stdout output
        # TODO more robust parsing
        # TODO deal with multiple audio streams
        for line in stdoutdata.splitlines():
            if line == self.STDOUT_END_STREAM:
                self.log(u"Reached end of the stream")
                break
            elif len(line.split("=")) == 2:
                key, value = line.split("=")
                results[key] = value
                self.log([u"Found property '%s'='%s'", key, value])

        try:
            self.log([u"Duration found in stdout: '%s'", results[self.STDOUT_DURATION]])
            results[self.STDOUT_DURATION] = TimeValue(results[self.STDOUT_DURATION])
            self.log(u"Valid duration")
        except:
            self.log_warn(u"Invalid duration")
            results[self.STDOUT_DURATION] = None
            # try scanning ffprobe stderr output
            for line in stderrdata.splitlines():
                match = self.STDERR_DURATION_REGEX.search(line)
                if match is not None:
                    self.log([u"Found matching line '%s'", line])
                    results[self.STDOUT_DURATION] = gf.time_from_hhmmssmmm(line)
                    self.log([u"Extracted duration '%.3f'", results[self.STDOUT_DURATION]])
                    break

        if results[self.STDOUT_DURATION] is None:
            self.log_exc(u"No duration found in stdout or stderr. Unsupported audio file format?", None, True, FFPROBEUnsupportedFormatError)

        # return dictionary
        self.log(u"Returning dict")
        return results
Exemplo n.º 23
0
 def test_file_can_be_read_false(self):
     path = "/foo/bar/baz"
     self.assertFalse(gf.file_can_be_read(path))