Exemplo n.º 1
0
 def test_safe_bytes(self):
     tests = [
         ("", b""),
         ("foo", b"foo"),
         (b"", b""),
         (b"foo", b"foo"),
         (b"fo\x99", b"fo\x99"),
     ]
     self.assertIsNone(gf.safe_bytes(None))
     for test in tests:
         self.assertEqual(gf.safe_bytes(test[0]), test[1])
Exemplo n.º 2
0
 def test_safe_bytes(self):
     tests = [
         ("", b""),
         ("foo", b"foo"),
         (b"", b""),
         (b"foo", b"foo"),
         (b"fo\x99", b"fo\x99"),
     ]
     self.assertIsNone(gf.safe_bytes(None))
     for test in tests:
         self.assertEqual(gf.safe_bytes(test[0]), test[1])
Exemplo n.º 3
0
def main():
    """
    Run ``aeneas.cew``, reading input text from file and writing audio and interval data to file.
    """

    # make sure we have enough parameters
    if len(sys.argv) < 6:
        print("You must pass five arguments: QUIT_AFTER BACKWARDS TEXT_FILE_PATH AUDIO_FILE_PATH DATA_FILE_PATH")
        return 1

    # read parameters
    c_quit_after = float(sys.argv[1]) # NOTE: cew needs float, not TimeValue
    c_backwards = int(sys.argv[2])
    text_file_path = sys.argv[3]
    audio_file_path = sys.argv[4]
    data_file_path = sys.argv[5]

    # read (voice_code, text) from file
    s_text = []
    with io.open(text_file_path, "r", encoding="utf-8") as text:
        for line in text.readlines():
            # NOTE: not using strip() to avoid removing trailing blank characters
            line = line.replace(u"\n", u"").replace(u"\r", u"")
            idx = line.find(" ")
            if idx > 0:
                f_voice_code = line[:idx]
                f_text = line[idx+1:]
                #print("%s => '%s' and '%s'" % (line, f_voice_code, f_text))
                s_text.append((f_voice_code, f_text))

    # convert to bytes/unicode as required by subprocess
    c_text = []
    if gf.PY2:
        for f_voice_code, f_text in s_text:
            c_text.append((gf.safe_bytes(f_voice_code), gf.safe_bytes(f_text)))
    else:
        for f_voice_code, f_text in s_text:
            c_text.append((gf.safe_unicode(f_voice_code), gf.safe_unicode(f_text)))

    try:
        import aeneas.cew.cew
        sr, sf, intervals = aeneas.cew.cew.synthesize_multiple(
            audio_file_path,
            c_quit_after,
            c_backwards,
            c_text
        )
        with io.open(data_file_path, "w", encoding="utf-8") as data:
            data.write(u"%d\n" % (sr))
            data.write(u"%d\n" % (sf))
            data.write(u"\n".join([u"%.3f %.3f" % (i[0], i[1]) for i in intervals]))
    except Exception as exc:
        print(u"Unexpected error: %s" % str(exc))
Exemplo n.º 4
0
def main():
    """
    Run ``aeneas.cew``, reading input text from file and writing audio and interval data to file.
    """

    # make sure we have enough parameters
    if len(sys.argv) < 6:
        print(
            "You must pass five arguments: QUIT_AFTER BACKWARDS TEXT_FILE_PATH AUDIO_FILE_PATH DATA_FILE_PATH"
        )
        return 1

    # read parameters
    c_quit_after = float(sys.argv[1])  # NOTE: cew needs float, not TimeValue
    c_backwards = int(sys.argv[2])
    text_file_path = sys.argv[3]
    audio_file_path = sys.argv[4]
    data_file_path = sys.argv[5]

    # read (voice_code, text) from file
    s_text = []
    with io.open(text_file_path, "r", encoding="utf-8") as text:
        for line in text.readlines():
            # NOTE: not using strip() to avoid removing trailing blank characters
            line = line.replace(u"\n", u"").replace(u"\r", u"")
            idx = line.find(" ")
            if idx > 0:
                f_voice_code = line[:idx]
                f_text = line[idx + 1:]
                #print("%s => '%s' and '%s'" % (line, f_voice_code, f_text))
                s_text.append((f_voice_code, f_text))

    # convert to bytes/unicode as required by subprocess
    c_text = []
    if gf.PY2:
        for f_voice_code, f_text in s_text:
            c_text.append((gf.safe_bytes(f_voice_code), gf.safe_bytes(f_text)))
    else:
        for f_voice_code, f_text in s_text:
            c_text.append(
                (gf.safe_unicode(f_voice_code), gf.safe_unicode(f_text)))

    try:
        import aeneas.cew.cew
        sr, sf, intervals = aeneas.cew.cew.synthesize_multiple(
            audio_file_path, c_quit_after, c_backwards, c_text)
        with io.open(data_file_path, "w", encoding="utf-8") as data:
            data.write(u"%d\n" % (sr))
            data.write(u"%d\n" % (sf))
            data.write(u"\n".join(
                [u"%.3f %.3f" % (i[0], i[1]) for i in intervals]))
    except Exception as exc:
        print(u"Unexpected error: %s" % str(exc))
Exemplo n.º 5
0
 def parse(self, input_text, syncmap):
     from lxml import etree
     # namespaces
     xsi = "http://www.w3.org/2001/XMLSchema-instance"
     ns_map = {"xsi": xsi}
     # get root
     root = etree.fromstring(gf.safe_bytes(input_text))
     # get time slots
     time_slots = dict()
     for ts in root.iter("TIME_SLOT"):
         time_slots[ts.get("TIME_SLOT_ID")] = gf.time_from_ssmmm(ts.get("TIME_VALUE")) / 1000
     # parse annotations
     for alignable in root.iter("ALIGNABLE_ANNOTATION"):
         identifier = gf.safe_unicode(alignable.get("ANNOTATION_ID"))
         begin = time_slots[alignable.get("TIME_SLOT_REF1")]
         end = time_slots[alignable.get("TIME_SLOT_REF2")]
         lines = []
         for value in alignable.iter("ANNOTATION_VALUE"):
             lines.append(gf.safe_unicode(value.text))
         self._add_fragment(
             syncmap=syncmap,
             identifier=identifier,
             lines=lines,
             begin=begin,
             end=end
         )
Exemplo n.º 6
0
    def check_config_xml(self, contents):
        """
        Check whether the given XML config file contents
        is well-formed and it has all the required parameters.

        :param string contents: the XML config file contents or XML config string
        :param bool is_config_string: if ``True``, contents is a config string
        :rtype: :class:`~aeneas.validator.ValidatorResult`
        """
        self.log(u"Checking contents XML config file")
        self.result = ValidatorResult()
        if self._are_safety_checks_disabled(u"check_config_xml"):
            return self.result
        contents = gf.safe_bytes(contents)
        self.log(u"Checking that contents is well formed")
        self.check_raw_string(contents, is_bstring=True)
        if not self.result.passed:
            return self.result
        self.log(u"Checking required parameters for job")
        job_parameters = gf.config_xml_to_dict(contents, self.result, parse_job=True)
        self._check_required_parameters(self.XML_JOB_REQUIRED_PARAMETERS, job_parameters)
        if not self.result.passed:
            return self.result
        self.log(u"Checking required parameters for task")
        tasks_parameters = gf.config_xml_to_dict(contents, self.result, parse_job=False)
        for parameters in tasks_parameters:
            self.log([u"Checking required parameters for task: '%s'", parameters])
            self._check_required_parameters(self.XML_TASK_REQUIRED_PARAMETERS, parameters)
            if not self.result.passed:
                return self.result
        return self.result
Exemplo n.º 7
0
 def parse(self, input_text, syncmap):
     from lxml import etree
     # namespaces
     xsi = "http://www.w3.org/2001/XMLSchema-instance"
     ns_map = {"xsi": xsi}
     # get root
     root = etree.fromstring(gf.safe_bytes(input_text))
     # get time slots
     time_slots = dict()
     for ts in root.iter("TIME_SLOT"):
         time_slots[ts.get("TIME_SLOT_ID")] = gf.time_from_ssmmm(
             ts.get("TIME_VALUE")) / 1000
     # parse annotations
     for alignable in root.iter("ALIGNABLE_ANNOTATION"):
         identifier = gf.safe_unicode(alignable.get("ANNOTATION_ID"))
         begin = time_slots[alignable.get("TIME_SLOT_REF1")]
         end = time_slots[alignable.get("TIME_SLOT_REF2")]
         lines = []
         for value in alignable.iter("ANNOTATION_VALUE"):
             lines.append(gf.safe_unicode(value.text))
         self._add_fragment(syncmap=syncmap,
                            identifier=identifier,
                            lines=lines,
                            begin=begin,
                            end=end)
Exemplo n.º 8
0
    def parse(self, input_text, syncmap):
        """
        Read from SMIL file.

        Limitations:
        1. parses only ``<par>`` elements, in order
        2. timings must have ``hh:mm:ss.mmm`` or ``ss.mmm`` format (autodetected)
        3. both ``clipBegin`` and ``clipEnd`` attributes of ``<audio>`` must be populated
        """
        from lxml import etree
        smil_ns = "{http://www.w3.org/ns/SMIL}"
        root = etree.fromstring(gf.safe_bytes(input_text))
        for par in root.iter(smil_ns + "par"):
            for child in par:
                if child.tag == (smil_ns + "text"):
                    identifier = gf.safe_unicode(gf.split_url(child.get("src"))[1])
                elif child.tag == (smil_ns + "audio"):
                    begin_text = child.get("clipBegin")
                    if ":" in begin_text:
                        begin = gf.time_from_hhmmssmmm(begin_text)
                    else:
                        begin = gf.time_from_ssmmm(begin_text)
                    end_text = child.get("clipEnd")
                    if ":" in end_text:
                        end = gf.time_from_hhmmssmmm(end_text)
                    else:
                        end = gf.time_from_ssmmm(end_text)
            # TODO read text from additional text_file?
            self._add_fragment(
                syncmap=syncmap,
                identifier=identifier,
                lines=[u""],
                begin=begin,
                end=end
            )
Exemplo n.º 9
0
    def _synthesize_single_c_extension(self, text, voice_code, output_file_path):
        """
        Synthesize a single text fragment, using the cew extension.

        Return the duration of the synthesized text, in seconds.

        :rtype: (bool, (:class:`~aeneas.timevalue.TimeValue`, ))
        """
        self.log(u"Synthesizing using C extension...")

        end = None
        if self.rconf[RuntimeConfiguration.CEW_SUBPROCESS_ENABLED]:
            self.log(u"Using cewsubprocess to call aeneas.cew")
            try:
                self.log(u"Importing aeneas.cewsubprocess...")
                from aeneas.cewsubprocess import CEWSubprocess
                self.log(u"Importing aeneas.cewsubprocess... done")
                self.log(u"Calling aeneas.cewsubprocess...")
                cewsub = CEWSubprocess(rconf=self.rconf, logger=self.logger)
                end = cewsub.synthesize_single(output_file_path, voice_code, text)
                self.log(u"Calling aeneas.cewsubprocess... done")
            except Exception as exc:
                self.log_exc(u"An unexpected error occurred while running cewsubprocess", exc, False, None)
                # NOTE not critical, try calling aeneas.cew directly
                #return (False, None)

        if end is None:
            self.log(u"Preparing c_text...")
            if gf.PY2:
                # Python 2 => pass byte strings
                c_text = gf.safe_bytes(text)
            else:
                # Python 3 => pass Unicode strings
                c_text = gf.safe_unicode(text)
            self.log(u"Preparing c_text... done")

            self.log(u"Calling aeneas.cew directly")
            try:
                self.log(u"Importing aeneas.cew...")
                import aeneas.cew.cew
                self.log(u"Importing aeneas.cew... done")
                self.log(u"Calling aeneas.cew...")
                sr, begin, end = aeneas.cew.cew.synthesize_single(
                    output_file_path,
                    voice_code,
                    c_text
                )
                end = TimeValue(end)
                self.log(u"Calling aeneas.cew... done")
            except Exception as exc:
                self.log_exc(u"An unexpected error occurred while running cew", exc, False, None)
                return (False, None)

        self.log(u"Synthesizing using C extension... done")
        return (True, (end, ))
Exemplo n.º 10
0
 def parse(self, input_text, syncmap):
     from lxml import etree
     root = etree.fromstring(gf.safe_bytes(input_text))
     for frag in root:
         for child in frag:
             if child.tag == "identifier":
                 identifier = gf.safe_unicode(child.text)
             elif child.tag == "start":
                 begin = gf.time_from_ssmmm(child.text)
             elif child.tag == "end":
                 end = gf.time_from_ssmmm(child.text)
         # TODO read text from additional text_file?
         self._add_fragment(syncmap=syncmap,
                            identifier=identifier,
                            lines=[u""],
                            begin=begin,
                            end=end)
Exemplo n.º 11
0
 def parse(self, input_text, syncmap):
     from lxml import etree
     ttml_ns = "{http://www.w3.org/ns/ttml}"
     xml_ns = "{http://www.w3.org/XML/1998/namespace}"
     root = etree.fromstring(gf.safe_bytes(input_text))
     language = root.get(xml_ns + "lang")
     for elem in root.iter(ttml_ns + "p"):
         identifier = gf.safe_unicode(elem.get(xml_ns + "id"))
         begin = gf.time_from_ttml(elem.get("begin"))
         end = gf.time_from_ttml(elem.get("end"))
         fragment_lines = self._get_lines_from_node_text(elem)
         self._add_fragment(syncmap=syncmap,
                            identifier=identifier,
                            language=language,
                            lines=fragment_lines,
                            begin=begin,
                            end=end)
Exemplo n.º 12
0
 def parse(self, input_text, syncmap):
     from lxml import etree
     root = etree.fromstring(gf.safe_bytes(input_text))
     for frag in root:
         identifier = gf.safe_unicode(frag.get("id"))
         begin = gf.time_from_ssmmm(frag.get("begin"))
         end = gf.time_from_ssmmm(frag.get("end"))
         lines = []
         for child in frag:
             if child.tag == "line":
                 lines.append(gf.safe_unicode(child.text))
         self._add_fragment(
             syncmap=syncmap,
             identifier=identifier,
             lines=lines,
             begin=begin,
             end=end
         )
Exemplo n.º 13
0
 def parse(self, input_text, syncmap):
     from lxml import etree
     ttml_ns = "{http://www.w3.org/ns/ttml}"
     xml_ns = "{http://www.w3.org/XML/1998/namespace}"
     root = etree.fromstring(gf.safe_bytes(input_text))
     language = root.get(xml_ns + "lang")
     for elem in root.iter(ttml_ns + "p"):
         identifier = gf.safe_unicode(elem.get(xml_ns + "id"))
         begin = gf.time_from_ttml(elem.get("begin"))
         end = gf.time_from_ttml(elem.get("end"))
         fragment_lines = self._get_lines_from_node_text(elem)
         self._add_fragment(
             syncmap=syncmap,
             identifier=identifier,
             language=language,
             lines=fragment_lines,
             begin=begin,
             end=end
         )
Exemplo n.º 14
0
 def parse(self, input_text, syncmap):
     from lxml import etree
     root = etree.fromstring(gf.safe_bytes(input_text))
     for frag in root:
         for child in frag:
             if child.tag == "identifier":
                 identifier = gf.safe_unicode(child.text)
             elif child.tag == "start":
                 begin = gf.time_from_ssmmm(child.text)
             elif child.tag == "end":
                 end = gf.time_from_ssmmm(child.text)
         # TODO read text from additional text_file?
         self._add_fragment(
             syncmap=syncmap,
             identifier=identifier,
             lines=[u""],
             begin=begin,
             end=end
         )
Exemplo n.º 15
0
    def _synthesize_multiple_c_extension(self, text_file, output_file_path, quit_after=None, backwards=False):
        """
        Synthesize multiple text fragments, using the cfw extension.

        Return a tuple (anchors, total_time, num_chars).

        :rtype: (bool, (list, :class:`~aeneas.exacttiming.TimeValue`, int))
        """
        self.log(u"Synthesizing using C extension...")

        # convert parameters from Python values to C values
        try:
            c_quit_after = float(quit_after)
        except TypeError:
            c_quit_after = 0.0
        c_backwards = 0
        if backwards:
            c_backwards = 1
        self.log([u"output_file_path: %s", output_file_path])
        self.log([u"c_quit_after:     %.3f", c_quit_after])
        self.log([u"c_backwards:      %d", c_backwards])
        self.log(u"Preparing u_text...")
        u_text = []
        fragments = text_file.fragments
        for fragment in fragments:
            f_lang = fragment.language
            f_text = fragment.filtered_text
            if f_lang is None:
                f_lang = self.DEFAULT_LANGUAGE
            f_voice_code = self.VOICE_CODE_TO_SUBPROCESS[self._language_to_voice_code(f_lang)]
            if f_text is None:
                f_text = u""
            u_text.append((f_voice_code, f_text))
        self.log(u"Preparing u_text... done")

        # call C extension
        sr = None
        sf = None
        intervals = None

        self.log(u"Preparing c_text...")
        if gf.PY2:
            # Python 2 => pass byte strings
            c_text = [(gf.safe_bytes(t[0]), gf.safe_bytes(t[1])) for t in u_text]
        else:
            # Python 3 => pass Unicode strings
            c_text = [(gf.safe_unicode(t[0]), gf.safe_unicode(t[1])) for t in u_text]
        self.log(u"Preparing c_text... done")

        self.log(u"Calling aeneas.cfw directly")
        try:
            self.log(u"Importing aeneas.cfw...")
            import aeneas.cfw.cfw
            self.log(u"Importing aeneas.cfw... done")
            self.log(u"Calling aeneas.cfw...")
            sr, sf, intervals = aeneas.cfw.cfw.synthesize_multiple(
                output_file_path,
                c_quit_after,
                c_backwards,
                c_text
            )
            self.log(u"Calling aeneas.cfw... done")
        except Exception as exc:
            self.log_exc(u"An unexpected error occurred while running cfw", exc, False, None)
            return (False, None)

        self.log([u"sr: %d", sr])
        self.log([u"sf: %d", sf])

        # create output
        anchors = []
        current_time = TimeValue("0.000")
        num_chars = 0
        if backwards:
            fragments = fragments[::-1]
        for i in range(sf):
            # get the correct fragment
            fragment = fragments[i]
            # store for later output
            anchors.append([
                TimeValue(intervals[i][0]),
                fragment.identifier,
                fragment.filtered_text
            ])
            # increase the character counter
            num_chars += fragment.characters
            # update current_time
            current_time = TimeValue(intervals[i][1])

        # return output
        # NOTE anchors do not make sense if backwards == True
        self.log([u"Returning %d time anchors", len(anchors)])
        self.log([u"Current time %.3f", current_time])
        self.log([u"Synthesized %d characters", num_chars])
        self.log(u"Synthesizing using C extension... done")
        return (True, (anchors, current_time, num_chars))
Exemplo n.º 16
0
    def perform_command(self):
        """
        Perform command and return the appropriate exit code.

        :rtype: int
        """
        if len(self.actual_arguments) < 2:
            return self.print_help()
        input_file_path = self.actual_arguments[0]
        output_file_path = self.actual_arguments[1]

        output_text_format = self.has_option_with_value(u"--format")
        if output_text_format is None:
            output_text_format = u"%.18e"
        output_binary = self.has_option([u"-b", u"--binary"])
        output_npz = self.has_option([u"-z", u"--npz"])
        output_npy = self.has_option([u"-n", u"--npy"])
        delete_first = self.has_option([u"-d", u"--delete-first"])
        transpose = self.has_option([u"-t", u"--transpose"])

        self.check_c_extensions("cmfcc")
        if not self.check_input_file(input_file_path):
            return self.ERROR_EXIT_CODE
        if not self.check_output_file(output_file_path):
            return self.ERROR_EXIT_CODE

        try:
            mfccs = AudioFileMFCC(input_file_path,
                                  rconf=self.rconf,
                                  logger=self.logger).all_mfcc
            if delete_first:
                mfccs = mfccs[1:, :]
            if transpose:
                mfccs = mfccs.transpose()
            if output_binary:
                # save as a raw C float64 binary file
                mapped = numpy.memmap(output_file_path,
                                      dtype="float64",
                                      mode="w+",
                                      shape=mfccs.shape)
                mapped[:] = mfccs[:]
                mapped.flush()
                del mapped
            elif output_npz:
                # save as a .npz compressed binary file
                with io.open(output_file_path, "wb") as output_file:
                    numpy.savez(output_file, mfccs)
            elif output_npy:
                # save as a .npy binary file
                with io.open(output_file_path, "wb") as output_file:
                    numpy.save(output_file, mfccs)
            else:
                # save as a text file
                # NOTE: in Python 2, passing the fmt value a Unicode string crashes NumPy
                #       hence, converting back to bytes, which works in Python 3 too
                numpy.savetxt(output_file_path,
                              mfccs,
                              fmt=gf.safe_bytes(output_text_format))
            self.print_info(u"MFCCs shape: %d %d" % (mfccs.shape))
            self.print_success(u"MFCCs saved to '%s'" % (output_file_path))
            return self.NO_ERROR_EXIT_CODE
        except AudioFileConverterError:
            self.print_error(u"Unable to call the ffmpeg executable '%s'" %
                             (self.rconf[RuntimeConfiguration.FFMPEG_PATH]))
            self.print_error(u"Make sure the path to ffmpeg is correct")
        except (AudioFileUnsupportedFormatError, AudioFileNotInitializedError):
            self.print_error(u"Cannot read file '%s'" % (input_file_path))
            self.print_error(u"Check that its format is supported by ffmpeg")
        except OSError:
            self.print_error(u"Cannot write file '%s'" % (output_file_path))

        return self.ERROR_EXIT_CODE
Exemplo n.º 17
0
    def _synthesize_single_subprocess_helper(self,
                                             text,
                                             voice_code,
                                             output_file_path=None,
                                             return_audio_data=True):
        """
        This is an helper function to synthesize a single text fragment via ``subprocess``.

        If ``output_file_path`` is ``None``,
        the audio data will not persist to file at the end of the method.

        If ``return_audio_data`` is ``True``,
        return the audio data at the end of the function call;
        if ``False``, just return ``(True, None)`` in case of success.

        :rtype: tuple (result, (duration, sample_rate, codec, data)) or (result, None)
        """
        # return zero if text is the empty string
        if len(text) == 0:
            #
            # NOTE sample_rate, codec, data do not matter
            #      if the duration is 0.000 => set them to None
            #
            self.log(u"len(text) is zero: returning 0.000")
            return (True, (TimeValue("0.000"), None, None, None))

        # create a temporary output file if needed
        synt_tmp_file = (output_file_path is None)
        if synt_tmp_file:
            self.log(
                u"Synthesizer helper called with output_file_path=None => creating temporary output file"
            )
            output_file_handler, output_file_path = gf.tmp_file(
                suffix=u".wav", root=self.rconf[RuntimeConfiguration.TMP_PATH])
            self.log([u"Temporary output file path is '%s'", output_file_path])

        try:
            # if the TTS engine reads text from file,
            # write the text into a temporary file
            if self.CLI_PARAMETER_TEXT_PATH in self.subprocess_arguments:
                self.log(u"TTS engine reads text from file")
                tmp_text_file_handler, tmp_text_file_path = gf.tmp_file(
                    suffix=u".txt",
                    root=self.rconf[RuntimeConfiguration.TMP_PATH])
                self.log([
                    u"Creating temporary text file '%s'...", tmp_text_file_path
                ])
                with io.open(tmp_text_file_path, "w",
                             encoding="utf-8") as tmp_text_file:
                    tmp_text_file.write(text)
                self.log([
                    u"Creating temporary text file '%s'... done",
                    tmp_text_file_path
                ])
            else:
                self.log(u"TTS engine reads text from stdin")
                tmp_text_file_handler = None
                tmp_text_file_path = None

            # copy all relevant arguments
            self.log(u"Creating arguments list...")
            arguments = []
            for arg in self.subprocess_arguments:
                if arg == self.CLI_PARAMETER_VOICE_CODE_FUNCTION:
                    arguments.extend(
                        self._voice_code_to_subprocess(voice_code))
                elif arg == self.CLI_PARAMETER_VOICE_CODE_STRING:
                    arguments.append(voice_code)
                elif arg == self.CLI_PARAMETER_TEXT_PATH:
                    arguments.append(tmp_text_file_path)
                elif arg == self.CLI_PARAMETER_WAVE_PATH:
                    arguments.append(output_file_path)
                elif arg == self.CLI_PARAMETER_TEXT_STDIN:
                    # placeholder, do not append
                    pass
                elif arg == self.CLI_PARAMETER_WAVE_STDOUT:
                    # placeholder, do not append
                    pass
                else:
                    arguments.append(arg)
            self.log(u"Creating arguments list... done")

            # actual call via subprocess
            self.log(u"Calling TTS engine...")
            self.log([u"Calling with arguments '%s'", arguments])
            self.log([u"Calling with text '%s'", text])
            proc = subprocess.Popen(arguments,
                                    stdout=subprocess.PIPE,
                                    stdin=subprocess.PIPE,
                                    stderr=subprocess.PIPE,
                                    universal_newlines=True)
            if self.CLI_PARAMETER_TEXT_STDIN in self.subprocess_arguments:
                self.log(u"Passing text via stdin...")
                if gf.PY2:
                    (stdoutdata,
                     stderrdata) = proc.communicate(input=gf.safe_bytes(text))
                else:
                    (stdoutdata, stderrdata) = proc.communicate(input=text)
                self.log(u"Passing text via stdin... done")
            else:
                self.log(u"Passing text via file...")
                (stdoutdata, stderrdata) = proc.communicate()
                self.log(u"Passing text via file... done")
            proc.stdout.close()
            proc.stdin.close()
            proc.stderr.close()

            if self.CLI_PARAMETER_WAVE_STDOUT in self.subprocess_arguments:
                self.log(u"TTS engine wrote audio data to stdout")
                self.log(
                    [u"Writing audio data to file '%s'...", output_file_path])
                with io.open(output_file_path, "wb") as output_file:
                    output_file.write(stdoutdata)
                self.log([
                    u"Writing audio data to file '%s'... done",
                    output_file_path
                ])
            else:
                self.log(u"TTS engine wrote audio data to file")

            if tmp_text_file_path is not None:
                self.log(
                    [u"Delete temporary text file '%s'", tmp_text_file_path])
                gf.delete_file(tmp_text_file_handler, tmp_text_file_path)

            self.log(u"Calling TTS ... done")
        except Exception as exc:
            self.log_exc(
                u"An unexpected error occurred while calling TTS engine via subprocess",
                exc, False, None)
            return (False, None)

        # check the file can be read
        if not gf.file_can_be_read(output_file_path):
            self.log_exc(
                u"Output file '%s' cannot be read" % (output_file_path), None,
                True, None)
            return (False, None)

        # read audio data
        ret = self._read_audio_data(
            output_file_path) if return_audio_data else (True, None)

        # if the output file was temporary, remove it
        if synt_tmp_file:
            self.log([
                u"Removing temporary output file path '%s'", output_file_path
            ])
            gf.delete_file(output_file_handler, output_file_path)

        # return audio data or (True, None)
        return ret
Exemplo n.º 18
0
    def _synthesize_single_subprocess(self, text, voice_code, output_file_path):
        """
        Synthesize a single text fragment via ``subprocess``.

        :rtype: tuple (result, (duration, sample_rate, encoding, samples))
        """
        self.log(u"Synthesizing using pure Python...")
        try:
            # if the TTS engine reads text from file,
            # write the text into a temporary file
            if self.CLI_PARAMETER_TEXT_PATH in self.subprocess_arguments:
                self.log(u"TTS engine reads text from file")
                tmp_text_file_handler, tmp_text_file_path = gf.tmp_file(suffix=u".txt", root=self.rconf[RuntimeConfiguration.TMP_PATH])
                self.log([u"Creating temporary text file '%s'...", tmp_text_file_path])
                with io.open(tmp_text_file_path, "w", encoding="utf-8") as tmp_text_file:
                    tmp_text_file.write(text)
                self.log([u"Creating temporary text file '%s'... done", tmp_text_file_path])
            else:
                self.log(u"TTS engine reads text from stdin")
                tmp_text_file_handler = None
                tmp_text_file_path = None

            # copy all relevant arguments
            self.log(u"Creating arguments list...")
            arguments = []
            for arg in self.subprocess_arguments:
                if arg == self.CLI_PARAMETER_VOICE_CODE_FUNCTION:
                    arguments.extend(self._voice_code_to_subprocess(voice_code))
                elif arg == self.CLI_PARAMETER_VOICE_CODE_STRING:
                    arguments.append(voice_code)
                elif arg == self.CLI_PARAMETER_TEXT_PATH:
                    arguments.append(tmp_text_file_path)
                elif arg == self.CLI_PARAMETER_WAVE_PATH:
                    arguments.append(output_file_path)
                elif arg == self.CLI_PARAMETER_TEXT_STDIN:
                    # placeholder, do not append
                    pass
                elif arg == self.CLI_PARAMETER_WAVE_STDOUT:
                    # placeholder, do not append
                    pass
                else:
                    arguments.append(arg)
            self.log(u"Creating arguments list... done")

            # actual call via subprocess
            self.log(u"Calling TTS engine...")
            self.log([u"Calling with arguments '%s'", arguments])
            self.log([u"Calling with text '%s'", text])
            proc = subprocess.Popen(
                arguments,
                stdout=subprocess.PIPE,
                stdin=subprocess.PIPE,
                stderr=subprocess.PIPE,
                universal_newlines=True
            )
            if self.CLI_PARAMETER_TEXT_STDIN in self.subprocess_arguments:
                self.log(u"Passing text via stdin...")
                if gf.PY2:
                    (stdoutdata, stderrdata) = proc.communicate(input=gf.safe_bytes(text))
                else:
                    (stdoutdata, stderrdata) = proc.communicate(input=text)
                self.log(u"Passing text via stdin... done")
            else:
                self.log(u"Passing text via file...")
                (stdoutdata, stderrdata) = proc.communicate()
                self.log(u"Passing text via file... done")
            proc.stdout.close()
            proc.stdin.close()
            proc.stderr.close()

            if self.CLI_PARAMETER_WAVE_STDOUT in self.subprocess_arguments:
                self.log(u"TTS engine wrote audio data to stdout")
                self.log([u"Writing audio data to file '%s'...", output_file_path])
                with io.open(output_file_path, "wb") as output_file:
                    output_file.write(stdoutdata)
                self.log([u"Writing audio data to file '%s'... done", output_file_path])
            else:
                self.log(u"TTS engine wrote audio data to file")

            if tmp_text_file_path is not None:
                self.log([u"Delete temporary text file '%s'", tmp_text_file_path])
                gf.delete_file(tmp_text_file_handler, tmp_text_file_path)

            self.log(u"Calling TTS ... done")
        except Exception as exc:
            self.log_exc(u"An unexpected error occurred while calling TTS engine via subprocess", exc, False, None)
            return (False, None)

        # check the file can be read
        if not gf.file_can_be_read(output_file_path):
            self.log_exc(u"Output file '%s' cannot be read" % (output_file_path), None, True, None)
            return (False, None)

        # return the duration of the output file
        try:
            # if we know the TTS outputs to PCM16 mono WAVE,
            # we can read samples directly from it,
            # without an intermediate conversion through ffmpeg
            audio_file = AudioFile(
                file_path=output_file_path,
                is_mono_wave=self.OUTPUT_MONO_WAVE,
                rconf=self.rconf,
                logger=self.logger
            )
            audio_file.read_samples_from_file()
            self.log([u"Duration of '%s': %f", output_file_path, audio_file.audio_length])
            self.log(u"Synthesizing using pure Python... done")
            return (True, (
                audio_file.audio_length,
                audio_file.audio_sample_rate,
                audio_file.audio_format,
                audio_file.audio_samples
            ))
        except (AudioFileUnsupportedFormatError, OSError) as exc:
            self.log_exc(u"An unexpected error occurred while trying to read the sythesized audio file", exc, True, None)
            return (False, None)
Exemplo n.º 19
0
    def _synthesize_multiple_c_extension(self, text_file, output_file_path, quit_after=None, backwards=False):
        """
        Synthesize multiple text fragments, using the cew extension.

        Return a tuple (anchors, total_time, num_chars).

        :rtype: (bool, (list, :class:`~aeneas.timevalue.TimeValue`, int))
        """
        self.log(u"Synthesizing using C extension...")

        # convert parameters from Python values to C values
        try:
            c_quit_after = float(quit_after)
        except TypeError:
            c_quit_after = 0.0
        c_backwards = 0
        if backwards:
            c_backwards = 1
        self.log([u"output_file_path: %s", output_file_path])
        self.log([u"c_quit_after:     %.3f", c_quit_after])
        self.log([u"c_backwards:      %d", c_backwards])
        self.log(u"Preparing u_text...")
        u_text = []
        fragments = text_file.fragments
        for fragment in fragments:
            f_lang = fragment.language
            f_text = fragment.filtered_text
            if f_lang is None:
                f_lang = self.DEFAULT_LANGUAGE
            f_voice_code = self._language_to_voice_code(f_lang)
            if f_text is None:
                f_text = u""
            u_text.append((f_voice_code, f_text))
        self.log(u"Preparing u_text... done")

        # call C extension
        sr = None
        sf = None
        intervals = None
        if self.rconf[RuntimeConfiguration.CEW_SUBPROCESS_ENABLED]:
            self.log(u"Using cewsubprocess to call aeneas.cew")
            try:
                self.log(u"Importing aeneas.cewsubprocess...")
                from aeneas.cewsubprocess import CEWSubprocess
                self.log(u"Importing aeneas.cewsubprocess... done")
                self.log(u"Calling aeneas.cewsubprocess...")
                cewsub = CEWSubprocess(rconf=self.rconf, logger=self.logger)
                sr, sf, intervals = cewsub.synthesize_multiple(output_file_path, c_quit_after, c_backwards, u_text)
                self.log(u"Calling aeneas.cewsubprocess... done")
            except Exception as exc:
                self.log_exc(u"An unexpected error occurred while running cewsubprocess", exc, False, None)
                # NOTE not critical, try calling aeneas.cew directly
                #return (False, None)

        if sr is None:
            self.log(u"Preparing c_text...")
            if gf.PY2:
                # Python 2 => pass byte strings
                c_text = [(gf.safe_bytes(t[0]), gf.safe_bytes(t[1])) for t in u_text]
            else:
                # Python 3 => pass Unicode strings
                c_text = [(gf.safe_unicode(t[0]), gf.safe_unicode(t[1])) for t in u_text]
            self.log(u"Preparing c_text... done")

            self.log(u"Calling aeneas.cew directly")
            try:
                self.log(u"Importing aeneas.cew...")
                import aeneas.cew.cew
                self.log(u"Importing aeneas.cew... done")
                self.log(u"Calling aeneas.cew...")
                sr, sf, intervals = aeneas.cew.cew.synthesize_multiple(
                    output_file_path,
                    c_quit_after,
                    c_backwards,
                    c_text
                )
                self.log(u"Calling aeneas.cew... done")
            except Exception as exc:
                self.log_exc(u"An unexpected error occurred while running cew", exc, False, None)
                return (False, None)

        self.log([u"sr: %d", sr])
        self.log([u"sf: %d", sf])

        # create output
        anchors = []
        current_time = TimeValue("0.000")
        num_chars = 0
        if backwards:
            fragments = fragments[::-1]
        for i in range(sf):
            # get the correct fragment
            fragment = fragments[i]
            # store for later output
            anchors.append([
                TimeValue(intervals[i][0]),
                fragment.identifier,
                fragment.filtered_text
            ])
            # increase the character counter
            num_chars += fragment.characters
            # update current_time
            current_time = TimeValue(intervals[i][1])

        # return output
        # NOTE anchors do not make sense if backwards == True
        self.log([u"Returning %d time anchors", len(anchors)])
        self.log([u"Current time %.3f", current_time])
        self.log([u"Synthesized %d characters", num_chars])
        self.log(u"Synthesizing using C extension... done")
        return (True, (anchors, current_time, num_chars))
Exemplo n.º 20
0
    def perform_command(self):
        """
        Perform command and return the appropriate exit code.

        :rtype: int
        """
        if len(self.actual_arguments) < 2:
            return self.print_help()
        input_file_path = self.actual_arguments[0]
        output_file_path = self.actual_arguments[1]

        output_text_format = self.has_option_with_value(u"--format")
        if output_text_format is None:
            output_text_format = u"%.18e"
        output_binary = self.has_option([u"-b", u"--binary"])
        output_npz = self.has_option([u"-z", u"--npz"])
        output_npy = self.has_option([u"-n", u"--npy"])
        delete_first = self.has_option([u"-d", u"--delete-first"])
        transpose = self.has_option([u"-t", u"--transpose"])

        self.check_c_extensions("cmfcc")
        if not self.check_input_file(input_file_path):
            return self.ERROR_EXIT_CODE
        if not self.check_output_file(output_file_path):
            return self.ERROR_EXIT_CODE

        try:
            mfccs = AudioFileMFCC(input_file_path, rconf=self.rconf, logger=self.logger).all_mfcc
            if delete_first:
                mfccs = mfccs[1:, :]
            if transpose:
                mfccs = mfccs.transpose()
            if output_binary:
                # save as a raw C float64 binary file
                mapped = numpy.memmap(output_file_path, dtype="float64", mode="w+", shape=mfccs.shape)
                mapped[:] = mfccs[:]
                mapped.flush()
                del mapped
            elif output_npz:
                # save as a .npz compressed binary file
                with io.open(output_file_path, "wb") as output_file:
                    numpy.savez(output_file, mfccs)
            elif output_npy:
                # save as a .npy binary file
                with io.open(output_file_path, "wb") as output_file:
                    numpy.save(output_file, mfccs)
            else:
                # save as a text file
                # NOTE: in Python 2, passing the fmt value a Unicode string crashes NumPy
                #       hence, converting back to bytes, which works in Python 3 too
                numpy.savetxt(output_file_path, mfccs, fmt=gf.safe_bytes(output_text_format))
            self.print_info(u"MFCCs shape: %d %d" % (mfccs.shape))
            self.print_success(u"MFCCs saved to '%s'" % (output_file_path))
            return self.NO_ERROR_EXIT_CODE
        except AudioFileConverterError:
            self.print_error(u"Unable to call the ffmpeg executable '%s'" % (self.rconf[RuntimeConfiguration.FFMPEG_PATH]))
            self.print_error(u"Make sure the path to ffmpeg is correct")
        except (AudioFileUnsupportedFormatError, AudioFileNotInitializedError):
            self.print_error(u"Cannot read file '%s'" % (input_file_path))
            self.print_error(u"Check that its format is supported by ffmpeg")
        except OSError:
            self.print_error(u"Cannot write file '%s'" % (output_file_path))

        return self.ERROR_EXIT_CODE