示例#1
0
    def _extract_mfcc(self, file_path=None, file_format=None, audio_file=None):
        """
        Extract the MFCCs from the given audio file.

        :rtype: :class:`~aeneas.audiofilemfcc.AudioFileMFCC`
        """
        audio_file_mfcc = AudioFileMFCC(file_path=file_path,
                                        file_format=file_format,
                                        audio_file=audio_file,
                                        rconf=self.rconf,
                                        logger=self.logger)
        if self.rconf.mmn:
            self.log(u"Running VAD inside _extract_mfcc...")
            audio_file_mfcc.run_vad(
                log_energy_threshold=self.rconf[
                    RuntimeConfiguration.MFCC_MASK_LOG_ENERGY_THRESHOLD],
                min_nonspeech_length=self.rconf[
                    RuntimeConfiguration.MFCC_MASK_MIN_NONSPEECH_LENGTH],
                extend_before=self.rconf[
                    RuntimeConfiguration.
                    MFCC_MASK_EXTEND_SPEECH_INTERVAL_BEFORE],
                extend_after=self.rconf[
                    RuntimeConfiguration.
                    MFCC_MASK_EXTEND_SPEECH_INTERVAL_AFTER])
            self.log(u"Running VAD inside _extract_mfcc... done")
        return audio_file_mfcc
示例#2
0
    def perform_command(self):
        """
        Perform command and return the appropriate exit code.

        :rtype: int
        """
        if len(self.actual_arguments) < 2:
            return self.print_help()
        audio_file_path = self.actual_arguments[0]
        mode = self.actual_arguments[1]
        if mode not in [u"speech", u"nonspeech", u"both"]:
            return self.print_help()
        output_file_path = None
        if len(self.actual_arguments) >= 3:
            output_file_path = self.actual_arguments[2]
        output_time = not self.has_option([u"-i", u"--index"])

        self.check_c_extensions("cmfcc")
        if not self.check_input_file(audio_file_path):
            return self.ERROR_EXIT_CODE
        if (output_file_path is not None) and (not self.check_output_file(output_file_path)):
            return self.ERROR_EXIT_CODE

        self.print_info(u"Reading audio...")
        try:
            audio_file_mfcc = AudioFileMFCC(audio_file_path, rconf=self.rconf, logger=self.logger)
        except AudioFileConverterError:
            self.print_error(u"Unable to call the ffmpeg executable '%s'" % (self.rconf[RuntimeConfiguration.FFMPEG_PATH]))
            self.print_error(u"Make sure the path to ffmpeg is correct")
            return self.ERROR_EXIT_CODE
        except (AudioFileUnsupportedFormatError, AudioFileNotInitializedError):
            self.print_error(u"Cannot read file '%s'" % (audio_file_path))
            self.print_error(u"Check that its format is supported by ffmpeg")
            return self.ERROR_EXIT_CODE
        except Exception as exc:
            self.print_error(u"An unexpected error occurred while reading the audio file:")
            self.print_error(u"%s" % exc)
            return self.ERROR_EXIT_CODE
        self.print_info(u"Reading audio... done")

        self.print_info(u"Executing VAD...")
        audio_file_mfcc.run_vad()
        self.print_info(u"Executing VAD... done")

        speech = audio_file_mfcc.intervals(speech=True, time=output_time)
        nonspeech = audio_file_mfcc.intervals(speech=False, time=output_time)
        if mode == u"speech":
            intervals = speech
        elif mode == u"nonspeech":
            intervals = nonspeech
        elif mode == u"both":
            speech = [[x[0], x[1], u"speech"] for x in speech]
            nonspeech = [[x[0], x[1], u"nonspeech"] for x in nonspeech]
            intervals = sorted(speech + nonspeech)
        intervals = [tuple(interval) for interval in intervals]
        self.write_to_file(output_file_path, intervals, output_time)

        return self.NO_ERROR_EXIT_CODE
示例#3
0
    def _extract_mfcc(self, file_path=None, file_format=None, audio_file=None):
        """
        Extract the MFCCs from the given audio file.

        :rtype: :class:`~aeneas.audiofilemfcc.AudioFileMFCC`
        """
        audio_file_mfcc = AudioFileMFCC(
            file_path=file_path,
            file_format=file_format,
            audio_file=audio_file,
            rconf=self.rconf,
            logger=self.logger
        )
        if self.rconf.mmn:
            self.log(u"Running VAD inside _extract_mfcc...")
            audio_file_mfcc.run_vad(
                log_energy_threshold=self.rconf[RuntimeConfiguration.MFCC_MASK_LOG_ENERGY_THRESHOLD],
                min_nonspeech_length=self.rconf[RuntimeConfiguration.MFCC_MASK_MIN_NONSPEECH_LENGTH],
                extend_before=self.rconf[RuntimeConfiguration.MFCC_MASK_EXTEND_SPEECH_INTERVAL_BEFORE],
                extend_after=self.rconf[RuntimeConfiguration.MFCC_MASK_EXTEND_SPEECH_INTERVAL_AFTER]
            )
            self.log(u"Running VAD inside _extract_mfcc... done")
        return audio_file_mfcc
示例#4
0
文件: run_sd.py 项目: ptrwtts/aeneas
    def perform_command(self):
        """
        Perform command and return the appropriate exit code.

        :rtype: int
        """
        if len(self.actual_arguments) < 4:
            return self.print_help()
        text_format = gf.safe_unicode(self.actual_arguments[0])
        if text_format == u"list":
            text = gf.safe_unicode(self.actual_arguments[1])
        elif text_format in TextFileFormat.ALLOWED_VALUES:
            text = self.actual_arguments[1]
            if not self.check_input_file(text):
                return self.ERROR_EXIT_CODE
        else:
            return self.print_help()

        l1_id_regex = self.has_option_with_value(u"--l1-id-regex")
        l2_id_regex = self.has_option_with_value(u"--l2-id-regex")
        l3_id_regex = self.has_option_with_value(u"--l3-id-regex")
        id_regex = self.has_option_with_value(u"--id-regex")
        class_regex = self.has_option_with_value(u"--class-regex")
        sort = self.has_option_with_value(u"--sort")
        parameters = {
            gc.PPN_TASK_IS_TEXT_MUNPARSED_L1_ID_REGEX : l1_id_regex,
            gc.PPN_TASK_IS_TEXT_MUNPARSED_L2_ID_REGEX : l2_id_regex,
            gc.PPN_TASK_IS_TEXT_MUNPARSED_L3_ID_REGEX : l3_id_regex,
            gc.PPN_JOB_IS_TEXT_UNPARSED_ID_REGEX : id_regex,
            gc.PPN_JOB_IS_TEXT_UNPARSED_CLASS_REGEX : class_regex,
            gc.PPN_JOB_IS_TEXT_UNPARSED_ID_SORT : sort
        }
        if (text_format == TextFileFormat.MUNPARSED) and ((l1_id_regex is None) or (l2_id_regex is None) or (l3_id_regex is None)):
            self.print_error(u"You must specify --l1-id-regex and --l2-id-regex and --l3-id-regex for munparsed format")
            return self.ERROR_EXIT_CODE
        if (text_format == TextFileFormat.UNPARSED) and (id_regex is None) and (class_regex is None):
            self.print_error(u"You must specify --id-regex and/or --class-regex for unparsed format")
            return self.ERROR_EXIT_CODE

        language = gf.safe_unicode(self.actual_arguments[2])

        audio_file_path = self.actual_arguments[3]
        if not self.check_input_file(audio_file_path):
            return self.ERROR_EXIT_CODE

        text_file = self.get_text_file(text_format, text, parameters)
        if text_file is None:
            self.print_error(u"Unable to build a TextFile from the given parameters")
            return self.ERROR_EXIT_CODE
        elif len(text_file) == 0:
            self.print_error(u"No text fragments found")
            return self.ERROR_EXIT_CODE
        text_file.set_language(language)
        self.print_info(u"Read input text with %d fragments" % (len(text_file)))

        self.print_info(u"Reading audio...")
        try:
            audio_file_mfcc = AudioFileMFCC(audio_file_path, rconf=self.rconf, logger=self.logger)
        except AudioFileConverterError:
            self.print_error(u"Unable to call the ffmpeg executable '%s'" % (self.rconf[RuntimeConfiguration.FFMPEG_PATH]))
            self.print_error(u"Make sure the path to ffmpeg is correct")
            return self.ERROR_EXIT_CODE
        except (AudioFileUnsupportedFormatError, AudioFileNotInitializedError):
            self.print_error(u"Cannot read file '%s'" % (audio_file_path))
            self.print_error(u"Check that its format is supported by ffmpeg")
            return self.ERROR_EXIT_CODE
        except Exception as exc:
            self.print_error(u"An unexpected error occurred while reading the audio file:")
            self.print_error(u"%s" % exc)
            return self.ERROR_EXIT_CODE
        self.print_info(u"Reading audio... done")

        self.print_info(u"Running VAD...")
        audio_file_mfcc.run_vad()
        self.print_info(u"Running VAD... done")

        min_head = gf.safe_float(self.has_option_with_value(u"--min-head"), None)
        max_head = gf.safe_float(self.has_option_with_value(u"--max-head"), None)
        min_tail = gf.safe_float(self.has_option_with_value(u"--min-tail"), None)
        max_tail = gf.safe_float(self.has_option_with_value(u"--max-tail"), None)

        self.print_info(u"Detecting audio interval...")
        start_detector = SD(audio_file_mfcc, text_file, rconf=self.rconf, logger=self.logger)
        start, end = start_detector.detect_interval(min_head, max_head, min_tail, max_tail)
        self.print_info(u"Detecting audio interval... done")

        self.print_result(audio_file_mfcc.audio_length, start, end)
        return self.NO_ERROR_EXIT_CODE
示例#5
0
 def perform(self, input_file_path, speech_length, nonspeech_length):
     audiofile = AudioFileMFCC(gf.absolute_path(input_file_path, __file__))
     audiofile.run_vad()
     self.assertEqual(len(audiofile.intervals(speech=True)), speech_length)
     self.assertEqual(len(audiofile.intervals(speech=False)), nonspeech_length)
示例#6
0
 def perform(self, input_file_path, speech_length, nonspeech_length):
     audiofile = AudioFileMFCC(gf.absolute_path(input_file_path, __file__))
     audiofile.run_vad()
     self.assertEqual(len(audiofile.intervals(speech=True)), speech_length)
     self.assertEqual(len(audiofile.intervals(speech=False)),
                      nonspeech_length)
示例#7
0
    def perform_command(self):
        """
        Perform command and return the appropriate exit code.

        :rtype: int
        """
        if len(self.actual_arguments) < 2:
            return self.print_help()
        audio_file_path = self.actual_arguments[0]
        mode = self.actual_arguments[1]
        if mode not in [u"speech", u"nonspeech", u"both"]:
            return self.print_help()
        output_file_path = None
        if len(self.actual_arguments) >= 3:
            output_file_path = self.actual_arguments[2]
        output_time = not self.has_option([u"-i", u"--index"])

        self.check_c_extensions("cmfcc")
        if not self.check_input_file(audio_file_path):
            return self.ERROR_EXIT_CODE
        if (output_file_path is not None) and (
                not self.check_output_file(output_file_path)):
            return self.ERROR_EXIT_CODE

        self.print_info(u"Reading audio...")
        try:
            audio_file_mfcc = AudioFileMFCC(audio_file_path,
                                            rconf=self.rconf,
                                            logger=self.logger)
        except AudioFileConverterError:
            self.print_error(u"Unable to call the ffmpeg executable '%s'" %
                             (self.rconf[RuntimeConfiguration.FFMPEG_PATH]))
            self.print_error(u"Make sure the path to ffmpeg is correct")
            return self.ERROR_EXIT_CODE
        except (AudioFileUnsupportedFormatError, AudioFileNotInitializedError):
            self.print_error(u"Cannot read file '%s'" % (audio_file_path))
            self.print_error(u"Check that its format is supported by ffmpeg")
            return self.ERROR_EXIT_CODE
        except Exception as exc:
            self.print_error(
                u"An unexpected error occurred while reading the audio file:")
            self.print_error(u"%s" % exc)
            return self.ERROR_EXIT_CODE
        self.print_info(u"Reading audio... done")

        self.print_info(u"Executing VAD...")
        audio_file_mfcc.run_vad()
        self.print_info(u"Executing VAD... done")

        speech = audio_file_mfcc.intervals(speech=True, time=output_time)
        nonspeech = audio_file_mfcc.intervals(speech=False, time=output_time)
        if mode == u"speech":
            intervals = speech
        elif mode == u"nonspeech":
            intervals = nonspeech
        elif mode == u"both":
            speech = [[x[0], x[1], u"speech"] for x in speech]
            nonspeech = [[x[0], x[1], u"nonspeech"] for x in nonspeech]
            intervals = sorted(speech + nonspeech)
        intervals = [tuple(interval) for interval in intervals]
        self.write_to_file(output_file_path, intervals, output_time)

        return self.NO_ERROR_EXIT_CODE
示例#8
0
    def perform_command(self):
        """
        Perform command and return the appropriate exit code.

        :rtype: int
        """
        if len(self.actual_arguments) < 4:
            return self.print_help()
        text_format = gf.safe_unicode(self.actual_arguments[0])
        if text_format == u"list":
            text = gf.safe_unicode(self.actual_arguments[1])
        elif text_format in TextFileFormat.ALLOWED_VALUES:
            text = self.actual_arguments[1]
            if not self.check_input_file(text):
                return self.ERROR_EXIT_CODE
        else:
            return self.print_help()

        l1_id_regex = self.has_option_with_value(u"--l1-id-regex")
        l2_id_regex = self.has_option_with_value(u"--l2-id-regex")
        l3_id_regex = self.has_option_with_value(u"--l3-id-regex")
        id_regex = self.has_option_with_value(u"--id-regex")
        class_regex = self.has_option_with_value(u"--class-regex")
        sort = self.has_option_with_value(u"--sort")
        parameters = {
            gc.PPN_TASK_IS_TEXT_MUNPARSED_L1_ID_REGEX: l1_id_regex,
            gc.PPN_TASK_IS_TEXT_MUNPARSED_L2_ID_REGEX: l2_id_regex,
            gc.PPN_TASK_IS_TEXT_MUNPARSED_L3_ID_REGEX: l3_id_regex,
            gc.PPN_TASK_IS_TEXT_UNPARSED_CLASS_REGEX: class_regex,
            gc.PPN_TASK_IS_TEXT_UNPARSED_ID_REGEX: id_regex,
            gc.PPN_TASK_IS_TEXT_UNPARSED_ID_SORT: sort,
        }
        if (text_format == TextFileFormat.MUNPARSED) and (
            (l1_id_regex is None) or (l2_id_regex is None) or
            (l3_id_regex is None)):
            self.print_error(
                u"You must specify --l1-id-regex and --l2-id-regex and --l3-id-regex for munparsed format"
            )
            return self.ERROR_EXIT_CODE
        if (text_format == TextFileFormat.UNPARSED) and (
                id_regex is None) and (class_regex is None):
            self.print_error(
                u"You must specify --id-regex and/or --class-regex for unparsed format"
            )
            return self.ERROR_EXIT_CODE

        language = gf.safe_unicode(self.actual_arguments[2])

        audio_file_path = self.actual_arguments[3]
        if not self.check_input_file(audio_file_path):
            return self.ERROR_EXIT_CODE

        text_file = self.get_text_file(text_format, text, parameters)
        if text_file is None:
            self.print_error(
                u"Unable to build a TextFile from the given parameters")
            return self.ERROR_EXIT_CODE
        elif len(text_file) == 0:
            self.print_error(u"No text fragments found")
            return self.ERROR_EXIT_CODE
        text_file.set_language(language)
        self.print_info(u"Read input text with %d fragments" %
                        (len(text_file)))

        self.print_info(u"Reading audio...")
        try:
            audio_file_mfcc = AudioFileMFCC(audio_file_path,
                                            rconf=self.rconf,
                                            logger=self.logger)
        except AudioFileConverterError:
            self.print_error(u"Unable to call the ffmpeg executable '%s'" %
                             (self.rconf[RuntimeConfiguration.FFMPEG_PATH]))
            self.print_error(u"Make sure the path to ffmpeg is correct")
            return self.ERROR_EXIT_CODE
        except (AudioFileUnsupportedFormatError, AudioFileNotInitializedError):
            self.print_error(u"Cannot read file '%s'" % (audio_file_path))
            self.print_error(u"Check that its format is supported by ffmpeg")
            return self.ERROR_EXIT_CODE
        except Exception as exc:
            self.print_error(
                u"An unexpected error occurred while reading the audio file:")
            self.print_error(u"%s" % exc)
            return self.ERROR_EXIT_CODE
        self.print_info(u"Reading audio... done")

        self.print_info(u"Running VAD...")
        audio_file_mfcc.run_vad()
        self.print_info(u"Running VAD... done")

        min_head = gf.safe_float(self.has_option_with_value(u"--min-head"),
                                 None)
        max_head = gf.safe_float(self.has_option_with_value(u"--max-head"),
                                 None)
        min_tail = gf.safe_float(self.has_option_with_value(u"--min-tail"),
                                 None)
        max_tail = gf.safe_float(self.has_option_with_value(u"--max-tail"),
                                 None)

        self.print_info(u"Detecting audio interval...")
        start_detector = SD(audio_file_mfcc,
                            text_file,
                            rconf=self.rconf,
                            logger=self.logger)
        start, end = start_detector.detect_interval(min_head, max_head,
                                                    min_tail, max_tail)
        self.print_info(u"Detecting audio interval... done")

        self.print_result(audio_file_mfcc.audio_length, start, end)
        return self.NO_ERROR_EXIT_CODE