def check_cew(cls): """ Check whether Python C extension ``cew`` can be imported. Return ``True`` on failure and ``False`` on success. For those OSes where ``cew`` is not available, print a warning and return ``False`` (success). :rtype: bool """ if not gf.is_linux(): gf.print_warning(u"aeneas.cew NOT AVAILABLE") gf.print_info( u" The Python C Extension cew is not available for your OS") gf.print_info( u" You can still run aeneas but it will be a bit slower (than Linux)" ) return False if gf.can_run_c_extension("cew"): gf.print_success(u"aeneas.cew COMPILED") return False gf.print_warning(u"aeneas.cew NOT COMPILED") gf.print_info( u" You can still run aeneas but it will be a bit slower") gf.print_info(u" To compile the cew module, run %s" % SETUP_COMMAND) return True
def check_cew(cls): """ Check whether Python C extension ``cew`` can be imported. Return ``True`` on failure and ``False`` on success. For those OSes where ``cew`` is not available, print a warning and return ``False`` (success). :rtype: bool """ """ if not gf.is_linux(): gf.print_warning(u"aeneas.cew NOT AVAILABLE") gf.print_info(u" The Python C Extension cew is not available for your OS") gf.print_info(u" You can still run aeneas but it will be a bit slower (than Linux)") return False """ if gf.can_run_c_extension("cew"): gf.print_success(u"aeneas.cew COMPILED") return False gf.print_warning(u"aeneas.cew NOT COMPILED") gf.print_info(u" You can still run aeneas but it will be a bit slower") gf.print_info(u" To compile the cew module, run %s" % SETUP_COMMAND) return True
def _setup_dtw(self): """ Set the DTW object up. """ # check we have the AudioFileMFCC objects if (self.real_wave_mfcc is None) or (self.real_wave_mfcc.middle_mfcc is None): self.log_exc(u"The real wave MFCCs are not initialized", None, True, DTWAlignerNotInitialized) if (self.synt_wave_mfcc is None) or (self.synt_wave_mfcc.middle_mfcc is None): self.log_exc(u"The synt wave MFCCs are not initialized", None, True, DTWAlignerNotInitialized) # setup algorithm = self.rconf[RuntimeConfiguration.DTW_ALGORITHM] delta = int(2 * self.rconf[RuntimeConfiguration.DTW_MARGIN] / self.rconf[RuntimeConfiguration.MFCC_WINDOW_SHIFT]) mfcc2_length = self.synt_wave_mfcc.middle_length self.log([u"Requested algorithm: '%s'", algorithm]) self.log([u"delta = %d", delta]) self.log([u"m = %d", mfcc2_length]) # check if delta is >= length of synt wave if mfcc2_length <= delta: self.log(u"We have mfcc2_length <= delta") if (self.rconf[RuntimeConfiguration.C_EXTENSIONS]) and ( gf.can_run_c_extension()): # the C code can be run: since it is still faster, do not run EXACT self.log( u"C extensions enabled and loaded: not selecting EXACT algorithm" ) else: self.log(u"Selecting EXACT algorithm") algorithm = DTWAlgorithm.EXACT # execute the selected algorithm if algorithm == DTWAlgorithm.EXACT: self.log(u"Computing with EXACT algo") dtw = DTWExact(self.real_wave_mfcc.middle_mfcc, self.synt_wave_mfcc.middle_mfcc, rconf=self.rconf, logger=self.logger) else: self.log(u"Computing with STRIPE algo") dtw = DTWStripe(self.real_wave_mfcc.middle_mfcc, self.synt_wave_mfcc.middle_mfcc, delta, rconf=self.rconf, logger=self.logger) return dtw
def main(): """ Entry point """ if len(sys.argv) < 3: usage() return container_path = sys.argv[1] config_string = None verbose = (sys.argv[-1] == "-v") number_of_arguments = 4 if verbose: number_of_arguments += 1 if len(sys.argv) >= number_of_arguments: config_string = sys.argv[2] output_dir = sys.argv[3] else: output_dir = sys.argv[2] logger = Logger(tee=verbose) executor = ExecuteJob(logger=logger) if not gf.can_run_c_extension(): print "[WARN] Unable to load Python C Extensions" print "[WARN] Running the slower pure Python code" print "[WARN] See the README file for directions to compile the Python C Extensions" print "[INFO] Loading job from container..." result = executor.load_job_from_container(container_path, config_string) print "[INFO] Loading job from container... done" if not result: print "[ERRO] The job cannot be loaded from the specified container" return print "[INFO] Executing..." result = executor.execute() print "[INFO] Executing... done" if not result: print "[ERRO] An error occurred while executing the job" return print "[INFO] Creating output container..." result, path = executor.write_output_container(output_dir) print "[INFO] Creating output container... done" if result: print "[INFO] Created %s" % path else: print "[ERRO] An error occurred while writing the output container" executor.clean(True)
def check_cew(cls): """ Check whether Python C extension ``cew`` can be imported. Return ``True`` on failure and ``False`` on success. :rtype: bool """ if gf.can_run_c_extension("cew"): gf.print_success(u"aeneas.cew AVAILABLE") return False gf.print_warning(u"aeneas.cew NOT AVAILABLE") gf.print_info(u" You can still run aeneas but it will be a bit slower") gf.print_info(u" Please refer to the installation documentation for details") return True
def check_cmfcc(cls): """ Check whether Python C extension ``cmfcc`` can be imported. Return ``True`` on failure and ``False`` on success. :rtype: bool """ if gf.can_run_c_extension("cmfcc"): gf.print_success(u"aeneas.cmfcc COMPILED") return False gf.print_warning(u"aeneas.cmfcc NOT COMPILED") gf.print_info(u" You can still run aeneas but it will be significantly slower") gf.print_info(u" Please refer to the installation documentation for details") return True
def check_cmfcc(cls): """ Check whether Python C extension ``cmfcc`` can be imported. Return ``True`` on failure and ``False`` on success. :rtype: bool """ if gf.can_run_c_extension("cmfcc"): gf.print_success(u"aeneas.cmfcc COMPILED") return False gf.print_warning(u"aeneas.cmfcc NOT COMPILED") gf.print_info(u" You can still run aeneas but it will be significantly slower") gf.print_info(u" To compile the cmfcc module, run %s" % SETUP_COMMAND) return True
def check_cmfcc(cls): """ Check whether Python C extension ``cmfcc`` can be imported. Return ``True`` on failure and ``False`` on success. :rtype: bool """ if gf.can_run_c_extension("cmfcc"): gf.print_success(u"aeneas.cmfcc COMPILED") return False gf.print_warning(u"aeneas.cmfcc NOT COMPILED") gf.print_info( u" You can still run aeneas but it will be significantly slower") gf.print_info(u" To compile the cmfcc module, run %s" % SETUP_COMMAND) return True
def _setup_dtw(self): """ Set the DTW object up. """ # check we have the AudioFileMFCC objects if (self.real_wave_mfcc is None) or (self.real_wave_mfcc.middle_mfcc is None): self.log_exc(u"The real wave MFCCs are not initialized", None, True, DTWAlignerNotInitialized) if (self.synt_wave_mfcc is None) or (self.synt_wave_mfcc.middle_mfcc is None): self.log_exc(u"The synt wave MFCCs are not initialized", None, True, DTWAlignerNotInitialized) # setup algorithm = self.rconf[RuntimeConfiguration.DTW_ALGORITHM] delta = int(2 * self.rconf[RuntimeConfiguration.DTW_MARGIN] / self.rconf[RuntimeConfiguration.MFCC_WINDOW_SHIFT]) mfcc2_length = self.synt_wave_mfcc.middle_length self.log([u"Requested algorithm: '%s'", algorithm]) self.log([u"delta = %d", delta]) self.log([u"m = %d", mfcc2_length]) # check if delta is >= length of synt wave if mfcc2_length <= delta: self.log(u"We have mfcc2_length <= delta") if (self.rconf[RuntimeConfiguration.C_EXTENSIONS]) and (gf.can_run_c_extension()): # the C code can be run: since it is still faster, do not run EXACT self.log(u"C extensions enabled and loaded: not selecting EXACT algorithm") else: self.log(u"Selecting EXACT algorithm") algorithm = DTWAlgorithm.EXACT # execute the selected algorithm if algorithm == DTWAlgorithm.EXACT: self.log(u"Computing with EXACT algo") dtw = DTWExact( self.real_wave_mfcc.middle_mfcc, self.synt_wave_mfcc.middle_mfcc, rconf=self.rconf, logger=self.logger ) else: self.log(u"Computing with STRIPE algo") dtw = DTWStripe( self.real_wave_mfcc.middle_mfcc, self.synt_wave_mfcc.middle_mfcc, delta, rconf=self.rconf, logger=self.logger ) return dtw
def extract_mfcc(self, frame_rate=gc.MFCC_FRAME_RATE): """ Extract MFCCs from the given audio file. If audio data is not loaded, load it, extract MFCCs, and then clear it. This function works only for mono wav files! :param frame_rate: the MFCC frame rate, in frames per second. Default: :class:`aeneas.globalconstants.MFCC_FRAME_RATE` :type frame_rate: int """ # remember if we have audio data had_audio_data = (self.audio_data is not None) if not had_audio_data: self.load_data() if gc.USE_C_EXTENSIONS: self._log("C extensions enabled in gc") if gf.can_run_c_extension("cmfcc"): self._log("C extensions enabled in gc and cmfcc can be loaded") try: self._compute_mfcc_c_extension(frame_rate) if not had_audio_data: self.clear_data() return except: self._log( "An error occurred running cmfcc", severity=Logger.WARNING ) else: self._log("C extensions enabled in gc, but cmfcc cannot be loaded") else: self._log("C extensions disabled in gc") self._log("Running the pure Python code") try: self._compute_mfcc_pure_python(frame_rate) except: self._log( "An error occurred running _compute_mfcc_pure_python", severity=Logger.WARNING ) if not had_audio_data: self.clear_data()
def compute_path(self): if gc.USE_C_EXTENSIONS: self._log("C extensions enabled in gc") if gf.can_run_c_extension("cdtw"): self._log("C extensions enabled in gc and cdtw can be loaded") try: return self._compute_path_c_extension() except: self._log( "An error occurred running cdtw", severity=Logger.WARNING ) else: self._log("C extensions enabled in gc, but cdtw cannot be loaded") else: self._log("C extensions disabled in gc") self._log("Running the pure Python code") return self._compute_path_pure_python()
def main(): """ Entry point """ if len(sys.argv) < 3: usage() return file_path = sys.argv[1] save_path = sys.argv[2] if not gf.can_run_c_extension(): print "[WARN] Unable to load Python C Extensions" print "[WARN] Running the slower pure Python code" print "[WARN] See the README file for directions to compile the Python C Extensions" audiofile = AudioFile(file_path) audiofile.load_data() audiofile.extract_mfcc() audiofile.clear_data() numpy.savetxt(save_path, audiofile.audio_mfcc) print "[INFO] MFCCs saved to %s" % (save_path)
def extract_mfcc(self, frame_rate=gc.MFCC_FRAME_RATE): """ Extract MFCCs from the given audio file. If audio data is not loaded, load it, extract MFCCs, and then clear it. This function works only for mono wav files! :param frame_rate: the MFCC frame rate, in frames per second. Default: :class:`aeneas.globalconstants.MFCC_FRAME_RATE` :type frame_rate: int """ # remember if we have audio data had_audio_data = (self.audio_data is not None) if not had_audio_data: self.load_data() if gc.USE_C_EXTENSIONS: self._log("C extensions enabled in gc") if gf.can_run_c_extension("cmfcc"): self._log("C extensions enabled in gc and cmfcc can be loaded") try: self._compute_mfcc_c_extension(frame_rate) if not had_audio_data: self.clear_data() return except: self._log("An error occurred running cmfcc", severity=Logger.WARNING) else: self._log( "C extensions enabled in gc, but cmfcc cannot be loaded") else: self._log("C extensions disabled in gc") self._log("Running the pure Python code") try: self._compute_mfcc_pure_python(frame_rate) except: self._log("An error occurred running _compute_mfcc_pure_python", severity=Logger.WARNING) if not had_audio_data: self.clear_data()
def check_c_extensions(self, name=None): """ If C extensions cannot be run, emit a warning and return ``False``. Otherwise return ``True``. If ``name`` is not ``None``, check just the C extension with that name. :param name: the name of the Python C extension to test :type name: string :rtype: bool """ if not gf.can_run_c_extension(name=name): if name is None: self.print_warning(u"Unable to load Python C Extensions") else: self.print_warning(u"Unable to load Python C Extension %s" % (name)) self.print_warning(u"Running the slower pure Python code") self.print_warning(u"See the documentation for directions to compile the Python C Extensions") return False return True
def _setup_dtw(self): """ Setup DTW object """ # setup dtw = None algorithm = self.algorithm delta = self.frame_rate * (self.margin * 2) mfcc2_size = self.synt_wave_full_mfcc.shape[1] self._log(["Requested algorithm: '%s'", algorithm]) self._log(["delta = %d", delta]) self._log(["m = %d", mfcc2_size]) # check if delta is >= length of synt wave if mfcc2_size <= delta: self._log("We have mfcc2_size <= delta") if gc.USE_C_EXTENSIONS and gf.can_run_c_extension(): # the C code can be run: since it is still faster, do not run EXACT self._log("C extensions enabled and loaded: not selecting EXACT algorithm") elif gc.ALIGNER_USE_EXACT_ALGORITHM_WHEN_MARGIN_TOO_LARGE: self._log("Selecting EXACT algorithm") algorithm = DTWAlgorithm.EXACT else: self._log("Selecting EXACT algorithm disabled in gc") # execute the selected algorithm if algorithm == DTWAlgorithm.STRIPE: self._log("Computing with STRIPE algo") dtw = DTWStripe( self.real_wave_full_mfcc, self.synt_wave_full_mfcc, delta, self.logger ) if algorithm == DTWAlgorithm.EXACT: self._log("Computing with EXACT algo") dtw = DTWExact( self.real_wave_full_mfcc, self.synt_wave_full_mfcc, self.logger ) return dtw
def _setup_dtw(self): """ Set the DTW object up. """ # check if the DTW object has already been set up if self.dtw is not None: return # check we have the AudioFileMFCC objects if (self.real_wave_mfcc is None) or (self.real_wave_mfcc.middle_mfcc is None): self.log_exc(u"The real wave MFCCs are not initialized", None, True, DTWAlignerNotInitialized) if (self.synt_wave_mfcc is None) or (self.synt_wave_mfcc.middle_mfcc is None): self.log_exc(u"The synt wave MFCCs are not initialized", None, True, DTWAlignerNotInitialized) # setup algorithm = self.rconf[RuntimeConfiguration.DTW_ALGORITHM] delta = int(2 * self.rconf.dtw_margin / self.rconf[RuntimeConfiguration.MFCC_WINDOW_SHIFT]) mfcc2_length = self.synt_wave_mfcc.middle_length self.log([u"Requested algorithm: '%s'", algorithm]) self.log([u"delta = %d", delta]) self.log([u"m = %d", mfcc2_length]) # check if delta is >= length of synt wave if mfcc2_length <= delta: self.log(u"We have mfcc2_length <= delta") if (self.rconf[RuntimeConfiguration.C_EXTENSIONS]) and ( gf.can_run_c_extension()): # the C code can be run: since it is still faster, do not run EXACT self.log( u"C extensions enabled and loaded: not selecting EXACT algorithm" ) else: self.log(u"Selecting EXACT algorithm") algorithm = DTWAlgorithm.EXACT # select mask here if self.rconf.mmn: self.log(u"Using masked MFCC") real_mfcc = self.real_wave_mfcc.masked_middle_mfcc synt_mfcc = self.synt_wave_mfcc.masked_middle_mfcc else: self.log(u"Using unmasked MFCC") real_mfcc = self.real_wave_mfcc.middle_mfcc synt_mfcc = self.synt_wave_mfcc.middle_mfcc n = real_mfcc.shape[1] m = synt_mfcc.shape[1] self.log([u" Number of MFCC frames in real wave: %d", n]) self.log([u" Number of MFCC frames in synt wave: %d", m]) if (n == 0) or (m == 0): self.log(u"Setting self.dtw to None") self.dtw = None else: # set the selected algorithm if algorithm == DTWAlgorithm.EXACT: self.log(u"Computing with EXACT algo") self.dtw = DTWExact(m1=real_mfcc, m2=synt_mfcc, rconf=self.rconf, logger=self.logger) else: self.log(u"Computing with STRIPE algo") self.dtw = DTWStripe(m1=real_mfcc, m2=synt_mfcc, delta=delta, rconf=self.rconf, logger=self.logger)
def _setup_dtw(self): """ Set the DTW object up. """ # check if the DTW object has already been set up if self.dtw is not None: return # check we have the AudioFileMFCC objects if (self.real_wave_mfcc is None) or (self.real_wave_mfcc.middle_mfcc is None): self.log_exc(u"The real wave MFCCs are not initialized", None, True, DTWAlignerNotInitialized) if (self.synt_wave_mfcc is None) or (self.synt_wave_mfcc.middle_mfcc is None): self.log_exc(u"The synt wave MFCCs are not initialized", None, True, DTWAlignerNotInitialized) # setup algorithm = self.rconf[RuntimeConfiguration.DTW_ALGORITHM] delta = int(2 * self.rconf.dtw_margin / self.rconf[RuntimeConfiguration.MFCC_WINDOW_SHIFT]) mfcc2_length = self.synt_wave_mfcc.middle_length self.log([u"Requested algorithm: '%s'", algorithm]) self.log([u"delta = %d", delta]) self.log([u"m = %d", mfcc2_length]) # check if delta is >= length of synt wave if mfcc2_length <= delta: self.log(u"We have mfcc2_length <= delta") if (self.rconf[RuntimeConfiguration.C_EXTENSIONS]) and (gf.can_run_c_extension()): # the C code can be run: since it is still faster, do not run EXACT self.log(u"C extensions enabled and loaded: not selecting EXACT algorithm") else: self.log(u"Selecting EXACT algorithm") algorithm = DTWAlgorithm.EXACT # select mask here if self.rconf.mmn: self.log(u"Using masked MFCC") real_mfcc = self.real_wave_mfcc.masked_middle_mfcc synt_mfcc = self.synt_wave_mfcc.masked_middle_mfcc else: self.log(u"Using unmasked MFCC") real_mfcc = self.real_wave_mfcc.middle_mfcc synt_mfcc = self.synt_wave_mfcc.middle_mfcc n = real_mfcc.shape[1] m = synt_mfcc.shape[1] self.log([u" Number of MFCC frames in real wave: %d", n]) self.log([u" Number of MFCC frames in synt wave: %d", m]) if (n == 0) or (m == 0): self.log(u"Setting self.dtw to None") self.dtw = None else: # set the selected algorithm if algorithm == DTWAlgorithm.EXACT: self.log(u"Computing with EXACT algo") self.dtw = DTWExact( m1=real_mfcc, m2=synt_mfcc, rconf=self.rconf, logger=self.logger ) else: self.log(u"Computing with STRIPE algo") self.dtw = DTWStripe( m1=real_mfcc, m2=synt_mfcc, delta=delta, rconf=self.rconf, logger=self.logger )
def test_can_run_c_extension(self): gf.can_run_c_extension() gf.can_run_c_extension("cdtw") gf.can_run_c_extension("cew") gf.can_run_c_extension("cmfcc") gf.can_run_c_extension("foo") gf.can_run_c_extension("bar")
def main(): """ Entry point """ if len(sys.argv) < 5: usage() return language = sys.argv[1] text_file_path = sys.argv[2] text_format = sys.argv[3] audio_file_path = sys.argv[-1] verbose = False parameters = {} for i in range(4, len(sys.argv) - 1): args = sys.argv[i].split("=") if len(args) == 1: verbose = (args[0] in ["v", "-v", "verbose", "--verbose"]) if len(args) == 2: key, value = args if key == "id_regex": parameters[gc.PPN_JOB_IS_TEXT_UNPARSED_ID_REGEX] = value if key == "class_regex": parameters[gc.PPN_JOB_IS_TEXT_UNPARSED_CLASS_REGEX] = value if key == "sort": parameters[gc.PPN_JOB_IS_TEXT_UNPARSED_ID_SORT] = value if key == "min_head_length": parameters["min_head_length"] = float(value) if key == "max_head_length": parameters["max_head_length"] = float(value) if key == "min_tail_length": parameters["min_head_length"] = float(value) if key == "max_tail_length": parameters["max_tail_length"] = float(value) if not gf.can_run_c_extension(): print "[WARN] Unable to load Python C Extensions" print "[WARN] Running the slower pure Python code" print "[WARN] See the README file for directions to compile the Python C Extensions" logger = Logger(tee=verbose) print "[INFO] Reading audio..." tmp_handler, tmp_file_path = tempfile.mkstemp(suffix=".wav", dir=gf.custom_tmp_dir()) converter = FFMPEGWrapper(logger=logger) converter.convert(audio_file_path, tmp_file_path) audio_file = AudioFile(tmp_file_path) print "[INFO] Reading audio... done" print "[INFO] Reading text..." if text_format == "list": text_file = TextFile() text_file.read_from_list(text_file_path.split("|")) else: text_file = TextFile(text_file_path, text_format, parameters) text_file.set_language(language) print "[INFO] Reading text... done" print "[INFO] Detecting audio interval..." sd = SD(audio_file, text_file, logger=logger) min_head_length = gc.SD_MIN_HEAD_LENGTH if "min_head_length" in parameters: min_head_length = parameters["min_head_length"] max_head_length = gc.SD_MAX_HEAD_LENGTH if "max_head_length" in parameters: max_head_length = parameters["max_head_length"] min_tail_length = gc.SD_MIN_TAIL_LENGTH if "min_tail_length" in parameters: min_tail_length = parameters["min_tail_length"] max_tail_length = gc.SD_MAX_TAIL_LENGTH if "max_tail_length" in parameters: max_tail_length = parameters["max_tail_length"] start, end = sd.detect_interval(min_head_length, max_head_length, min_tail_length, max_tail_length) zero = 0 audio_len = audio_file.audio_length head_len = start text_len = end - start tail_len = audio_len - end print "[INFO] Detecting audio interval... done" print "[INFO] " print "[INFO] Head: %.3f %.3f (%.3f)" % (zero, start, head_len) print "[INFO] Text: %.3f %.3f (%.3f)" % (start, end, text_len) print "[INFO] Tail: %.3f %.3f (%.3f)" % (end, audio_len, tail_len) print "[INFO] " zero_h = gf.time_to_hhmmssmmm(0) start_h = gf.time_to_hhmmssmmm(start) end_h = gf.time_to_hhmmssmmm(end) audio_len_h = gf.time_to_hhmmssmmm(audio_len) head_len_h = gf.time_to_hhmmssmmm(head_len) text_len_h = gf.time_to_hhmmssmmm(text_len) tail_len_h = gf.time_to_hhmmssmmm(tail_len) print "[INFO] Head: %s %s (%s)" % (zero_h, start_h, head_len_h) print "[INFO] Text: %s %s (%s)" % (start_h, end_h, text_len_h) print "[INFO] Tail: %s %s (%s)" % (end_h, audio_len_h, tail_len_h) #print "[INFO] Cleaning up..." cleanup(tmp_handler, tmp_file_path)
def main(): """ Entry point """ if len(sys.argv) < 5: usage() return language = sys.argv[1] text_file_path = sys.argv[2] text_format = sys.argv[3] audio_file_path = sys.argv[-1] verbose = False parameters = {} for i in range(4, len(sys.argv)-1): args = sys.argv[i].split("=") if len(args) == 1: verbose = (args[0] in ["v", "-v", "verbose", "--verbose"]) if len(args) == 2: key, value = args if key == "id_regex": parameters[gc.PPN_JOB_IS_TEXT_UNPARSED_ID_REGEX] = value if key == "class_regex": parameters[gc.PPN_JOB_IS_TEXT_UNPARSED_CLASS_REGEX] = value if key == "sort": parameters[gc.PPN_JOB_IS_TEXT_UNPARSED_ID_SORT] = value if key == "min_head_length": parameters["min_head_length"] = float(value) if key == "max_head_length": parameters["max_head_length"] = float(value) if key == "min_tail_length": parameters["min_head_length"] = float(value) if key == "max_tail_length": parameters["max_tail_length"] = float(value) if not gf.can_run_c_extension(): print "[WARN] Unable to load Python C Extensions" print "[WARN] Running the slower pure Python code" print "[WARN] See the README file for directions to compile the Python C Extensions" logger = Logger(tee=verbose) print "[INFO] Reading audio..." tmp_handler, tmp_file_path = tempfile.mkstemp( suffix=".wav", dir=gf.custom_tmp_dir() ) converter = FFMPEGWrapper(logger=logger) converter.convert(audio_file_path, tmp_file_path) audio_file = AudioFile(tmp_file_path) print "[INFO] Reading audio... done" print "[INFO] Reading text..." if text_format == "list": text_file = TextFile() text_file.read_from_list(text_file_path.split("|")) else: text_file = TextFile(text_file_path, text_format, parameters) text_file.set_language(language) print "[INFO] Reading text... done" print "[INFO] Detecting audio interval..." sd = SD(audio_file, text_file, logger=logger) min_head_length = gc.SD_MIN_HEAD_LENGTH if "min_head_length" in parameters: min_head_length = parameters["min_head_length"] max_head_length = gc.SD_MAX_HEAD_LENGTH if "max_head_length" in parameters: max_head_length = parameters["max_head_length"] min_tail_length = gc.SD_MIN_TAIL_LENGTH if "min_tail_length" in parameters: min_tail_length = parameters["min_tail_length"] max_tail_length = gc.SD_MAX_TAIL_LENGTH if "max_tail_length" in parameters: max_tail_length = parameters["max_tail_length"] start, end = sd.detect_interval( min_head_length, max_head_length, min_tail_length, max_tail_length ) zero = 0 audio_len = audio_file.audio_length head_len = start text_len = end - start tail_len = audio_len - end print "[INFO] Detecting audio interval... done" print "[INFO] " print "[INFO] Head: %.3f %.3f (%.3f)" % (zero, start, head_len) print "[INFO] Text: %.3f %.3f (%.3f)" % (start, end, text_len) print "[INFO] Tail: %.3f %.3f (%.3f)" % (end, audio_len, tail_len) print "[INFO] " zero_h = gf.time_to_hhmmssmmm(0) start_h = gf.time_to_hhmmssmmm(start) end_h = gf.time_to_hhmmssmmm(end) audio_len_h = gf.time_to_hhmmssmmm(audio_len) head_len_h = gf.time_to_hhmmssmmm(head_len) text_len_h = gf.time_to_hhmmssmmm(text_len) tail_len_h = gf.time_to_hhmmssmmm(tail_len) print "[INFO] Head: %s %s (%s)" % (zero_h, start_h, head_len_h) print "[INFO] Text: %s %s (%s)" % (start_h, end_h, text_len_h) print "[INFO] Tail: %s %s (%s)" % (end_h, audio_len_h, tail_len_h) #print "[INFO] Cleaning up..." cleanup(tmp_handler, tmp_file_path)
def main(): """ Entry point """ if len(sys.argv) < 4: usage() return audio_file_path = sys.argv[1] tmp_handler, tmp_file_path = tempfile.mkstemp( suffix=".wav", dir=gf.custom_tmp_dir() ) mode = sys.argv[2] output_file_path = sys.argv[3] verbose = (sys.argv[-1] == "-v") if mode not in ["speech", "nonspeech", "both"]: usage() return if not gf.can_run_c_extension(): print "[WARN] Unable to load Python C Extensions" print "[WARN] Running the slower pure Python code" print "[WARN] See the README file for directions to compile the Python C Extensions" logger = Logger(tee=verbose) print "[INFO] Converting audio file to mono..." converter = FFMPEGWrapper(logger=logger) converter.convert(audio_file_path, tmp_file_path) print "[INFO] Converting audio file to mono... done" vad = VAD(tmp_file_path, logger=logger) print "[INFO] Extracting MFCCs..." vad.compute_mfcc() print "[INFO] Extracting MFCCs... done" print "[INFO] Executing VAD..." vad.compute_vad() print "[INFO] Executing VAD... done" print "[INFO] Cleaning up..." cleanup(tmp_handler, tmp_file_path) print "[INFO] Cleaning up... done" if mode == "speech": print "[INFO] Creating speech file..." output_file = open(output_file_path, "w") for interval in vad.speech: output_file.write("%.3f\t%.3f\n" % (interval[0], interval[1])) output_file.close() print "[INFO] Creating speech file... done" if mode == "nonspeech": print "[INFO] Creating nonspeech file..." output_file = open(output_file_path, "w") for interval in vad.nonspeech: output_file.write("%.3f\t%.3f\n" % (interval[0], interval[1])) output_file.close() print "[INFO] Creating nonspeech file... done" if mode == "both": print "[INFO] Creating speech and nonspeech file..." output_file = open(output_file_path, "w") speech = [[x[0], x[1], "speech"] for x in vad.speech] nonspeech = [[x[0], x[1], "nonspeech"] for x in vad.nonspeech] both = sorted(speech + nonspeech) for interval in both: output_file.write("%.3f\t%.3f\t%s\n" % ( interval[0], interval[1], interval[2] )) output_file.close() print "[INFO] Creating speech and nonspeech file... done" print "[INFO] Created file %s" % output_file_path