def synthesize_multiple(self, text_file, ofp=None, quit_after=None, backwards=False, zero_length=False): if ofp is None: handler, output_file_path = gf.tmp_file(suffix=".wav") else: handler = None output_file_path = ofp try: rconf = RuntimeConfiguration() rconf[RuntimeConfiguration.TTS] = u"festival" rconf[RuntimeConfiguration.TTS_PATH] = u"text2wave" tts_engine = FESTIVALWrapper(rconf=rconf) anchors, total_time, num_chars = tts_engine.synthesize_multiple( text_file, output_file_path, quit_after, backwards) gf.delete_file(handler, output_file_path) if zero_length: self.assertEqual(total_time, 0.0) else: self.assertGreater(total_time, 0.0) except (OSError, TypeError, UnicodeDecodeError, ValueError) as exc: gf.delete_file(handler, output_file_path) raise exc
def align(text_path, audio_path, align_out_path, word_align=True): # create Task object config_string = u"task_language=hi" config_string += "|os_task_file_format=json" rconf = None if word_align: config_string += "|os_task_file_levels=3" config_string += "|is_text_type=mplain" rconf = RuntimeConfiguration() rconf[RuntimeConfiguration.MFCC_MASK_NONSPEECH] = True rconf[RuntimeConfiguration.MFCC_MASK_NONSPEECH_L3] = True else: config_string += "|is_text_type=plain" task = Task(config_string=config_string) task.text_file_path_absolute = text_path task.audio_file_path_absolute = audio_path task.sync_map_file_path_absolute = align_out_path # process Task ExecuteTask(task, rconf=rconf).execute() # output sync map to file task.output_sync_map_file() # Remove annoying unicode characters with open(align_out_path, 'r', encoding='utf8') as f: alignment = json.load(f) with open(align_out_path, 'w', encoding='utf8') as f: json.dump(alignment, f, ensure_ascii=False, indent=2)
def inner(c_ext, cew_subprocess, cache): if ofp is None: handler, output_file_path = gf.tmp_file(suffix=".wav") else: handler = None output_file_path = ofp try: rconf = RuntimeConfiguration() rconf[RuntimeConfiguration.TTS] = self.TTS rconf[RuntimeConfiguration.TTS_PATH] = self.TTS_PATH rconf[RuntimeConfiguration.C_EXTENSIONS] = c_ext rconf[RuntimeConfiguration. CEW_SUBPROCESS_ENABLED] = cew_subprocess rconf[RuntimeConfiguration.TTS_CACHE] = cache tts_engine = self.TTS_CLASS(rconf=rconf) anchors, total_time, num_chars = tts_engine.synthesize_multiple( text_file, output_file_path, quit_after, backwards) gf.delete_file(handler, output_file_path) if cache: tts_engine.clear_cache() if zero_length: self.assertEqual(total_time, 0.0) else: self.assertGreater(total_time, 0.0) except (OSError, TypeError, UnicodeDecodeError, ValueError) as exc: gf.delete_file(handler, output_file_path) if (cache) and (tts_engine is not None): tts_engine.clear_cache() with self.assertRaises(expected_exc): raise exc
def test_use_cache(self): if self.TTS == u"": return rconf = RuntimeConfiguration() rconf[RuntimeConfiguration.TTS_CACHE] = True tts_engine = self.TTS_CLASS(rconf=rconf) self.assertTrue(tts_engine.use_cache) self.assertIsNotNone(tts_engine.cache)
def test_set_tts(self): rconf = RuntimeConfiguration() rconf.set_tts(level=1) self.assertEqual(rconf.tts, "espeak") self.assertEqual(rconf.tts_path, None) rconf.set_tts(level=2) self.assertEqual(rconf.tts, "espeak") self.assertEqual(rconf.tts_path, None) rconf.set_tts(level=3) self.assertEqual(rconf.tts, "espeak") self.assertEqual(rconf.tts_path, None)
def test_set_granularity(self): rconf = RuntimeConfiguration() rconf.set_granularity(level=1) self.assertEqual(rconf.mmn, False) self.assertEqual(rconf.mwl, TimeValue("0.100")) self.assertEqual(rconf.mws, TimeValue("0.040")) rconf.set_granularity(level=2) self.assertEqual(rconf.mmn, False) self.assertEqual(rconf.mwl, TimeValue("0.050")) self.assertEqual(rconf.mws, TimeValue("0.020")) rconf.set_granularity(level=3) self.assertEqual(rconf.mmn, False) self.assertEqual(rconf.mwl, TimeValue("0.020")) self.assertEqual(rconf.mws, TimeValue("0.005"))
def synthesize_single(self, text, language, ofp=None, zero_length=False): if ofp is None: handler, output_file_path = gf.tmp_file(suffix=".wav") else: handler = None output_file_path = ofp try: rconf = RuntimeConfiguration() rconf[RuntimeConfiguration.TTS] = u"festival" rconf[RuntimeConfiguration.TTS_PATH] = u"text2wave" tts_engine = FESTIVALWrapper(rconf=rconf) result = tts_engine.synthesize_single(text, language, output_file_path) gf.delete_file(handler, output_file_path) if zero_length: self.assertEqual(result, 0) else: self.assertGreater(result, 0) except (OSError, TypeError, UnicodeDecodeError, ValueError) as exc: gf.delete_file(handler, output_file_path) raise exc
def test_tts_path(self): rconf = RuntimeConfiguration() self.assertEqual(rconf.tts_path, None)
def test_tts(self): rconf = RuntimeConfiguration() self.assertEqual(rconf.tts, "espeak")
def test_mwl(self): rconf = RuntimeConfiguration() self.assertEqual(rconf.mwl, TimeValue("0.100"))
def test_mmn(self): rconf = RuntimeConfiguration() self.assertEqual(rconf.mmn, False)
class AbstractCLIProgram(Loggable): """ This class is an "abstract" CLI program. To create a new CLI program, create a new class, derived from this one, and overload ``NAME``, ``HELP``, and ``perform_command()``. :param use_sys: if ``True``, call ``sys.exit`` when needed; otherwise, never call ``sys.exit`` and just return a return code/value :type use_sys: bool :param string invoke: the CLI command to be invoked :param rconf: a runtime configuration. Default: ``None``, meaning that default settings will be used. :type rconf: :class:`aeneas.runtimeconfiguration.RuntimeConfiguration` """ NAME = gf.file_name_without_extension(__file__) AENEAS_URL = u"http://www.readbeyond.it/aeneas/" DOCS_URL = u"http://www.readbeyond.it/aeneas/docs/" GITHUB_URL = u"https://github.com/ReadBeyond/aeneas/" ISSUES_URL = u"https://github.com/ReadBeyond/aeneas/issues/" RB_URL = u"http://www.readbeyond.it" NO_ERROR_EXIT_CODE = 0 ERROR_EXIT_CODE = 1 HELP_EXIT_CODE = 2 HELP = { "description": u"An abstract CLI program", "synopsis": [], "options": [], "parameters": [], "examples": [] } TAG = u"CLI" def __init__(self, use_sys=True, invoke=None, rconf=None, logger=None): super(AbstractCLIProgram, self).__init__(rconf=rconf, logger=logger) self.invoke = u"python -m aeneas.tools.%s" % (self.NAME) if ( invoke is None) else invoke self.use_sys = use_sys self.formal_arguments_raw = [] self.formal_arguments = [] self.actual_arguments = [] self.log_file_path = None self.verbose = False self.very_verbose = False PREFIX_TO_PRINT_FUNCTION = { Logger.CRITICAL: gf.print_error, Logger.DEBUG: gf.print_info, Logger.ERROR: gf.print_error, Logger.INFO: gf.print_info, Logger.SUCCESS: gf.print_success, Logger.WARNING: gf.print_warning } def print_generic(self, msg, prefix=None): """ Print a message and log it. :param msg: the message :type msg: Unicode string :param prefix: the (optional) prefix :type prefix: Unicode string """ if prefix is None: self._log(msg, Logger.INFO) else: self._log(msg, prefix) if self.use_sys: if (prefix is not None) and (prefix in self.PREFIX_TO_PRINT_FUNCTION): self.PREFIX_TO_PRINT_FUNCTION[prefix](msg) else: gf.safe_print(msg) def print_error(self, msg): """ Print an error message and log it. :param string msg: the message """ self.print_generic(msg, Logger.ERROR) def print_info(self, msg): """ Print an info message and log it. :param string msg: the message """ self.print_generic(msg, Logger.INFO) def print_success(self, msg): """ Print a success message and log it. :param string msg: the message """ self.print_generic(msg, Logger.SUCCESS) def print_warning(self, msg): """ Print a warning message and log it. :param string msg: the message """ self.print_generic(msg, Logger.WARNING) def exit(self, code): """ Exit with the given exit code, possibly with ``sys.exit()``. :param code: the exit code :type code: int :rtype: int """ if self.use_sys: sys.exit(code) return code def print_help(self, short=False): """ Print help message and exit. :param short: print short help only :type short: bool """ header = [ u"", u"NAME", u" %s - %s" % (self.NAME, self.HELP["description"]), u"", ] synopsis = [u"SYNOPSIS", u" %s [-h|--help|--version]" % (self.invoke)] if "synopsis" in self.HELP: for syn, opt in self.HELP["synopsis"]: if opt: opt = u" [OPTIONS]" else: opt = u"" synopsis.append(u" %s %s%s" % (self.invoke, syn, opt)) synopsis.append(u"") options = [ u" -h : print short help and exit", u" --help : print full help and exit", u" --version : print the program name and version and exit", u" -l[=FILE], --log[=FILE] : log verbose output to tmp file or FILE if specified", u" -r=CONF, --runtime-configuration=CONF : apply runtime configuration CONF", u" -v, --verbose : verbose output", u" -vv, --very-verbose : verbose output, print date/time values", ] if "options" in self.HELP: for opt in self.HELP["options"]: options.append(u" %s" % (opt)) options = [u"OPTIONS"] + sorted(options) + [u""] parameters = [] if ("parameters" in self.HELP) and (len(self.HELP["parameters"]) > 0): parameters.append(u"PARAMETERS") for par in self.HELP["parameters"]: parameters.append(u" %s" % (par)) parameters.append(u"") examples = [] if ("examples" in self.HELP) and (len(self.HELP["examples"]) > 0): examples.append(u"EXAMPLES") for exa in self.HELP["examples"]: examples.append(u" %s %s" % (self.invoke, exa)) examples.append(u"") footer = [ u"EXIT CODES", u" %d : no error" % (self.NO_ERROR_EXIT_CODE), u" %d : error" % (self.ERROR_EXIT_CODE), u" %d : help shown, no command run" % (self.HELP_EXIT_CODE), u"", u"AUTHOR", u" Alberto Pettarin, http://www.albertopettarin.it/", u"", u"REPORTING BUGS", u" Please use the GitHub Issues Web page : %s" % (self.ISSUES_URL), u"", u"COPYRIGHT", u" 2012-2016, Alberto Pettarin and ReadBeyond Srl", u" This software is available under the terms of the GNU Affero General Public License Version 3", u"", u"SEE ALSO", u" Code repository : %s" % (self.GITHUB_URL), u" Documentation : %s" % (self.DOCS_URL), u" Project Web page : %s" % (self.AENEAS_URL), u"", ] msg = header + synopsis + options + parameters + examples if not short: msg += footer if self.use_sys: self.print_generic(u"\n".join(msg)) return self.exit(self.HELP_EXIT_CODE) def print_name_version(self): """ Print program name and version and exit. :rtype: int """ if self.use_sys: self.print_generic(u"%s v%s" % (self.NAME, aeneas_version)) return self.exit(self.HELP_EXIT_CODE) def run(self, arguments, show_help=True): """ Program entry point. Please note that the first item in ``arguments`` is discarded, as it is assumed to be the script/invocation name; pass a "dumb" placeholder if you call this method with an argument different that ``sys.argv``. :param arguments: the list of arguments :type arguments: list :param show_help: if ``False``, do not show help on ``-h`` and ``--help`` :type show_help: bool :rtype: int """ # convert arguments into Unicode strings if self.use_sys: # check that sys.stdin.encoding and sys.stdout.encoding are set to utf-8 if not gf.FROZEN: if sys.stdin.encoding not in ["UTF-8", "UTF8"]: self.print_warning( u"The default input encoding is not UTF-8.") self.print_warning( u"You might want to set 'PYTHONIOENCODING=UTF-8' in your shell." ) if sys.stdout.encoding not in ["UTF-8", "UTF8"]: self.print_warning( u"The default output encoding is not UTF-8.") self.print_warning( u"You might want to set 'PYTHONIOENCODING=UTF-8' in your shell." ) # decode using sys.stdin.encoding args = [gf.safe_unicode_stdin(arg) for arg in arguments] else: # decode using utf-8 (but you should pass Unicode strings as parameters anyway) args = [gf.safe_unicode(arg) for arg in arguments] if show_help: if u"-h" in args: return self.print_help(short=True) if u"--help" in args: return self.print_help(short=False) if u"--version" in args: return self.print_name_version() # store formal arguments self.formal_arguments_raw = arguments self.formal_arguments = args # to obtain the actual arguments, # remove the first one and "special" switches args = args[1:] set_args = set(args) # set verbosity, if requested for flag in set([u"-v", u"--verbose"]) & set_args: self.verbose = True args.remove(flag) for flag in set([u"-vv", u"--very-verbose"]) & set_args: self.verbose = True self.very_verbose = True args.remove(flag) # set RuntimeConfiguration string, if specified for flag in [u"-r", u"--runtime-configuration"]: rconf_string = self.has_option_with_value(flag, actual_arguments=False) if rconf_string is not None: self.rconf = RuntimeConfiguration(rconf_string) args.remove("%s=%s" % (flag, rconf_string)) # set log file path, if requested log_path = None for flag in [u"-l", u"--log"]: log_path = self.has_option_with_value(flag, actual_arguments=False) if log_path is not None: args.remove("%s=%s" % (flag, log_path)) elif flag in set_args: handler, log_path = gf.tmp_file( suffix=u".log", root=self.rconf[RuntimeConfiguration.TMP_PATH]) args.remove(flag) if log_path is not None: self.log_file_path = log_path # if no actual arguments left, print help if (len(args) < 1) and (show_help): return self.print_help(short=True) # store actual arguments self.actual_arguments = args # create logger self.logger = Logger(tee=self.verbose, tee_show_datetime=self.very_verbose) self.log([u"Formal arguments: %s", self.formal_arguments]) self.log([u"Actual arguments: %s", self.actual_arguments]) self.log([u"Runtime configuration: '%s'", self.rconf.config_string()]) # perform command exit_code = self.perform_command() self.log([u"Execution completed with code %d", exit_code]) # output log if requested if self.log_file_path is not None: self.log([ u"User requested saving log to file '%s'", self.log_file_path ]) self.logger.write(self.log_file_path) if self.use_sys: self.print_info(u"Log written to file '%s'" % self.log_file_path) return self.exit(exit_code) def has_option(self, target): """ Return ``True`` if the actual arguments include the specified ``target`` option or, if ``target`` is a list of options, at least one of them. :param target: the option or a list of options :type target: Unicode string or list of Unicode strings :rtype: bool """ if isinstance(target, list): target_set = set(target) else: target_set = set([target]) return len(target_set & set(self.actual_arguments)) > 0 def has_option_with_value(self, prefix, actual_arguments=True): """ Check if the actual arguments include an option starting with the given ``prefix`` and having a value, e.g. ``--format=ogg`` for ``prefix="--format"``. :param prefix: the option prefix :type prefix: Unicode string :param actual_arguments: if ``True``, check among actual arguments; otherwise check among formal arguments :rtype actual_arguments: bool :rtype: Unicode string or None """ if actual_arguments: args = self.actual_arguments else: args = self.formal_arguments for arg in [ arg for arg in args if (arg is not None) and (arg.startswith(prefix + u"=")) ]: lis = arg.split(u"=") if len(lis) >= 2: return u"=".join(lis[1:]) return None def perform_command(self): """ Perform command and return the appropriate exit code. :rtype: int """ self.log(u"This function should be overloaded in derived classes") self.log([u"Invoked with %s", self.actual_arguments]) return self.NO_ERROR_EXIT_CODE def check_c_extensions(self, name=None): """ If C extensions cannot be run, emit a warning and return ``False``. Otherwise return ``True``. If ``name`` is not ``None``, check just the C extension with that name. :param name: the name of the Python C extension to test :type name: string :rtype: bool """ if not gf.can_run_c_extension(name=name): if name is None: self.print_warning(u"Unable to load Python C Extensions") else: self.print_warning(u"Unable to load Python C Extension %s" % (name)) self.print_warning(u"Running the slower pure Python code") self.print_warning( u"See the documentation for directions to compile the Python C Extensions" ) return False return True def check_input_file_or_directory(self, path): """ If the given path does not exist, emit an error and return ``False``. Otherwise return ``True``. :param path: the path of the input file or directory :type path: string (path) :rtype: bool """ if (not gf.file_can_be_read(path)) and (not os.path.isdir(path)): self.print_error(u"Unable to read file or directory '%s'" % (path)) self.print_error( u"Make sure the path is written/escaped correctly and that you have read permission on it" ) return False return True def check_input_file(self, path): """ If the given path does not exist, emit an error and return ``False``. Otherwise return ``True``. :param path: the path of the input file :type path: string (path) :rtype: bool """ if not gf.file_can_be_read(path): self.print_error(u"Unable to read file '%s'" % (path)) self.print_error( u"Make sure the file path is written/escaped correctly and that you have read permission on it" ) return False return True def check_output_file(self, path): """ If the given path cannot be written, emit an error and return ``False``. Otherwise return ``True``. :param path: the path of the output file :type path: string (path) :rtype: bool """ if not gf.file_can_be_written(path): self.print_error(u"Unable to create file '%s'" % (path)) self.print_error( u"Make sure the file path is written/escaped correctly and that you have write permission on it" ) return False return True def check_output_directory(self, path): """ If the given directory cannot be written, emit an error and return ``False``. Otherwise return ``True``. :param path: the path of the output directory :type path: string (path) :rtype: bool """ if not os.path.isdir(path): self.print_error(u"Directory '%s' does not exist" % (path)) return False test_file = os.path.join(path, u"file.test") if not gf.file_can_be_written(test_file): self.print_error(u"Unable to write inside directory '%s'" % (path)) self.print_error( u"Make sure the directory path is written/escaped correctly and that you have write permission on it" ) return False return True def get_text_file(self, text_format, text, parameters): if text_format == u"list": text_file = TextFile(logger=self.logger) text_file.read_from_list(text.split(u"|")) return text_file else: if text_format not in TextFileFormat.ALLOWED_VALUES: self.print_error(u"File format '%s' is not allowed" % (text_format)) self.print_error(u"Allowed text file formats: %s" % (" ".join(TextFileFormat.ALLOWED_VALUES))) return None try: return TextFile(text, text_format, parameters, logger=self.logger) except OSError: self.print_error(u"Cannot read file '%s'" % (text)) return None def print_no_pafy_error(self): self.print_error( u"You need to install Python modules youtube-dl and pafy to download audio from YouTube. Run:" ) self.print_error(u"$ pip install youtube-dl pafy") self.print_error(u"or, to install for all users:") self.print_error(u"$ sudo pip install youtube-dl pafy")
def __init__(self, logger=None, rconf=None): self.logger = logger if logger is not None else Logger() self.rconf = rconf if rconf is not None else RuntimeConfiguration()
def test_clone(self): rconf = RuntimeConfiguration() rconf2 = rconf.clone() self.assertNotEqual(id(rconf), id(rconf2)) self.assertEqual(rconf.config_string, rconf2.config_string)
def test_config_string(self): rconf = RuntimeConfiguration() rconf.config_string
def __adjust_durations(self, subs: List[SubRipItem], audio_file_path: str, stretch_in_lang: str) -> List[SubRipItem]: from aeneas.executetask import ExecuteTask from aeneas.task import Task from aeneas.runtimeconfiguration import RuntimeConfiguration from aeneas.logger import Logger as AeneasLogger # Initialise a DTW alignment task task_config_string = ( "task_language={}|os_task_file_format=srt|is_text_type=subtitles".format(stretch_in_lang) ) runtime_config_string = "dtw_algorithm=stripe" # stripe or exact task = Task(config_string=task_config_string) try: segment_path, _ = MediaHelper.extract_audio_from_start_to_end( audio_file_path, str(subs[0].start), str(subs[len(subs) - 1].end), ) # Create a text file for DTW alignments root, _ = os.path.splitext(segment_path) text_file_path = "{}.txt".format(root) with open(text_file_path, "w", encoding="utf8") as text_file: for sub_new in subs: text_file.write(sub_new.text) text_file.write(os.linesep * 2) task.audio_file_path_absolute = segment_path task.text_file_path_absolute = text_file_path task.sync_map_file_path_absolute = "{}.srt".format(root) tee = False if Logger.VERBOSE: tee = True if Logger.QUIET: tee = False with self.__lock: # Execute the task ExecuteTask( task=task, rconf=RuntimeConfiguration(config_string=runtime_config_string), logger=AeneasLogger(tee=tee), ).execute() # Output new subtitle segment to a file task.output_sync_map_file() # Load the above subtitle segment adjusted_subs = Subtitle.load( task.sync_map_file_path_absolute ).subs for index, sub_new_loaded in enumerate(adjusted_subs): sub_new_loaded.index = subs[index].index adjusted_subs.shift( seconds=MediaHelper.get_duration_in_seconds( start=None, end=str(subs[0].start) ) ) return adjusted_subs finally: # Housekeep intermediate files if task.audio_file_path_absolute is not None and os.path.exists( task.audio_file_path_absolute ): os.remove(task.audio_file_path_absolute) if task.text_file_path_absolute is not None and os.path.exists( task.text_file_path_absolute ): os.remove(task.text_file_path_absolute) if task.sync_map_file_path_absolute is not None and os.path.exists(task.sync_map_file_path_absolute): os.remove(task.sync_map_file_path_absolute)
def run(self, arguments, show_help=True): """ Program entry point. Please note that the first item in ``arguments`` is discarded, as it is assumed to be the script/invocation name; pass a "dumb" placeholder if you call this method with an argument different that ``sys.argv``. :param arguments: the list of arguments :type arguments: list :param show_help: if ``False``, do not show help on ``-h`` and ``--help`` :type show_help: bool :rtype: int """ # convert arguments into Unicode strings if self.use_sys: # check that sys.stdin.encoding and sys.stdout.encoding are set to utf-8 if not gf.FROZEN: if sys.stdin.encoding not in ["UTF-8", "UTF8"]: self.print_warning(u"The default input encoding is not UTF-8.") self.print_warning(u"You might want to set 'PYTHONIOENCODING=UTF-8' in your shell.") if sys.stdout.encoding not in ["UTF-8", "UTF8"]: self.print_warning(u"The default output encoding is not UTF-8.") self.print_warning(u"You might want to set 'PYTHONIOENCODING=UTF-8' in your shell.") # decode using sys.stdin.encoding args = [gf.safe_unicode_stdin(arg) for arg in arguments] else: # decode using utf-8 (but you should pass Unicode strings as parameters anyway) args = [gf.safe_unicode(arg) for arg in arguments] if show_help: if u"-h" in args: return self.print_help(short=True) if u"--help" in args: return self.print_help(short=False) if u"--version" in args: return self.print_name_version() # store formal arguments self.formal_arguments_raw = arguments self.formal_arguments = args # to obtain the actual arguments, # remove the first one and "special" switches args = args[1:] set_args = set(args) # set verbosity, if requested for flag in set([u"-v", u"--verbose"]) & set_args: self.verbose = True args.remove(flag) for flag in set([u"-vv", u"--very-verbose"]) & set_args: self.verbose = True self.very_verbose = True args.remove(flag) # set RuntimeConfiguration string, if specified for flag in [u"-r", u"--runtime-configuration"]: rconf_string = self.has_option_with_value(flag, actual_arguments=False) if rconf_string is not None: self.rconf = RuntimeConfiguration(rconf_string) args.remove("%s=%s" % (flag, rconf_string)) # set log file path, if requested log_path = None for flag in [u"-l", u"--log"]: log_path = self.has_option_with_value(flag, actual_arguments=False) if log_path is not None: args.remove("%s=%s" % (flag, log_path)) elif flag in set_args: handler, log_path = gf.tmp_file(suffix=u".log", root=self.rconf[RuntimeConfiguration.TMP_PATH]) args.remove(flag) if log_path is not None: self.log_file_path = log_path # if no actual arguments left, print help if (len(args) < 1) and (show_help): return self.print_help(short=True) # store actual arguments self.actual_arguments = args # create logger self.logger = Logger(tee=self.verbose, tee_show_datetime=self.very_verbose) self.log([u"Formal arguments: %s", self.formal_arguments]) self.log([u"Actual arguments: %s", self.actual_arguments]) self.log([u"Runtime configuration: '%s'", self.rconf.config_string()]) # perform command exit_code = self.perform_command() self.log([u"Execution completed with code %d", exit_code]) # output log if requested if self.log_file_path is not None: self.log([u"User requested saving log to file '%s'", self.log_file_path]) self.logger.write(self.log_file_path) if self.use_sys: self.print_info(u"Log written to file '%s'" % self.log_file_path) return self.exit(exit_code)
class AbstractCLIProgram(Loggable): """ This class is an "abstract" CLI program. To create a new CLI program, create a new class, derived from this one, and overload ``NAME``, ``HELP``, and ``perform_command()``. :param use_sys: if ``True``, call ``sys.exit`` when needed; otherwise, never call ``sys.exit`` and just return a return code/value :type use_sys: bool :param string invoke: the CLI command to be invoked :param rconf: a runtime configuration. Default: ``None``, meaning that default settings will be used. :type rconf: :class:`aeneas.runtimeconfiguration.RuntimeConfiguration` """ NAME = gf.file_name_without_extension(__file__) AENEAS_URL = u"http://www.readbeyond.it/aeneas/" DOCS_URL = u"http://www.readbeyond.it/aeneas/docs/" GITHUB_URL = u"https://github.com/ReadBeyond/aeneas/" ISSUES_URL = u"https://github.com/ReadBeyond/aeneas/issues/" RB_URL = u"http://www.readbeyond.it" NO_ERROR_EXIT_CODE = 0 ERROR_EXIT_CODE = 1 HELP_EXIT_CODE = 2 HELP = {"description": u"An abstract CLI program", "synopsis": [], "options": [], "parameters": [], "examples": []} TAG = u"CLI" def __init__(self, use_sys=True, invoke=None, rconf=None, logger=None): super(AbstractCLIProgram, self).__init__(rconf=rconf, logger=logger) self.invoke = u"python -m aeneas.tools.%s" % (self.NAME) if (invoke is None) else invoke self.use_sys = use_sys self.formal_arguments_raw = [] self.formal_arguments = [] self.actual_arguments = [] self.log_file_path = None self.verbose = False self.very_verbose = False PREFIX_TO_PRINT_FUNCTION = { Logger.CRITICAL: gf.print_error, Logger.DEBUG: gf.print_info, Logger.ERROR: gf.print_error, Logger.INFO: gf.print_info, Logger.SUCCESS: gf.print_success, Logger.WARNING: gf.print_warning, } def print_generic(self, msg, prefix=None): """ Print a message and log it. :param msg: the message :type msg: Unicode string :param prefix: the (optional) prefix :type prefix: Unicode string """ if prefix is None: self._log(msg, Logger.INFO) else: self._log(msg, prefix) if self.use_sys: if (prefix is not None) and (prefix in self.PREFIX_TO_PRINT_FUNCTION): self.PREFIX_TO_PRINT_FUNCTION[prefix](msg) else: gf.safe_print(msg) def print_error(self, msg): """ Print an error message and log it. :param string msg: the message """ self.print_generic(msg, Logger.ERROR) def print_info(self, msg): """ Print an info message and log it. :param string msg: the message """ self.print_generic(msg, Logger.INFO) def print_success(self, msg): """ Print a success message and log it. :param string msg: the message """ self.print_generic(msg, Logger.SUCCESS) def print_warning(self, msg): """ Print a warning message and log it. :param string msg: the message """ self.print_generic(msg, Logger.WARNING) def exit(self, code): """ Exit with the given exit code, possibly with ``sys.exit()``. :param code: the exit code :type code: int :rtype: int """ if self.use_sys: sys.exit(code) return code def print_help(self, short=False): """ Print help message and exit. :param short: print short help only :type short: bool """ header = [u"", u"NAME", u" %s - %s" % (self.NAME, self.HELP["description"]), u""] synopsis = [u"SYNOPSIS", u" %s [-h|--help|--version]" % (self.invoke)] if "synopsis" in self.HELP: for syn, opt in self.HELP["synopsis"]: if opt: opt = u" [OPTIONS]" else: opt = u"" synopsis.append(u" %s %s%s" % (self.invoke, syn, opt)) synopsis.append(u"") options = [ u" -h : print short help and exit", u" --help : print full help and exit", u" --version : print the program name and version and exit", u" -l[=FILE], --log[=FILE] : log verbose output to tmp file or FILE if specified", u" -r=CONF, --runtime-configuration=CONF : apply runtime configuration CONF", u" -v, --verbose : verbose output", u" -vv, --very-verbose : verbose output, print date/time values", ] if "options" in self.HELP: for opt in self.HELP["options"]: options.append(u" %s" % (opt)) options = [u"OPTIONS"] + sorted(options) + [u""] parameters = [] if ("parameters" in self.HELP) and (len(self.HELP["parameters"]) > 0): parameters.append(u"PARAMETERS") for par in self.HELP["parameters"]: parameters.append(u" %s" % (par)) parameters.append(u"") examples = [] if ("examples" in self.HELP) and (len(self.HELP["examples"]) > 0): examples.append(u"EXAMPLES") for exa in self.HELP["examples"]: examples.append(u" %s %s" % (self.invoke, exa)) examples.append(u"") footer = [ u"EXIT CODES", u" %d : no error" % (self.NO_ERROR_EXIT_CODE), u" %d : error" % (self.ERROR_EXIT_CODE), u" %d : help shown, no command run" % (self.HELP_EXIT_CODE), u"", u"AUTHOR", u" Alberto Pettarin, http://www.albertopettarin.it/", u"", u"REPORTING BUGS", u" Please use the GitHub Issues Web page : %s" % (self.ISSUES_URL), u"", u"COPYRIGHT", u" 2012-2016, Alberto Pettarin and ReadBeyond Srl", u" This software is available under the terms of the GNU Affero General Public License Version 3", u"", u"SEE ALSO", u" Code repository : %s" % (self.GITHUB_URL), u" Documentation : %s" % (self.DOCS_URL), u" Project Web page : %s" % (self.AENEAS_URL), u"", ] msg = header + synopsis + options + parameters + examples if not short: msg += footer if self.use_sys: self.print_generic(u"\n".join(msg)) return self.exit(self.HELP_EXIT_CODE) def print_name_version(self): """ Print program name and version and exit. :rtype: int """ if self.use_sys: self.print_generic(u"%s v%s" % (self.NAME, __version__)) return self.exit(self.HELP_EXIT_CODE) def run(self, arguments, show_help=True): """ Program entry point. Please note that the first item in ``arguments`` is discarded, as it is assumed to be the script/invocation name; pass a "dumb" placeholder if you call this method with an argument different that ``sys.argv``. :param arguments: the list of arguments :type arguments: list :param show_help: if ``False``, do not show help on ``-h`` and ``--help`` :type show_help: bool :rtype: int """ # convert arguments into Unicode strings if self.use_sys: # check that sys.stdin.encoding and sys.stdout.encoding are set to utf-8 if not gf.FROZEN: if sys.stdin.encoding not in ["UTF-8", "UTF8"]: self.print_warning(u"The default input encoding is not UTF-8.") self.print_warning(u"You might want to set 'PYTHONIOENCODING=UTF-8' in your shell.") if sys.stdout.encoding not in ["UTF-8", "UTF8"]: self.print_warning(u"The default output encoding is not UTF-8.") self.print_warning(u"You might want to set 'PYTHONIOENCODING=UTF-8' in your shell.") # decode using sys.stdin.encoding args = [gf.safe_unicode_stdin(arg) for arg in arguments] else: # decode using utf-8 (but you should pass Unicode strings as parameters anyway) args = [gf.safe_unicode(arg) for arg in arguments] if show_help: if u"-h" in args: return self.print_help(short=True) if u"--help" in args: return self.print_help(short=False) if u"--version" in args: return self.print_name_version() # store formal arguments self.formal_arguments_raw = arguments self.formal_arguments = args # to obtain the actual arguments, # remove the first one and "special" switches args = args[1:] set_args = set(args) # set verbosity, if requested for flag in set([u"-v", u"--verbose"]) & set_args: self.verbose = True args.remove(flag) for flag in set([u"-vv", u"--very-verbose"]) & set_args: self.verbose = True self.very_verbose = True args.remove(flag) # set RuntimeConfiguration string, if specified for flag in [u"-r", u"--runtime-configuration"]: rconf_string = self.has_option_with_value(flag, actual_arguments=False) if rconf_string is not None: self.rconf = RuntimeConfiguration(rconf_string) args.remove("%s=%s" % (flag, rconf_string)) # set log file path, if requested log_path = None for flag in [u"-l", u"--log"]: log_path = self.has_option_with_value(flag, actual_arguments=False) if log_path is not None: args.remove("%s=%s" % (flag, log_path)) elif flag in set_args: handler, log_path = gf.tmp_file(suffix=u".log", root=self.rconf[RuntimeConfiguration.TMP_PATH]) args.remove(flag) if log_path is not None: self.log_file_path = log_path # if no actual arguments left, print help if (len(args) < 1) and (show_help): return self.print_help(short=True) # store actual arguments self.actual_arguments = args # create logger self.logger = Logger(tee=self.verbose, tee_show_datetime=self.very_verbose) self.log([u"Formal arguments: %s", self.formal_arguments]) self.log([u"Actual arguments: %s", self.actual_arguments]) self.log([u"Runtime configuration: '%s'", self.rconf.config_string()]) # perform command exit_code = self.perform_command() self.log([u"Execution completed with code %d", exit_code]) # output log if requested if self.log_file_path is not None: self.log([u"User requested saving log to file '%s'", self.log_file_path]) self.logger.write(self.log_file_path) if self.use_sys: self.print_info(u"Log written to file '%s'" % self.log_file_path) return self.exit(exit_code) def has_option(self, target): """ Return ``True`` if the actual arguments include the specified ``target`` option or, if ``target`` is a list of options, at least one of them. :param target: the option or a list of options :type target: Unicode string or list of Unicode strings :rtype: bool """ if isinstance(target, list): target_set = set(target) else: target_set = set([target]) return len(target_set & set(self.actual_arguments)) > 0 def has_option_with_value(self, prefix, actual_arguments=True): """ Check if the actual arguments include an option starting with the given ``prefix`` and having a value, e.g. ``--format=ogg`` for ``prefix="--format"``. :param prefix: the option prefix :type prefix: Unicode string :param actual_arguments: if ``True``, check among actual arguments; otherwise check among formal arguments :rtype actual_arguments: bool :rtype: Unicode string or None """ if actual_arguments: args = self.actual_arguments else: args = self.formal_arguments for arg in [arg for arg in args if (arg is not None) and (arg.startswith(prefix + u"="))]: lis = arg.split(u"=") if len(lis) >= 2: return u"=".join(lis[1:]) return None def perform_command(self): """ Perform command and return the appropriate exit code. :rtype: int """ self.log(u"This function should be overloaded in derived classes") self.log([u"Invoked with %s", self.actual_arguments]) return self.NO_ERROR_EXIT_CODE def check_c_extensions(self, name=None): """ If C extensions cannot be run, emit a warning and return ``False``. Otherwise return ``True``. If ``name`` is not ``None``, check just the C extension with that name. :param name: the name of the Python C extension to test :type name: string :rtype: bool """ if not gf.can_run_c_extension(name=name): if name is None: self.print_warning(u"Unable to load Python C Extensions") else: self.print_warning(u"Unable to load Python C Extension %s" % (name)) self.print_warning(u"Running the slower pure Python code") self.print_warning(u"See the documentation for directions to compile the Python C Extensions") return False return True def check_input_file_or_directory(self, path): """ If the given path does not exist, emit an error and return ``False``. Otherwise return ``True``. :param path: the path of the input file or directory :type path: string (path) :rtype: bool """ if (not gf.file_can_be_read(path)) and (not os.path.isdir(path)): self.print_error(u"Unable to read file or directory '%s'" % (path)) self.print_error(u"Make sure the path is written/escaped correctly and that you have read permission on it") return False return True def check_input_file(self, path): """ If the given path does not exist, emit an error and return ``False``. Otherwise return ``True``. :param path: the path of the input file :type path: string (path) :rtype: bool """ if not gf.file_can_be_read(path): self.print_error(u"Unable to read file '%s'" % (path)) self.print_error( u"Make sure the file path is written/escaped correctly and that you have read permission on it" ) return False return True def check_output_file(self, path): """ If the given path cannot be written, emit an error and return ``False``. Otherwise return ``True``. :param path: the path of the output file :type path: string (path) :rtype: bool """ if not gf.file_can_be_written(path): self.print_error(u"Unable to create file '%s'" % (path)) self.print_error( u"Make sure the file path is written/escaped correctly and that you have write permission on it" ) return False return True def check_output_directory(self, path): """ If the given directory cannot be written, emit an error and return ``False``. Otherwise return ``True``. :param path: the path of the output directory :type path: string (path) :rtype: bool """ if not os.path.isdir(path): self.print_error(u"Directory '%s' does not exist" % (path)) return False test_file = os.path.join(path, u"file.test") if not gf.file_can_be_written(test_file): self.print_error(u"Unable to write inside directory '%s'" % (path)) self.print_error( u"Make sure the directory path is written/escaped correctly and that you have write permission on it" ) return False return True def get_text_file(self, text_format, text, parameters): if text_format == u"list": text_file = TextFile(logger=self.logger) text_file.read_from_list(text.split(u"|")) return text_file else: if text_format not in TextFileFormat.ALLOWED_VALUES: self.print_error(u"File format '%s' is not allowed" % (text_format)) self.print_error(u"Allowed text file formats: %s" % (" ".join(TextFileFormat.ALLOWED_VALUES))) return None try: return TextFile(text, text_format, parameters, logger=self.logger) except OSError: self.print_error(u"Cannot read file '%s'" % (text)) return None def print_no_pafy_error(self): self.print_error(u"You need to install Python modules youtube-dl and pafy to download audio from YouTube. Run:") self.print_error(u"$ pip install youtube-dl pafy") self.print_error(u"or, to install for all users:") self.print_error(u"$ sudo pip install youtube-dl pafy")
def test_loggable_rconf_logger(self): logger = Logger() rconf = RuntimeConfiguration() loggable = Loggable(rconf=rconf, logger=logger) self.assertEqual(rconf, loggable.rconf) self.assertEqual(logger, loggable.logger)
def test_loggable_rconf(self): rconf = RuntimeConfiguration() loggable = Loggable(rconf=rconf) self.assertEqual(rconf, loggable.rconf) self.assertIsNotNone(loggable.logger)
if lang not in ["eng", "hi", "hin"]: print("only hi and eng allowed for language") exit(1) from aeneas.executetask import ExecuteTask from aeneas.task import Task from aeneas.runtimeconfiguration import RuntimeConfiguration config_string = u"task_language=" + lang + u"|is_text_type=subtitles|os_task_file_format=srt" tempout, tempfilename = tempfile.mkstemp() task = Task(config_string=config_string) task.audio_file_path_absolute = args.audio task.text_file_path_absolute = args.txt task.sync_map_file_path_absolute = tempfilename rconf = RuntimeConfiguration() # This option ignores the non-word sounds in the audio rconf[RuntimeConfiguration.MFCC_MASK_NONSPEECH] = True rconf[RuntimeConfiguration.MFCC_MASK_LOG_ENERGY_THRESHOLD] = 2.5 # To use a different Text-to-Speech engine #rconf[RuntimeConfiguration.TTS] = "festival" # process Task ExecuteTask(task, rconf=rconf).execute() # output sync map to file task.output_sync_map_file() f = open(args.out, "w") f.writelines("WEBVTT\n")
def test_safety_checks(self): rconf = RuntimeConfiguration() self.assertEqual(rconf.safety_checks, True)
def test_sample_rate(self): rconf = RuntimeConfiguration() self.assertEqual(rconf.sample_rate, 16000)
def test_set_rconf_string(self): params = [ (u"aba_nonspeech_tolerance=0.040", "aba_nonspeech_tolerance", TimeValue("0.040")), (u"aba_no_zero_duration=0.040", "aba_no_zero_duration", TimeValue("0.040")), (u"allow_unlisted_languages=True", "allow_unlisted_languages", True), (u"c_extensions=False", "c_extensions", False), (u"cdtw=False", "cdtw", False), (u"cew=False", "cew", False), (u"cmfcc=False", "cmfcc", False), (u"cew_subprocess_enabled=True", "cew_subprocess_enabled", True), (u"cew_subprocess_path=/foo/bar/python", "cew_subprocess_path", "/foo/bar/python"), (u"downloader_sleep=5.000", "downloader_sleep", TimeValue("5.000")), (u"downloader_retry_attempts=5", "downloader_retry_attempts", 5), (u"dtw_algorithm=exact", "dtw_algorithm", "exact"), (u"dtw_margin=100", "dtw_margin", TimeValue("100")), (u"ffmpeg_path=/foo/bar/ffmpeg", "ffmpeg_path", "/foo/bar/ffmpeg"), (u"ffmpeg_sample_rate=8000", "ffmpeg_sample_rate", 8000), (u"ffprobe_path=/foo/bar/ffprobe", "ffprobe_path", "/foo/bar/ffprobe"), (u"job_max_tasks=10", "job_max_tasks", 10), (u"mfcc_filters=100", "mfcc_filters", 100), (u"mfcc_size=20", "mfcc_size", 20), (u"mfcc_fft_order=256", "mfcc_fft_order", 256), (u"mfcc_lower_frequency=120.0", "mfcc_lower_frequency", 120.0), (u"mfcc_upper_frequency=5000.0", "mfcc_upper_frequency", 5000.0), (u"mfcc_emphasis_factor=1.0", "mfcc_emphasis_factor", 1.0), (u"mfcc_mask_nonspeech=True", "mfcc_mask_nonspeech", True), (u"mfcc_window_length=0.360", "mfcc_window_length", TimeValue("0.360")), (u"mfcc_window_shift=0.160", "mfcc_window_shift", TimeValue("0.160")), (u"dtw_margin_l1=100", "dtw_margin_l1", TimeValue("100")), (u"mfcc_mask_nonspeech_l1=True", "mfcc_mask_nonspeech_l1", True), (u"mfcc_window_length_l1=0.360", "mfcc_window_length_l1", TimeValue("0.360")), (u"mfcc_window_shift_l1=0.160", "mfcc_window_shift_l1", TimeValue("0.160")), (u"dtw_margin_l2=30", "dtw_margin_l2", TimeValue("30")), (u"mfcc_mask_nonspeech_l2=True", "mfcc_mask_nonspeech_l2", True), (u"mfcc_window_length_l2=0.360", "mfcc_window_length_l2", TimeValue("0.360")), (u"mfcc_window_shift_l2=0.160", "mfcc_window_shift_l2", TimeValue("0.160")), (u"dtw_margin_l3=10", "dtw_margin_l3", TimeValue("10")), (u"mfcc_mask_nonspeech_l3=True", "mfcc_mask_nonspeech_l3", True), (u"mfcc_window_length_l3=0.360", "mfcc_window_length_l3", TimeValue("0.360")), (u"mfcc_window_shift_l3=0.160", "mfcc_window_shift_l3", TimeValue("0.160")), (u"mfcc_mask_extend_speech_after=1", "mfcc_mask_extend_speech_after", 1), (u"mfcc_mask_extend_speech_before=1", "mfcc_mask_extend_speech_before", 1), (u"mfcc_mask_log_energy_threshold=0.750", "mfcc_mask_log_energy_threshold", 0.750), (u"mfcc_mask_min_nonspeech_length=5", "mfcc_mask_min_nonspeech_length", 5), (u"nuance_tts_api_id=foo", "nuance_tts_api_id", "foo"), (u"nuance_tts_api_key=bar", "nuance_tts_api_key", "bar"), (u"safety_checks=False", "safety_checks", False), (u"task_max_audio_length=1000", "task_max_audio_length", TimeValue("1000")), (u"task_max_text_length=1000", "task_max_text_length", 1000), (u"tmp_path=/foo/bar", "tmp_path", "/foo/bar"), (u"tts=festival", "tts", "festival"), (u"tts_path=/foo/bar/festival", "tts_path", "/foo/bar/festival"), (u"tts_api_sleep=5.000", "tts_api_sleep", TimeValue("5.000")), (u"tts_api_retry_attempts=3", "tts_api_retry_attempts", 3), (u"tts_voice_code=ru", "tts_voice_code", "ru"), (u"tts_cache=True", "tts_cache", True), (u"tts_l1=festival", "tts_l1", "festival"), (u"tts_path_l1=/foo/bar/festival", "tts_path_l1", "/foo/bar/festival"), (u"tts_l2=festival", "tts_l2", "festival"), (u"tts_path_l2=/foo/bar/festival", "tts_path_l2", "/foo/bar/festival"), (u"tts_l3=festival", "tts_l3", "festival"), (u"tts_path_l3=/foo/bar/festival", "tts_path_l3", "/foo/bar/festival"), (u"vad_extend_speech_after=1.000", "vad_extend_speech_after", TimeValue("1.000")), (u"vad_extend_speech_before=1.000", "vad_extend_speech_before", TimeValue("1.000")), (u"vad_log_energy_threshold=0.750", "vad_log_energy_threshold", 0.750), (u"vad_min_nonspeech_length=0.500", "vad_min_nonspeech_length", TimeValue("0.500")), ] for string, key, value in params: rconf = RuntimeConfiguration(string) self.assertEqual(rconf[key], value)
def test_dtw_margin(self): rconf = RuntimeConfiguration() self.assertEqual(rconf.dtw_margin, TimeValue("60.000"))
def test_convert_rc(self): rc = RuntimeConfiguration(u"ffmpeg_sample_rate=44100") for f in self.FILES: self.convert(f["path"], runtime_configuration=rc)
def run(self, arguments, show_help=True): """ Program entry point. Please note that the first item in ``arguments`` is discarded, as it is assumed to be the script/invocation name; pass a "dumb" placeholder if you call this method with an argument different that ``sys.argv``. :param arguments: the list of arguments :type arguments: list :param show_help: if ``False``, do not show help on ``-h`` and ``--help`` :type show_help: bool :rtype: int """ # convert arguments into Unicode strings if self.use_sys: # check that sys.stdin.encoding and sys.stdout.encoding are set to utf-8 if not gf.FROZEN: if sys.stdin.encoding not in ["UTF-8", "UTF8"]: self.print_warning( u"The default input encoding is not UTF-8.") self.print_warning( u"You might want to set 'PYTHONIOENCODING=UTF-8' in your shell." ) if sys.stdout.encoding not in ["UTF-8", "UTF8"]: self.print_warning( u"The default output encoding is not UTF-8.") self.print_warning( u"You might want to set 'PYTHONIOENCODING=UTF-8' in your shell." ) # decode using sys.stdin.encoding args = [gf.safe_unicode_stdin(arg) for arg in arguments] else: # decode using utf-8 (but you should pass Unicode strings as parameters anyway) args = [gf.safe_unicode(arg) for arg in arguments] if show_help: if u"-h" in args: return self.print_help(short=True) if u"--help" in args: return self.print_help(short=False) if u"--version" in args: return self.print_name_version() # store formal arguments self.formal_arguments_raw = arguments self.formal_arguments = args # to obtain the actual arguments, # remove the first one and "special" switches args = args[1:] set_args = set(args) # set verbosity, if requested for flag in set([u"-v", u"--verbose"]) & set_args: self.verbose = True args.remove(flag) for flag in set([u"-vv", u"--very-verbose"]) & set_args: self.verbose = True self.very_verbose = True args.remove(flag) # set RuntimeConfiguration string, if specified for flag in [u"-r", u"--runtime-configuration"]: rconf_string = self.has_option_with_value(flag, actual_arguments=False) if rconf_string is not None: self.rconf = RuntimeConfiguration(rconf_string) args.remove("%s=%s" % (flag, rconf_string)) # set log file path, if requested log_path = None for flag in [u"-l", u"--log"]: log_path = self.has_option_with_value(flag, actual_arguments=False) if log_path is not None: args.remove("%s=%s" % (flag, log_path)) elif flag in set_args: handler, log_path = gf.tmp_file( suffix=u".log", root=self.rconf[RuntimeConfiguration.TMP_PATH]) args.remove(flag) if log_path is not None: self.log_file_path = log_path # if no actual arguments left, print help if (len(args) < 1) and (show_help): return self.print_help(short=True) # store actual arguments self.actual_arguments = args # create logger self.logger = Logger(tee=self.verbose, tee_show_datetime=self.very_verbose) self.log([u"Formal arguments: %s", self.formal_arguments]) self.log([u"Actual arguments: %s", self.actual_arguments]) self.log([u"Runtime configuration: '%s'", self.rconf.config_string()]) # perform command exit_code = self.perform_command() self.log([u"Execution completed with code %d", exit_code]) # output log if requested if self.log_file_path is not None: self.log([ u"User requested saving log to file '%s'", self.log_file_path ]) self.logger.write(self.log_file_path) if self.use_sys: self.print_info(u"Log written to file '%s'" % self.log_file_path) return self.exit(exit_code)