def test_log(self): logger = Logger(tee=False, indentation=4) logger.log(u"Message 1", Logger.DEBUG) logger.log(u"Message 2", Logger.INFO) logger.log(u"Message 3", Logger.WARNING) logger.log(u"Message 4", Logger.CRITICAL) self.assertEqual(len(logger), 4)
def __init__(self, job=None, logger=None): self.job = job self.working_directory = None self.tmp_directory = None self.logger = logger if self.logger is None: self.logger = Logger()
def __init__(self, file_path, container_format=None, logger=None): self.file_path = file_path self.container_format = container_format self.actual_container = None self.logger = logger if self.logger is None: self.logger = Logger() self._log("Setting actual Container object") self._set_actual_container()
def test_tag(self): logger = Logger(tee=False, indentation=4) logger.log(u"Message 1", Logger.DEBUG, tag=u"TEST") logger.log(u"Message 2", Logger.DEBUG) logger.log(u"Message 3", Logger.DEBUG, tag=u"TEST") logger.log(u"Message 4", Logger.DEBUG) strings = logger.pretty_print(as_list=True) self.assertEqual(strings[0].find(u"TEST") > -1, True) self.assertEqual(strings[1].find(u"TEST") > -1, False) self.assertEqual(strings[2].find(u"TEST") > -1, True) self.assertEqual(strings[3].find(u"TEST") > -1, False)
def test_tag(self): logger = Logger(tee=False, indentation=4) logger.log("Message 1", Logger.DEBUG, tag="TEST") logger.log("Message 2", Logger.DEBUG) logger.log("Message 3", Logger.DEBUG, tag="TEST") logger.log("Message 4", Logger.DEBUG) strings = logger.to_list_of_strings() self.assertEqual(strings[0].find("TEST") > -1, True) self.assertEqual(strings[1].find("TEST") > -1, False) self.assertEqual(strings[2].find("TEST") > -1, True) self.assertEqual(strings[3].find("TEST") > -1, False)
def __init__(self, file_path, logger=None): self.logger = logger if self.logger is None: self.logger = Logger() self.file_path = file_path self.file_size = None self.audio_data = None self.audio_length = None self.audio_format = None self.audio_sample_rate = None self.audio_channels = None self.audio_mfcc = None
def __init__(self, audio_file, text_file, frame_rate=gc.MFCC_FRAME_RATE, logger=None): self.logger = logger if self.logger is None: self.logger = Logger() self.audio_file = audio_file self.text_file = text_file self.frame_rate = frame_rate self.audio_speech = None
def __init__(self, job=None, logger=None): self.job = job self.working_directory = None self.tmp_directory = None self.logger = logger if self.logger == None: self.logger = Logger()
def __init__(self, algorithm, text_map, speech, nonspeech, value=None, logger=None): self.algorithm = algorithm self.text_map = copy.deepcopy(text_map) self.speech = speech self.nonspeech = nonspeech self.value = value self.logger = logger self.max_rate = self.DEFAULT_MAX_RATE if self.logger is None: self.logger = Logger()
def __init__( self, file_path=None, file_format=None, parameters=None, logger=None ): self.file_path = file_path self.file_format = file_format self.parameters = parameters self.fragments = [] self.logger = Logger() if logger is not None: self.logger = logger if (self.file_path is not None) and (self.file_format is not None): self._read_from_file()
def __init__(self, file_path, logger=None): self.logger = logger if self.logger == None: self.logger = Logger() self.file_path = file_path self.file_size = None self.audio_length = None self.audio_format = None self.audio_sample_rate = None self.audio_channels = None self._read_properties()
def test_execute(self): config_string = "task_language=en|os_task_file_format=smil|os_task_file_name=p001.smil|os_task_file_smil_audio_ref=p001.mp3|os_task_file_smil_page_ref=p001.xhtml|is_text_type=unparsed|is_text_unparsed_id_regex=f[0-9]+|is_text_unparsed_id_sort=numeric" task = Task(config_string) task.audio_file_path_absolute = "../aeneas/tests/res/container/job/assets/p001.mp3" task.text_file_path_absolute = "../aeneas/tests/res/container/job/assets/p001.xhtml" logger = Logger(tee=True) executor = ExecuteTask(task, logger=logger) result = executor.execute() self.assertTrue(result) task.sync_map_file_path_absolute = "/tmp/p001.smil" path = task.output_sync_map_file() self.assertNotEqual(path, None)
def test_execute(self): config_string = "task_language=en|os_task_file_format=txt|os_task_file_name=output_head.txt|is_text_type=plain|is_audio_file_head_length=11.960|is_audio_file_process_length=31.640" task = Task(config_string) task.audio_file_path_absolute = "../aeneas/tests/res/container/job/assets/p001.mp3" task.text_file_path_absolute = "../aeneas/tests/res/inputtext/sonnet_plain_head_length.txt" logger = Logger(tee=True) executor = ExecuteTask(task, logger=logger) result = executor.execute() self.assertTrue(result) task.sync_map_file_path_absolute = "/tmp/output_head_length.txt" path = task.output_sync_map_file() self.assertNotEqual(path, None)
def __init__(self, wave_path=None, frame_rate=gc.MFCC_FRAME_RATE, energy_threshold=gc.VAD_LOG_ENERGY_THRESHOLD, min_nonspeech_length=gc.VAD_MIN_NONSPEECH_LENGTH, extend_after=gc.VAD_EXTEND_SPEECH_INTERVAL_AFTER, extend_before=gc.VAD_EXTEND_SPEECH_INTERVAL_BEFORE, logger=None): self.logger = logger if self.logger is None: self.logger = Logger() self.wave_path = wave_path self.frame_rate = frame_rate self.energy_threshold = energy_threshold self.min_nonspeech_length = min_nonspeech_length self.extend_after = extend_after self.extend_before = extend_before self.wave_mfcc = None self.wave_len = None self.speech = None self.nonspeech = None
def __init__( self, real_wave_path, synt_wave_path, frame_rate=gc.MFCC_FRAME_RATE, margin=gc.ALIGNER_MARGIN, algorithm=DTWAlgorithm.STRIPE, logger=None ): self.logger = logger if self.logger is None: self.logger = Logger() self.real_wave_path = real_wave_path self.synt_wave_path = synt_wave_path self.frame_rate = frame_rate self.margin = margin self.algorithm = algorithm self.real_wave_full_mfcc = None self.synt_wave_full_mfcc = None self.real_wave_length = None self.synt_wave_length = None self.computed_path = None
def test_execute(self): input_path = "../aeneas/tests/res/container/job.zip" output_path = "/tmp/" logger = Logger(tee=True) executor = ExecuteJob(job=None, logger=logger) executor.load_job_from_container(input_path) self.assertNotEqual(executor.job, None) result = executor.execute() self.assertTrue(result) result, path = executor.write_output_container(output_path) self.assertTrue(result) self.assertTrue(os.path.exists(path)) executor.clean()
def __init__( self, audio_file, text_file, frame_rate=gc.MFCC_FRAME_RATE, logger=None ): self.logger = logger if self.logger is None: self.logger = Logger() self.audio_file = audio_file self.text_file = text_file self.frame_rate = frame_rate self.audio_speech = None
def main(): """ Entry point """ if len(sys.argv) < 3: usage() return container_path = sys.argv[1] config_string = None verbose = (sys.argv[-1] == "-v") number_of_arguments = 4 if verbose: number_of_arguments += 1 if len(sys.argv) >= number_of_arguments: config_string = sys.argv[2] output_dir = sys.argv[3] else: output_dir = sys.argv[2] logger = Logger(tee=verbose) executor = ExecuteJob(logger=logger) if not gf.can_run_c_extension(): print "[WARN] Unable to load Python C Extensions" print "[WARN] Running the slower pure Python code" print "[WARN] See the README file for directions to compile the Python C Extensions" print "[INFO] Loading job from container..." result = executor.load_job_from_container(container_path, config_string) print "[INFO] Loading job from container... done" if not result: print "[ERRO] The job cannot be loaded from the specified container" return print "[INFO] Executing..." result = executor.execute() print "[INFO] Executing... done" if not result: print "[ERRO] An error occurred while executing the job" return print "[INFO] Creating output container..." result, path = executor.write_output_container(output_dir) print "[INFO] Creating output container... done" if result: print "[INFO] Created %s" % path else: print "[ERRO] An error occurred while writing the output container" executor.clean(True)
def __init__( self, algorithm, text_map, speech, nonspeech, value=None, logger=None ): self.algorithm = algorithm self.text_map = copy.deepcopy(text_map) self.speech = speech self.nonspeech = nonspeech self.value = value self.logger = logger self.max_rate = self.DEFAULT_MAX_RATE if self.logger is None: self.logger = Logger()
def __init__( self, wave_path_1, wave_path_2, frame_rate=gc.ALIGNER_FRAME_RATE, margin=gc.ALIGNER_MARGIN, algorithm=DTWAlgorithm.STRIPE, logger=None ): self.logger = logger if self.logger == None: self.logger = Logger() self.wave_path_1 = wave_path_1 self.wave_path_2 = wave_path_2 self.frame_rate = frame_rate self.margin = margin self.algorithm = algorithm self.wave_mfcc_1 = None self.wave_mfcc_2 = None self.wave_len_1 = None self.wave_len_2 = None self.computed_path = None
def __init__( self, wave_path=None, frame_rate=gc.MFCC_FRAME_RATE, energy_threshold=gc.VAD_LOG_ENERGY_THRESHOLD, min_nonspeech_length=gc.VAD_MIN_NONSPEECH_LENGTH, extend_after=gc.VAD_EXTEND_SPEECH_INTERVAL_AFTER, extend_before=gc.VAD_EXTEND_SPEECH_INTERVAL_BEFORE, logger=None ): self.logger = logger if self.logger is None: self.logger = Logger() self.wave_path = wave_path self.frame_rate = frame_rate self.energy_threshold = energy_threshold self.min_nonspeech_length = min_nonspeech_length self.extend_after = extend_after self.extend_before = extend_before self.wave_mfcc = None self.wave_len = None self.speech = None self.nonspeech = None
class AnalyzeContainer(object): """ Analyze a given container and build the corresponding job. :param container: the container to be analyzed :type container: :class:`aeneas.container.Container` :param logger: the logger object :type logger: :class:`aeneas.logger.Logger` """ TAG = "AnalyzeContainer" def __init__(self, container, logger=None): self.logger = logger if self.logger is None: self.logger = Logger() self.container = container def analyze(self): """ Analyze the given container and return the corresponding job object. On error, it will return ``None``. :rtype: :class:`aeneas.job.Job` """ if self.container.has_config_xml: self._log("Analyzing container with XML config file") return self._analyze_xml_config(config_contents=None) elif self.container.has_config_txt: self._log("Analyzing container with TXT config file") return self._analyze_txt_config(config_string=None) else: self._log("No configuration file in this container, returning None") return None def analyze_from_wizard(self, config_string): """ Analyze the given container using the given config string and return the corresponding job. On error, it will return ``None``. :param config_string: the configuration string generated by the wizard :type config_string: string :rtype: :class:`aeneas.job.Job` """ self._log("Analyzing container with config string from wizard") return self._analyze_txt_config(config_string=config_string) def _log(self, message, severity=Logger.DEBUG): """ Log """ self.logger.log(message, severity, self.TAG) def _analyze_txt_config(self, config_string=None): """ Analyze the given container and return the corresponding job. If ``config_string`` is ``None``, try reading it from the TXT config file inside the container. :param config_string: the configuration string :type config_string: string :rtype: :class:`aeneas.job.Job` """ # TODO break this function down into smaller functions self._log("Analyzing container with TXT config string") if config_string is None: self._log("Analyzing container with TXT config file") config_entry = self.container.entry_config_txt self._log(["Found TXT config entry '%s'", config_entry]) config_dir = os.path.dirname(config_entry) self._log(["Directory of TXT config entry: '%s'", config_dir]) self._log(["Reading TXT config entry: '%s'", config_entry]) config_contents = self.container.read_entry(config_entry) #self._log("Removing BOM") #config_contents = gf.remove_bom(config_contents) self._log("Converting config contents to config string") config_string = gf.config_txt_to_string(config_contents) else: self._log(["Analyzing container with TXT config string '%s'", config_string]) config_dir = "" #self._log("Removing BOM") #config_string = gf.remove_bom(config_string) # create the Job object to be returned self._log("Creating the Job object") job = Job(config_string) # get the entries in this container self._log("Getting entries") entries = self.container.entries() # convert the config string to dict self._log("Converting config string into config dict") parameters = gf.config_string_to_dict(config_string) # compute the root directory for the task assets self._log("Calculating the path of the tasks root directory") tasks_root_directory = gf.norm_join( config_dir, parameters[gc.PPN_JOB_IS_HIERARCHY_PREFIX] ) self._log(["Path of the tasks root directory: '%s'", tasks_root_directory]) # compute the root directory for the sync map files self._log("Calculating the path of the sync map root directory") sync_map_root_directory = gf.norm_join( config_dir, parameters[gc.PPN_JOB_OS_HIERARCHY_PREFIX] ) job_os_hierarchy_type = parameters[gc.PPN_JOB_OS_HIERARCHY_TYPE] self._log(["Path of the sync map root directory: '%s'", sync_map_root_directory]) # prepare relative path and file name regex for text and audio files text_file_relative_path = parameters[gc.PPN_JOB_IS_TEXT_FILE_RELATIVE_PATH] self._log(["Relative path for text file: '%s'", text_file_relative_path]) text_file_name_regex = re.compile(r"" + parameters[gc.PPN_JOB_IS_TEXT_FILE_NAME_REGEX]) self._log(["Regex for text file: '%s'", parameters[gc.PPN_JOB_IS_TEXT_FILE_NAME_REGEX]]) audio_file_relative_path = parameters[gc.PPN_JOB_IS_AUDIO_FILE_RELATIVE_PATH] self._log(["Relative path for audio file: '%s'", audio_file_relative_path]) audio_file_name_regex = re.compile(r"" + parameters[gc.PPN_JOB_IS_AUDIO_FILE_NAME_REGEX]) self._log(["Regex for audio file: '%s'", parameters[gc.PPN_JOB_IS_AUDIO_FILE_NAME_REGEX]]) # flat hierarchy if parameters[gc.PPN_JOB_IS_HIERARCHY_TYPE] == HierarchyType.FLAT: self._log("Looking for text/audio pairs in flat hierarchy") text_files = self._find_files( entries, tasks_root_directory, text_file_relative_path, text_file_name_regex ) self._log(["Found text files: '%s'", text_files]) audio_files = self._find_files( entries, tasks_root_directory, audio_file_relative_path, audio_file_name_regex ) self._log(["Found audio files: '%s'", audio_files]) self._log("Matching files in flat hierarchy...") matched_tasks = self._match_files_flat_hierarchy( text_files, audio_files ) self._log("Matching files in flat hierarchy... done") for task_info in matched_tasks: self._log(["Creating task: '%s'", str(task_info)]) task = self._create_task( task_info, config_string, sync_map_root_directory, job_os_hierarchy_type ) job.add_task(task) # paged hierarchy if parameters[gc.PPN_JOB_IS_HIERARCHY_TYPE] == HierarchyType.PAGED: self._log("Looking for text/audio pairs in paged hierarchy") # find all subdirectories of tasks_root_directory # that match gc.PPN_JOB_IS_TASK_DIRECTORY_NAME_REGEX matched_directories = self._match_directories( entries, tasks_root_directory, parameters[gc.PPN_JOB_IS_TASK_DIRECTORY_NAME_REGEX] ) for matched_directory in matched_directories: # rebuild the full path matched_directory_full_path = gf.norm_join( tasks_root_directory, matched_directory ) self._log(["Looking for text/audio pairs in directory '%s'", matched_directory_full_path]) # look for text and audio files there text_files = self._find_files( entries, matched_directory_full_path, text_file_relative_path, text_file_name_regex ) self._log(["Found text files: '%s'", text_files]) audio_files = self._find_files( entries, matched_directory_full_path, audio_file_relative_path, audio_file_name_regex ) self._log(["Found audio files: '%s'", audio_files]) # if we have found exactly one text and one audio file, # create a Task if (len(text_files) == 1) and (len(audio_files) == 1): self._log(["Exactly one text file and one audio file in '%s'", matched_directory]) task_info = [ matched_directory, text_files[0], audio_files[0] ] self._log(["Creating task: '%s'", str(task_info)]) task = self._create_task( task_info, config_string, sync_map_root_directory, job_os_hierarchy_type ) job.add_task(task) elif len(text_files) > 1: self._log(["More than one text file in '%s'", matched_directory]) elif len(audio_files) > 1: self._log(["More than one audio file in '%s'", matched_directory]) else: self._log(["No text nor audio file in '%s'", matched_directory]) # return the Job return job def _analyze_xml_config(self, config_contents=None): """ Analyze the given container and return the corresponding job. If ``config_contents`` is ``None``, try reading it from the XML config file inside the container. :param config_contents: the contents of the XML config file :type config_contents: string :rtype: :class:`aeneas.job.Job` """ # TODO break this function down into smaller functions self._log("Analyzing container with XML config string") if config_contents is None: self._log("Analyzing container with XML config file") config_entry = self.container.entry_config_xml self._log(["Found XML config entry '%s'", config_entry]) config_dir = os.path.dirname(config_entry) self._log(["Directory of XML config entry: '%s'", config_dir]) self._log(["Reading XML config entry: '%s'", config_entry]) config_contents = self.container.read_entry(config_entry) else: self._log("Analyzing container with XML config contents") config_dir = "" # remove BOM #self._log("Removing BOM") #config_contents = gf.remove_bom(config_contents) # get the job parameters and tasks parameters self._log("Converting config contents into job config dict") job_parameters = gf.config_xml_to_dict( config_contents, result=None, parse_job=True ) self._log("Converting config contents into tasks config dict") tasks_parameters = gf.config_xml_to_dict( config_contents, result=None, parse_job=False ) # compute the root directory for the sync map files self._log("Calculating the path of the sync map root directory") sync_map_root_directory = gf.norm_join( config_dir, job_parameters[gc.PPN_JOB_OS_HIERARCHY_PREFIX] ) job_os_hierarchy_type = job_parameters[gc.PPN_JOB_OS_HIERARCHY_TYPE] self._log(["Path of the sync map root directory: '%s'", sync_map_root_directory]) # create the Job object to be returned self._log("Converting job config dict into job config string") config_string = gf.config_dict_to_string(job_parameters) job = Job(config_string) # create the Task objects for task_parameters in tasks_parameters: self._log("Converting task config dict into task config string") config_string = gf.config_dict_to_string(task_parameters) self._log(["Creating task with config string '%s'", config_string]) try: custom_id = task_parameters[gc.PPN_TASK_CUSTOM_ID] except KeyError: custom_id = "" task_info = [ custom_id, gf.norm_join( config_dir, task_parameters[gc.PPN_TASK_IS_TEXT_FILE_XML] ), gf.norm_join( config_dir, task_parameters[gc.PPN_TASK_IS_AUDIO_FILE_XML] ) ] self._log(["Creating task: '%s'", str(task_info)]) task = self._create_task( task_info, config_string, sync_map_root_directory, job_os_hierarchy_type ) job.add_task(task) # return the Job return job def _create_task( self, task_info, config_string, sync_map_root_directory, job_os_hierarchy_type ): """ Create a task object from 1. the ``task_info`` found analyzing the container entries, and 2. the given ``config_string``. :param task_info: the task information: ``[prefix, text_path, audio_path]`` :type task_info: list of strings :param config_string: the configuration string :type config_string: string :param sync_map_root_directory: the root directory for the sync map files :type sync_map_root_directory: string (path) :param job_os_hierarchy_type: type of job output hierarchy :type job_os_hierarchy_type: :class:`aeneas.hierarchytype.HierarchyType` :rtype: :class:`aeneas.task.Task` """ self._log("Converting config string to config dict") parameters = gf.config_string_to_dict(config_string) self._log("Creating task") task = Task(config_string) task.configuration.description = "Task %s" % task_info[0] self._log(["Task description: %s", task.configuration.description]) try: task.configuration.language = parameters[gc.PPN_TASK_LANGUAGE] self._log(["Set language from task: '%s'", task.configuration.language]) except KeyError: task.configuration.language = parameters[gc.PPN_JOB_LANGUAGE] self._log(["Set language from job: '%s'", task.configuration.language]) custom_id = task_info[0] task.configuration.custom_id = custom_id self._log(["Task custom_id: %s", task.configuration.custom_id]) task.text_file_path = task_info[1] self._log(["Task text file path: %s", task.text_file_path]) task.audio_file_path = task_info[2] self._log(["Task audio file path: %s", task.audio_file_path]) task.sync_map_file_path = self._compute_sync_map_file_path( sync_map_root_directory, job_os_hierarchy_type, custom_id, task.configuration.os_file_name ) self._log(["Task sync map file path: %s", task.sync_map_file_path]) self._log("Replacing placeholder in os_file_smil_audio_ref") task.configuration.os_file_smil_audio_ref = self._replace_placeholder( task.configuration.os_file_smil_audio_ref, custom_id ) self._log("Replacing placeholder in os_file_smil_page_ref") task.configuration.os_file_smil_page_ref = self._replace_placeholder( task.configuration.os_file_smil_page_ref, custom_id ) self._log("Returning task") return task def _replace_placeholder(self, string, custom_id): """ Replace the prefix placeholder :class:`aeneas.globalconstants.PPV_OS_TASK_PREFIX` with ``custom_id`` and return the resulting string. :rtype: string """ if string is None: return None self._log(["Replacing '%s' with '%s' in '%s'", gc.PPV_OS_TASK_PREFIX, custom_id, string]) return string.replace(gc.PPV_OS_TASK_PREFIX, custom_id) def _compute_sync_map_file_path( self, root, hierarchy_type, custom_id, file_name ): """ Compute the sync map file path inside the output container. :param root: the root of the sync map files inside the container :type root: string (path) :param job_os_hierarchy_type: type of job output hierarchy :type job_os_hierarchy_type: :class:`aeneas.hierarchytype.HierarchyType` :param custom_id: the task custom id (flat) or page directory name (paged) :type custom_id: string :param file_name: the output file name for the sync map :type file_name: string :rtype: string (path) """ prefix = root if hierarchy_type == HierarchyType.PAGED: prefix = gf.norm_join(prefix, custom_id) file_name_joined = gf.norm_join(prefix, file_name) return self._replace_placeholder(file_name_joined, custom_id) def _find_files(self, entries, root, relative_path, file_name_regex): """ Return the elements in entries that 1. are in ``root/relative_path``, and 2. match ``file_name_regex``. :param entries: the list of entries (file paths) in the container :type entries: list of strings (path) :param root: the root directory of the container :type root: string (path) :param relative_path: the relative path in which we must search :type relative_path: string (path) :param file_name_regex: the regex matching the desired file names :type file_name_regex: regex :rtype: list of strings (path) """ self._log(["Finding files within root: '%s'", root]) target = root if relative_path is not None: self._log(["Joining relative path: '%s'", relative_path]) target = gf.norm_join(root, relative_path) self._log(["Finding files within target: '%s'", target]) files = [] target_len = len(target) for entry in entries: if entry.startswith(target): self._log(["Examining entry: '%s'", entry]) entry_suffix = entry[target_len + 1:] self._log(["Examining entry suffix: '%s'", entry_suffix]) if re.search(file_name_regex, entry_suffix) is not None: self._log(["Match: '%s'", entry]) files.append(entry) else: self._log(["No match: '%s'", entry]) return sorted(files) def _match_files_flat_hierarchy(self, text_files, audio_files): """ Match audio and text files in flat hierarchies. Two files match if their names, once removed the file extension, are the same. Examples: :: foo/text/a.txt foo/audio/a.mp3 => match: ["a", "foo/text/a.txt", "foo/audio/a.mp3"] foo/text/a.txt foo/audio/b.mp3 => no match foo/res/c.txt foo/res/c.mp3 => match: ["c", "foo/res/c.txt", "foo/res/c.mp3"] foo/res/d.txt foo/res/e.mp3 => no match :param text_files: the entries corresponding to text files :type text_files: list of strings (path) :param audio_files: the entries corresponding to audio files :type audio_files: list of strings (path) :rtype: list of lists (see above) """ self._log("Matching files in flat hierarchy") self._log(["Text files: '%s'", text_files]) self._log(["Audio files: '%s'", audio_files]) d_text = dict() d_audio = dict() for text_file in text_files: text_file_no_ext = gf.file_name_without_extension(text_file) d_text[text_file_no_ext] = text_file self._log(["Added text file '%s' to key '%s'", text_file, text_file_no_ext]) for audio_file in audio_files: audio_file_no_ext = gf.file_name_without_extension(audio_file) d_audio[audio_file_no_ext] = audio_file self._log(["Added audio file '%s' to key '%s'", audio_file, audio_file_no_ext]) tasks = [] for key in d_text.keys(): self._log(["Examining text key '%s'", key]) if key in d_audio: self._log(["Key '%s' is also in audio", key]) tasks.append([key, d_text[key], d_audio[key]]) self._log(["Added pair ('%s', '%s')", d_text[key], d_audio[key]]) return tasks def _match_directories(self, entries, root, regex_string): """ Match directory names in paged hierarchies. Example: :: root = /foo/bar regex_string = [0-9]+ /foo/bar/ 1/ bar baz 2/ bar 3/ foo => ["/foo/bar/1", "/foo/bar/2", "/foo/bar/3"] :param entries: the list of entries (paths) of a container :type entries: list of strings (paths) :param root: the root directory to search within :type root: string (path) :param regex_string: regex string to match directory names :type regex_string: string :rtype: list of matched directories """ self._log("Matching directory names in paged hierarchy") self._log(["Matching within '%s'", root]) self._log(["Matching regex '%s'", regex_string]) regex = re.compile(r"" + regex_string) directories = set() root_len = len(root) for entry in entries: # look only inside root dir if entry.startswith(root): self._log(["Examining '%s'", entry]) # remove common prefix root/ entry = entry[root_len + 1:] # split path entry_splitted = entry.split(os.sep) # match regex if ((len(entry_splitted) >= 2) and (re.match(regex, entry_splitted[0]) is not None)): directories.add(entry_splitted[0]) self._log(["Match: '%s'", entry_splitted[0]]) else: self._log(["No match: '%s'", entry]) return sorted(directories)
def test_loggable_logger(self): logger = Logger() loggable = Loggable(logger=logger) self.assertIsNotNone(loggable.rconf) self.assertEqual(logger, loggable.logger)
class AdjustBoundaryAlgorithm(object): """ Enumeration of the available algorithms to adjust the boundary point between two consecutive fragments. :param algorithm: the boundary adjustment algorithm to be used :type algorithm: string (from :class:`aeneas.adjustboundaryalgorithm.AdjustBoundaryAlgorithm` enumeration) :param text_map: a text map list [[start, end, id, text], ..., []] :type text_map: list :param speech: a list of time intervals [[s_1, e_1,], ..., [s_k, e_k]] containing speech :type speech: list :param nonspeech: a list of time intervals [[s_1, e_1,], ..., [s_j, e_j]] not containing speech :type nonspeech: list :param value: an optional parameter to be passed to the boundary adjustment algorithm, it will be converted (to int, to float) as needed, depending on the selected algorithm :type value: string :param logger: the logger object :type logger: :class:`aeneas.logger.Logger` """ AFTERCURRENT = "aftercurrent" """ Set the boundary at ``value`` seconds after the end of the current fragment """ AUTO = "auto" """ Auto (no adjustment) """ BEFORENEXT = "beforenext" """ Set the boundary at ``value`` seconds before the beginning of the next fragment """ OFFSET = "offset" """ Offset the current boundaries by ``value`` seconds .. versionadded:: 1.1.0 """ PERCENT = "percent" """ Set the boundary at ``value`` percent of the nonspeech interval between the current and the next fragment """ RATE = "rate" """ Adjust boundaries trying to respect the ``value`` characters/second constraint """ RATEAGGRESSIVE = "rateaggressive" """ Adjust boundaries trying to respect the ``value`` characters/second constraint (aggressive mode) .. versionadded:: 1.1.0 """ ALLOWED_VALUES = [ AFTERCURRENT, AUTO, BEFORENEXT, OFFSET, PERCENT, RATE, RATEAGGRESSIVE ] """ List of all the allowed values """ DEFAULT_MAX_RATE = 21.0 """ Default max rate (used only when RATE or RATEAGGRESSIVE algorithms are used) """ TOLERANCE = 0.001 """ Tolerance when comparing floats """ TAG = "AdjustBoundaryAlgorithm" def __init__(self, algorithm, text_map, speech, nonspeech, value=None, logger=None): self.algorithm = algorithm self.text_map = copy.deepcopy(text_map) self.speech = speech self.nonspeech = nonspeech self.value = value self.logger = logger self.max_rate = self.DEFAULT_MAX_RATE if self.logger is None: self.logger = Logger() def _log(self, message, severity=Logger.DEBUG): """ Log """ self.logger.log(message, severity, self.TAG) def adjust(self): """ Adjust the boundaries of the text map. :rtype: list of intervals """ if self.text_map is None: raise AttributeError("Text map is None") if self.algorithm == self.AUTO: return self._adjust_auto() elif self.algorithm == self.AFTERCURRENT: return self._adjust_aftercurrent() elif self.algorithm == self.BEFORENEXT: return self._adjust_beforenext() elif self.algorithm == self.OFFSET: return self._adjust_offset() elif self.algorithm == self.PERCENT: return self._adjust_percent() elif self.algorithm == self.RATE: return self._adjust_rate(False) elif self.algorithm == self.RATEAGGRESSIVE: return self._adjust_rate(True) return self.text_map def _adjust_auto(self): self._log("Called _adjust_auto: returning text_map unchanged") return self.text_map def _adjust_offset(self): self._log("Called _adjust_offset") try: value = float(self.value) for index in range(1, len(self.text_map)): current = self.text_map[index] previous = self.text_map[index - 1] if value >= 0: offset = min(value, current[1] - current[0]) else: offset = -min(-value, previous[1] - previous[0]) previous[1] += offset current[0] += offset except: self._log( "Exception in _adjust_offset: returning text_map unchanged") return self.text_map def _adjust_percent(self): def new_time(current_boundary, nsi): duration = nsi[1] - nsi[0] try: percent = max(min(int(self.value), 100), 0) / 100.0 except: percent = 0.500 return nsi[0] + duration * percent return self._adjust_on_nsi(new_time) def _adjust_aftercurrent(self): def new_time(current_boundary, nsi): duration = nsi[1] - nsi[0] try: delay = max(min(float(self.value), duration), 0) if delay == 0: return current_boundary return nsi[0] + delay except: return current_boundary return self._adjust_on_nsi(new_time) def _adjust_beforenext(self): def new_time(current_boundary, nsi): duration = nsi[1] - nsi[0] try: delay = max(min(float(self.value), duration), 0) if delay == 0: return current_boundary return nsi[1] - delay except: return current_boundary return self._adjust_on_nsi(new_time) def _adjust_on_nsi(self, new_time_function): nsi_index = 0 # TODO numpy-fy this loop? for index in range(len(self.text_map) - 1): current_boundary = self.text_map[index][1] self._log(["current_boundary: %.3f", current_boundary]) # the tolerance comparison seems necessary while ((nsi_index < len(self.nonspeech)) and (self.nonspeech[nsi_index][1] + self.TOLERANCE <= current_boundary)): nsi_index += 1 nsi = None if ((nsi_index < len(self.nonspeech)) and (current_boundary >= self.nonspeech[nsi_index][0] - self.TOLERANCE)): nsi = self.nonspeech[nsi_index] nsi_index += 1 if nsi: self._log([" in interval %.3f %.3f", nsi[0], nsi[1]]) new_time = new_time_function(current_boundary, nsi) self._log([" new_time: %.3f", new_time]) new_start = self.text_map[index][0] new_end = self.text_map[index + 1][1] if self._time_in_interval(new_time, new_start, new_end): self._log([ " updating %.3f => %.3f", current_boundary, new_time ]) self.text_map[index][1] = new_time self.text_map[index + 1][0] = new_time else: #print " new_time outside: no adjustment performed" self._log(" new_time outside: no adjustment performed") else: #print " no nonspeech interval found: no adjustment performed" self._log( " no nonspeech interval found: no adjustment performed") return self.text_map def _len(self, string): """ Return the length of the given string. If it is greater than 2 times the max_rate, one space will become a newline, and hence we do not count it (e.g., max_rate = 21 => max 42 chars per line). :param string: the string to be counted :type string: string :rtype: int """ # TODO this should depend on the number of lines # in the text fragment; current code assumes # at most 2 lines of at most max_rate characters each # (the effect of this finesse is negligible in practice) if string is None: return 0 length = len(string) if length > 2 * self.max_rate: length -= 1 return length def _time_in_interval(self, time, start, end): """ Decides whether the given time is within the given interval. :param time: a time value :type time: float :param start: the start of the interval :type start: float :param end: the end of the interval :type end: float :rtype: bool """ return (time >= start) and (time <= end) # TODO a more efficient search (e.g., binary) is possible # the tolerance comparison seems necessary def _find_interval_containing(self, intervals, time): """ Return the interval containing the given time, or None if no such interval exists. :param intervals: a list of time intervals [[s_1, e_1], ..., [s_k, e_k]] :type intervals: list of lists :param time: a time value :type time: float :rtype: a time interval ``[s, e]`` or ``None`` """ for interval in intervals: start = interval[0] - self.TOLERANCE end = interval[1] + self.TOLERANCE if self._time_in_interval(time, start, end): return interval return None def _compute_rate_raw(self, start, end, length): """ Compute the rate of a fragment, that is, the number of characters per second. :param start: the start time :type start: float :param end: the end time :type end: float :param length: the number of character (possibly adjusted) of the text :type length: int :rtype: float """ duration = end - start if duration > 0: return length / duration return 0 def _compute_rate(self, index): """ Compute the rate of a fragment, that is, the number of characters per second. :param index: the index of the fragment in the text map :type index: int :rtype: float """ if (index < 0) or (index >= len(self.text_map)): return 0 fragment = self.text_map[index] start = fragment[0] end = fragment[1] length = self._len(fragment[3]) return self._compute_rate_raw(start, end, length) def _compute_slack(self, index): """ Return the slack of a fragment, that is, the difference between the current duration of the fragment and the duration it should have if its rate was exactly self.max_rate If the slack is positive, the fragment can be shrinken; if the slack is negative, the fragment should be stretched. The returned value can be None, in case the index is out of self.text_map bounds. :param index: the index of the fragment in the text map :type index: int :rtype: float """ if (index < 0) or (index >= len(self.text_map)): return None fragment = self.text_map[index] start = fragment[0] end = fragment[1] length = self._len(fragment[3]) duration = end - start return duration - (length / self.max_rate) def _adjust_rate(self, aggressive=False): try: self.max_rate = float(self.value) except: pass if self.max_rate <= 0: self.max_rate = self.DEFAULT_MAX_RATE faster = [] # TODO numpy-fy this loop? for index in range(len(self.text_map)): fragment = self.text_map[index] self._log(["Fragment %d", index]) rate = self._compute_rate(index) self._log([" %.3f %.3f => %.3f", fragment[0], fragment[1], rate]) if rate > self.max_rate: self._log(" too fast") faster.append(index) if len(self.text_map) == 1: self._log("Only one fragment, and it is too fast") return self.text_map if len(faster) == 0: self._log(["No fragment faster than max rate %.3f", self.max_rate]) return self.text_map # TODO numpy-fy this loop? # try fixing faster fragments self._log("Fixing faster fragments...") for index in faster: self._log(["Fixing faster fragment %d ...", index]) if aggressive: try: self._rateaggressive_fix_fragment(index) except: self._log("Exception in _rateaggressive_fix_fragment") else: try: self._rate_fix_fragment(index) except: self._log("Exception in _rate_fix_fragment") self._log(["Fixing faster fragment %d ... done", index]) self._log("Fixing faster fragments... done") return self.text_map def _rate_fix_fragment(self, index): """ Fix index-th fragment using the rate algorithm (standard variant). """ succeeded = False current = self.text_map[index] current_start = current[0] current_end = current[1] current_rate = self._compute_rate(index) previous_slack = self._compute_slack(index - 1) current_slack = self._compute_slack(index) next_slack = self._compute_slack(index + 1) if previous_slack is not None: previous = self.text_map[index - 1] self._log([ " previous: %.3f %.3f => %.3f", previous[0], previous[1], self._compute_rate(index - 1) ]) self._log([" previous slack: %.3f", previous_slack]) if current_slack is not None: self._log([ " current: %.3f %.3f => %.3f", current_start, current_end, current_rate ]) self._log([" current slack: %.3f", current_slack]) if next_slack is not None: nextf = self.text_map[index] self._log([ " next: %.3f %.3f => %.3f", nextf[0], nextf[1], self._compute_rate(index + 1) ]) self._log([" next slack: %.3f", next_slack]) # try expanding into the previous fragment new_start = current_start new_end = current_end if (previous_slack is not None) and (previous_slack > 0): self._log(" can expand into previous") nsi = self._find_interval_containing(self.nonspeech, current[0]) previous = self.text_map[index - 1] if nsi is not None: if nsi[0] > previous[0]: self._log( [" found suitable nsi: %.3f %.3f", nsi[0], nsi[1]]) previous_slack = min(current[0] - nsi[0], previous_slack) self._log( [" previous slack after min: %.3f", previous_slack]) if previous_slack + current_slack >= 0: self._log(" enough slack to completely fix") steal_from_previous = -current_slack succeeded = True else: self._log(" not enough slack to completely fix") steal_from_previous = previous_slack new_start = current_start - steal_from_previous self.text_map[index - 1][1] = new_start self.text_map[index][0] = new_start new_rate = self._compute_rate(index) self._log([ " old: %.3f %.3f => %.3f", current_start, current_end, current_rate ]) self._log([ " new: %.3f %.3f => %.3f", new_start, new_end, new_rate ]) else: self._log(" nsi found is not suitable") else: self._log(" no nsi found") else: self._log(" cannot expand into previous") if succeeded: self._log(" succeeded: returning") return # recompute current fragment current_rate = self._compute_rate(index) current_slack = self._compute_slack(index) current_rate = self._compute_rate(index) # try expanding into the next fragment new_start = current_start new_end = current_end if (next_slack is not None) and (next_slack > 0): self._log(" can expand into next") nsi = self._find_interval_containing(self.nonspeech, current[1]) previous = self.text_map[index - 1] if nsi is not None: if nsi[0] > previous[0]: self._log( [" found suitable nsi: %.3f %.3f", nsi[0], nsi[1]]) next_slack = min(nsi[1] - current[1], next_slack) self._log([" next slack after min: %.3f", next_slack]) if next_slack + current_slack >= 0: self._log(" enough slack to completely fix") steal_from_next = -current_slack succeeded = True else: self._log(" not enough slack to completely fix") steal_from_next = next_slack new_end = current_end + steal_from_next self.text_map[index][1] = new_end self.text_map[index + 1][0] = new_end new_rate = self._compute_rate(index) self._log([ " old: %.3f %.3f => %.3f", current_start, current_end, current_rate ]) self._log([ " new: %.3f %.3f => %.3f", new_start, new_end, new_rate ]) else: self._log(" nsi found is not suitable") else: self._log(" no nsi found") else: self._log(" cannot expand into next") if succeeded: self._log(" succeeded: returning") return self._log(" not succeeded, returning") def _rateaggressive_fix_fragment(self, index): """ Fix index-th fragment using the rate algorithm (aggressive variant). """ current = self.text_map[index] current_start = current[0] current_end = current[1] current_rate = self._compute_rate(index) previous_slack = self._compute_slack(index - 1) current_slack = self._compute_slack(index) next_slack = self._compute_slack(index + 1) if previous_slack is not None: self._log([" previous slack: %.3f", previous_slack]) if current_slack is not None: self._log([" current slack: %.3f", current_slack]) if next_slack is not None: self._log([" next slack: %.3f", next_slack]) steal_from_previous = 0 steal_from_next = 0 if ((previous_slack is not None) and (next_slack is not None) and (previous_slack > 0) and (next_slack > 0)): self._log(" can expand into both previous and next") total_slack = previous_slack + next_slack self._log([" total slack: %.3f", total_slack]) if total_slack + current_slack >= 0: self._log(" enough total slack to completely fix") # partition the needed slack proportionally previous_percentage = previous_slack / total_slack self._log( [" previous percentage: %.3f", previous_percentage]) steal_from_previous = -current_slack * previous_percentage steal_from_next = -current_slack - steal_from_previous else: self._log(" not enough total slack to completely fix") # consume all the available slack steal_from_previous = previous_slack steal_from_next = next_slack elif (previous_slack is not None) and (previous_slack > 0): self._log(" can expand into previous only") if previous_slack + current_slack >= 0: self._log(" enough previous slack to completely fix") steal_from_previous = -current_slack else: self._log(" not enough previous slack to completely fix") steal_from_previous = previous_slack elif (next_slack is not None) and (next_slack > 0): self._log(" can expand into next only") if next_slack + current_slack >= 0: self._log(" enough next slack to completely fix") steal_from_next = -current_slack else: self._log(" not enough next slack to completely fix") steal_from_next = next_slack else: self._log([" fragment %d cannot be fixed", index]) self._log([" steal from previous: %.3f", steal_from_previous]) self._log([" steal from next: %.3f", steal_from_next]) new_start = current_start - steal_from_previous new_end = current_end + steal_from_next if index - 1 >= 0: self.text_map[index - 1][1] = new_start self.text_map[index][0] = new_start self.text_map[index][1] = new_end if index + 1 < len(self.text_map): self.text_map[index + 1][0] = new_end new_rate = self._compute_rate(index) self._log([ " old: %.3f %.3f => %.3f", current_start, current_end, current_rate ]) self._log([" new: %.3f %.3f => %.3f", new_start, new_end, new_rate])
class VAD(object): """ The VAD extractor. :param wave_path: the path to the wav file (must be mono!) :type wave_path: string (path) :param frame_rate: the MFCC frame rate, in frames per second. Default: :class:`aeneas.globalconstants.MFCC_FRAME_RATE` :type frame_rate: int :param energy_threshold: the threshold for the VAD algorithm to decide that a given frame contains speech. Note that this is the log10 of the energy coefficient. Default: :class:`aeneas.globalconstants.VAD_LOG_ENERGY_THRESHOLD` :type energy_threshold: float :param min_nonspeech_length: the minimum number of nonspeech frames the VAD algorithm must encounter to create a nonspeech interval. Default: :class:`aeneas.globalconstants.VAD_MIN_NONSPEECH_LENGTH` :type min_nonspeech_length: int :param extend_after: extend a speech interval by this many frames after. Default: :class:`aeneas.globalconstants.VAD_EXTEND_SPEECH_INTERVAL_AFTER` :type extend_after: int :param extend_before: extend a speech interval by this many frames before. Default: :class:`aeneas.globalconstants.VAD_EXTEND_SPEECH_INTERVAL_BEFORE` :type extend_before: int :param logger: the logger object :type logger: :class:`aeneas.logger.Logger` """ TAG = "VAD" def __init__(self, wave_path=None, frame_rate=gc.MFCC_FRAME_RATE, energy_threshold=gc.VAD_LOG_ENERGY_THRESHOLD, min_nonspeech_length=gc.VAD_MIN_NONSPEECH_LENGTH, extend_after=gc.VAD_EXTEND_SPEECH_INTERVAL_AFTER, extend_before=gc.VAD_EXTEND_SPEECH_INTERVAL_BEFORE, logger=None): self.logger = logger if self.logger is None: self.logger = Logger() self.wave_path = wave_path self.frame_rate = frame_rate self.energy_threshold = energy_threshold self.min_nonspeech_length = min_nonspeech_length self.extend_after = extend_after self.extend_before = extend_before self.wave_mfcc = None self.wave_len = None self.speech = None self.nonspeech = None def _log(self, message, severity=Logger.DEBUG): """ Log """ self.logger.log(message, severity, self.TAG) def compute_mfcc(self): """ Compute the MFCCs of the wave, and store them internally. """ if (self.wave_path is not None) and (os.path.isfile(self.wave_path)): self._log("Computing MFCCs for wave...") try: wave = AudioFile(self.wave_path, logger=self.logger) wave.extract_mfcc(self.frame_rate) self.wave_mfcc = wave.audio_mfcc self.wave_len = wave.audio_length except IOError as e: self._log("IOError", Logger.CRITICAL) self._log(["Message: %s", e]) raise e self._log("Computing MFCCs for wave... done") else: self._log(["Input file '%s' cannot be read", self.wave_path], Logger.CRITICAL) raise OSError("Input file cannot be read") @property def speech(self): """ Return the list of time intervals containing speech, as a list of lists, each being a pair of floats: :: [[s_1, e_1], [s_2, e_2], ..., [s_k, e_k]] where ``s_i`` is the time when the ``i``-th interval starts, and ``e_i`` is the time when it ends. :rtype: list of pairs of floats (see above) """ return self.__speech @speech.setter def speech(self, speech): self.__speech = speech @property def nonspeech(self): """ Return the list of time intervals not containing speech, as a list of lists, each being a pair of floats: :: [[s_1, e_1], [s_2, e_2], ..., [s_j, e_j]] where ``s_i`` is the time when the ``i``-th interval starts, and ``e_i`` is the time when it ends. :rtype: list of pairs of floats (see above) """ return self.__nonspeech @nonspeech.setter def nonspeech(self, nonspeech): self.__nonspeech = nonspeech def compute_vad(self): """ Compute the time intervals containing speech and nonspeech, and store them internally in the corresponding properties. """ if (self.wave_mfcc is not None) and (self.wave_len is not None): self._log("Computing VAD for wave") self.speech, self.nonspeech = self._compute_vad() else: # TODO raise pass def _compute_vad(self): labels = [] energy_vector = self.wave_mfcc[0] energy_threshold = numpy.min(energy_vector) + self.energy_threshold current_time = 0 time_step = 1.0 / self.frame_rate self._log(["Time step: %.3f", time_step]) last_index = len(energy_vector) - 1 self._log(["Last frame index: %d", last_index]) # decide whether each frame has speech or not, # based only on its energy self._log("Assigning initial labels") for current_energy in energy_vector: start_time = current_time end_time = start_time + time_step has_speech = False if current_energy >= energy_threshold: has_speech = True labels.append([start_time, end_time, current_energy, has_speech]) current_time = end_time # to start a new nonspeech interval, there must be # at least self.min_nonspeech_length nonspeech frames ahead # spotty False values immersed in True runs are changed to True self._log("Smoothing labels") in_nonspeech = True if len(labels) > self.min_nonspeech_length: for i in range(len(labels) - self.min_nonspeech_length): if ((not labels[i][3]) and (self._nonspeech_ahead(labels, i, in_nonspeech))): labels[i][3] = False in_nonspeech = True else: labels[i][3] = True in_nonspeech = False # deal with the tail first_index_not_set = len(labels) - self.min_nonspeech_length speech_at_the_end = False for i in range(first_index_not_set, last_index + 1): speech_at_the_end = speech_at_the_end or labels[i][3] for i in range(first_index_not_set, last_index + 1): labels[i][3] = speech_at_the_end self._log("Extending speech intervals before and after") self._log(["Extend before: %d", self.extend_before]) self._log(["Extend after: %d", self.extend_after]) in_speech = False run_starts = [] run_ends = [] for i in range(len(labels)): if in_speech: if not labels[i][3]: run_ends.append(i - 1) in_speech = False else: if labels[i][3]: run_starts.append(i) in_speech = True if in_speech: run_ends.append(last_index) adj_starts = [max(0, x - self.extend_before) for x in run_starts] adj_ends = [min(x + self.extend_after, last_index) for x in run_ends] speech_indices = zip(adj_starts, adj_ends) self._log("Generating speech and nonspeech list of intervals") speech = [] nonspeech = [] nonspeech_time = 0 for speech_interval in speech_indices: start, end = speech_interval if nonspeech_time < start: nonspeech.append( [labels[nonspeech_time][0], labels[start - 1][1]]) speech.append([labels[start][0], labels[end][1]]) nonspeech_time = end + 1 if nonspeech_time < last_index: nonspeech.append( [labels[nonspeech_time][0], labels[last_index][1]]) self._log("Returning speech and nonspeech list of intervals") return speech, nonspeech # TODO check if a numpy sliding window is faster def _nonspeech_ahead(self, array, current_index, in_nonspeech): if in_nonspeech: return not array[current_index][3] ahead = range(current_index, current_index + self.min_nonspeech_length) for index in ahead: if array[index][3]: return False return True
def test_loggable_rconf_logger(self): logger = Logger() rconf = RuntimeConfiguration() loggable = Loggable(rconf=rconf, logger=logger) self.assertEqual(rconf, loggable.rconf) self.assertEqual(logger, loggable.logger)
def __init__(self, logger=None): self.fragments = [] self.logger = Logger() if logger is not None: self.logger = logger
def test_job_logger(self): logger = Logger() job = Job(logger=logger)
def __init__(self, task, logger=None): self.task = task self.cleanup_info = [] self.logger = logger if self.logger is None: self.logger = Logger()
def test_task_logger(self): logger = Logger() task = Task(logger=logger)
def __init__(self, container, logger=None): self.logger = logger if self.logger is None: self.logger = Logger() self.container = container
class FFMPEGWrapper(object): """ Wrapper around ``ffmpeg`` to convert audio files. It will perform a call like:: $ ffmpeg -i /path/to/input.mp3 [parameters] /path/to/output.wav :param parameters: list of parameters (not counting input and output paths) to be passed to ``ffmpeg``. If ``None``, ``FFMPEG_PARAMETERS`` will be used. :type parameters: list of strings :param logger: the logger object :type logger: :class:`aeneas.logger.Logger` """ FFMPEG_PARAMETERS_SAMPLE_NO_CHANGE = ["-ac", "1", "-y", "-f", "wav"] """ Set of parameters for ``ffmpeg`` without changing the sampling rate """ FFMPEG_PARAMETERS_SAMPLE_22050 = ["-ac", "1", "-ar", "22050", "-y", "-f", "wav"] """ Set of parameters for ``ffmpeg`` with 22050 Hz sampling """ FFMPEG_PARAMETERS_SAMPLE_44100 = ["-ac", "1", "-ar", "44100", "-y", "-f", "wav"] """ Set of parameters for ``ffmpeg`` with 44100 Hz sampling """ FFMPEG_PARAMETERS = FFMPEG_PARAMETERS_SAMPLE_44100 """ Default set of parameters for ``ffmpeg`` """ FFMPEG_SAMPLE_22050 = ["-ar", "22050"] """ Single parameter for ``ffmpeg``: 22050 Hz sampling """ FFMPEG_SAMPLE_44100 = ["-ar", "44100"] """ Single parameter for ``ffmpeg``: 44100 Hz sampling """ FFMPEG_MONO = ["-ac", "1"] """ Single parameter for ``ffmpeg``: mono (1 channel) """ FFMPEG_STEREO = ["-ac", "2"] """ Single parameter for ``ffmpeg``: stereo (2 channels) """ FFMPEG_OVERWRITE = ["-y"] """ Single parameter for ``ffmpeg``: force overwriting output file """ FFMPEG_FORMAT_WAV = ["-f", "wav"] """ Single parameter for ``ffmpeg``: produce output in ``wav`` format (must be the second to last argument to ``ffmpeg``, just before path of the output file) """ TAG = "FFMPEGWrapper" def __init__(self, parameters=None, logger=None): self.parameters = parameters self.logger = logger if self.logger == None: self.logger = Logger() self._log("Initialized with parameters '%s'" % self.parameters) def _log(self, message, severity=Logger.DEBUG): """ Log """ self.logger.log(message, severity, self.TAG) @property def parameters(self): """ The parameters to be passed to ffmpeg, not including ``-i input_file.mp3`` and ``output_file.wav``. If this property is ``None``, the default ``FFMPEG_PARAMETERS`` will be used. :rtype: list of strings """ return self.__parameters @parameters.setter def parameters(self, parameters): self.__parameters = parameters def convert( self, input_file_path, output_file_path, head_length=None, process_length=None ): """ Convert the audio file at ``input_file_path`` into ``output_file_path``, using the parameters set in the constructor or through the ``parameters`` property. You can skip the beginning of the audio file by specifying ``head_length`` seconds to skip (if it is ``None``, start at time zero), and you can specify to convert only ``process_length`` seconds (if it is ``None``, process the entire input file length). By specifying both ``head_length`` and ``process_length``, you can skip a portion at the beginning and at the end of the original input file. :param input_file_path: the path of the audio file to convert :type input_file_path: string :param output_file_path: the path of the converted audio file :type output_file_path: string :param head_length: skip these many seconds from the beginning of the audio file :type head_length: float :param process_length: process these many seconds of the audio file :type process_length: float """ # test if we can read the input file if not os.path.isfile(input_file_path): msg = "Input file '%s' cannot be read" % input_file_path self._log(msg, Logger.CRITICAL) raise OSError(msg) # call ffmpeg arguments = [] arguments += [gc.FFMPEG_PATH] arguments += ["-i", input_file_path] if head_length != None: arguments += ["-ss", head_length] if process_length != None: arguments += ["-t", process_length] if self.parameters == None: arguments += self.FFMPEG_PARAMETERS else: arguments += self.parameters arguments += [output_file_path] self._log("Calling with arguments '%s'" % str(arguments)) proc = subprocess.Popen( arguments, stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE) proc.communicate() proc.stdout.close() proc.stdin.close() proc.stderr.close() self._log("Call completed") # check if the output file exists if not os.path.exists(output_file_path): msg = "Output file '%s' cannot be read" % output_file_path self._log(msg, Logger.CRITICAL) raise OSError(msg) else: self._log("Returning output file path '%s'" % output_file_path) return output_file_path
def run_test_multi(self, msg): logger = Logger(tee=False) logger.log(msg) self.assertEqual(len(logger), 1)
def test_change_indentation(self): logger = Logger(tee=False, indentation=4) self.assertEqual(logger.indentation, 4) logger.log(u"Message 1", Logger.DEBUG) logger.log(u"Message 2", Logger.INFO) logger.indentation = 2 self.assertEqual(logger.indentation, 2) logger.log(u"Message 3", Logger.WARNING) logger.log(u"Message 4", Logger.CRITICAL) logger.indentation = 0 self.assertEqual(logger.indentation, 0)
def __init__(self, logger=None): self.logger = logger if logger is None: self.logger = Logger()
def __init__(self, logger=None): self.logger = logger if self.logger == None: self.logger = Logger()
class TextFile(object): """ A list of text fragments. :param logger: the logger object :type logger: :class:`aeneas.logger.Logger` """ TAG = "TextFile" def __init__( self, file_path=None, file_format=None, parameters=None, logger=None ): self.file_path = file_path self.file_format = file_format self.parameters = parameters self.fragments = [] self.logger = Logger() if logger != None: self.logger = logger if (self.file_path != None) and (self.file_format != None): self._read_from_file() def __len__(self): return len(self.fragments) def __str__(self): return "\n".join([str(f) for f in self.fragments]) def _log(self, message, severity=Logger.DEBUG): self.logger.log(message, severity, self.TAG) @property def fragments(self): """ The current list of text fragments. :rtype: list of :class:`aeneas.textfile.TextFragment` """ return self.__fragments @fragments.setter def fragments(self, fragments): self.__fragments = fragments def set_language(self, language): """ Set the given language for all the text fragments. :param language: the language of the text fragments :type language: string (from :class:`aeneas.language.Language` enumeration) """ self._log("Setting language: '%s'" % language) for fragment in self.fragments: fragment.language = language def clear(self): """ Clear the list of text fragments. """ self._log("Clearing text fragments") self.fragments = [] def read_from_list(self, lines): """ Read text fragments from a given list of strings:: [fragment_1, fragment_2, ..., fragment_n] :param lines: the text fragments :type lines: list of strings """ self._log("Reading text fragments from list") self._read_plain(lines) def read_from_list_with_ids(self, lines): """ Read text fragments from a given list of lists:: [[id_1, text_1], [id_2, text_2], ..., [id_n, text_n]]. :param lines: the list of ``[id, text]`` fragments (see above) :type lines: list of pairs (see above) """ self._log("Reading text fragments from list with ids") self._create_text_fragments(lines) def _read_from_file(self): """ Read text fragments from file. """ # test if we can read the given file if not os.path.isfile(self.file_path): msg = "File '%s' cannot be read" % self.file_path self._log(msg, Logger.CRITICAL) raise OSError(msg) if self.file_format not in TextFileFormat.ALLOWED_VALUES: msg = "Text file format '%s' is not supported." % self.file_format self._log(msg, Logger.CRITICAL) raise ValueError(msg) # read the contents of the file self._log("Reading contents of file '%s'" % self.file_path) text_file = codecs.open(self.file_path, "r", "utf-8") lines = text_file.readlines() text_file.close() # clear text fragments self.clear() # parse the contents if self.file_format == TextFileFormat.PARSED: self._log("Reading from format PARSED") self._read_parsed(lines) if self.file_format == TextFileFormat.PLAIN: self._log("Reading from format PLAIN") self._read_plain(lines) if self.file_format == TextFileFormat.UNPARSED: self._log("Reading from format UNPARSED") self._read_unparsed(lines, self.parameters) # log the number of fragments self._log("Parsed %d fragments" % len(self.fragments)) def _read_parsed(self, lines): """ Read text fragments from a parsed format text file. :param lines: the lines of the parsed text file :type lines: list of strings """ self._log("Parsing fragments from parsed text format") pairs = [] for line in lines: if gc.PARSED_TEXT_SEPARATOR in line: first, second = line.split(gc.PARSED_TEXT_SEPARATOR) identifier = first.strip() text = second.strip() pairs.append([identifier, text]) self._create_text_fragments(pairs) def _read_plain(self, lines): """ Read text fragments from a plain format text file. :param lines: the lines of the plain text file :type lines: list of strings """ self._log("Parsing fragments from plain text format") i = 1 pairs = [] for line in lines: identifier = "f" + str(i).zfill(6) text = line.strip() pairs.append([identifier, text]) i += 1 self._create_text_fragments(pairs) def _read_unparsed(self, lines, parameters): """ Read text fragments from an unparsed format text file. :param lines: the lines of the unparsed text file :type lines: list of strings :param parameters: additional parameters for parsing (e.g., class/id regex strings) :type parameters: dict """ # # TODO better and/or parametric parsing, # for example, removing tags but keeping text, etc. # self._log("Parsing fragments from unparsed text format") pairs = [] # get filter attributes attributes = dict() if gc.PPN_JOB_IS_TEXT_UNPARSED_CLASS_REGEX in parameters: class_regex_string = parameters[gc.PPN_JOB_IS_TEXT_UNPARSED_CLASS_REGEX] if class_regex_string != None: self._log("Regex for class: '%s'" % class_regex_string) class_regex = re.compile(r".*\b" + class_regex_string + r"\b.*") attributes['class'] = class_regex if gc.PPN_JOB_IS_TEXT_UNPARSED_ID_REGEX in parameters: id_regex_string = parameters[gc.PPN_JOB_IS_TEXT_UNPARSED_ID_REGEX] if id_regex_string != None: self._log("Regex for id: '%s'" % id_regex_string) id_regex = re.compile(r".*\b" + id_regex_string + r"\b.*") attributes['id'] = id_regex # get id sorting algorithm id_sort = IDSortingAlgorithm.UNSORTED if gc.PPN_JOB_IS_TEXT_UNPARSED_ID_SORT in parameters: id_sort = parameters[gc.PPN_JOB_IS_TEXT_UNPARSED_ID_SORT] self._log("Sorting text fragments using '%s'" % id_sort) # transform text in a soup object self._log("Creating soup") soup = BeautifulSoup.BeautifulSoup("\n".join(lines)) # extract according to class_regex and id_regex text_from_id = dict() ids = [] self._log("Finding elements matching attributes '%s'" % attributes) nodes = soup.findAll(attrs=attributes) for node in nodes: try: f_id = node['id'] f_text = node.text text_from_id[f_id] = f_text ids.append(f_id) except KeyError: self._log("KeyError while parsing a node", Logger.WARNING) # sort by ID as requested self._log("Sorting text fragments") sorted_ids = IDSortingAlgorithm(id_sort).sort(ids) # append to fragments self._log("Appending fragments") for key in sorted_ids: pairs.append([key, text_from_id[key]]) self._create_text_fragments(pairs) def _create_text_fragments(self, pairs): """ Create text fragment objects and append them to this list. :param pairs: a list of lists, each being [id, text] :type pairs: list of lists of two strings """ self._log("Creating TextFragment objects") for pair in pairs: fragment = TextFragment(identifier=pair[0], text=pair[1]) self.fragments.append(fragment)
class SD(object): """ The SD extractor. :param audio_file: the audio file :type audio_file: :class:`aeneas.audiofile.AudioFile` :param text_file: the text file :type text_file: :class:`aeneas.textfile.TextFile` :param frame_rate: the MFCC frame rate, in frames per second. Default: :class:`aeneas.globalconstants.MFCC_FRAME_RATE` :type frame_rate: int :param logger: the logger object :type logger: :class:`aeneas.logger.Logger` """ TAG = "SD" # TODO eliminate these magic numbers MAX_RUNS_NO_IMPROVEMENT = 20 MAX_RUNS_WITH_MIN_LENGTH = 20 QUERY_FACTOR = 2.0 AUDIO_FACTOR = 6.0 def __init__(self, audio_file, text_file, frame_rate=gc.MFCC_FRAME_RATE, logger=None): self.logger = logger if self.logger is None: self.logger = Logger() self.audio_file = audio_file self.text_file = text_file self.frame_rate = frame_rate self.audio_speech = None def _log(self, message, severity=Logger.DEBUG): """ Log """ self.logger.log(message, severity, self.TAG) def detect_interval(self, min_head_length=gc.SD_MIN_HEAD_LENGTH, max_head_length=gc.SD_MAX_HEAD_LENGTH, min_tail_length=gc.SD_MIN_TAIL_LENGTH, max_tail_length=gc.SD_MAX_TAIL_LENGTH, metric=SDMetric.VALUE): """ Detect the audio interval. :param max_head_length: estimated maximum head length :type max_head_length: float :param max_tail_length: estimated maximum tail length :type max_tail_length: float :param metric: the metric to be used when comparing candidates :type metric: :class:`aeneas.sd.SDMetric` :rtype: (float, float) """ head = self.detect_head(min_head_length, max_head_length, metric) tail = self.detect_tail(min_tail_length, max_tail_length, metric) begin = head end = self.audio_file.audio_length - tail self._log(["Audio length: %.3f", self.audio_file.audio_length]) self._log(["Head length: %.3f", head]) self._log(["Tail length: %.3f", tail]) self._log(["Begin: %.3f", begin]) self._log(["End: %.3f", end]) if (begin >= 0) and (end > begin): self._log(["Returning %.3f %.3f", begin, end]) return (begin, end) self._log("Returning (0.0, 0.0)") return (0.0, 0.0) def detect_head(self, min_head_length=gc.SD_MIN_HEAD_LENGTH, max_head_length=gc.SD_MAX_HEAD_LENGTH, metric=SDMetric.VALUE): """ Detect the audio head. :param min_head_length: estimated minimum head length :type min_head_length: float :param max_head_length: estimated maximum head length :type max_head_length: float :param metric: the metric to be used when comparing candidates :type metric: :class:`aeneas.sd.SDMetric` :rtype: float """ self._extract_mfcc() self._extract_speech() self.audio_file.clear_data() head = 0.0 try: head = self._detect_start(min_head_length, max_head_length, metric, False) except Exception as e: self._log("Error while detecting head", Logger.CRITICAL) self._log([" Message: %s", str(e)], Logger.CRITICAL) return head def detect_tail(self, min_tail_length=gc.SD_MIN_TAIL_LENGTH, max_tail_length=gc.SD_MAX_TAIL_LENGTH, metric=SDMetric.VALUE): """ Detect the audio tail. :param min_tail_length: estimated minimum tail length :type min_tail_length: float :param max_tail_length: estimated maximum tail length :type max_tail_length: float :param metric: the metric to be used when comparing candidates :type metric: :class:`aeneas.sd.SDMetric` :rtype: float """ self.audio_file.reverse() self._extract_mfcc() self._extract_speech() self._log("Reversing audio") self.audio_file.reverse() self.audio_file.clear_data() tail = 0.0 try: tail = self._detect_start(min_tail_length, max_tail_length, metric, True) except Exception as e: self._log("Error while detecting tail", Logger.CRITICAL) self._log([" Message: %s", str(e)], Logger.CRITICAL) return tail def _detect_start(self, min_start_length, max_start_length, metric, backwards=False): """ Detect start """ self._log(["Min start length: %.3f", min_start_length]) self._log(["Max start length: %.3f", max_start_length]) self._log(["Metric: %s", metric]) self._log(["Backwards: %s", str(backwards)]) audio_rate = self.text_file.characters / self.audio_file.audio_length self._log(["Audio rate: %.3f", audio_rate]) self._log("Synthesizing query...") tmp_handler, tmp_file_path = tempfile.mkstemp(suffix=".wav", dir=gf.custom_tmp_dir()) synt = Synthesizer(logger=self.logger) synt_duration = max_start_length * self.QUERY_FACTOR self._log(["Synthesizing %.3f seconds", synt_duration]) result = synt.synthesize(self.text_file, tmp_file_path, quit_after=synt_duration, backwards=backwards) self._log("Synthesizing query... done") query_file = AudioFile(tmp_file_path) if backwards: self._log("Reversing query") query_file.reverse() self._log("Extracting MFCCs for query...") query_file.extract_mfcc(frame_rate=self.frame_rate) query_file.clear_data() self._log("Extracting MFCCs for query... done") self._log("Cleaning up...") self._cleanup(tmp_handler, tmp_file_path) self._log("Cleaning up... done") query_characters = result[2] query_len = query_file.audio_length query_mfcc = query_file.audio_mfcc query_rate = query_characters / query_len stretch_factor = max(1, query_rate / audio_rate) self._log(["Audio rate: %.3f", audio_rate]) self._log(["Query rate: %.3f", query_rate]) self._log(["Stretch factor: %.3f", stretch_factor]) audio_mfcc = self.audio_file.audio_mfcc self._log(["Actual audio has %d frames", audio_mfcc.shape[1]]) audio_mfcc_end_index = int(max_start_length * self.AUDIO_FACTOR * self.frame_rate) self._log(["Limiting audio to first %d frames", audio_mfcc_end_index]) audio_mfcc_end_index = min(audio_mfcc_end_index, audio_mfcc.shape[1]) audio_mfcc = audio_mfcc[:, 0:audio_mfcc_end_index] self._log(["Limited audio has %d frames", audio_mfcc.shape[1]]) l, o = audio_mfcc.shape l, n = query_mfcc.shape # minimum length of a matched interval in the real audio stretched_match_minimum_length = int(n * stretch_factor) self._log(["Audio has %d frames == %.3f seconds", o, self._i2t(o)]) self._log(["Query has %d frames == %.3f seconds", n, self._i2t(n)]) self._log(["Stretch factor: %.3f", stretch_factor]) self._log( ["Required minimum length: %.3f", stretched_match_minimum_length]) self._log("Speech intervals:") for interval in self.audio_speech: self._log([ " %d %d == %.3f %.3f", self._t2i(interval[0]), self._t2i(interval[1]), interval[0], interval[1] ]) admissible_intervals = [ x for x in self.audio_speech if ((x[0] >= min_start_length) and (x[0] <= max_start_length)) ] self._log("AdmissibleSpeech intervals:") for interval in admissible_intervals: self._log([ " %d %d == %.3f %.3f", self._t2i(interval[0]), self._t2i(interval[1]), interval[0], interval[1] ]) candidates = [] runs_with_min_length = 0 runs_no_improvement = 0 runs_min_distortion = numpy.inf runs_min_value = numpy.inf for interval in admissible_intervals: if runs_no_improvement >= self.MAX_RUNS_NO_IMPROVEMENT: self._log(" Breaking: too many runs without improvement") break if runs_with_min_length >= self.MAX_RUNS_WITH_MIN_LENGTH: self._log( " Breaking: too many runs with minimum required length") break start_time = interval[0] start_index = self._t2i(start_time) self._log([ "Evaluating interval starting at %d == %.3f ", start_index, start_time ]) if start_index > o: self._log(" Breaking: start index outside audio window") break req_end_index = start_index + stretched_match_minimum_length req_end_time = self._i2t(req_end_index) if req_end_index > o: self._log( " Breaking: not enough audio left in shifted window") break end_index = min(start_index + 2 * n, o) end_time = self._i2t(end_index) self._log([" Start %d == %.3f", start_index, start_time]) self._log([" Req end %d == %.3f", req_end_index, req_end_time]) self._log([" Eff end %d == %.3f", end_index, end_time]) audio_mfcc_sub = audio_mfcc[:, start_index:end_index] l, m = audio_mfcc_sub.shape self._log("Computing DTW...") aligner = DTWAligner(None, None, frame_rate=self.frame_rate, logger=self.logger) aligner.real_wave_full_mfcc = audio_mfcc_sub aligner.synt_wave_full_mfcc = query_mfcc aligner.real_wave_length = self._i2t(m) aligner.synt_wave_length = self._i2t(n) acm = aligner.compute_accumulated_cost_matrix() # transpose, so we have an n x m accumulated cost matrix acm = acm.transpose() last_row = acm[-1, :] self._log("Computing DTW... done") # find the minimum, but its index must be >= stretched_match_minimum_length candidate_argmin_index = numpy.argmin( last_row[stretched_match_minimum_length:]) candidate_length_index = stretched_match_minimum_length + candidate_argmin_index candidate_length_time = self._i2t(candidate_length_index) candidate_value = last_row[candidate_length_index] candidate_end_index = start_index + candidate_length_index candidate_end_time = self._i2t(candidate_end_index) candidate_distortion = candidate_value / candidate_length_index # check if the candidate has minimum length if candidate_length_index == stretched_match_minimum_length: runs_with_min_length += 1 else: runs_with_min_length = 0 # check if the candidate improved the global minimum value if metric == SDMetric.VALUE: if candidate_value < runs_min_value: runs_min_value = candidate_value runs_no_improvement = 0 else: runs_no_improvement += 1 if metric == SDMetric.DISTORTION: if candidate_distortion < runs_min_distortion: runs_min_distortion = candidate_distortion runs_no_improvement = 0 else: runs_no_improvement += 1 # append to the list of candidates self._log([ " Interval start: %d == %.6f", start_index, start_time ]) self._log( [" Interval end: %d == %.6f", end_index, end_time]) self._log([ " Candidate start: %d == %.6f", start_index, start_time ]) self._log([ " Candidate end: %d == %.6f", candidate_end_index, candidate_end_time ]) self._log([ " Candidate length: %d == %.6f", candidate_length_index, candidate_length_time ]) self._log([" Candidate value: %.6f", candidate_value]) self._log([" Candidate distortion: %.6f", candidate_distortion]) candidates.append({ "start_index": start_index, "length": candidate_length_index, "value": candidate_value, "distortion": candidate_distortion }) # select best candidate and return its start time # if we have no best candidate, return 0.0 best_candidate = self._select_best_candidate(candidates, metric) if best_candidate is None: return 0.0 sd_time = self._i2t(max(best_candidate["start_index"], 0)) self._log(["Returning time %.3f", sd_time]) return sd_time def _cleanup(self, handler, path): """ Remove temporary handler/file """ if handler is not None: try: os.close(handler) except: pass if path is not None: try: os.remove(path) except: pass def _extract_mfcc(self): """ Extract MFCCs for audio """ self._log("Extracting MFCCs for audio...") self.audio_file.extract_mfcc(frame_rate=self.frame_rate) self._log("Extracting MFCCs for audio... done") def _extract_speech(self): """ Extract speech intervals """ self._log("Running VAD...") vad = VAD(frame_rate=self.frame_rate, logger=self.logger) vad.wave_len = self.audio_file.audio_length vad.wave_mfcc = self.audio_file.audio_mfcc vad.compute_vad() self.audio_speech = vad.speech self._log("Running VAD... done") def _i2t(self, index): """ Frame index to (start) time """ return index * 1.0 / self.frame_rate def _t2i(self, time): """ Frame (start) time to index """ return int(time * self.frame_rate) def _select_best_candidate(self, candidates, metric): """ Select the best candidate (or None if no one is found) """ self._log(["Using metric '%s'", metric]) self._log("Candidates:") for candidate in candidates: self._log([" %s", str(candidate)]) tuples = [] if metric == SDMetric.VALUE: tuples = [(v["value"], v["distortion"], v) for v in candidates] if metric == SDMetric.DISTORTION: tuples = [(v["distortion"], v["value"], v) for v in candidates] if len(tuples) == 0: return None return min(tuples)[2]
class ExecuteJob(object): """ Execute a job, that is, execute all of its tasks and generate the output container holding the generated sync maps. If you do not provide a job object in the constructor, you must manually set it later, or load it from a container with ``load_job_from_container``. In the first case, you are responsible for setting the absolute audio/text/sync map paths of each task of the job, to their actual absolute location on the computing machine. Moreover, you are responsible for cleaning up any temporary files you might have generated around. In the second case, you are responsible for calling ``clean`` at the end of the job execution, to delete the working directory created by ``load_job_from_container`` when creating the job object. :param job: the job to be executed :type job: :class:`aeneas.job.Job` :param logger: the logger object :type logger: :class:`aeneas.logger.Logger` """ TAG = "ExecuteJob" def __init__(self, job=None, logger=None): self.job = job self.working_directory = None self.tmp_directory = None self.logger = logger if self.logger is None: self.logger = Logger() def _log(self, message, severity=Logger.DEBUG): """ Log """ self.logger.log(message, severity, self.TAG) def load_job(self, job): """ Load the given job. NOTE: no sanity check is perfomed by this call, and it will always return ``True``. :param job: the job to load :type job: :class:`aeneas.job.Job` :rtype: bool """ self.job = job return True def load_job_from_container(self, container_path, config_string=None): """ Validate the given container, and, if it is well formed, load the job from it. If ``config_string`` is ``None``, the container must contain a configuration file; otherwise use the provided config string (i.e., the wizard case). Return ``True`` if the job has been loaded successfully, ``False`` otherwise. :param container_path: the path to the input container :type container_path: string (path) :param config_string: the configuration string (from wizard) :type config_string: string :rtype: bool """ self._log("Loading job from container...") # validate container self._log("Validating container...") validator = Validator(logger=self.logger) if config_string is None: validator_result = validator.check_container(container_path) else: validator_result = validator.check_container_from_wizard( container_path, config_string) if not validator_result.passed: self._log("Validating container: failed") self._log("Loading job from container: failed") return False self._log("Validating container: succeeded") try: # create working directory where the input container # will be decompressed self.working_directory = tempfile.mkdtemp(dir=gf.custom_tmp_dir()) self._log( ["Created working directory '%s'", self.working_directory]) # decompress self._log("Decompressing input container...") input_container = Container(container_path, logger=self.logger) input_container.decompress(self.working_directory) self._log("Decompressing input container... done") # create job from the working directory self._log("Creating job from working directory...") working_container = Container(self.working_directory, logger=self.logger) analyzer = AnalyzeContainer(working_container, logger=self.logger) if config_string is None: self.job = analyzer.analyze() else: self.job = analyzer.analyze_from_wizard(config_string) self._log("Creating job from working directory... done") # set absolute path for text file and audio file # for each task in the job self._log("Setting absolute paths for tasks...") for task in self.job.tasks: task.text_file_path_absolute = gf.norm_join( self.working_directory, task.text_file_path) task.audio_file_path_absolute = gf.norm_join( self.working_directory, task.audio_file_path) self._log("Setting absolute paths for tasks... done") # return self._log("Loading job from container: succeeded") return True except: # failure: clean and return self.clean() self._log("Loading job from container: failed") return False def write_output_container(self, output_directory_path): """ Write the output container for this job. Return a pair ``(bool, string)``, where the bool indicates whether the execution succeeded, and the string is the path to output container. :param output_directory_path: the path to a directory where the output container must be created :type output_directory_path: string (path) :rtype: (bool, string) """ self._log("Writing output container for this job") # check if the job has tasks if self.job is None: self._log("job is None") return (False, None) if len(self.job) == 0: self._log("The job has no tasks") return (False, None) try: # create temporary directory where the sync map files # will be created # this temporary directory will be compressed into # the output container self.tmp_directory = tempfile.mkdtemp(dir=gf.custom_tmp_dir()) self._log(["Created temporary directory '%s'", self.tmp_directory]) for task in self.job.tasks: custom_id = task.configuration.custom_id # check if the task has sync map and sync map file path if task.sync_map_file_path is None: self._log([ "Task '%s' has sync_map_file_path not set", custom_id ]) return (False, None) if task.sync_map is None: self._log(["Task '%s' has sync_map not set", custom_id]) return (False, None) # output sync map self._log(["Outputting sync map for task '%s'...", custom_id]) task.output_sync_map_file(self.tmp_directory) self._log( ["Outputting sync map for task '%s'... done", custom_id]) # get output container info output_container_format = self.job.configuration.os_container_format self._log( ["Output container format: '%s'", output_container_format]) output_file_name = self.job.configuration.os_file_name if ((output_container_format != ContainerFormat.UNPACKED) and (not output_file_name.endswith(output_container_format))): self._log("Adding extension to output_file_name") output_file_name += "." + output_container_format self._log(["Output file name: '%s'", output_file_name]) output_file_path = gf.norm_join(output_directory_path, output_file_name) self._log(["Output file path: '%s'", output_file_path]) # create output container self._log("Compressing...") container = Container(output_file_path, output_container_format, logger=self.logger) container.compress(self.tmp_directory) self._log("Compressing... done") self._log(["Created output file: '%s'", output_file_path]) # clean and return self.clean(False) return (True, output_file_path) except: self.clean(False) return (False, None) def execute(self): """ Execute the job, that is, execute all of its tasks. Each produced sync map will be stored inside the corresponding task object. Return ``True`` if the execution succeeded, ``False`` otherwise. :rtype: bool """ self._log("Executing job") # check if the job has tasks if self.job is None: self._log("job is None") return False if len(self.job) == 0: self._log("The job has no tasks") return False self._log(["Number of tasks: '%d'", len(self.job)]) # execute tasks for task in self.job.tasks: custom_id = task.configuration.custom_id self._log(["Executing task '%s'...", custom_id]) executor = ExecuteTask(task, logger=self.logger) result = executor.execute() self._log(["Executing task '%s'... done", custom_id]) if not result: self._log("Executing task: failed") return False self._log("Executing task: succeeded") # return self._log("Executing job: succeeded") return True def clean(self, remove_working_directory=True): """ Remove the temporary directory. If ``remove_working_directory`` is True remove the working directory as well, otherwise just remove the temporary directory. :param remove_working_directory: if ``True``, remove the working directory as well :type remove_working_directory: bool """ if remove_working_directory: self._log("Removing working directory... ") self._clean(self.working_directory) self.working_directory = None self._log("Removing working directory... done") self._log("Removing temporary directory... ") self._clean(self.tmp_directory) self.tmp_directory = None self._log("Removing temporary directory... done") def _clean(self, path): """ Remove the directory ``path``. :param path: the path of the directory to be removed :type path: string (path) """ if (path is not None) and (os.path.isdir(path)): try: self._log(["Removing directory '%s'...", path]) shutil.rmtree(path) self._log("Succeeded") except: self._log("Failed")
class Synthesizer(object): """ A class to synthesize text fragments into a single ``wav`` file, along with the corresponding time anchors. :param logger: the logger object :type logger: :class:`aeneas.logger.Logger` """ TAG = "Synthesizer" def __init__(self, logger=None): self.logger = logger if self.logger == None: self.logger = Logger() def _log(self, message, severity=Logger.DEBUG): self.logger.log(message, severity, self.TAG) def synthesize(self, text_file, audio_file_path): """ Synthesize the text contained in the given fragment list into a ``wav`` file. :param text_file: the text file to be synthesized :type text_file: :class:`aeneas.textfile.TextFile` :param audio_file_path: the path to the output audio file :type audio_file_path: string (path) """ # time anchors anchors = [] # initialize time current_time = 0.0 # waves is used to concatenate all the fragments WAV files waves = numpy.array([]) # espeak wrapper espeak = ESPEAKWrapper(logger=self.logger) num = 0 # for each fragment, synthesize it and concatenate it for fragment in text_file.fragments: # synthesize and get the duration of the output file self._log("Synthesizing fragment %d" % num) handler, tmp_destination = tempfile.mkstemp( suffix=".wav", dir=gf.custom_tmp_dir() ) duration = espeak.synthesize( text=fragment.text, language=fragment.language, output_file_path=tmp_destination ) # store for later output anchors.append([current_time, fragment.identifier, fragment.text]) # concatenate to buffer self._log("Fragment %d starts at: %f" % (num, current_time)) if duration > 0: self._log("Fragment %d duration: %f" % (num, duration)) current_time += duration data, sample_frequency, encoding = wavread(tmp_destination) # # TODO this might result in memory swapping # if we have a large number of fragments # is there a better way? # # waves = numpy.concatenate((waves, data)) # # append seems faster than concatenate, as it should waves = numpy.append(waves, data) else: self._log("Fragment %d has zero duration" % num) # remove temporary file self._log("Removing temporary file '%s'" % tmp_destination) os.close(handler) os.remove(tmp_destination) num += 1 # output WAV file, concatenation of synthesized fragments self._log("Writing audio file '%s'" % audio_file_path) wavwrite(waves, audio_file_path, sample_frequency, encoding) # return the time anchors self._log("Returning %d time anchors" % len(anchors)) return anchors
class Synthesizer(object): """ A class to synthesize text fragments into a single ``wav`` file, along with the corresponding time anchors. :param logger: the logger object :type logger: :class:`aeneas.logger.Logger` """ TAG = "Synthesizer" def __init__(self, logger=None): self.logger = logger if self.logger is None: self.logger = Logger() def _log(self, message, severity=Logger.DEBUG): """ Log """ self.logger.log(message, severity, self.TAG) def synthesize(self, text_file, audio_file_path, quit_after=None, backwards=False): """ Synthesize the text contained in the given fragment list into a ``wav`` file. :param text_file: the text file to be synthesized :type text_file: :class:`aeneas.textfile.TextFile` :param audio_file_path: the path to the output audio file :type audio_file_path: string (path) :param quit_after: stop synthesizing as soon as reaching this many seconds :type quit_after: float :param backwards: synthesizing from the end of the text file :type backwards: bool """ # time anchors anchors = [] # initialize time current_time = 0.0 # waves is used to concatenate all the fragments WAV files waves = numpy.array([]) # espeak wrapper espeak = ESPEAKWrapper(logger=self.logger) if quit_after is not None: self._log(["Quit after reaching %.3f", quit_after]) if backwards: self._log("Synthesizing backwards") # for each fragment, synthesize it and concatenate it num = 0 num_chars = 0 fragments = text_file.fragments if backwards: fragments = fragments[::-1] for fragment in fragments: # synthesize and get the duration of the output file self._log(["Synthesizing fragment %d", num]) handler, tmp_destination = tempfile.mkstemp( suffix=".wav", dir=gf.custom_tmp_dir() ) duration = espeak.synthesize( text=fragment.text, language=fragment.language, output_file_path=tmp_destination ) # store for later output anchors.append([current_time, fragment.identifier, fragment.text]) # increase the character counter num_chars += fragment.characters # concatenate to buffer self._log(["Fragment %d starts at: %f", num, current_time]) if duration > 0: self._log(["Fragment %d duration: %f", num, duration]) current_time += duration data, sample_frequency, encoding = wavread(tmp_destination) # # TODO this might result in memory swapping # if we have a large number of fragments # is there a better way? # # NOTE since append cannot be in place, # it seems that the only alternative is pre-allocating # the destination array, # possibly truncating or extending it as needed # if backwards: waves = numpy.append(data, waves) else: waves = numpy.append(waves, data) else: self._log(["Fragment %d has zero duration", num]) # remove temporary file self._log(["Removing temporary file '%s'", tmp_destination]) os.close(handler) os.remove(tmp_destination) num += 1 if (quit_after is not None) and (current_time > quit_after): self._log(["Quitting after reached duration %.3f", current_time]) break # output WAV file, concatenation of synthesized fragments self._log(["Writing audio file '%s'", audio_file_path]) wavwrite(waves, audio_file_path, sample_frequency, encoding) # return the time anchors # TODO anchors do not make sense if backwards == True self._log(["Returning %d time anchors", len(anchors)]) self._log(["Current time %.3f", current_time]) self._log(["Synthesized %d characters", num_chars]) return (anchors, current_time, num_chars)
class ExecuteTask(object): """ Execute a task, that is, compute the sync map for it. :param task: the task to be executed :type task: :class:`aeneas.task.Task` :param logger: the logger object :type logger: :class:`aeneas.logger.Logger` """ TAG = "ExecuteTask" def __init__(self, task, logger=None): self.task = task self.cleanup_info = [] self.logger = logger if self.logger is None: self.logger = Logger() def _log(self, message, severity=Logger.DEBUG): """ Log """ self.logger.log(message, severity, self.TAG) def execute(self): """ Execute the task. The sync map produced will be stored inside the task object. Return ``True`` if the execution succeeded, ``False`` if an error occurred. :rtype: bool """ self._log("Executing task") # check that we have the AudioFile object if self.task.audio_file is None: self._log("The task does not seem to have its audio file set", Logger.WARNING) return False if ((self.task.audio_file.audio_length is None) or (self.task.audio_file.audio_length <= 0)): self._log("The task seems to have an invalid audio file", Logger.WARNING) return False # check that we have the TextFile object if self.task.text_file is None: self._log("The task does not seem to have its text file set", Logger.WARNING) return False if len(self.task.text_file) == 0: self._log("The task seems to have no text fragments", Logger.WARNING) return False self._log("Both audio and text input file are present") self.cleanup_info = [] #TODO refactor what follows # real full wave = the real audio file, converted to WAVE format # real trimmed wave = real full wave, possibly with head and/or tail trimmed off # synt wave = WAVE file synthesized from text; it will be aligned to real trimmed wave # STEP 0 : convert audio file to real full wave self._log("STEP 0 BEGIN") result, real_full_handler, real_full_path = self._convert() self.cleanup_info.append([real_full_handler, real_full_path]) if not result: self._log("STEP 0 FAILURE") self._cleanup() return False self._log("STEP 0 END") # STEP 1 : extract MFCCs from real full wave self._log("STEP 1 BEGIN") result, real_full_wave_full_mfcc, real_full_wave_length = self._extract_mfcc( real_full_path) if not result: self._log("STEP 1 FAILURE") self._cleanup() return False self._log("STEP 1 END") # STEP 2 : cut head and/or tail off # detecting head/tail if requested, and # overwriting real_path # at the end, read_path will not have the head/tail self._log("STEP 2 BEGIN") result = self._cut_head_tail(real_full_path) real_trimmed_path = real_full_path if not result: self._log("STEP 2 FAILURE") self._cleanup() return False self._log("STEP 2 END") # STEP 3 : synthesize text to wave self._log("STEP 3 BEGIN") result, synt_handler, synt_path, synt_anchors = self._synthesize() self.cleanup_info.append([synt_handler, synt_path]) if not result: self._log("STEP 3 FAILURE") self._cleanup() return False self._log("STEP 3 END") # STEP 4 : align waves self._log("STEP 4 BEGIN") result, wave_map = self._align_waves(real_trimmed_path, synt_path) if not result: self._log("STEP 4 FAILURE") self._cleanup() return False self._log("STEP 4 END") # STEP 5 : align text self._log("STEP 5 BEGIN") result, text_map = self._align_text(wave_map, synt_anchors) if not result: self._log("STEP 5 FAILURE") self._cleanup() return False self._log("STEP 5 END") # STEP 6 : translate the text_map, possibly putting back the head/tail self._log("STEP 6 BEGIN") result, translated_text_map = self._translate_text_map( text_map, real_full_wave_length) if not result: self._log("STEP 6 FAILURE") self._cleanup() return False self._log("STEP 6 END") # STEP 7 : adjust boundaries self._log("STEP 7 BEGIN") result, adjusted_map = self._adjust_boundaries( translated_text_map, real_full_wave_full_mfcc, real_full_wave_length) if not result: self._log("STEP 7 FAILURE") self._cleanup() return False self._log("STEP 7 END") # STEP 8 : create syncmap and add it to task self._log("STEP 8 BEGIN") result = self._create_syncmap(adjusted_map) if not result: self._log("STEP 8 FAILURE") self._cleanup() return False self._log("STEP 8 END") # STEP 9 : cleanup self._log("STEP 9 BEGIN") self._cleanup() self._log("STEP 9 END") self._log("Execution completed") return True def _cleanup(self): """ Remove all temporary files. """ for info in self.cleanup_info: handler, path = info if handler is not None: try: self._log(["Closing handler '%s'...", handler]) os.close(handler) self._log("Succeeded") except: self._log("Failed") if path is not None: try: self._log(["Removing path '%s'...", path]) os.remove(path) self._log("Succeeded") except: self._log("Failed") self.cleanup_info = [] def _convert(self): """ Convert the entire audio file into a ``wav`` file. (Head/tail will be cut off later.) Return a triple: 1. a success bool flag 2. handler of the generated wave file 3. path of the generated wave file """ self._log("Converting real audio to wav") handler = None path = None try: self._log("Creating an output tempfile") handler, path = tempfile.mkstemp(suffix=".wav", dir=gf.custom_tmp_dir()) self._log("Creating a FFMPEGWrapper") ffmpeg = FFMPEGWrapper(logger=self.logger) self._log("Converting...") ffmpeg.convert(input_file_path=self.task.audio_file_path_absolute, output_file_path=path) self._log("Converting... done") self._log("Converting real audio to wav: succeeded") return (True, handler, path) except Exception as e: self._log("Converting real audio to wav: failed") self._log(["Message: %s", str(e)]) return (False, handler, path) def _extract_mfcc(self, audio_file_path): """ Extract the MFCCs of the real full wave. """ self._log("Extracting MFCCs from real full wave") try: audio_file = AudioFile(audio_file_path, logger=self.logger) audio_file.extract_mfcc() self._log("Extracting MFCCs from real full wave: succeeded") return (True, audio_file.audio_mfcc, audio_file.audio_length) except Exception as e: self._log("Extracting MFCCs from real full wave: failed") self._log(["Message: %s", str(e)]) return (False, None, None) def _cut_head_tail(self, audio_file_path): """ Set the audio file head or tail, suitably cutting the audio file on disk, and setting the corresponding parameters in the task configuration. Return a success bool flag """ self._log("Setting head and/or tail") try: configuration = self.task.configuration head_length = configuration.is_audio_file_head_length process_length = configuration.is_audio_file_process_length detect_head_min = configuration.is_audio_file_detect_head_min detect_head_max = configuration.is_audio_file_detect_head_max detect_tail_min = configuration.is_audio_file_detect_tail_min detect_tail_max = configuration.is_audio_file_detect_tail_max # explicit head or process? explicit = (head_length is not None) or (process_length is not None) # at least one detect parameter? detect = ((detect_head_min is not None) or (detect_head_max is not None) or (detect_tail_min is not None) or (detect_tail_max is not None)) if explicit or detect: # we need to load the audio data audio_file = AudioFile(audio_file_path, logger=self.logger) audio_file.load_data() if explicit: self._log("Explicit head or process") else: self._log( "No explicit head or process => detecting head/tail") head = 0.0 if (detect_head_min is not None) or (detect_head_max is not None): self._log("Detecting head...") detect_head_min = gf.safe_float( detect_head_min, gc.SD_MIN_HEAD_LENGTH) detect_head_max = gf.safe_float( detect_head_max, gc.SD_MAX_HEAD_LENGTH) self._log(["detect_head_min is %.3f", detect_head_min]) self._log(["detect_head_max is %.3f", detect_head_max]) sd = SD(audio_file, self.task.text_file, logger=self.logger) head = sd.detect_head(detect_head_min, detect_head_max) self._log(["Detected head: %.3f", head]) tail = 0.0 if (detect_tail_min is not None) or (detect_tail_max is not None): self._log("Detecting tail...") detect_tail_max = gf.safe_float( detect_tail_max, gc.SD_MAX_TAIL_LENGTH) detect_tail_min = gf.safe_float( detect_tail_min, gc.SD_MIN_TAIL_LENGTH) self._log(["detect_tail_min is %.3f", detect_tail_min]) self._log(["detect_tail_max is %.3f", detect_tail_max]) sd = SD(audio_file, self.task.text_file, logger=self.logger) tail = sd.detect_tail(detect_tail_min, detect_tail_max) self._log(["Detected tail: %.3f", tail]) # sanity check head_length = max(0, head) process_length = max(0, audio_file.audio_length - tail - head) # we need to set these values # in the config object for later use self.task.configuration.is_audio_file_head_length = head_length self.task.configuration.is_audio_file_process_length = process_length self._log(["Set head_length: %.3f", head_length]) self._log(["Set process_length: %.3f", process_length]) if head_length is not None: # in case we are reading from config object head_length = float(head_length) if process_length is not None: # in case we are reading from config object process_length = float(process_length) # note that str() is necessary, as one might be None self._log( ["is_audio_file_head_length is %s", str(head_length)]) self._log([ "is_audio_file_process_length is %s", str(process_length) ]) self._log("Trimming audio data...") audio_file.trim(head_length, process_length) self._log("Trimming audio data... done") self._log("Writing audio file...") audio_file.write(audio_file_path) self._log("Writing audio file... done") audio_file.clear_data() else: # nothing to do self._log("No explicit head/process or detect head/tail") self._log("Setting head and/or tail: succeeded") return True except Exception as e: self._log("Setting head and/or tail: failed") self._log(["Message: %s", str(e)]) return False def _synthesize(self): """ Synthesize text into a ``wav`` file. Return a quadruple: 1. a success bool flag 2. handler of the generated wave file 3. path of the generated wave file 4. the list of anchors, that is, a list of floats each representing the start time of the corresponding text fragment in the generated wave file ``[start_1, start_2, ..., start_n]`` """ self._log("Synthesizing text") handler = None path = None anchors = None try: self._log("Creating an output tempfile") handler, path = tempfile.mkstemp(suffix=".wav", dir=gf.custom_tmp_dir()) self._log("Creating Synthesizer object") synt = Synthesizer(logger=self.logger) self._log("Synthesizing...") result = synt.synthesize(self.task.text_file, path) anchors = result[0] self._log("Synthesizing... done") self._log("Synthesizing text: succeeded") return (True, handler, path, anchors) except Exception as e: self._log("Synthesizing text: failed") self._log(["Message: %s", str(e)]) return (False, handler, path, anchors) def _align_waves(self, real_path, synt_path): """ Align two ``wav`` files. Return a pair: 1. a success bool flag 2. the computed alignment map, that is, a list of pairs of floats, each representing corresponding time instants in the real and synt wave, respectively ``[real_time, synt_time]`` """ self._log("Aligning waves") try: self._log("Creating DTWAligner object") aligner = DTWAligner(real_path, synt_path, logger=self.logger) self._log("Computing MFCC...") aligner.compute_mfcc() self._log("Computing MFCC... done") self._log("Computing path...") aligner.compute_path() self._log("Computing path... done") self._log("Computing map...") computed_map = aligner.computed_map self._log("Computing map... done") self._log("Aligning waves: succeeded") return (True, computed_map) except Exception as e: self._log("Aligning waves: failed") self._log(["Message: %s", str(e)]) return (False, None) def _align_text(self, wave_map, synt_anchors): """ Align the text with the real wave, using the ``wave_map`` (containing the mapping between real and synt waves) and ``synt_anchors`` (containing the start times of text fragments in the synt wave). Return a pair: 1. a success bool flag 2. the computed interval map, that is, a list of triples ``[start_time, end_time, fragment_id]`` """ self._log("Aligning text") self._log(["Number of frames: %d", len(wave_map)]) self._log(["Number of fragments: %d", len(synt_anchors)]) try: real_times = numpy.array([t[0] for t in wave_map]) synt_times = numpy.array([t[1] for t in wave_map]) real_anchors = [] anchor_index = 0 # TODO numpy-fy this loop for anchor in synt_anchors: time, fragment_id, fragment_text = anchor self._log("Looking for argmin index...") # TODO allow an user-specified function instead of min # partially solved by AdjustBoundaryAlgorithm index = (numpy.abs(synt_times - time)).argmin() self._log("Looking for argmin index... done") real_time = real_times[index] real_anchors.append([real_time, fragment_id, fragment_text]) self._log(["Time for anchor %d: %f", anchor_index, real_time]) anchor_index += 1 # dummy last anchor, starting at the real file duration real_anchors.append([real_times[-1], None, None]) # compute map self._log("Computing interval map...") # TODO numpy-fy this loop computed_map = [] for i in range(len(real_anchors) - 1): fragment_id = real_anchors[i][1] fragment_text = real_anchors[i][2] start = real_anchors[i][0] end = real_anchors[i + 1][0] computed_map.append([start, end, fragment_id, fragment_text]) self._log("Computing interval map... done") self._log("Aligning text: succeeded") return (True, computed_map) except Exception as e: self._log("Aligning text: failed") self._log(["Message: %s", str(e)]) return (False, None) def _translate_text_map(self, text_map, real_full_wave_length): """ Translate the text_map by adding head and tail dummy fragments """ if len(text_map) == 0: self._log("No fragments in the text_map", Logger.CRITICAL) return (False, None) translated = [] head = gf.safe_float(self.task.configuration.is_audio_file_head_length, 0) translated.append([0, head, None, None]) end = 0 for element in text_map: start, end, fragment_id, fragment_text = element start += head end += head translated.append([start, end, fragment_id, fragment_text]) translated.append([end, real_full_wave_length, None, None]) return (True, translated) def _adjust_boundaries(self, text_map, real_wave_full_mfcc, real_wave_length): """ Adjust the boundaries between consecutive fragments. Return a pair: 1. a success bool flag 2. the computed interval map, that is, a list of triples ``[start_time, end_time, fragment_id]`` """ self._log("Adjusting boundaries") algo = self.task.configuration.adjust_boundary_algorithm value = None if algo is None: self._log("No adjust boundary algorithm specified: returning") return (True, text_map) elif algo == AdjustBoundaryAlgorithm.AUTO: self._log("Requested adjust boundary algorithm AUTO: returning") return (True, text_map) elif algo == AdjustBoundaryAlgorithm.AFTERCURRENT: value = self.task.configuration.adjust_boundary_aftercurrent_value elif algo == AdjustBoundaryAlgorithm.BEFORENEXT: value = self.task.configuration.adjust_boundary_beforenext_value elif algo == AdjustBoundaryAlgorithm.OFFSET: value = self.task.configuration.adjust_boundary_offset_value elif algo == AdjustBoundaryAlgorithm.PERCENT: value = self.task.configuration.adjust_boundary_percent_value elif algo == AdjustBoundaryAlgorithm.RATE: value = self.task.configuration.adjust_boundary_rate_value elif algo == AdjustBoundaryAlgorithm.RATEAGGRESSIVE: value = self.task.configuration.adjust_boundary_rate_value self._log(["Requested algo %s and value %s", algo, value]) try: self._log("Running VAD...") vad = VAD(logger=self.logger) vad.wave_mfcc = real_wave_full_mfcc vad.wave_len = real_wave_length vad.compute_vad() self._log("Running VAD... done") except Exception as e: self._log("Adjusting boundaries: failed") self._log(["Message: %s", str(e)]) return (False, None) self._log("Creating AdjustBoundaryAlgorithm object") adjust_boundary = AdjustBoundaryAlgorithm(algorithm=algo, text_map=text_map, speech=vad.speech, nonspeech=vad.nonspeech, value=value, logger=self.logger) self._log("Adjusting boundaries...") adjusted_map = adjust_boundary.adjust() self._log("Adjusting boundaries... done") self._log("Adjusting boundaries: succeeded") return (True, adjusted_map) def _create_syncmap(self, adjusted_map): """ Create a sync map out of the provided interval map, and store it in the task object. Return a success bool flag. """ self._log("Creating sync map") self._log([ "Number of fragments in adjusted map (including HEAD and TAIL): %d", len(adjusted_map) ]) # adjusted map has 2 elements (HEAD and TAIL) more than text_file if len(adjusted_map) != len(self.task.text_file.fragments) + 2: self._log( "The number of sync map fragments does not match the number of text fragments (+2)", Logger.CRITICAL) return False try: sync_map = SyncMap() head = adjusted_map[0] tail = adjusted_map[-1] # get language language = Language.EN self._log(["Language set to default: %s", language]) if len(self.task.text_file.fragments) > 0: language = self.task.text_file.fragments[0].language self._log(["Language read from text_file: %s", language]) # get head/tail format head_tail_format = self.task.configuration.os_file_head_tail_format # note that str() is necessary, as head_tail_format might be None self._log(["Head/tail format: %s", str(head_tail_format)]) # add head sync map fragment if needed if head_tail_format == SyncMapHeadTailFormat.ADD: head_frag = TextFragment(u"HEAD", language, [u""]) sync_map_frag = SyncMapFragment(head_frag, head[0], head[1]) sync_map.append(sync_map_frag) self._log(["Adding head (ADD): %.3f %.3f", head[0], head[1]]) # stretch first and last fragment timings if needed if head_tail_format == SyncMapHeadTailFormat.STRETCH: self._log([ "Stretching (STRETCH): %.3f => %.3f (head) and %.3f => %.3f (tail)", adjusted_map[1][0], head[0], adjusted_map[-2][1], tail[1] ]) adjusted_map[1][0] = head[0] adjusted_map[-2][1] = tail[1] i = 1 for fragment in self.task.text_file.fragments: start = adjusted_map[i][0] end = adjusted_map[i][1] sync_map_frag = SyncMapFragment(fragment, start, end) sync_map.append(sync_map_frag) i += 1 # add tail sync map fragment if needed if head_tail_format == SyncMapHeadTailFormat.ADD: tail_frag = TextFragment(u"TAIL", language, [u""]) sync_map_frag = SyncMapFragment(tail_frag, tail[0], tail[1]) sync_map.append(sync_map_frag) self._log(["Adding tail (ADD): %.3f %.3f", tail[0], tail[1]]) self.task.sync_map = sync_map self._log("Creating sync map: succeeded") return True except Exception as e: self._log("Creating sync map: failed") self._log(["Message: %s", str(e)]) return False
class Container(object): """ An abstraction for different archive formats like ZIP or TAR, exposing common functions like extracting all files or a single file, listing the files, etc. An (uncompressed) directory can be used in lieu of a compressed file. :param file_path: the path to the container file (or directory) :type file_path: string (path) :param container_format: the format of the container :type container_format: :class:`aeneas.container.ContainerFormat` :param logger: the logger object :type logger: :class:`aeneas.logger.Logger` """ TAG = "Container" def __init__(self, file_path, container_format=None, logger=None): self.file_path = file_path self.container_format = container_format self.actual_container = None self.logger = logger if self.logger is None: self.logger = Logger() self._log("Setting actual Container object") self._set_actual_container() def _log(self, message, severity=Logger.DEBUG): """ Log """ self.logger.log(message, severity, self.TAG) @property def file_path(self): """ The path of this container. :rtype: string (path) """ return self.__file_path @file_path.setter def file_path(self, file_path): self.__file_path = file_path @property def container_format(self): """ The format of this container. :rtype: :class:`aeneas.container.ContainerFormat` """ return self.__container_format @container_format.setter def container_format(self, container_format): self.__container_format = container_format @property def has_config_xml(self): """ Return ``True`` if there is an XML config file in this container, ``False`` otherwise. :rtype: bool """ return self.find_entry(gc.CONFIG_XML_FILE_NAME, exact=False) is not None @property def entry_config_xml(self): """ Return the entry (path inside the container) of the XML config file in this container, or ``None`` if not present. :rtype: string (path) """ return self.find_entry(gc.CONFIG_XML_FILE_NAME, exact=False) @property def has_config_txt(self): """ Return ``True`` if there is a TXT config file in this container, ``False`` otherwise. :rtype: bool """ return self.find_entry(gc.CONFIG_TXT_FILE_NAME, exact=False) is not None @property def entry_config_txt(self): """ Return the entry (path inside the container) of the TXT config file in this container, or ``None`` if not present. :rtype: string (path) """ return self.find_entry(gc.CONFIG_TXT_FILE_NAME, exact=False) @property def is_safe(self): """ Return ``True`` if the container can be safely extracted, that is, if all its entries are safe, ``False`` otherwise. :rtype: bool """ self._log("Checking if this container is safe") entries = self.entries() for entry in entries: if not self.is_entry_safe(entry): self._log(["This container is not safe: found unsafe entry '%s'", entry]) return False self._log("This container is safe") return True def is_entry_safe(self, entry): """ Return ``True`` if ``entry`` can be safely extracted, that is, if it does start with ``/`` or ``../`` after path normalization, ``False`` otherwise. :rtype: bool """ normalized = os.path.normpath(entry) if normalized.startswith("/") or normalized.startswith("../"): self._log(["Entry '%s' is not safe", entry]) return False self._log(["Entry '%s' is safe", entry]) return True def entries(self): """ Return the sorted list of entries in this container, each represented by its full path inside the container. :rtype: list of strings (path) """ self._log("Getting entries") if (self.actual_container is not None) and (self.exists()): try: return self.actual_container.entries() except: self._log("An error occurred while getting entries") return [] def find_entry(self, entry, exact=True): """ Return the full path to the first entry whose file name equals the given ``entry`` path. Return ``None`` if the entry cannot be found. If ``exact`` is ``True``, the path must be exact, otherwise the comparison is done only on the file name. Example: :: entry = "config.txt" might match: :: config.txt foo/config.txt (if exact = False) foo/bar/config.txt (if exact = False) :param entry: the entry name to be searched for :type entry: string (path) :param exact: look for the exact entry path :type exact: bool :rtype: string (path) """ if exact: self._log(["Finding entry '%s' with exact=True", entry]) if entry in self.entries(): self._log(["Found entry '%s'", entry]) return entry else: self._log(["Finding entry '%s' with exact=False", entry]) for ent in self.entries(): if os.path.basename(ent) == entry: self._log(["Found entry '%s'", ent]) return ent self._log(["Entry '%s' not found", entry]) return None def read_entry(self, entry): """ Read the contents of an entry in this container, and return them as a string. Return ``None`` if the entry is not safe or it cannot be found. :rtype: string """ if not self.is_entry_safe(entry): self._log(["Accessing entry '%s' is not safe", entry]) return None if not entry in self.entries(): self._log(["Entry '%s' not found in this container", entry]) return None self._log(["Reading contents of entry '%s'", entry]) try: return self.actual_container.read_entry(entry) except: self._log(["An error occurred while reading the contents of '%s'", entry]) return None def decompress(self, output_path): """ Decompress the entire container into the given directory. :param output_path: path of the destination directory :type output_path: string (path) """ self._log(["Decompressing the container into '%s'", output_path]) if self.actual_container is None: self._log("Actual container not set, aborting") return if not self.exists(): self._log("The container path is not set or not existing, aborting") return if not self.is_safe: self._log("The container contains unsafe entries") return try: self.actual_container.decompress(output_path) self._log(["Decompressing the container into '%s': succeeded", output_path]) except: self._log(["Decompressing the container into '%s': failed", output_path]) def compress(self, input_path): """ Compress the contents of the given directory. :param input_path: path of the input directory :type input_path: string (path) """ self._log(["Compressing '%s' into this container", input_path]) if self.actual_container is None: self._log("Actual container not set, aborting") return if self.file_path is None: self._log("The container path is not set, aborting") return if not os.path.isdir(input_path): self._log(["The input path '%s' is not a directory, aborting", input_path]) return try: self.actual_container.compress(input_path) self._log(["Compressing '%s' into this container: succeeded", input_path]) except: self._log(["Compressing '%s' into this container: failed", input_path]) def exists(self): """ Return ``True`` if the container has its path set and it exists, ``False`` otherwise. :rtype: boolean """ return (self.file_path is not None) and os.path.exists(self.file_path) def _set_actual_container(self): """ Set the actual container, based on the specified container format. If the container format is not specified, infer it from the (lowercased) extension of the file path. If the format cannot be inferred, it is assumed to be of type :class:`aeneas.container.ContainerFormat.UNPACKED` (unpacked directory). """ self._log("Setting actual container") # infer container format if self.container_format is None: self._log("Inferring actual container format") path_lowercased = self.file_path.lower() self._log(["Lowercased file path: '%s'", path_lowercased]) if path_lowercased.endswith(ContainerFormat.ZIP): self.container_format = ContainerFormat.ZIP elif path_lowercased.endswith(ContainerFormat.EPUB): self.container_format = ContainerFormat.EPUB elif path_lowercased.endswith(ContainerFormat.TAR): self.container_format = ContainerFormat.TAR elif path_lowercased.endswith(ContainerFormat.TAR_GZ): self.container_format = ContainerFormat.TAR_GZ elif path_lowercased.endswith(ContainerFormat.TAR_BZ2): self.container_format = ContainerFormat.TAR_BZ2 else: self.container_format = ContainerFormat.UNPACKED self._log(["Inferred format: '%s'", self.container_format]) # set the actual container self._log("Setting actual container") if self.container_format == ContainerFormat.ZIP: self.actual_container = _ContainerZIP(self.file_path) elif self.container_format == ContainerFormat.EPUB: self.actual_container = _ContainerZIP(self.file_path) elif self.container_format == ContainerFormat.TAR: self.actual_container = _ContainerTAR(self.file_path, "") elif self.container_format == ContainerFormat.TAR_GZ: self.actual_container = _ContainerTAR(self.file_path, ":gz") elif self.container_format == ContainerFormat.TAR_BZ2: self.actual_container = _ContainerTAR(self.file_path, ":bz2") elif self.container_format == ContainerFormat.UNPACKED: self.actual_container = _ContainerUnpacked(self.file_path) self._log(["Actual container format: '%s'", self.container_format]) self._log("Actual container set")
class SyncMap(object): """ A synchronization map, that is, a list of :class:`aeneas.syncmap.SyncMapFragment` objects. """ TAG = "SyncMap" def __init__(self, logger=None): self.fragments = [] self.logger = Logger() if logger is not None: self.logger = logger def _log(self, message, severity=Logger.DEBUG): """ Log """ self.logger.log(message, severity, self.TAG) def __len__(self): return len(self.fragments) def __str__(self): return "\n".join([str(f) for f in self.fragments]) def append(self, fragment): """ Append the given sync map fragment. :param fragment: the sync map fragment to be appended :type fragment: :class:`aeneas.syncmap.SyncMapFragment` """ self.fragments.append(fragment) @property def fragments(self): """ The current list of sync map fragments. :rtype: list of :class:`aeneas.syncmap.SyncMapFragment` """ return self.__fragments @fragments.setter def fragments(self, fragments): self.__fragments = fragments def clear(self): """ Clear the sync map. """ self._log("Clearing sync map") self.fragments = [] def read(self, sync_map_format, input_file_path, parameters=None): """ Read sync map fragments from the given file in the specified format, and append them the current (this) sync map. Return ``True`` if the call succeeded, ``False`` if an error occurred. :param sync_map_format: the format of the sync map :type sync_map_format: string (from :class:`aeneas.syncmap.SyncMapFormat` enumeration) :param input_file_path: the path to the input file to read :type input_file_path: string (path) :param parameters: additional parameters (e.g., for SMIL input) :type parameters: dict :rtype: bool """ self._log(["Input format: '%s'", sync_map_format]) self._log(["Input path: '%s'", input_file_path]) self._log(["Input parameters: '%s'", parameters]) try: # open file for writing self._log("Opening output file") input_file = codecs.open(input_file_path, "r", "utf-8") # input from the requested format if sync_map_format == SyncMapFormat.CSV: self._read_csv(input_file, gf.time_from_ssmmm) elif sync_map_format == SyncMapFormat.CSVH: self._read_csv(input_file, gf.time_from_hhmmssmmm) elif sync_map_format == SyncMapFormat.CSVM: self._read_csv(input_file, gf.time_from_ssmmm) elif sync_map_format == SyncMapFormat.JSON: self._read_json(input_file) elif sync_map_format == SyncMapFormat.RBSE: self._read_rbse(input_file) elif sync_map_format == SyncMapFormat.SMIL: self._read_smil(input_file) elif sync_map_format == SyncMapFormat.SMILH: self._read_smil(input_file) elif sync_map_format == SyncMapFormat.SMILM: self._read_smil(input_file) elif sync_map_format == SyncMapFormat.SRT: self._read_srt(input_file) elif sync_map_format == SyncMapFormat.SSV: self._read_ssv(input_file, gf.time_from_ssmmm) elif sync_map_format == SyncMapFormat.SSVH: self._read_ssv(input_file, gf.time_from_hhmmssmmm) elif sync_map_format == SyncMapFormat.SSVM: self._read_ssv(input_file, gf.time_from_ssmmm) elif sync_map_format == SyncMapFormat.TAB: self._read_tsv(input_file, gf.time_from_ssmmm) elif sync_map_format == SyncMapFormat.TSV: self._read_tsv(input_file, gf.time_from_ssmmm) elif sync_map_format == SyncMapFormat.TSVH: self._read_tsv(input_file, gf.time_from_hhmmssmmm) elif sync_map_format == SyncMapFormat.TSVM: self._read_tsv(input_file, gf.time_from_ssmmm) elif sync_map_format == SyncMapFormat.TTML: self._read_ttml(input_file) elif sync_map_format == SyncMapFormat.TXT: self._read_txt(input_file, gf.time_from_ssmmm) elif sync_map_format == SyncMapFormat.TXTH: self._read_txt(input_file, gf.time_from_hhmmssmmm) elif sync_map_format == SyncMapFormat.TXTM: self._read_txt(input_file, gf.time_from_ssmmm) elif sync_map_format == SyncMapFormat.VTT: self._read_vtt(input_file) elif sync_map_format == SyncMapFormat.XML: self._read_xml(input_file) elif sync_map_format == SyncMapFormat.XML_LEGACY: self._read_xml_legacy(input_file) else: input_file.close() return False # overwrite language if requested if (parameters is not None) and (gc.PPN_SYNCMAP_LANGUAGE in parameters): for fragment in self.fragments: fragment.text_fragment.language = parameters[gc.PPN_SYNCMAP_LANGUAGE] # close file and return input_file.close() return True except Exception as e: self._log("Exception while reading sync map from file", Logger.CRITICAL) self._log(["Message: %s", str(e)], Logger.CRITICAL) return False def write(self, sync_map_format, output_file_path, parameters=None): """ Write the current sync map to file in the required format. Return ``True`` if the call succeeded, ``False`` if an error occurred. :param sync_map_format: the format of the sync map :type sync_map_format: string (from :class:`aeneas.syncmap.SyncMapFormat` enumeration) :param output_file_path: the path to the output file to write :type output_file_path: string (path) :param parameters: additional parameters (e.g., for SMIL output) :type parameters: dict :rtype: bool """ self._log(["Output format: '%s'", sync_map_format]) self._log(["Output path: '%s'", output_file_path]) self._log(["Output parameters: '%s'", parameters]) # create dir hierarchy, if needed parent_directory = os.path.dirname(os.path.abspath(output_file_path)) if not os.path.exists(parent_directory): self._log(["Creating directory '%s'", parent_directory]) os.makedirs(parent_directory) # check required parameters if sync_map_format in [SyncMapFormat.SMIL, SyncMapFormat.SMILH, SyncMapFormat.SMILM]: required_parameters = [ gc.PPN_TASK_OS_FILE_SMIL_PAGE_REF, gc.PPN_TASK_OS_FILE_SMIL_AUDIO_REF ] if parameters is None: self._log(["No parameters while requesting %s format", sync_map_format], severity=Logger.CRITICAL) return False for required_parameter in required_parameters: if not required_parameter in parameters: self._log(["Required key %s not present in parameters", required_parameter], severity=Logger.CRITICAL) return False if parameters[required_parameter] is None: self._log(["Required key %s has None value", required_parameter], severity=Logger.CRITICAL) return False try: # open file for writing self._log("Opening output file") output_file = codecs.open(output_file_path, "w", "utf-8") # output in the requested format if sync_map_format == SyncMapFormat.CSV: self._write_csv(output_file, gf.time_to_ssmmm) elif sync_map_format == SyncMapFormat.CSVH: self._write_csv(output_file, gf.time_to_hhmmssmmm) elif sync_map_format == SyncMapFormat.CSVM: self._write_csv(output_file, gf.time_to_ssmmm) elif sync_map_format == SyncMapFormat.JSON: self._write_json(output_file) elif sync_map_format == SyncMapFormat.RBSE: self._write_rbse(output_file) elif sync_map_format == SyncMapFormat.SMIL: self._write_smil(output_file, gf.time_to_hhmmssmmm, parameters) elif sync_map_format == SyncMapFormat.SMILH: self._write_smil(output_file, gf.time_to_hhmmssmmm, parameters) elif sync_map_format == SyncMapFormat.SMILM: self._write_smil(output_file, gf.time_to_ssmmm, parameters) elif sync_map_format == SyncMapFormat.SRT: self._write_srt(output_file) elif sync_map_format == SyncMapFormat.SSV: self._write_ssv(output_file, gf.time_to_ssmmm) elif sync_map_format == SyncMapFormat.SSVH: self._write_ssv(output_file, gf.time_to_hhmmssmmm) elif sync_map_format == SyncMapFormat.SSVM: self._write_ssv(output_file, gf.time_to_ssmmm) elif sync_map_format == SyncMapFormat.TAB: self._write_tsv(output_file, gf.time_to_ssmmm) elif sync_map_format == SyncMapFormat.TSV: self._write_tsv(output_file, gf.time_to_ssmmm) elif sync_map_format == SyncMapFormat.TSVH: self._write_tsv(output_file, gf.time_to_hhmmssmmm) elif sync_map_format == SyncMapFormat.TSVM: self._write_tsv(output_file, gf.time_to_ssmmm) elif sync_map_format == SyncMapFormat.TTML: self._write_ttml(output_file, parameters) elif sync_map_format == SyncMapFormat.TXT: self._write_txt(output_file, gf.time_to_ssmmm) elif sync_map_format == SyncMapFormat.TXTH: self._write_txt(output_file, gf.time_to_hhmmssmmm) elif sync_map_format == SyncMapFormat.TXTM: self._write_txt(output_file, gf.time_to_ssmmm) elif sync_map_format == SyncMapFormat.VTT: self._write_vtt(output_file) elif sync_map_format == SyncMapFormat.XML: self._write_xml(output_file) elif sync_map_format == SyncMapFormat.XML_LEGACY: self._write_xml_legacy(output_file) else: output_file.close() return False # close file and return output_file.close() return True except Exception as e: self._log("Exception while writing sync map to file", Logger.CRITICAL) self._log(["Message: %s", str(e)], Logger.CRITICAL) return False def _read_csv(self, input_file, parse_time): """ Read from CSV file """ for line in input_file.readlines(): line = line.strip() split = line.split(",") identifier = split[0] begin = parse_time(split[1]) end = parse_time(split[2]) text = (",".join(split[3:]))[1:-1] text_fragment = TextFragment(identifier=identifier, lines=[text]) sm_fragment = SyncMapFragment(text_fragment, begin, end) self.append(sm_fragment) def _write_csv(self, output_file, format_time): """ Write to CSV file """ for fragment in self.fragments: text = fragment.text_fragment output_file.write("%s,%s,%s,\"%s\"\n" % ( text.identifier, format_time(fragment.begin), format_time(fragment.end), text.text )) def _read_json(self, input_file): """ Read from JSON file """ contents = input_file.read() contents_dict = json.loads(contents) for fragment in contents_dict["fragments"]: identifier = fragment["id"] language = fragment["language"] begin = gf.time_from_ssmmm(fragment["begin"]) end = gf.time_from_ssmmm(fragment["end"]) lines = [] for line in fragment["lines"]: lines.append(line) text_fragment = TextFragment(identifier=identifier, language=language, lines=lines) sm_fragment = SyncMapFragment(text_fragment, begin, end) self.append(sm_fragment) def _write_json(self, output_file): """ Write to JSON file """ output_fragments = [] for fragment in self.fragments: text = fragment.text_fragment output_fragment = {} output_fragment["id"] = text.identifier output_fragment["language"] = text.language output_fragment["lines"] = text.lines output_fragment["begin"] = gf.time_to_ssmmm(fragment.begin) output_fragment["end"] = gf.time_to_ssmmm(fragment.end) output_fragments.append(output_fragment) output_dict = {"fragments": output_fragments} output_file.write(json.dumps(output_dict, indent=1, sort_keys=True)) def _read_rbse(self, input_file): """ Read from RBSE file """ contents = input_file.read() contents_dict = json.loads(contents) for fragment in contents_dict["smil_data"]: identifier = fragment["id"] begin = gf.time_from_ssmmm(fragment["begin"]) end = gf.time_from_ssmmm(fragment["end"]) # TODO read text from additional text_file? text = u"" text_fragment = TextFragment(identifier=identifier, lines=[text]) sm_fragment = SyncMapFragment(text_fragment, begin, end) self.append(sm_fragment) def _write_rbse(self, output_file): """ Write to RBSE file """ output_dict = {} smil_data = [] smil_ids = [] for fragment in self.fragments: text = fragment.text_fragment output_fragment = {} output_fragment["id"] = text.identifier output_fragment["begin"] = gf.time_to_ssmmm(fragment.begin) output_fragment["end"] = gf.time_to_ssmmm(fragment.end) smil_ids.append(text.identifier) smil_data.append(output_fragment) output_dict = { "smil_ids": smil_ids, "smil_data": smil_data } output_file.write(json.dumps(output_dict, indent=1, sort_keys=True)) def _read_smil(self, input_file): """ Read from SMIL file. Limitations: 1. parses only <par> elements, in order 2. timings must have hh:mm:ss.mmm or ss.mmm format (autodetected) 3. both clipBegin and clipEnd attributes of <audio> must be populated """ smil_ns = "{http://www.w3.org/ns/SMIL}" contents = input_file.read() root = etree.fromstring(contents.encode("utf-8")) for par in root.iter(smil_ns + "par"): for child in par: if child.tag == (smil_ns + "text"): identifier = gf.split_url(child.get("src"))[1] elif child.tag == (smil_ns + "audio"): begin = gf.time_from_hhmmssmmm(child.get("clipBegin")) if begin is None: begin = gf.time_from_ssmmm(child.get("clipBegin")) end = gf.time_from_hhmmssmmm(child.get("clipEnd")) if end is None: end = gf.time_from_ssmmm(child.get("clipEnd")) # TODO read text from additional text_file? text = u"" text_fragment = TextFragment(identifier=identifier, lines=[text]) sm_fragment = SyncMapFragment(text_fragment, begin, end) self.append(sm_fragment) #def _write_smil(self, output_file, format_time, parameters=None): # """ # Write to SMIL file # """ # text_ref = parameters[gc.PPN_TASK_OS_FILE_SMIL_PAGE_REF] # audio_ref = parameters[gc.PPN_TASK_OS_FILE_SMIL_AUDIO_REF] # output_file.write("<smil xmlns=\"http://www.w3.org/ns/SMIL\" xmlns:epub=\"http://www.idpf.org/2007/ops\" version=\"3.0\">\n") # output_file.write(" <body>\n") # output_file.write(" <seq id=\"s%s\" epub:textref=\"%s\">\n" % ( # str(1).zfill(6), # text_ref # )) # i = 1 # for fragment in self.fragments: # text = fragment.text_fragment # output_file.write(" <par id=\"p%s\">\n" % (str(i).zfill(6))) # output_file.write(" <text src=\"%s#%s\"/>\n" % ( # text_ref, # text.identifier # )) # output_file.write(" <audio clipBegin=\"%s\" clipEnd=\"%s\" src=\"%s\"/>\n" % ( # format_time(fragment.begin), # format_time(fragment.end), # audio_ref # )) # output_file.write(" </par>\n") # i += 1 # output_file.write(" </seq>\n") # output_file.write(" </body>\n") # output_file.write("</smil>\n") def _write_smil(self, output_file, format_time, parameters=None): """ Write to SMIL file """ # we are sure we have them text_ref = parameters[gc.PPN_TASK_OS_FILE_SMIL_PAGE_REF] audio_ref = parameters[gc.PPN_TASK_OS_FILE_SMIL_AUDIO_REF] # namespaces smil_ns = "http://www.w3.org/ns/SMIL" epub_ns = "http://www.idpf.org/2007/ops" ns_map = {None : smil_ns, "epub" : epub_ns} # build tree smil_elem = etree.Element("{%s}smil" % smil_ns, nsmap=ns_map) smil_elem.attrib["version"] = "3.0" body_elem = etree.SubElement(smil_elem, "{%s}body" % smil_ns) seq_elem = etree.SubElement(body_elem, "{%s}seq" % smil_ns) seq_elem.attrib["id"] = "s" + str(1).zfill(6) seq_elem.attrib["{%s}textref" % epub_ns] = text_ref i = 1 for fragment in self.fragments: text = fragment.text_fragment par_elem = etree.SubElement(seq_elem, "{%s}par" % smil_ns) par_elem.attrib["id"] = "p" + str(i).zfill(6) text_elem = etree.SubElement(par_elem, "{%s}text" % smil_ns) text_elem.attrib["src"] = "%s#%s" % (text_ref, text.identifier) audio_elem = etree.SubElement(par_elem, "{%s}audio" % smil_ns) audio_elem.attrib["src"] = audio_ref audio_elem.attrib["clipBegin"] = format_time(fragment.begin) audio_elem.attrib["clipEnd"] = format_time(fragment.end) i += 1 # write tree self._write_tree_to_file(smil_elem, output_file, xml_declaration=False) def _read_srt(self, input_file): """ Read from SRT file """ lines = input_file.readlines() i = 0 while i < len(lines): line = lines[i].strip() if len(line) > 0: identifier_index = int(line) identifier = "f" + str(identifier_index).zfill(6) i += 1 if i < len(lines): line = lines[i].strip() timings = line.split(" --> ") if len(timings) == 2: begin = gf.time_from_hhmmssmmm(timings[0], decimal_separator=",") end = gf.time_from_hhmmssmmm(timings[1], decimal_separator=",") fragment_lines = [] while (i + 1 < len(lines)) and (len(line) > 0): i += 1 line = lines[i].strip() if len(line) > 0: fragment_lines.append(line) # should never happen, but just in case... if len(fragment_lines) == 0: fragment_lines = [""] text_fragment = TextFragment(identifier=identifier, lines=fragment_lines) sm_fragment = SyncMapFragment(text_fragment, begin, end) self.append(sm_fragment) i += 1 def _write_srt(self, output_file): """ Write to SRT file """ i = 1 for fragment in self.fragments: text = fragment.text_fragment output_file.write("%d\n" % i) output_file.write("%s --> %s\n" % ( gf.time_to_srt(fragment.begin), gf.time_to_srt(fragment.end) )) for line in text.lines: output_file.write("%s\n" % line) output_file.write("\n") i += 1 def _read_ssv(self, input_file, parse_time): """ Read from SSV file """ for line in input_file.readlines(): line = line.strip() split = line.split(" ") begin = parse_time(split[0]) end = parse_time(split[1]) identifier = split[2] text = (" ".join(split[3:]))[1:-1] text_fragment = TextFragment(identifier=identifier, lines=[text]) sm_fragment = SyncMapFragment(text_fragment, begin, end) self.append(sm_fragment) def _write_ssv(self, output_file, format_time): """ Write to SSV file """ for fragment in self.fragments: text = fragment.text_fragment output_file.write("%s %s %s \"%s\"\n" % ( format_time(fragment.begin), format_time(fragment.end), text.identifier, text.text )) def _read_tsv(self, input_file, parse_time): """ Read from TSV file """ for line in input_file.readlines(): line = line.strip() split = line.split("\t") begin = parse_time(split[0]) end = parse_time(split[1]) identifier = split[2] # TODO read text from additional text_file? text = u"" text_fragment = TextFragment(identifier=identifier, lines=[text]) sm_fragment = SyncMapFragment(text_fragment, begin, end) self.append(sm_fragment) def _write_tsv(self, output_file, format_time): """ Write to TSV file """ for fragment in self.fragments: text = fragment.text_fragment output_file.write("%s\t%s\t%s\n" % ( format_time(fragment.begin), format_time(fragment.end), text.identifier )) def _read_ttml(self, input_file): """ Read from TTML file """ ttml_ns = "{http://www.w3.org/ns/ttml}" xml_ns = "{http://www.w3.org/XML/1998/namespace}" contents = input_file.read() root = etree.fromstring(contents.encode("utf-8")) language = root.get(xml_ns + "lang") for elem in root.iter(ttml_ns + "p"): identifier = elem.get(xml_ns + "id") begin = gf.time_from_ttml(elem.get("begin")) end = gf.time_from_ttml(elem.get("end")) lines = self._get_lines_from_node_text(elem) text_fragment = TextFragment(identifier=identifier, language=language, lines=lines) sm_fragment = SyncMapFragment(text_fragment, begin, end) self.append(sm_fragment) #def _write_ttml(self, output_file): # """ # Write to TTML file # """ # output_file.write("<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n") # output_file.write("<tt xmlns=\"http://www.w3.org/ns/ttml\">\n") # # TODO add metadata from parameters here? # # output_file.write(" <head/>\n") # output_file.write(" <body>\n") # output_file.write(" <div>\n") # for fragment in self.fragments: # text = fragment.text_fragment # output_file.write(" <p xml:id=\"%s\" begin=\"%s\" end=\"%s\">\n" % ( # text.identifier, # gf.time_to_ssmmm(fragment.begin), # gf.time_to_ssmmm(fragment.end) # )) # output_file.write(" %s\n" % "<br/>\n ".join(text.lines)) # output_file.write(" </p>\n") # output_file.write(" </div>\n") # output_file.write(" </body>\n") # output_file.write("</tt>") def _write_ttml(self, output_file, parameters): """ Write to TTML file """ # get language language = None if (parameters is not None) and ("language" in parameters): language = parameters["language"] elif len(self.fragments) > 0: language = self.fragments[0].text_fragment.language if language is None: language = "" # namespaces ttml_ns = "http://www.w3.org/ns/ttml" xml_ns = "http://www.w3.org/XML/1998/namespace" ns_map = {None : ttml_ns} # build tree tt_elem = etree.Element("{%s}tt" % ttml_ns, nsmap=ns_map) tt_elem.attrib["{%s}lang" % xml_ns] = language # TODO add metadata from parameters here? #head_elem = etree.SubElement(tt_elem, "{%s}head" % ttml_ns) body_elem = etree.SubElement(tt_elem, "{%s}body" % ttml_ns) div_elem = etree.SubElement(body_elem, "{%s}div" % ttml_ns) for fragment in self.fragments: text = fragment.text_fragment p_string = u"<p xml:id=\"%s\" begin=\"%s\" end=\"%s\">%s</p>" % ( text.identifier, gf.time_to_ttml(fragment.begin), gf.time_to_ttml(fragment.end), u"<br/>".join(text.lines) ) p_elem = etree.fromstring(p_string) div_elem.append(p_elem) # write tree self._write_tree_to_file(tt_elem, output_file) def _read_txt(self, input_file, parse_time): """ Read from TXT file """ for line in input_file.readlines(): line = line.strip() split = line.split(" ") identifier = split[0] begin = parse_time(split[1]) end = parse_time(split[2]) text = (" ".join(split[3:]))[1:-1] text_fragment = TextFragment(identifier=identifier, lines=[text]) sm_fragment = SyncMapFragment(text_fragment, begin, end) self.append(sm_fragment) def _write_txt(self, output_file, format_time): """ Write to TXT file """ for fragment in self.fragments: text = fragment.text_fragment output_file.write("%s %s %s \"%s\"\n" % ( text.identifier, format_time(fragment.begin), format_time(fragment.end), text.text )) def _read_vtt(self, input_file): """ Read from WebVTT file """ lines = input_file.readlines() # ignore the first line containing "WEBVTT" and the following blank line i = 2 while i < len(lines): line = lines[i].strip() if len(line) > 0: identifier_index = int(line) identifier = "f" + str(identifier_index).zfill(6) i += 1 if i < len(lines): line = lines[i].strip() timings = line.split(" --> ") if len(timings) == 2: begin = gf.time_from_hhmmssmmm(timings[0]) end = gf.time_from_hhmmssmmm(timings[1]) fragment_lines = [] while (i + 1 < len(lines)) and (len(line) > 0): i += 1 line = lines[i].strip() if len(line) > 0: fragment_lines.append(line) # should never happen, but just in case... if len(fragment_lines) == 0: fragment_lines = [""] text_fragment = TextFragment(identifier=identifier, lines=fragment_lines) sm_fragment = SyncMapFragment(text_fragment, begin, end) self.append(sm_fragment) i += 1 def _write_vtt(self, output_file): """ Write to WebVTT file """ output_file.write("WEBVTT\n\n") i = 1 for fragment in self.fragments: text = fragment.text_fragment output_file.write("%d\n" % i) output_file.write("%s --> %s\n" % ( gf.time_to_hhmmssmmm(fragment.begin), gf.time_to_hhmmssmmm(fragment.end) )) for line in text.lines: output_file.write("%s\n" % line) output_file.write("\n") i += 1 def _read_xml(self, input_file): """ Read from XML file """ contents = input_file.read() root = etree.fromstring(contents.encode("utf-8")) for frag in root: identifier = frag.get("id") begin = gf.time_from_ssmmm(frag.get("begin")) end = gf.time_from_ssmmm(frag.get("end")) lines = [] for child in frag: if child.tag == "line": lines.append(child.text) text_fragment = TextFragment(identifier=identifier, lines=lines) sm_fragment = SyncMapFragment(text_fragment, begin, end) self.append(sm_fragment) def _write_xml(self, output_file): """ Write to XML file """ map_elem = etree.Element("map") for fragment in self.fragments: text = fragment.text_fragment fragment_elem = etree.SubElement(map_elem, "fragment") fragment_elem.attrib["id"] = text.identifier fragment_elem.attrib["begin"] = gf.time_to_ssmmm(fragment.begin) fragment_elem.attrib["end"] = gf.time_to_ssmmm(fragment.end) for line in text.lines: line_elem = etree.SubElement(fragment_elem, "line") line_elem.text = line self._write_tree_to_file(map_elem, output_file) def _read_xml_legacy(self, input_file): """ Read from XML file (legacy format) """ contents = input_file.read() root = etree.fromstring(contents.encode("utf-8")) for frag in root: for child in frag: if child.tag == "identifier": identifier = child.text elif child.tag == "start": begin = gf.time_from_ssmmm(child.text) elif child.tag == "end": end = gf.time_from_ssmmm(child.text) # TODO read text from additional text_file? text = "" text_fragment = TextFragment(identifier=identifier, lines=[text]) sm_fragment = SyncMapFragment(text_fragment, begin, end) self.append(sm_fragment) def _write_xml_legacy(self, output_file): """ Write to XML file (legacy format) """ output_file.write("<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n") output_file.write("<map>\n") for fragment in self.fragments: text = fragment.text_fragment output_file.write(" <fragment>\n") output_file.write(" <identifier>%s</identifier>\n" % text.identifier) output_file.write(" <start>%s</start>\n" % gf.time_to_ssmmm(fragment.begin)) output_file.write(" <end>%s</end>\n" % gf.time_to_ssmmm(fragment.end)) output_file.write(" </fragment>\n") output_file.write("</map>") def _write_tree_to_file(self, root_element, output_file, pretty_print=True, xml_declaration=True): """ Write an lxml tree to the given output file """ tree = etree.ElementTree(root_element) tree.write( output_file, pretty_print=pretty_print, xml_declaration=xml_declaration ) def _get_lines_from_node_text(self, node): """ Given an lxml node, get lines from node.text, where the line separator is "<br xmlns=... />". """ parts = ([node.text] + list(chain(*([etree.tostring(c, with_tail=False), c.tail] for c in node.getchildren()))) + [node.tail]) parts = [p.strip() for p in parts if not p.startswith("<br ")] parts = [p for p in parts if len(p) > 0] return parts
class ESPEAKWrapper(object): """ Wrapper around ``espeak`` to synthesize text into a ``wav`` audio file. It will perform a call like :: $ espeak -v language_code -w /tmp/output_file.wav < text :param logger: the logger object :type logger: :class:`aeneas.logger.Logger` """ TAG = "ESPEAKWrapper" def __init__(self, logger=None): self.logger = logger if self.logger is None: self.logger = Logger() def _log(self, message, severity=Logger.DEBUG): """ Log """ self.logger.log(message, severity, self.TAG) def _replace_language(self, language): """ Mock support for a given language by synthesizing using a similar language. :param language: the requested language :type language: string (from :class:`aeneas.language.Language` enumeration) :rtype: string (from :class:`aeneas.language.Language` enumeration) """ if language == Language.UK: self._log(["Replaced '%s' with '%s'", Language.UK, Language.RU]) return Language.RU return language def synthesize(self, text, language, output_file_path): """ Create a ``wav`` audio file containing the synthesized text. The ``text`` must be a unicode string encodable with UTF-8, otherwise ``espeak`` might fail. Return the duration of the synthesized audio file, in seconds. :param text: the text to synthesize :type text: unicode :param language: the language to use :type language: string (from :class:`aeneas.language.Language` enumeration) :param output_file_path: the path of the output audio file :type output_file_path: string :rtype: float """ self._log(["Synthesizing text: '%s'", text]) self._log(["Synthesizing language: '%s'", language]) self._log(["Synthesizing to file: '%s'", output_file_path]) # return 0 if no text is given if (text is None) or (len(text) == 0): self._log("Text is None or it has zero length") return 0 # return 0 if the requested language is not listed in language.py # NOTE disabling this check to allow testing new languages # TODO put it back, add an option in gc to allow unlisted languages #if language not in Language.ALLOWED_VALUES: # self._log(["Language %s is not allowed", language]) # return 0 # replace language language = self._replace_language(language) self._log(["Using language: '%s'", language]) # call espeak arguments = [] arguments += [gc.ESPEAK_PATH] arguments += ["-v", language] arguments += ["-w", output_file_path] self._log(["Calling with arguments '%s'", " ".join(arguments)]) self._log(["Calling with text '%s'", text]) proc = subprocess.Popen( arguments, stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) proc.communicate(input=text.encode('utf-8')) proc.stdout.close() proc.stdin.close() proc.stderr.close() self._log("Call completed") # check if the output file exists if not os.path.exists(output_file_path): self._log(["Output file '%s' cannot be read", output_file_path], Logger.CRITICAL) raise OSError("Output file cannot be read") # return the duration of the output file self._log(["Calling wavread to analyze file '%s'", output_file_path]) duration = 0 try: data, sample_frequency, encoding = wavread(output_file_path) duration = len(data) / float(sample_frequency) self._log(["Duration of '%s': %f", output_file_path, duration]) except IOError as e: self._log("IOError while trying reading the generated file") self._log(["Message: %s", e]) return duration
class AudioFile(object): """ A class representing an audio file. The properties of the audio file (length, format, etc.) will be set by the constructor invoking an audio file probe. (Currently, :class:`aeneas.ffprobewrapper.FFPROBEWrapper` ) :param file_path: the path to the audio file :type file_path: string (path) :param logger: the logger object :type logger: :class:`aeneas.logger.Logger` """ TAG = "AudioFile" def __init__(self, file_path, logger=None): self.logger = logger if self.logger == None: self.logger = Logger() self.file_path = file_path self.file_size = None self.audio_length = None self.audio_format = None self.audio_sample_rate = None self.audio_channels = None self._read_properties() def _log(self, message, severity=Logger.DEBUG): self.logger.log(message, severity, self.TAG) def __str__(self): accumulator = "" accumulator += "File path: %s\n" % self.file_path accumulator += "File size (bytes): %s\n" % gf.safe_int(self.file_size) accumulator += "Audio length (s): %s\n" % gf.safe_float(self.audio_length) accumulator += "Audio format: %s\n" % self.audio_format accumulator += "Audio sample rate: %s\n" % gf.safe_int(self.audio_sample_rate) accumulator += "Audio channels: %s" % gf.safe_int(self.audio_channels) return accumulator @property def file_path(self): """ The path of the audio file. :rtype: string """ return self.__file_path @file_path.setter def file_path(self, file_path): self.__file_path = file_path @property def file_size(self): """ The size, in bytes, of the audio file. :rtype: int """ return self.__file_size @file_size.setter def file_size(self, file_size): self.__file_size = file_size @property def audio_length(self): """ The length, in seconds, of the audio file. :rtype: float """ return self.__audio_length @audio_length.setter def audio_length(self, audio_length): self.__audio_length = audio_length @property def audio_format(self): """ The format of the audio file. :rtype: string """ return self.__audio_format @audio_format.setter def audio_format(self, audio_format): self.__audio_format = audio_format @property def audio_sample_rate(self): """ The sample rate of the audio file. :rtype: int """ return self.__audio_sample_rate @audio_sample_rate.setter def audio_sample_rate(self, audio_sample_rate): self.__audio_sample_rate = audio_sample_rate @property def audio_channels(self): """ The number of channels of the audio file. :rtype: int """ return self.__audio_channels @audio_channels.setter def audio_channels(self, audio_channels): self.__audio_channels = audio_channels def _read_properties(self): """ Populate this object by reading the audio properties of the file at the given path. Currently this function uses :class:`aeneas.ffprobewrapper.FFPROBEWrapper` to get the audio file properties. """ self._log("Reading properties") # check the file can be read if not os.path.isfile(self.file_path): msg = "File '%s' cannot be read" % self.file_path self._log(msg, Logger.CRITICAL) raise OSError(msg) # get the file size self._log("Getting file size for '%s'" % self.file_path) self.file_size = os.path.getsize(self.file_path) self._log("File size for '%s' is '%d'" % (self.file_path, self.file_size)) # get the audio properties self._log("Reading properties with FFPROBEWrapper...") prober = FFPROBEWrapper(logger=self.logger) properties = prober.read_properties(self.file_path) self._log("Reading properties with FFPROBEWrapper... done") # save relevant properties in results inside the audiofile object self.audio_length = gf.safe_float(properties[FFPROBEWrapper.STDOUT_DURATION]) self._log("Stored audio_length: '%s'" % self.audio_length) self.audio_format = properties[FFPROBEWrapper.STDOUT_CODEC_NAME] self._log("Stored audio_format: '%s'" % self.audio_format) self.audio_sample_rate = gf.safe_int(properties[FFPROBEWrapper.STDOUT_SAMPLE_RATE]) self._log("Stored audio_sample_rate: '%s'" % self.audio_sample_rate) self.audio_channels = gf.safe_int(properties[FFPROBEWrapper.STDOUT_CHANNELS]) self._log("Stored audio_channels: '%s'" % self.audio_channels)
def test_synthesize_logger(self): logger = Logger() self.perform("res/inputtext/sonnet_plain.txt", 15, logger=logger)
class TextFile(object): """ A list of text fragments. :param logger: the logger object :type logger: :class:`aeneas.logger.Logger` """ TAG = "TextFile" def __init__( self, file_path=None, file_format=None, parameters=None, logger=None ): self.file_path = file_path self.file_format = file_format self.parameters = parameters self.fragments = [] self.logger = Logger() if logger is not None: self.logger = logger if (self.file_path is not None) and (self.file_format is not None): self._read_from_file() def __len__(self): return len(self.fragments) def __str__(self): return "\n".join([str(f) for f in self.fragments]) def _log(self, message, severity=Logger.DEBUG): """ Log """ self.logger.log(message, severity, self.TAG) @property def characters(self): """ The number of characters in this text. :rtype: int """ chars = 0 for fragment in self.fragments: chars += fragment.characters return chars @property def fragments(self): """ The current list of text fragments. :rtype: list of :class:`aeneas.textfile.TextFragment` """ return self.__fragments @fragments.setter def fragments(self, fragments): self.__fragments = fragments def append_fragment(self, fragment): """ Append the given text fragment to the current list. :param fragment: the text fragment to be appended :type fragment: :class:`aeneas.textfile.TextFragment` """ self.fragments.append(fragment) def get_slice(self, start=None, end=None): """ Return a new list of text fragments, indexed from start (included) to end (excluded). :param start: the start index :type start: int :param end: the end index :type end: int :rtype: :class:`aeneas.textfile.TextFile` """ if start is not None: start = min(max(0, start), len(self) - 1) else: start = 0 if end is not None: end = min(max(0, end), len(self)) end = max(end, start + 1) else: end = len(self) new_text = TextFile() for fragment in self.fragments[start:end]: new_text.append_fragment(fragment) return new_text def set_language(self, language): """ Set the given language for all the text fragments. :param language: the language of the text fragments :type language: string (from :class:`aeneas.language.Language` enumeration) """ self._log(["Setting language: '%s'", language]) for fragment in self.fragments: fragment.language = language def clear(self): """ Clear the list of text fragments. """ self._log("Clearing text fragments") self.fragments = [] def read_from_list(self, lines): """ Read text fragments from a given list of strings:: [fragment_1, fragment_2, ..., fragment_n] :param lines: the text fragments :type lines: list of strings """ self._log("Reading text fragments from list") self._read_plain(lines) def read_from_list_with_ids(self, lines): """ Read text fragments from a given list of lists:: [[id_1, text_1], [id_2, text_2], ..., [id_n, text_n]]. :param lines: the list of ``[id, text]`` fragments (see above) :type lines: list of pairs (see above) """ self._log("Reading text fragments from list with ids") pairs = [] for line in lines: pairs.append([line[0], [line[1]]]) self._create_text_fragments(pairs) def _read_from_file(self): """ Read text fragments from file. """ # test if we can read the given file if not os.path.isfile(self.file_path): self._log(["File '%s' cannot be read", self.file_path], Logger.CRITICAL) raise OSError("Input file cannot be read") if self.file_format not in TextFileFormat.ALLOWED_VALUES: self._log(["Text file format '%s' is not supported.", self.file_format], Logger.CRITICAL) raise ValueError("Text file format not supported") # read the contents of the file self._log(["Reading contents of file '%s'", self.file_path]) text_file = codecs.open(self.file_path, "r", "utf-8") lines = text_file.readlines() text_file.close() # clear text fragments self.clear() # parse the contents if self.file_format == TextFileFormat.SUBTITLES: self._log("Reading from format SUBTITLES") self._read_subtitles(lines) if self.file_format == TextFileFormat.PARSED: self._log("Reading from format PARSED") self._read_parsed(lines) if self.file_format == TextFileFormat.PLAIN: self._log("Reading from format PLAIN") self._read_plain(lines) if self.file_format == TextFileFormat.UNPARSED: self._log("Reading from format UNPARSED") self._read_unparsed(lines, self.parameters) # log the number of fragments self._log(["Parsed %d fragments", len(self.fragments)]) def _read_subtitles(self, lines): """ Read text fragments from a subtitles format text file. :param lines: the lines of the subtitles text file :type lines: list of strings """ self._log("Parsing fragments from subtitles text format") lines = [line.strip() for line in lines] pairs = [] i = 1 current = 0 while current < len(lines): line_text = lines[current] if len(line_text) > 0: fragment_lines = [line_text] following = current + 1 while (following < len(lines) and (len(lines[following]) > 0)): fragment_lines.append(lines[following]) following += 1 identifier = "f" + str(i).zfill(6) pairs.append([identifier, fragment_lines]) current = following i += 1 current += 1 self._create_text_fragments(pairs) def _read_parsed(self, lines): """ Read text fragments from a parsed format text file. :param lines: the lines of the parsed text file :type lines: list of strings """ self._log("Parsing fragments from parsed text format") pairs = [] for line in lines: pieces = line.split(gc.PARSED_TEXT_SEPARATOR) if len(pieces) == 2: identifier = pieces[0].strip() text = pieces[1].strip() if len(identifier) > 0: pairs.append([identifier, [text]]) self._create_text_fragments(pairs) def _read_plain(self, lines): """ Read text fragments from a plain format text file. :param lines: the lines of the plain text file :type lines: list of strings """ self._log("Parsing fragments from plain text format") lines = [line.strip() for line in lines] pairs = [] i = 1 for line in lines: identifier = "f" + str(i).zfill(6) text = line.strip() pairs.append([identifier, [text]]) i += 1 self._create_text_fragments(pairs) def _read_unparsed(self, lines, parameters): """ Read text fragments from an unparsed format text file. :param lines: the lines of the unparsed text file :type lines: list of strings :param parameters: additional parameters for parsing (e.g., class/id regex strings) :type parameters: dict """ # # TODO better and/or parametric parsing, # for example, removing tags but keeping text, etc. # self._log("Parsing fragments from unparsed text format") pairs = [] # get filter attributes attributes = dict() if gc.PPN_JOB_IS_TEXT_UNPARSED_CLASS_REGEX in parameters: class_regex_string = parameters[gc.PPN_JOB_IS_TEXT_UNPARSED_CLASS_REGEX] if class_regex_string is not None: self._log(["Regex for class: '%s'", class_regex_string]) class_regex = re.compile(r".*\b" + class_regex_string + r"\b.*") attributes['class'] = class_regex if gc.PPN_JOB_IS_TEXT_UNPARSED_ID_REGEX in parameters: id_regex_string = parameters[gc.PPN_JOB_IS_TEXT_UNPARSED_ID_REGEX] if id_regex_string is not None: self._log(["Regex for id: '%s'", id_regex_string]) id_regex = re.compile(r".*\b" + id_regex_string + r"\b.*") attributes['id'] = id_regex # get id sorting algorithm id_sort = IDSortingAlgorithm.UNSORTED if gc.PPN_JOB_IS_TEXT_UNPARSED_ID_SORT in parameters: id_sort = parameters[gc.PPN_JOB_IS_TEXT_UNPARSED_ID_SORT] self._log(["Sorting text fragments using '%s'", id_sort]) # transform text in a soup object self._log("Creating soup") soup = BeautifulSoup.BeautifulSoup("\n".join(lines)) # extract according to class_regex and id_regex text_from_id = dict() ids = [] self._log(["Finding elements matching attributes '%s'", attributes]) nodes = soup.findAll(attrs=attributes) for node in nodes: try: f_id = node['id'] f_text = node.text text_from_id[f_id] = f_text ids.append(f_id) except KeyError: self._log("KeyError while parsing a node", Logger.WARNING) # sort by ID as requested self._log("Sorting text fragments") sorted_ids = IDSortingAlgorithm(id_sort).sort(ids) # append to fragments self._log("Appending fragments") for key in sorted_ids: pairs.append([key, [text_from_id[key]]]) self._create_text_fragments(pairs) def _create_text_fragments(self, pairs): """ Create text fragment objects and append them to this list. :param pairs: a list of lists, each being [id, [line_1, ..., line_n]] :type pairs: list of lists (see above) """ self._log("Creating TextFragment objects") for pair in pairs: fragment = TextFragment(identifier=pair[0], lines=pair[1]) self.append_fragment(fragment)
class ExecuteJob(object): """ Execute a job, that is, execute all of its tasks and generate the output container holding the generated sync maps. If you do not provide a job object in the constructor, you must manually set it later, or load it from a container with ``load_job_from_container``. In the first case, you are responsible for setting the absolute audio/text/sync map paths of each task of the job, to their actual absolute location on the computing machine. Moreover, you are responsible for cleaning up any temporary files you might have generated around. In the second case, you are responsible for calling ``clean`` at the end of the job execution, to delete the working directory created by ``load_job_from_container`` when creating the job object. :param job: the job to be executed :type job: :class:`aeneas.job.Job` :param logger: the logger object :type logger: :class:`aeneas.logger.Logger` """ TAG = "ExecuteJob" def __init__(self, job=None, logger=None): self.job = job self.working_directory = None self.tmp_directory = None self.logger = logger if self.logger == None: self.logger = Logger() def _log(self, message, severity=Logger.DEBUG): """ Log """ self.logger.log(message, severity, self.TAG) def load_job(self, job): """ Load the given job. NOTE: no sanity check is perfomed by this call, and it will always return ``True``. :param job: the job to load :type job: :class:`aeneas.job.Job` :rtype: bool """ self.job = job return True def load_job_from_container(self, container_path, config_string=None): """ Validate the given container, and, if it is well formed, load the job from it. If ``config_string`` is ``None``, the container must contain a configuration file; otherwise use the provided config string (i.e., the wizard case). Return ``True`` if the job has been loaded successfully, ``False`` otherwise. :param container_path: the path to the input container :type container_path: string (path) :param config_string: the configuration string (from wizard) :type config_string: string :rtype: bool """ self._log("Loading job from container...") # validate container self._log("Validating container...") validator = Validator(logger=self.logger) if config_string == None: validator_result = validator.check_container(container_path) else: validator_result = validator.check_container_from_wizard( container_path, config_string ) if not validator_result.passed: self._log("Validating container: failed") self._log("Loading job from container: failed") return False self._log("Validating container: succeeded") try: # create working directory where the input container # will be decompressed self.working_directory = tempfile.mkdtemp(dir=gf.custom_tmp_dir()) self._log("Created working directory '%s'" % self.working_directory) # decompress self._log("Decompressing input container...") input_container = Container(container_path, logger=self.logger) input_container.decompress(self.working_directory) self._log("Decompressing input container... done") # create job from the working directory self._log("Creating job from working directory...") working_container = Container( self.working_directory, logger=self.logger ) analyzer = AnalyzeContainer(working_container, logger=self.logger) if config_string == None: self.job = analyzer.analyze() else: self.job = analyzer.analyze_from_wizard(config_string) self._log("Creating job from working directory... done") # set absolute path for text file and audio file # for each task in the job self._log("Setting absolute paths for tasks...") for task in self.job.tasks: task.text_file_path_absolute = gf.norm_join( self.working_directory, task.text_file_path ) task.audio_file_path_absolute = gf.norm_join( self.working_directory, task.audio_file_path ) self._log("Setting absolute paths for tasks... done") # return self._log("Loading job from container: succeeded") return True except: # failure: clean and return self.clean() self._log("Loading job from container: failed") return False def write_output_container(self, output_directory_path): """ Write the output container for this job. Return a pair ``(bool, string)``, where the bool indicates whether the execution succeeded, and the string is the path to output container. :param output_directory_path: the path to a directory where the output container must be created :type output_directory_path: string (path) :rtype: (bool, string) """ self._log("Writing output container for this job") # check if the job has tasks if self.job == None: self._log("job is None") return (False, None) if len(self.job) == 0: self._log("The job has no tasks") return (False, None) try: # create temporary directory where the sync map files # will be created # this temporary directory will be compressed into # the output container self.tmp_directory = tempfile.mkdtemp(dir=gf.custom_tmp_dir()) self._log("Created temporary directory '%s'" % self.tmp_directory) for task in self.job.tasks: custom_id = task.configuration.custom_id # check if the task has sync map and sync map file path if task.sync_map_file_path == None: self._log("Task '%s' has sync_map_file_path not set" % custom_id) return (False, None) if task.sync_map == None: self._log("Task '%s' has sync_map not set" % custom_id) return (False, None) # output sync map self._log("Outputting sync map for task '%s'..." % custom_id) task.output_sync_map_file(self.tmp_directory) self._log("Outputting sync map for task '%s'... done" % custom_id) # get output container info output_container_format = self.job.configuration.os_container_format self._log("Output container format: '%s'" % output_container_format) output_file_name = self.job.configuration.os_file_name if ((output_container_format != ContainerFormat.UNPACKED) and (not output_file_name.endswith(output_container_format))): self._log("Adding extension to output_file_name") output_file_name += "." + output_container_format self._log("Output file name: '%s'" % output_file_name) output_file_path = gf.norm_join( output_directory_path, output_file_name ) self._log("Output file path: '%s'" % output_file_path) # create output container self._log("Compressing...") container = Container( output_file_path, output_container_format, logger=self.logger ) container.compress(self.tmp_directory) self._log("Compressing... done") self._log("Created output file: '%s'" % output_file_path) # clean and return self.clean(False) return (True, output_file_path) except: self.clean(False) return (False, None) def execute(self): """ Execute the job, that is, execute all of its tasks. Each produced sync map will be stored inside the corresponding task object. Return ``True`` if the execution succeeded, ``False`` otherwise. :rtype: bool """ self._log("Executing job") # check if the job has tasks if self.job == None: self._log("job is None") return False if len(self.job) == 0: self._log("The job has no tasks") return False self._log("Number of tasks: '%s'" % len(self.job)) # execute tasks for task in self.job.tasks: custom_id = task.configuration.custom_id self._log("Executing task '%s'..." % custom_id) executor = ExecuteTask(task, logger=self.logger) result = executor.execute() self._log("Executing task '%s'... done" % custom_id) if not result: self._log("Executing task: failed") return False self._log("Executing task: succeeded") # return self._log("Executing job: succeeded") return True def clean(self, remove_working_directory=True): """ Remove the temporary directory. If ``remove_working_directory`` is True remove the working directory as well, otherwise just remove the temporary directory. :param remove_working_directory: if ``True``, remove the working directory as well :type remove_working_directory: bool """ if remove_working_directory: self._log("Removing working directory... ") self._clean(self.working_directory) self.working_directory = None self._log("Removing working directory... done") self._log("Removing temporary directory... ") self._clean(self.tmp_directory) self.tmp_directory = None self._log("Removing temporary directory... done") def _clean(self, path): """ Remove the directory ``path``. :param path: the path of the directory to be removed :type path: string (path) """ if (path != None) and (os.path.isdir(path)): try: self._log("Removing directory '%s'..." % path) shutil.rmtree(path) self._log("Succeeded") except: self._log("Failed")
class ExecuteTask(object): """ Execute a task, that is, compute the sync map for it. :param task: the task to be executed :type task: :class:`aeneas.task.Task` :param logger: the logger object :type logger: :class:`aeneas.logger.Logger` """ TAG = "ExecuteTask" def __init__(self, task, logger=None): self.task = task self.cleanup_info = [] self.logger = logger if self.logger == None: self.logger = Logger() def _log(self, message, severity=Logger.DEBUG): """ Log """ self.logger.log(message, severity, self.TAG) def execute(self): """ Execute the task. The sync map produced will be stored inside the task object. Return ``True`` if the execution succeeded, ``False`` if an error occurred. :rtype: bool """ self._log("Executing task") # check that we have the AudioFile object if self.task.audio_file == None: self._log("The task does not seem to have its audio file set", Logger.WARNING) return False if (self.task.audio_file.audio_length == None) or (self.task.audio_file.audio_length <= 0): self._log("The task seems to have an invalid audio file", Logger.WARNING) return False # check that we have the TextFile object if self.task.text_file == None: self._log("The task does not seem to have its text file set", Logger.WARNING) return False if len(self.task.text_file) == 0: self._log("The task seems to have no text fragments", Logger.WARNING) return False self._log("Both audio and text input file are present") self.cleanup_info = [] # STEP 1 : convert (real) audio to wave self._log("STEP 1 BEGIN") result, real_handler, real_path = self._convert() self.cleanup_info.append([real_handler, real_path]) if not result: self._log("STEP 1 FAILURE") self._cleanup() return False self._log("STEP 1 END") # STEP 2 : synthesize text to wave self._log("STEP 2 BEGIN") result, synt_handler, synt_path, synt_anchors = self._synthesize() self.cleanup_info.append([synt_handler, synt_path]) if not result: self._log("STEP 2 FAILURE") self._cleanup() return False self._log("STEP 2 END") # STEP 3 : align waves self._log("STEP 3 BEGIN") result, wave_map = self._align_waves(real_path, synt_path) if not result: self._log("STEP 3 FAILURE") self._cleanup() return False self._log("STEP 3 END") # STEP 4 : align text self._log("STEP 4 BEGIN") result, text_map = self._align_text(wave_map, synt_anchors) if not result: self._log("STEP 4 FAILURE") self._cleanup() return False self._log("STEP 4 END") # STEP 5 : create syncmap and add it to task self._log("STEP 5 BEGIN") result = self._create_syncmap(text_map) if not result: self._log("STEP 5 FAILURE") self._cleanup() return False self._log("STEP 5 END") # STEP 6 : cleanup self._log("STEP 6 BEGIN") self._cleanup() self._log("STEP 6 END") self._log("Execution completed") return True def _cleanup(self): """ Remove all temporary files. """ for info in self.cleanup_info: handler, path = info if handler != None: try: self._log("Closing handler '%s'..." % handler) os.close(handler) self._log("Succeeded") except: self._log("Failed") if path != None: try: self._log("Removing path '%s'..." % path) os.remove(path) self._log("Succeeded") except: self._log("Failed") self.cleanup_info = [] def _convert(self): """ Convert the audio file into a ``wav`` file. Return a triple: 1. a success bool flag 2. handler of the generated wave file 3. path of the generated wave file """ self._log("Converting real audio to wav") handler = None path = None try: self._log("Creating an output tempfile") handler, path = tempfile.mkstemp( suffix=".wav", dir=gf.custom_tmp_dir() ) self._log("Creating a FFMPEGWrapper") ffmpeg = FFMPEGWrapper(logger=self.logger) self._log("Converting...") ffmpeg.convert( input_file_path=self.task.audio_file_path_absolute, output_file_path=path, head_length=self.task.configuration.is_audio_file_head_length, process_length=self.task.configuration.is_audio_file_process_length) self._log("Converting... done") self._log("Converting real audio to wav: succeeded") return (True, handler, path) except: self._log("Converting real audio to wav: failed") return (False, handler, path) def _synthesize(self): """ Synthesize text into a ``wav`` file. Return a quadruple: 1. a success bool flag 2. handler of the generated wave file 3. path of the generated wave file 4. the list of anchors, that is, a list of floats each representing the start time of the corresponding text fragment in the generated wave file ``[start_1, start_2, ..., start_n]`` """ self._log("Synthesizing text") handler = None path = None anchors = None try: self._log("Creating an output tempfile") handler, path = tempfile.mkstemp( suffix=".wav", dir=gf.custom_tmp_dir() ) self._log("Creating Synthesizer object") synt = Synthesizer(logger=self.logger) self._log("Synthesizing...") anchors = synt.synthesize(self.task.text_file, path) self._log("Synthesizing... done") self._log("Synthesizing text: succeeded") return (True, handler, path, anchors) except: self._log("Synthesizing text: failed") return (False, handler, path, anchors) def _align_waves(self, real_path, synt_path): """ Align two ``wav`` files. Return a pair: 1. a success bool flag 2. the computed alignment map, that is, a list of pairs of floats, each representing corresponding time instants in the real and synt wave, respectively ``[real_time, synt_time]`` """ self._log("Aligning waves") try: self._log("Creating DTWAligner object") aligner = DTWAligner(real_path, synt_path, logger=self.logger) self._log("Computing MFCC...") aligner.compute_mfcc() self._log("Computing MFCC... done") self._log("Computing path...") aligner.compute_path() self._log("Computing path... done") self._log("Computing map...") computed_map = aligner.computed_map self._log("Computing map... done") return (True, computed_map) except: return (False, None) def _align_text(self, wave_map, synt_anchors): """ Align the text with the real wave, using the ``wave_map`` (containing the mapping between real and synt waves) and ``synt_anchors`` (containing the start times of text fragments in the synt wave). Return a pair: 1. a success bool flag 2. the computed interval map, that is, a list of triples ``[start_time, end_time, fragment_id]`` """ self._log("Align text") self._log("Number of frames: %d" % len(wave_map)) self._log("Number of fragments: %d" % len(synt_anchors)) try: real_times = numpy.array([t[0] for t in wave_map]) synt_times = numpy.array([t[1] for t in wave_map]) real_anchors = [] anchor_index = 0 for anchor in synt_anchors: time, fragment_id, fragment_text = anchor self._log("Looking for argmin index...") # TODO improve this by allowing an arbitrary # user-specified function instead of min index = (numpy.abs(synt_times - time)).argmin() self._log("Looking for argmin index... done") real_time = real_times[index] real_anchors.append([real_time, fragment_id]) self._log("Time for anchor %d: %f" % (anchor_index, real_time)) anchor_index += 1 # dummy last anchor, starting at the real file duration real_anchors.append([real_times[-1], None]) # compute map self._log("Computing interval map...") computed_map = [] for i in range(len(real_anchors) - 1): fragment_id = real_anchors[i][1] start = real_anchors[i][0] end = real_anchors[i+1][0] computed_map.append([start, end, fragment_id]) self._log("Computing interval map... done") # return computed map self._log("Returning interval map") return (True, computed_map) except: return (False, None) def _create_syncmap(self, text_map): """ Create a sync map out of the provided interval map, and store it in the task object. Return a success bool flag. """ self._log("Creating SyncMap") self._log("Number of fragments: %d" % len(text_map)) if len(text_map) != len(self.task.text_file.fragments): return False try: sync_map = SyncMap() i = 0 head = 0 if self.task.configuration.is_audio_file_head_length != None: head = gf.safe_float(self.task.configuration.is_audio_file_head_length, 0) for fragment in self.task.text_file.fragments: start = head + text_map[i][0] end = head + text_map[i][1] sync_map_frag = SyncMapFragment(fragment, start, end) sync_map.append(sync_map_frag) i += 1 self.task.sync_map = sync_map return True except: return False
class FFPROBEWrapper(object): """ Wrapper around ``ffprobe`` to read the properties of an audio file. It will perform a call like:: $ ffprobe -select_streams a -show_streams /path/to/audio/file.mp3 and it will parse the first ``[STREAM]`` element returned:: [STREAM] index=0 codec_name=mp3 codec_long_name=MP3 (MPEG audio layer 3) profile=unknown codec_type=audio codec_time_base=1/44100 codec_tag_string=[0][0][0][0] codec_tag=0x0000 sample_fmt=s16p sample_rate=44100 channels=1 channel_layout=mono bits_per_sample=0 id=N/A r_frame_rate=0/0 avg_frame_rate=0/0 time_base=1/14112000 start_pts=0 start_time=0.000000 duration_ts=1545083190 duration=109.487188 bit_rate=128000 max_bit_rate=N/A bits_per_raw_sample=N/A nb_frames=N/A nb_read_frames=N/A nb_read_packets=N/A DISPOSITION:default=0 DISPOSITION:dub=0 DISPOSITION:original=0 DISPOSITION:comment=0 DISPOSITION:lyrics=0 DISPOSITION:karaoke=0 DISPOSITION:forced=0 DISPOSITION:hearing_impaired=0 DISPOSITION:visual_impaired=0 DISPOSITION:clean_effects=0 DISPOSITION:attached_pic=0 [/STREAM] :param logger: the logger object :type logger: :class:`aeneas.logger.Logger` """ FFPROBE_PARAMETERS = [ "-select_streams", "a", "-show_streams" ] """ ``ffprobe`` parameters """ STDERR_DURATION_REGEX = r"Duration: ([0-9]*):([0-9]*):([0-9]*)\.([0-9]*)" """ Regex to match ``ffprobe`` stderr duration values """ STDOUT_BEGIN_STREAM = "[STREAM]" """ ``ffprobe`` stdout begin stream tag """ STDOUT_CHANNELS = "channels" """ ``ffprobe`` stdout channels keyword """ STDOUT_CODEC_NAME = "codec_name" """ ``ffprobe`` stdout codec name (format) keyword """ STDOUT_END_STREAM = "[/STREAM]" """ ``ffprobe`` stdout end stream tag """ STDOUT_DURATION = "duration" """ ``ffprobe`` stdout duration keyword """ STDOUT_SAMPLE_RATE = "sample_rate" """ ``ffprobe`` stdout sample rate keyword """ TAG = "FFPROBEWrapper" def __init__(self, logger=None): self.logger = logger if logger is None: self.logger = Logger() def _log(self, message, severity=Logger.DEBUG): """ Log """ self.logger.log(message, severity, self.TAG) def read_properties(self, audio_file_path): """ Read the properties of an audio file and return them as a dictionary. Example: :: d["index"]=0 d["codec_name"]=mp3 d["codec_long_name"]=MP3 (MPEG audio layer 3) d["profile"]=unknown d["codec_type"]=audio d["codec_time_base"]=1/44100 d["codec_tag_string"]=[0][0][0][0] d["codec_tag"]=0x0000 d["sample_fmt"]=s16p d["sample_rate"]=44100 d["channels"]=1 d["channel_layout"]=mono d["bits_per_sample"]=0 d["id"]=N/A d["r_frame_rate"]=0/0 d["avg_frame_rate"]=0/0 d["time_base"]=1/14112000 d["start_pts"]=0 d["start_time"]=0.000000 d["duration_ts"]=1545083190 d["duration"]=109.487188 d["bit_rate"]=128000 d["max_bit_rate"]=N/A d["bits_per_raw_sample"]=N/A d["nb_frames"]=N/A d["nb_read_frames"]=N/A d["nb_read_packets"]=N/A d["DISPOSITION:default"]=0 d["DISPOSITION:dub"]=0 d["DISPOSITION:original"]=0 d["DISPOSITION:comment"]=0 d["DISPOSITION:lyrics"]=0 d["DISPOSITION:karaoke"]=0 d["DISPOSITION:forced"]=0 d["DISPOSITION:hearing_impaired"]=0 d["DISPOSITION:visual_impaired"]=0 d["DISPOSITION:clean_effects"]=0 d["DISPOSITION:attached_pic"]=0 :param audio_file_path: the path of the audio file to analyze :type audio_file_path: string (path) :rtype: dict """ # test if we can read the file at audio_file_path if not os.path.isfile(audio_file_path): self._log(["Input file '%s' cannot be read", audio_file_path], Logger.CRITICAL) raise OSError("Input file cannot be read") # call ffprobe arguments = [] arguments += [gc.FFPROBE_PATH] arguments += self.FFPROBE_PARAMETERS arguments += [audio_file_path] self._log(["Calling with arguments '%s'", arguments]) proc = subprocess.Popen( arguments, stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE) (stdoutdata, stderrdata) = proc.communicate() proc.stdout.close() proc.stdin.close() proc.stderr.close() self._log("Call completed") # if no output, raise error if (stdoutdata is None) or (len(stderrdata) == 0): msg = "No output for '%s'" % audio_file_path self._log(msg, Logger.CRITICAL) raise Exception(msg) # dictionary for the results results = dict() results[self.STDOUT_CHANNELS] = None results[self.STDOUT_CODEC_NAME] = None results[self.STDOUT_DURATION] = None results[self.STDOUT_SAMPLE_RATE] = None # scan the first audio stream the ffprobe stdout output # TODO more robust parsing # TODO deal with multiple audio streams for line in stdoutdata.splitlines(): if line == self.STDOUT_END_STREAM: self._log("Reached end of the stream") break elif len(line.split("=")) == 2: key, value = line.split("=") results[key] = value self._log(["Found property '%s'='%s'", key, value]) # convert duration to float if self.STDOUT_DURATION in results: self._log(["Found duration: '%s'", results[self.STDOUT_DURATION]]) results[self.STDOUT_DURATION] = gf.safe_float( results[self.STDOUT_DURATION], None ) else: self._log("No duration found in stdout") # if audio_length is still None, try scanning ffprobe stderr output try: if results[self.STDOUT_DURATION] is None: pattern = re.compile(self.STDERR_DURATION_REGEX) for line in stderrdata.splitlines(): match = pattern.search(line) if match is not None: self._log(["Found matching line '%s'", line]) v_h = int(match.group(1)) v_m = int(match.group(2)) v_s = int(match.group(3)) v_f = float("0." + match.group(4)) v_length = v_h * 3600 + v_m * 60 + v_s + v_f results[self.STDOUT_DURATION] = v_length self._log(["Extracted duration '%f'", v_length]) break except ValueError: self._log("ValueError exception while parsing stderr") except TypeError: self._log("TypeError exception while parsing stderr") if results[self.STDOUT_DURATION] is None: self._log("No duration found in stdout or stderr", Logger.CRITICAL) raise ValueError("Cannot determine duration of the input file") # return dictionary self._log("Returning dict") return results
def __init__(self, parameters=None, logger=None): self.parameters = parameters self.logger = logger if self.logger == None: self.logger = Logger() self._log("Initialized with parameters '%s'" % self.parameters)
def __init__(self, task, logger=None): self.task = task self.cleanup_info = [] self.logger = logger if self.logger == None: self.logger = Logger()