def check_job_configuration(self, config_string): """ Check whether the given job configuration string is well-formed and it has all the required parameters. :param config_string: the string to be checked :type config_string: string :rtype: :class:`aeneas.validator.ValidatorResult` """ self._log(["Checking job configuration '%s'", config_string]) # remove BOM, if any #self._log("Removing BOM") #config_string = gf.remove_bom(config_string) # check if it is well encoded self._log("Checking that string is well encoded") result = self.check_string_well_encoded(config_string) if not result.passed: self._log("Failed") return result # check required parameters self._log("Checking required parameters") required_parameters = [ gc.PPN_JOB_LANGUAGE, gc.PPN_JOB_OS_FILE_NAME, gc.PPN_JOB_OS_CONTAINER_FORMAT ] parameters = gf.config_string_to_dict(config_string, result) self._check_required_parameters(required_parameters, parameters, result) # return result self._log(["Checking job configuration: returning %s", result.passed]) return result
def check_config_txt(self, contents, is_config_string=False): """ Check whether the given TXT config file contents (if ``is_config_string`` is ``False``) or TXT config string (if ``is_config_string`` is ``True``) is well-formed and it has all the required parameters. :param string contents: the TXT config file contents or TXT config string :param bool is_config_string: if ``True``, contents is a config string :rtype: :class:`~aeneas.validator.ValidatorResult` """ self.log(u"Checking contents TXT config file") self.result = ValidatorResult() if self._are_safety_checks_disabled(u"check_config_txt"): return self.result is_bstring = gf.is_bytes(contents) if is_bstring: self.log(u"Checking that contents is well formed") self.check_raw_string(contents, is_bstring=True) if not self.result.passed: return self.result contents = gf.safe_unicode(contents) if not is_config_string: self.log(u"Converting file contents to config string") contents = gf.config_txt_to_string(contents) self.log(u"Checking required parameters") required_parameters = self.TXT_REQUIRED_PARAMETERS parameters = gf.config_string_to_dict(contents, self.result) self._check_required_parameters(required_parameters, parameters) self.log([u"Checking contents: returning %s", self.result.passed]) return self.result
def __init__(self, config_string=None): if (config_string is not None) and (not gf.is_unicode(config_string)): raise TypeError(u"config_string is not a Unicode string") # set dictionaries up to keep the config data self.data = {} self.types = {} self.aliases = {} for (field, info) in self.FIELDS: (fdefault, ftype, faliases) = info self.data[field] = fdefault self.types[field] = ftype for alias in faliases: self.aliases[alias] = field if config_string is not None: # strip leading/trailing " or ' characters if (len(config_string) > 0) and (config_string[0] == config_string[-1]) and (config_string[0] in [u"\"", u"'"]): config_string = config_string[1:-1] # populate values from config_string, # ignoring keys not present in FIELDS properties = gf.config_string_to_dict(config_string) for key in set(properties.keys()) & set(self.data.keys()): self.data[key] = properties[key]
def __init__(self, config_string=None): # task fields self.field_names = [ gc.PPN_TASK_DESCRIPTION, gc.PPN_TASK_LANGUAGE, gc.PPN_TASK_CUSTOM_ID, gc.PPN_TASK_ADJUST_BOUNDARY_ALGORITHM, gc.PPN_TASK_ADJUST_BOUNDARY_AFTERCURRENT_VALUE, gc.PPN_TASK_ADJUST_BOUNDARY_BEFORENEXT_VALUE, gc.PPN_TASK_ADJUST_BOUNDARY_OFFSET_VALUE, gc.PPN_TASK_ADJUST_BOUNDARY_PERCENT_VALUE, gc.PPN_TASK_ADJUST_BOUNDARY_RATE_VALUE, gc.PPN_TASK_IS_AUDIO_FILE_DETECT_HEAD_MIN, gc.PPN_TASK_IS_AUDIO_FILE_DETECT_HEAD_MAX, gc.PPN_TASK_IS_AUDIO_FILE_DETECT_TAIL_MIN, gc.PPN_TASK_IS_AUDIO_FILE_DETECT_TAIL_MAX, gc.PPN_TASK_IS_AUDIO_FILE_HEAD_LENGTH, gc.PPN_TASK_IS_AUDIO_FILE_PROCESS_LENGTH, gc.PPN_TASK_IS_TEXT_FILE_FORMAT, gc.PPN_TASK_IS_TEXT_UNPARSED_CLASS_REGEX, gc.PPN_TASK_IS_TEXT_UNPARSED_ID_REGEX, gc.PPN_TASK_IS_TEXT_UNPARSED_ID_SORT, gc.PPN_TASK_OS_FILE_FORMAT, gc.PPN_TASK_OS_FILE_NAME, gc.PPN_TASK_OS_FILE_SMIL_AUDIO_REF, gc.PPN_TASK_OS_FILE_SMIL_PAGE_REF, gc.PPN_TASK_OS_FILE_HEAD_TAIL_FORMAT ] self.fields = dict() for key in self.field_names: self.fields[key] = None # populate values from config_string if config_string is not None: properties = gf.config_string_to_dict(config_string) for key in properties: if key in self.field_names: self.fields[key] = properties[key]
def __init__(self, config_string=None): # job fields self.field_names = [ gc.PPN_JOB_DESCRIPTION, gc.PPN_JOB_LANGUAGE, gc.PPN_JOB_IS_AUDIO_FILE_NAME_REGEX, gc.PPN_JOB_IS_AUDIO_FILE_RELATIVE_PATH, gc.PPN_JOB_IS_HIERARCHY_PREFIX, gc.PPN_JOB_IS_HIERARCHY_TYPE, gc.PPN_JOB_IS_TASK_DIRECTORY_NAME_REGEX, gc.PPN_JOB_IS_TEXT_FILE_FORMAT, gc.PPN_JOB_IS_TEXT_FILE_NAME_REGEX, gc.PPN_JOB_IS_TEXT_FILE_RELATIVE_PATH, gc.PPN_JOB_IS_TEXT_UNPARSED_CLASS_REGEX, gc.PPN_JOB_IS_TEXT_UNPARSED_ID_REGEX, gc.PPN_JOB_IS_TEXT_UNPARSED_ID_SORT, gc.PPN_JOB_OS_FILE_NAME, gc.PPN_JOB_OS_CONTAINER_FORMAT, gc.PPN_JOB_OS_HIERARCHY_TYPE, gc.PPN_JOB_OS_HIERARCHY_PREFIX, ] self.fields = dict() for key in self.field_names: self.fields[key] = None # populate values from config_string if config_string is not None: properties = gf.config_string_to_dict(config_string) for key in properties: if key in self.field_names: self.fields[key] = properties[key]
def check_task_configuration(self, config_string): """ Check whether the given task configuration string is well-formed and it has all the required parameters. :param config_string: the string to be checked :type config_string: string :rtype: :class:`aeneas.validator.ValidatorResult` """ self._log(["Checking task configuration '%s'", config_string]) # remove BOM, if any #self._log("Removing BOM") #config_string = gf.remove_bom(config_string) # check if it is well encoded self._log("Checking that string is well encoded") result = self.check_string_well_encoded(config_string) if not result.passed: self._log("Failed") return result # check required parameters self._log("Checking required parameters") required_parameters = [ gc.PPN_TASK_IS_TEXT_FILE_FORMAT, gc.PPN_TASK_LANGUAGE, gc.PPN_TASK_OS_FILE_NAME, gc.PPN_TASK_OS_FILE_FORMAT ] parameters = gf.config_string_to_dict(config_string, result) self._check_required_parameters(required_parameters, parameters, result) # return result self._log(["Checking task configuration: returning %s", result.passed]) return result
def __init__(self, config_string=None): if (config_string is not None) and (not gf.is_unicode(config_string)): raise TypeError(u"config_string is not a Unicode string") # set dictionaries up to keep the config data self.data = {} self.types = {} self.aliases = {} self.desc = {} for (field, info) in self.FIELDS: (fdefault, ftype, faliases, fdesc) = info self.data[field] = fdefault self.types[field] = ftype self.desc[field] = fdesc for alias in faliases: self.aliases[alias] = field if config_string is not None: # strip leading/trailing " or ' characters if ( (len(config_string) > 0) and (config_string[0] == config_string[-1]) and (config_string[0] in [u"\"", u"'"]) ): config_string = config_string[1:-1] # populate values from config_string, # ignoring keys not present in FIELDS properties = gf.config_string_to_dict(config_string) for key in set(properties.keys()) & set(self.data.keys()): self.data[key] = properties[key]
def __init__(self, config_string=None): # task fields self.field_names = [ gc.PPN_TASK_DESCRIPTION, gc.PPN_TASK_LANGUAGE, gc.PPN_TASK_CUSTOM_ID, gc.PPN_TASK_IS_AUDIO_FILE_HEAD_LENGTH, gc.PPN_TASK_IS_AUDIO_FILE_PROCESS_LENGTH, gc.PPN_TASK_IS_TEXT_FILE_FORMAT, gc.PPN_TASK_IS_TEXT_UNPARSED_CLASS_REGEX, gc.PPN_TASK_IS_TEXT_UNPARSED_ID_REGEX, gc.PPN_TASK_IS_TEXT_UNPARSED_ID_SORT, gc.PPN_TASK_OS_FILE_FORMAT, gc.PPN_TASK_OS_FILE_NAME, gc.PPN_TASK_OS_FILE_SMIL_AUDIO_REF, gc.PPN_TASK_OS_FILE_SMIL_PAGE_REF ] self.fields = dict() for key in self.field_names: self.fields[key] = None # populate values from config_string if config_string != None: properties = gf.config_string_to_dict(config_string) for key in properties: if key in self.field_names: self.fields[key] = properties[key]
def _create_task( self, task_info, config_string, sync_map_root_directory, job_os_hierarchy_type ): """ Create a task object from 1. the ``task_info`` found analyzing the container entries, and 2. the given ``config_string``. :param list task_info: the task information: ``[prefix, text_path, audio_path]`` :param string config_string: the configuration string :param string sync_map_root_directory: the root directory for the sync map files :param job_os_hierarchy_type: type of job output hierarchy :type job_os_hierarchy_type: :class:`~aeneas.hierarchytype.HierarchyType` :rtype: :class:`~aeneas.task.Task` """ self.log(u"Converting config string to config dict") parameters = gf.config_string_to_dict(config_string) self.log(u"Creating task") task = Task(config_string, logger=self.logger) task.configuration["description"] = "Task %s" % task_info[0] self.log([u"Task description: %s", task.configuration["description"]]) try: task.configuration["language"] = parameters[gc.PPN_TASK_LANGUAGE] self.log([u"Set language from task: '%s'", task.configuration["language"]]) except KeyError: task.configuration["language"] = parameters[gc.PPN_JOB_LANGUAGE] self.log([u"Set language from job: '%s'", task.configuration["language"]]) custom_id = task_info[0] task.configuration["custom_id"] = custom_id self.log([u"Task custom_id: %s", task.configuration["custom_id"]]) task.text_file_path = task_info[1] self.log([u"Task text file path: %s", task.text_file_path]) task.audio_file_path = task_info[2] self.log([u"Task audio file path: %s", task.audio_file_path]) task.sync_map_file_path = self._compute_sync_map_file_path( sync_map_root_directory, job_os_hierarchy_type, custom_id, task.configuration["o_name"] ) self.log([u"Task sync map file path: %s", task.sync_map_file_path]) self.log(u"Replacing placeholder in os_file_smil_audio_ref") task.configuration["o_smil_audio_ref"] = self._replace_placeholder( task.configuration["o_smil_audio_ref"], custom_id ) self.log(u"Replacing placeholder in os_file_smil_page_ref") task.configuration["o_smil_page_ref"] = self._replace_placeholder( task.configuration["o_smil_page_ref"], custom_id ) self.log(u"Returning task") return task
def check_contents_txt_config_file(self, config_contents, convert_to_string=True): """ Check whether the given TXT config contents (or config string) is well formed and contains all the requested parameters. :param config_contents: :type config_contents: string :param convert_to_string: the ``config_contents`` must be converted to a config string :type convert_to_string: bool :rtype: :class:`aeneas.validator.ValidatorResult` """ self._log("Checking contents TXT config file") result = ValidatorResult() if convert_to_string: #self._log("Removing BOM") #config_contents = gf.remove_bom(config_contents) self._log("Converting file contents to config string") config_string = gf.config_txt_to_string(config_contents) #else: #self._log("Removing BOM") #config_string = gf.remove_bom(config_string) # check if it is well encoded self._log("Checking that string is well encoded") if not self.check_string_well_encoded(config_string): msg = "The TXT config is not well encoded" result.passed = False result.add_error(msg) self._log(msg) return result # check required parameters self._log("Checking required parameters") required_parameters = [ gc.PPN_JOB_IS_HIERARCHY_TYPE, gc.PPN_JOB_IS_HIERARCHY_PREFIX, gc.PPN_JOB_IS_TEXT_FILE_RELATIVE_PATH, gc.PPN_JOB_IS_TEXT_FILE_NAME_REGEX, gc.PPN_JOB_IS_TEXT_FILE_FORMAT, gc.PPN_JOB_IS_AUDIO_FILE_RELATIVE_PATH, gc.PPN_JOB_IS_AUDIO_FILE_NAME_REGEX, gc.PPN_JOB_OS_FILE_NAME, gc.PPN_JOB_OS_CONTAINER_FORMAT, gc.PPN_JOB_OS_HIERARCHY_TYPE, gc.PPN_JOB_OS_HIERARCHY_PREFIX, gc.PPN_TASK_OS_FILE_NAME, gc.PPN_TASK_OS_FILE_FORMAT, gc.PPN_JOB_LANGUAGE ] parameters = gf.config_string_to_dict(config_string, result) self._check_required_parameters(required_parameters, parameters, result) # return result self._log( ["Checking contents TXT config file: returning %s", result.passed]) return result
def _create_task(self, task_info, config_string, sync_map_root_directory, job_os_hierarchy_type): """ Create a task object from 1. the ``task_info`` found analyzing the container entries, and 2. the given ``config_string``. :param task_info: the task information: ``[prefix, text_path, audio_path]`` :type task_info: list of strings :param config_string: the configuration string :type config_string: string :param sync_map_root_directory: the root directory for the sync map files :type sync_map_root_directory: string (path) :param job_os_hierarchy_type: type of job output hierarchy :type job_os_hierarchy_type: :class:`aeneas.hierarchytype.HierarchyType` :rtype: :class:`aeneas.task.Task` """ self._log("Converting config string to config dict") parameters = gf.config_string_to_dict(config_string) self._log("Creating task") task = Task(config_string) task.configuration.description = "Task %s" % task_info[0] self._log(["Task description: %s", task.configuration.description]) try: task.configuration.language = parameters[gc.PPN_TASK_LANGUAGE] self._log( ["Set language from task: '%s'", task.configuration.language]) except KeyError: task.configuration.language = parameters[gc.PPN_JOB_LANGUAGE] self._log( ["Set language from job: '%s'", task.configuration.language]) custom_id = task_info[0] task.configuration.custom_id = custom_id self._log(["Task custom_id: %s", task.configuration.custom_id]) task.text_file_path = task_info[1] self._log(["Task text file path: %s", task.text_file_path]) task.audio_file_path = task_info[2] self._log(["Task audio file path: %s", task.audio_file_path]) task.sync_map_file_path = self._compute_sync_map_file_path( sync_map_root_directory, job_os_hierarchy_type, custom_id, task.configuration.os_file_name) self._log(["Task sync map file path: %s", task.sync_map_file_path]) self._log("Replacing placeholder in os_file_smil_audio_ref") task.configuration.os_file_smil_audio_ref = self._replace_placeholder( task.configuration.os_file_smil_audio_ref, custom_id) self._log("Replacing placeholder in os_file_smil_page_ref") task.configuration.os_file_smil_page_ref = self._replace_placeholder( task.configuration.os_file_smil_page_ref, custom_id) self._log("Returning task") return task
def test_config_string_to_dict(self): tests = [ (None, {}), (u"", {}), (u"k1=v1", { u"k1": u"v1" }), (u"k1=v1|", { u"k1": u"v1" }), (u"|k1=v1|", { u"k1": u"v1" }), (u"|k1=v1", { u"k1": u"v1" }), (u"k1=v1|k1=v2", { u"k1": u"v2" }), (u"k1=v1|k2=v2", { u"k1": u"v1", u"k2": u"v2" }), (u"k1=v1|k2=v2|k1=v3", { u"k1": u"v3", u"k2": u"v2" }), (u"k1=v1||k2=v2", { u"k1": u"v1", u"k2": u"v2" }), (u"k1=v1|k2=v2|k3=v3", { u"k1": u"v1", u"k2": u"v2", u"k3": u"v3" }), (u"k1=v1|k2=|k3=v3", { u"k1": u"v1", u"k3": u"v3" }), (u"k1=v1|=v2|k3=v3", { u"k1": u"v1", u"k3": u"v3" }), ] for test in tests: self.assertEqual(gf.config_string_to_dict(test[0]), test[1])
def test_config_string_to_dict(self): tests = [ (None, {}), (u"", {}), (u"k1=v1", {u"k1": u"v1"}), (u"k1=v1|", {u"k1": u"v1"}), (u"|k1=v1|", {u"k1": u"v1"}), (u"|k1=v1", {u"k1": u"v1"}), (u"k1=v1|k1=v2", {u"k1": u"v2"}), (u"k1=v1|k2=v2", {u"k1": u"v1", u"k2": u"v2"}), (u"k1=v1|k2=v2|k1=v3", {u"k1": u"v3", u"k2": u"v2"}), (u"k1=v1||k2=v2", {u"k1": u"v1", u"k2": u"v2"}), (u"k1=v1|k2=v2|k3=v3", {u"k1": u"v1", u"k2": u"v2", u"k3": u"v3"}), (u"k1=v1|k2=|k3=v3", {u"k1": u"v1", u"k3": u"v3"}), (u"k1=v1|=v2|k3=v3", {u"k1": u"v1", u"k3": u"v3"}), ] for test in tests: self.assertEqual(gf.config_string_to_dict(test[0]), test[1])
def check_configuration_string( self, config_string, is_job=True, external_name=False ): """ Check whether the given job or task configuration string is well-formed (if ``is_bstring`` is ``True``) and it has all the required parameters. :param string config_string: the byte string or Unicode string to be checked :param bool is_job: if ``True``, ``config_string`` is a job config string :param bool external_name: if ``True``, the task name is provided externally, and it is not required to appear in the config string :rtype: :class:`~aeneas.validator.ValidatorResult` """ if is_job: self.log(u"Checking job configuration string") else: self.log(u"Checking task configuration string") self.result = ValidatorResult() if self._are_safety_checks_disabled(u"check_configuration_string"): return self.result if is_job: required_parameters = self.JOB_REQUIRED_PARAMETERS elif external_name: required_parameters = self.TASK_REQUIRED_PARAMETERS_EXTERNAL_NAME else: required_parameters = self.TASK_REQUIRED_PARAMETERS is_bstring = gf.is_bytes(config_string) if is_bstring: self.log(u"Checking that config_string is well formed") self.check_raw_string(config_string, is_bstring=True) if not self.result.passed: return self.result config_string = gf.safe_unicode(config_string) self.log(u"Checking required parameters") parameters = gf.config_string_to_dict(config_string, self.result) self._check_required_parameters(required_parameters, parameters) self.log([u"Checking config_string: returning %s", self.result.passed]) return self.result
def _analyze_txt_config(self, config_string=None): """ Analyze the given container and return the corresponding job. If ``config_string`` is ``None``, try reading it from the TXT config file inside the container. :param string config_string: the configuration string :rtype: :class:`~aeneas.job.Job` """ self.log(u"Analyzing container with TXT config string") if config_string is None: self.log(u"Analyzing container with TXT config file") config_entry = self.container.entry_config_txt self.log([u"Found TXT config entry '%s'", config_entry]) config_dir = os.path.dirname(config_entry) self.log([u"Directory of TXT config entry: '%s'", config_dir]) self.log([u"Reading TXT config entry: '%s'", config_entry]) config_contents = self.container.read_entry(config_entry) self.log(u"Converting config contents to config string") config_contents = gf.safe_unicode(config_contents) config_string = gf.config_txt_to_string(config_contents) else: self.log([u"Analyzing container with TXT config string '%s'", config_string]) config_dir = "" self.log(u"Creating the Job object") job = Job(config_string) self.log(u"Getting entries") entries = self.container.entries self.log(u"Converting config string into config dict") parameters = gf.config_string_to_dict(config_string) self.log(u"Calculating the path of the tasks root directory") tasks_root_directory = gf.norm_join( config_dir, parameters[gc.PPN_JOB_IS_HIERARCHY_PREFIX] ) self.log([u"Path of the tasks root directory: '%s'", tasks_root_directory]) self.log(u"Calculating the path of the sync map root directory") sync_map_root_directory = gf.norm_join( config_dir, parameters[gc.PPN_JOB_OS_HIERARCHY_PREFIX] ) job_os_hierarchy_type = parameters[gc.PPN_JOB_OS_HIERARCHY_TYPE] self.log([u"Path of the sync map root directory: '%s'", sync_map_root_directory]) text_file_relative_path = parameters[gc.PPN_JOB_IS_TEXT_FILE_RELATIVE_PATH] self.log([u"Relative path for text file: '%s'", text_file_relative_path]) text_file_name_regex = re.compile(r"" + parameters[gc.PPN_JOB_IS_TEXT_FILE_NAME_REGEX]) self.log([u"Regex for text file: '%s'", parameters[gc.PPN_JOB_IS_TEXT_FILE_NAME_REGEX]]) audio_file_relative_path = parameters[gc.PPN_JOB_IS_AUDIO_FILE_RELATIVE_PATH] self.log([u"Relative path for audio file: '%s'", audio_file_relative_path]) audio_file_name_regex = re.compile(r"" + parameters[gc.PPN_JOB_IS_AUDIO_FILE_NAME_REGEX]) self.log([u"Regex for audio file: '%s'", parameters[gc.PPN_JOB_IS_AUDIO_FILE_NAME_REGEX]]) if parameters[gc.PPN_JOB_IS_HIERARCHY_TYPE] == HierarchyType.FLAT: self.log(u"Looking for text/audio pairs in flat hierarchy") text_files = self._find_files( entries, tasks_root_directory, text_file_relative_path, text_file_name_regex ) self.log([u"Found text files: '%s'", text_files]) audio_files = self._find_files( entries, tasks_root_directory, audio_file_relative_path, audio_file_name_regex ) self.log([u"Found audio files: '%s'", audio_files]) self.log(u"Matching files in flat hierarchy...") matched_tasks = self._match_files_flat_hierarchy( text_files, audio_files ) self.log(u"Matching files in flat hierarchy... done") for task_info in matched_tasks: self.log([u"Creating task: '%s'", str(task_info)]) task = self._create_task( task_info, config_string, sync_map_root_directory, job_os_hierarchy_type ) job.add_task(task) if parameters[gc.PPN_JOB_IS_HIERARCHY_TYPE] == HierarchyType.PAGED: self.log(u"Looking for text/audio pairs in paged hierarchy") # find all subdirectories of tasks_root_directory # that match gc.PPN_JOB_IS_TASK_DIRECTORY_NAME_REGEX matched_directories = self._match_directories( entries, tasks_root_directory, parameters[gc.PPN_JOB_IS_TASK_DIRECTORY_NAME_REGEX] ) for matched_directory in matched_directories: # rebuild the full path matched_directory_full_path = gf.norm_join( tasks_root_directory, matched_directory ) self.log([u"Looking for text/audio pairs in directory '%s'", matched_directory_full_path]) # look for text and audio files there text_files = self._find_files( entries, matched_directory_full_path, text_file_relative_path, text_file_name_regex ) self.log([u"Found text files: '%s'", text_files]) audio_files = self._find_files( entries, matched_directory_full_path, audio_file_relative_path, audio_file_name_regex ) self.log([u"Found audio files: '%s'", audio_files]) # if we have found exactly one text and one audio file, # create a Task if (len(text_files) == 1) and (len(audio_files) == 1): self.log([u"Exactly one text file and one audio file in '%s'", matched_directory]) task_info = [ matched_directory, text_files[0], audio_files[0] ] self.log([u"Creating task: '%s'", str(task_info)]) task = self._create_task( task_info, config_string, sync_map_root_directory, job_os_hierarchy_type ) job.add_task(task) elif len(text_files) > 1: self.log([u"More than one text file in '%s'", matched_directory]) elif len(audio_files) > 1: self.log([u"More than one audio file in '%s'", matched_directory]) else: self.log([u"No text nor audio file in '%s'", matched_directory]) return job
def _analyze_txt_config(self, config_string=None): """ Analyze the given container and return the corresponding job. If ``config_string`` is ``None``, try reading it from the TXT config file inside the container. :param string config_string: the configuration string :rtype: :class:`~aeneas.job.Job` """ self.log(u"Analyzing container with TXT config string") if config_string is None: self.log(u"Analyzing container with TXT config file") config_entry = self.container.entry_config_txt self.log([u"Found TXT config entry '%s'", config_entry]) config_dir = os.path.dirname(config_entry) self.log([u"Directory of TXT config entry: '%s'", config_dir]) self.log([u"Reading TXT config entry: '%s'", config_entry]) config_contents = self.container.read_entry(config_entry) self.log(u"Converting config contents to config string") config_contents = gf.safe_unicode(config_contents) config_string = gf.config_txt_to_string(config_contents) else: self.log([ u"Analyzing container with TXT config string '%s'", config_string ]) config_dir = "" self.log(u"Creating the Job object") job = Job(config_string) self.log(u"Getting entries") entries = self.container.entries self.log(u"Converting config string into config dict") parameters = gf.config_string_to_dict(config_string) self.log(u"Calculating the path of the tasks root directory") tasks_root_directory = gf.norm_join( config_dir, parameters[gc.PPN_JOB_IS_HIERARCHY_PREFIX]) self.log( [u"Path of the tasks root directory: '%s'", tasks_root_directory]) self.log(u"Calculating the path of the sync map root directory") sync_map_root_directory = gf.norm_join( config_dir, parameters[gc.PPN_JOB_OS_HIERARCHY_PREFIX]) job_os_hierarchy_type = parameters[gc.PPN_JOB_OS_HIERARCHY_TYPE] self.log([ u"Path of the sync map root directory: '%s'", sync_map_root_directory ]) text_file_relative_path = parameters[ gc.PPN_JOB_IS_TEXT_FILE_RELATIVE_PATH] self.log( [u"Relative path for text file: '%s'", text_file_relative_path]) text_file_name_regex = re.compile( r"" + parameters[gc.PPN_JOB_IS_TEXT_FILE_NAME_REGEX]) self.log([ u"Regex for text file: '%s'", parameters[gc.PPN_JOB_IS_TEXT_FILE_NAME_REGEX] ]) audio_file_relative_path = parameters[ gc.PPN_JOB_IS_AUDIO_FILE_RELATIVE_PATH] self.log( [u"Relative path for audio file: '%s'", audio_file_relative_path]) audio_file_name_regex = re.compile( r"" + parameters[gc.PPN_JOB_IS_AUDIO_FILE_NAME_REGEX]) self.log([ u"Regex for audio file: '%s'", parameters[gc.PPN_JOB_IS_AUDIO_FILE_NAME_REGEX] ]) if parameters[gc.PPN_JOB_IS_HIERARCHY_TYPE] == HierarchyType.FLAT: self.log(u"Looking for text/audio pairs in flat hierarchy") text_files = self._find_files(entries, tasks_root_directory, text_file_relative_path, text_file_name_regex) self.log([u"Found text files: '%s'", text_files]) audio_files = self._find_files(entries, tasks_root_directory, audio_file_relative_path, audio_file_name_regex) self.log([u"Found audio files: '%s'", audio_files]) self.log(u"Matching files in flat hierarchy...") matched_tasks = self._match_files_flat_hierarchy( text_files, audio_files) self.log(u"Matching files in flat hierarchy... done") for task_info in matched_tasks: self.log([u"Creating task: '%s'", str(task_info)]) task = self._create_task(task_info, config_string, sync_map_root_directory, job_os_hierarchy_type) job.add_task(task) if parameters[gc.PPN_JOB_IS_HIERARCHY_TYPE] == HierarchyType.PAGED: self.log(u"Looking for text/audio pairs in paged hierarchy") # find all subdirectories of tasks_root_directory # that match gc.PPN_JOB_IS_TASK_DIRECTORY_NAME_REGEX matched_directories = self._match_directories( entries, tasks_root_directory, parameters[gc.PPN_JOB_IS_TASK_DIRECTORY_NAME_REGEX]) for matched_directory in matched_directories: # rebuild the full path matched_directory_full_path = gf.norm_join( tasks_root_directory, matched_directory) self.log([ u"Looking for text/audio pairs in directory '%s'", matched_directory_full_path ]) # look for text and audio files there text_files = self._find_files(entries, matched_directory_full_path, text_file_relative_path, text_file_name_regex) self.log([u"Found text files: '%s'", text_files]) audio_files = self._find_files(entries, matched_directory_full_path, audio_file_relative_path, audio_file_name_regex) self.log([u"Found audio files: '%s'", audio_files]) # if we have found exactly one text and one audio file, # create a Task if (len(text_files) == 1) and (len(audio_files) == 1): self.log([ u"Exactly one text file and one audio file in '%s'", matched_directory ]) task_info = [ matched_directory, text_files[0], audio_files[0] ] self.log([u"Creating task: '%s'", str(task_info)]) task = self._create_task(task_info, config_string, sync_map_root_directory, job_os_hierarchy_type) job.add_task(task) elif len(text_files) > 1: self.log([ u"More than one text file in '%s'", matched_directory ]) elif len(audio_files) > 1: self.log([ u"More than one audio file in '%s'", matched_directory ]) else: self.log( [u"No text nor audio file in '%s'", matched_directory]) return job
def check_contents_txt_config_file( self, config_contents, convert_to_string=True ): """ Check whether the given TXT config contents (or config string) is well formed and contains all the requested parameters. :param config_contents: :type config_contents: string :param convert_to_string: the ``config_contents`` must be converted to a config string :type convert_to_string: bool :rtype: :class:`aeneas.validator.ValidatorResult` """ self._log("Checking contents TXT config file") result = ValidatorResult() if convert_to_string: #self._log("Removing BOM") #config_contents = gf.remove_bom(config_contents) self._log("Converting file contents to config string") config_string = gf.config_txt_to_string(config_contents) #else: #self._log("Removing BOM") #config_string = gf.remove_bom(config_string) # check if it is well encoded self._log("Checking that string is well encoded") if not self.check_string_well_encoded(config_string): msg = "The TXT config is not well encoded" result.passed = False result.add_error(msg) self._log(msg) return result # check required parameters self._log("Checking required parameters") required_parameters = [ gc.PPN_JOB_IS_HIERARCHY_TYPE, gc.PPN_JOB_IS_HIERARCHY_PREFIX, gc.PPN_JOB_IS_TEXT_FILE_RELATIVE_PATH, gc.PPN_JOB_IS_TEXT_FILE_NAME_REGEX, gc.PPN_JOB_IS_TEXT_FILE_FORMAT, gc.PPN_JOB_IS_AUDIO_FILE_RELATIVE_PATH, gc.PPN_JOB_IS_AUDIO_FILE_NAME_REGEX, gc.PPN_JOB_OS_FILE_NAME, gc.PPN_JOB_OS_CONTAINER_FORMAT, gc.PPN_JOB_OS_HIERARCHY_TYPE, gc.PPN_JOB_OS_HIERARCHY_PREFIX, gc.PPN_TASK_OS_FILE_NAME, gc.PPN_TASK_OS_FILE_FORMAT, gc.PPN_JOB_LANGUAGE ] parameters = gf.config_string_to_dict(config_string, result) self._check_required_parameters(required_parameters, parameters, result) # return result self._log(["Checking contents TXT config file: returning %s", result.passed]) return result
def _analyze_txt_config(self, config_string=None): """ Analyze the given container and return the corresponding job. If ``config_string`` is ``None``, try reading it from the TXT config file inside the container. :param config_string: the configuration string :type config_string: string :rtype: :class:`aeneas.job.Job` """ # TODO break this function down into smaller functions self._log("Analyzing container with TXT config string") if config_string == None: self._log("Analyzing container with TXT config file") config_entry = self.container.entry_config_txt self._log("Found TXT config entry '%s'" % config_entry) config_dir = os.path.dirname(config_entry) self._log("Directory of TXT config entry: '%s'" % config_dir) self._log("Reading TXT config entry: '%s'" % config_entry) config_contents = self.container.read_entry(config_entry) #self._log("Removing BOM") #config_contents = gf.remove_bom(config_contents) self._log("Converting config contents to config string") config_string = gf.config_txt_to_string(config_contents) else: self._log("Analyzing container with TXT config string '%s'" % config_string) config_dir = "" #self._log("Removing BOM") #config_string = gf.remove_bom(config_string) # create the Job object to be returned self._log("Creating the Job object") job = Job(config_string) # get the entries in this container self._log("Getting entries") entries = self.container.entries() # convert the config string to dict self._log("Converting config string into config dict") parameters = gf.config_string_to_dict(config_string) # compute the root directory for the task assets self._log("Calculating the path of the tasks root directory") tasks_root_directory = gf.norm_join( config_dir, parameters[gc.PPN_JOB_IS_HIERARCHY_PREFIX] ) self._log("Path of the tasks root directory: '%s'" % tasks_root_directory) # compute the root directory for the sync map files self._log("Calculating the path of the sync map root directory") sync_map_root_directory = gf.norm_join( config_dir, parameters[gc.PPN_JOB_OS_HIERARCHY_PREFIX] ) job_os_hierarchy_type = parameters[gc.PPN_JOB_OS_HIERARCHY_TYPE] self._log("Path of the sync map root directory: '%s'" % sync_map_root_directory) # prepare relative path and file name regex for text and audio files text_file_relative_path = parameters[gc.PPN_JOB_IS_TEXT_FILE_RELATIVE_PATH] self._log("Relative path for text file: '%s'" % text_file_relative_path) text_file_name_regex = re.compile(r"" + parameters[gc.PPN_JOB_IS_TEXT_FILE_NAME_REGEX]) self._log("Regex for text file: '%s'" % parameters[gc.PPN_JOB_IS_TEXT_FILE_NAME_REGEX]) audio_file_relative_path = parameters[gc.PPN_JOB_IS_AUDIO_FILE_RELATIVE_PATH] self._log("Relative path for audio file: '%s'" % audio_file_relative_path) audio_file_name_regex = re.compile(r"" + parameters[gc.PPN_JOB_IS_AUDIO_FILE_NAME_REGEX]) self._log("Regex for audio file: '%s'" % parameters[gc.PPN_JOB_IS_AUDIO_FILE_NAME_REGEX]) # flat hierarchy if parameters[gc.PPN_JOB_IS_HIERARCHY_TYPE] == HierarchyType.FLAT: self._log("Looking for text/audio pairs in flat hierarchy") text_files = self._find_files( entries, tasks_root_directory, text_file_relative_path, text_file_name_regex ) self._log("Found text files: '%s'" % str(text_files)) audio_files = self._find_files( entries, tasks_root_directory, audio_file_relative_path, audio_file_name_regex ) self._log("Found audio files: '%s'" % str(audio_files)) self._log("Matching files in flat hierarchy...") matched_tasks = self._match_files_flat_hierarchy( text_files, audio_files ) self._log("Matching files in flat hierarchy... done") for task_info in matched_tasks: self._log("Creating task: '%s'" % str(task_info)) task = self._create_task( task_info, config_string, sync_map_root_directory, job_os_hierarchy_type ) job.add_task(task) # paged hierarchy if parameters[gc.PPN_JOB_IS_HIERARCHY_TYPE] == HierarchyType.PAGED: self._log("Looking for text/audio pairs in paged hierarchy") # find all subdirectories of tasks_root_directory # that match gc.PPN_JOB_IS_TASK_DIRECTORY_NAME_REGEX matched_directories = self._match_directories( entries, tasks_root_directory, parameters[gc.PPN_JOB_IS_TASK_DIRECTORY_NAME_REGEX] ) for matched_directory in matched_directories: # rebuild the full path matched_directory_full_path = gf.norm_join( tasks_root_directory, matched_directory ) self._log("Looking for text/audio pairs in directory '%s'" % matched_directory_full_path) # look for text and audio files there text_files = self._find_files( entries, matched_directory_full_path, text_file_relative_path, text_file_name_regex ) self._log("Found text files: '%s'" % str(text_files)) audio_files = self._find_files( entries, matched_directory_full_path, audio_file_relative_path, audio_file_name_regex ) self._log("Found audio files: '%s'" % str(audio_files)) # if we have found exactly one text and one audio file, # create a Task if (len(text_files) == 1) and (len(audio_files) == 1): self._log("Exactly one text file and one audio file in '%s'" % matched_directory) task_info = [ matched_directory, text_files[0], audio_files[0] ] self._log("Creating task: '%s'" % str(task_info)) task = self._create_task( task_info, config_string, sync_map_root_directory, job_os_hierarchy_type ) job.add_task(task) elif len(text_files) > 1: self._log("More than one text file in '%s'" % matched_directory) elif len(audio_files) > 1: self._log("More than one audio file in '%s'" % matched_directory) else: self._log("No text nor audio file in '%s'" % matched_directory) # return the Job return job