def main(_): usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) # Calculate the list of problems to generate. problems = sorted( list(_SUPPORTED_PROBLEM_GENERATORS) + registry.list_problems()) for exclude in FLAGS.exclude_problems.split(","): if exclude: problems = [p for p in problems if exclude not in p] if FLAGS.problem and FLAGS.problem[-1] == "*": problems = [p for p in problems if p.startswith(FLAGS.problem[:-1])] elif FLAGS.problem and "," in FLAGS.problem: problems = [p for p in problems if p in FLAGS.problem.split(",")] elif FLAGS.problem: problems = [p for p in problems if p == FLAGS.problem] else: problems = [] # Remove TIMIT if paths are not given. if getattr(FLAGS, "timit_paths", None): problems = [p for p in problems if "timit" not in p] # Remove parsing if paths are not given. if getattr(FLAGS, "parsing_path", None): problems = [p for p in problems if "parsing_english_ptb" not in p] if not problems: problems_str = "\n * ".join( sorted( list(_SUPPORTED_PROBLEM_GENERATORS) + registry.list_problems())) error_msg = ("You must specify one of the supported problems to " "generate data for:\n * " + problems_str + "\n") error_msg += ("TIMIT and parsing need data_sets specified with " "--timit_paths and --parsing_path.") raise ValueError(error_msg) if not FLAGS.data_dir: FLAGS.data_dir = tempfile.gettempdir() tf.logging.warning( "It is strongly recommended to specify --data_dir. " "Data will be written to default data_dir=%s.", FLAGS.data_dir) FLAGS.data_dir = os.path.expanduser(FLAGS.data_dir) tf.gfile.MakeDirs(FLAGS.data_dir) tf.logging.info( "Generating problems:\n%s" % registry.display_list_by_prefix(problems, starting_spaces=4)) if FLAGS.only_list: return for problem in problems: set_random_seed() if problem in _SUPPORTED_PROBLEM_GENERATORS: generate_data_for_problem(problem) else: generate_data_for_registered_problem(problem)
def main(_): usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) # Calculate the list of problems to generate. problems = sorted( list(_SUPPORTED_PROBLEM_GENERATORS) + registry.list_problems()) for exclude in FLAGS.exclude_problems.split(","): if exclude: problems = [p for p in problems if exclude not in p] if FLAGS.problem and FLAGS.problem[-1] == "*": problems = [p for p in problems if p.startswith(FLAGS.problem[:-1])] elif FLAGS.problem and "," in FLAGS.problem: problems = [p for p in problems if p in FLAGS.problem.split(",")] elif FLAGS.problem: problems = [p for p in problems if p == FLAGS.problem] else: problems = [] # Remove TIMIT if paths are not given. if getattr(FLAGS, "timit_paths", None): problems = [p for p in problems if "timit" not in p] # Remove parsing if paths are not given. if getattr(FLAGS, "parsing_path", None): problems = [p for p in problems if "parsing_english_ptb" not in p] if not problems: problems_str = "\n * ".join( sorted(list(_SUPPORTED_PROBLEM_GENERATORS) + registry.list_problems())) error_msg = ("You must specify one of the supported problems to " "generate data for:\n * " + problems_str + "\n") error_msg += ("TIMIT and parsing need data_sets specified with " "--timit_paths and --parsing_path.") raise ValueError(error_msg) if not FLAGS.data_dir: FLAGS.data_dir = tempfile.gettempdir() tf.logging.warning("It is strongly recommended to specify --data_dir. " "Data will be written to default data_dir=%s.", FLAGS.data_dir) FLAGS.data_dir = os.path.expanduser(FLAGS.data_dir) tf.gfile.MakeDirs(FLAGS.data_dir) tf.logging.info("Generating problems:\n%s" % registry.display_list_by_prefix(problems, starting_spaces=4)) if FLAGS.only_list: return for problem in problems: set_random_seed() if problem in _SUPPORTED_PROBLEM_GENERATORS: generate_data_for_problem(problem) else: generate_data_for_registered_problem(problem)
def main(_): usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) # 不设置t2t_usr_dir参数,则此步不做事情 # Calculate the list of problems to generate. problems = sorted( # 这是将上面列举的任务和注册的任务统一在一起? list(_SUPPORTED_PROBLEM_GENERATORS) + registry.list_problems()) for exclude in FLAGS.exclude_problems.split(","): # 可以通过参数指定排除一些任务 if exclude: problems = [p for p in problems if exclude not in p] if FLAGS.problem and FLAGS.problem[-1] == "*": # 这意思是问题后面带个*号,即表示选择了包含该前缀的所有问题 problems = [p for p in problems if p.startswith(FLAGS.problem[:-1])] elif FLAGS.problem: # 如果不带*号,则仅仅将这一个问题选出来 problems = [p for p in problems if p == FLAGS.problem] else: # 否则就是没有问题 problems = [] # Remove TIMIT if paths are not given. # 有两个特殊的任务需要特殊的参数,如果这些参数没有指定,则没办进行下去 if not FLAGS.timit_paths: problems = [p for p in problems if "timit" not in p] # Remove parsing if paths are not given. if not FLAGS.parsing_path: problems = [p for p in problems if "parsing_english_ptb" not in p] if not problems: # 没有问题则报错 problems_str = "\n * ".join( sorted(list(_SUPPORTED_PROBLEM_GENERATORS) + registry.list_problems())) error_msg = ("You must specify one of the supported problems to " "generate data for:\n * " + problems_str + "\n") error_msg += ("TIMIT and parsing need data_sets specified with " "--timit_paths and --parsing_path.") raise ValueError(error_msg) if not FLAGS.data_dir: # 如果没有指定数据路径,则警告,并告知默认的数据路径 FLAGS.data_dir = tempfile.gettempdir() tf.logging.warning("It is strongly recommended to specify --data_dir. " "Data will be written to default data_dir=%s.", FLAGS.data_dir) FLAGS.data_dir = os.path.expanduser(FLAGS.data_dir) # 扩展数据路径 tf.gfile.MakeDirs(FLAGS.data_dir) # 创建路径 tf.logging.info("Generating problems:\n%s" % registry.display_list_by_prefix(problems, starting_spaces=4)) if FLAGS.only_list: # 真是高级呢==,控制“仅仅展示一下所有问题”,还是“不仅展示,还要生成这些数据” return for problem in problems: set_random_seed() # 先设置随机种子 if problem in _SUPPORTED_PROBLEM_GENERATORS: # 两种不同来源的任务还要分不同的生成方式 generate_data_for_problem(problem) else: generate_data_for_registered_problem(problem)