'--lmfile', dest="lm_file", required=True, nargs=1, help="Holmes.lm_format.questions.txt file path") parser.add_argument('-mf', '--mffile', dest="mf_file", required=True, nargs=1, help="Holmes.machine_format.questions.txt file path") parser.add_argument('-o', '--ofile', dest="output_file", required=True, nargs=1, help="Output file name") args = parser.parse_args() lmf_file = args.lm_file mf_file = args.mf_file ofile = args.output_file[0] lmf_questions = tools.Sentences(lmf_file) mf_sentences = tools.Sentences(mf_file) sg_questions = make_sentences(mf_sentences) sentences = merge_lm_questions_qwords(lmf_questions, sg_questions) tools.write_collection(sentences, ofile)
'--files', dest="file_names", required=True, nargs="*", help="Input file names") parser.add_argument('-o', '--outputdir', dest="output_dir", required=True, nargs=1, help="Output dir") args = parser.parse_args() file_names = args.file_names output_dir = args.output_dir[0] for file_name in file_names: if path.isfile(file_name): file_basename = path.basename(file_name) out_file = path.join(output_dir, file_basename) print('Processing ' + file_name) file_content = preprocess_text(file_name) file_content = format_text(file_content) sents = make_sententens(file_content) tools.write_collection(sents, out_file) else: print(file_name + ' is not found')
for sent in sentences: sent = process_sent_sg(sent) sents.append(sent) return sents if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('-l', '--lmfile', dest="lm_file", required=True, nargs=1, help="Holmes.lm_format.questions.txt file path") parser.add_argument('-mf', '--mffile', dest="mf_file", required=True, nargs=1, help="Holmes.machine_format.questions.txt file path") parser.add_argument('-o', '--ofile', dest="output_file", required=True, nargs=1, help="Output file name") args = parser.parse_args() lmf_file = args.lm_file mf_file = args.mf_file ofile = args.output_file[0] lmf_questions = tools.Sentences(lmf_file) mf_sentences = tools.Sentences(mf_file) sg_questions = make_sentences(mf_sentences) sentences = merge_lm_questions_qwords(lmf_questions, sg_questions) tools.write_collection(sentences, ofile)
file_content = re.sub(r"\bMr\.|mr\.\b", "mr", file_content) file_content = re.sub(r"\bMrs\.|mrs\.\b", "mrs", file_content) file_content = re.sub(r"\bDr\.|dr\.\b", "dr", file_content) return file_content if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("-f", "--files", dest="file_names", required=True, nargs="*", help="Input file names") parser.add_argument("-o", "--outputdir", dest="output_dir", required=True, nargs=1, help="Output dir") args = parser.parse_args() file_names = args.file_names output_dir = args.output_dir[0] for file_name in file_names: if path.isfile(file_name): file_basename = path.basename(file_name) out_file = path.join(output_dir, file_basename) print("Processing " + file_name) file_content = preprocess_text(file_name) file_content = format_text(file_content) sents = make_sententens(file_content) tools.write_collection(sents, out_file) else: print(file_name + " is not found")