示例#1
0
                        '--lmfile',
                        dest="lm_file",
                        required=True,
                        nargs=1,
                        help="Holmes.lm_format.questions.txt file path")
    parser.add_argument('-mf',
                        '--mffile',
                        dest="mf_file",
                        required=True,
                        nargs=1,
                        help="Holmes.machine_format.questions.txt file path")
    parser.add_argument('-o',
                        '--ofile',
                        dest="output_file",
                        required=True,
                        nargs=1,
                        help="Output file name")

    args = parser.parse_args()
    lmf_file = args.lm_file
    mf_file = args.mf_file
    ofile = args.output_file[0]

    lmf_questions = tools.Sentences(lmf_file)
    mf_sentences = tools.Sentences(mf_file)
    sg_questions = make_sentences(mf_sentences)

    sentences = merge_lm_questions_qwords(lmf_questions, sg_questions)

    tools.write_collection(sentences, ofile)
示例#2
0
                        '--files',
                        dest="file_names",
                        required=True,
                        nargs="*",
                        help="Input file names")
    parser.add_argument('-o',
                        '--outputdir',
                        dest="output_dir",
                        required=True,
                        nargs=1,
                        help="Output dir")

    args = parser.parse_args()
    file_names = args.file_names
    output_dir = args.output_dir[0]

    for file_name in file_names:
        if path.isfile(file_name):
            file_basename = path.basename(file_name)
            out_file = path.join(output_dir, file_basename)

            print('Processing ' + file_name)

            file_content = preprocess_text(file_name)
            file_content = format_text(file_content)
            sents = make_sententens(file_content)

            tools.write_collection(sents, out_file)
        else:
            print(file_name + ' is not found')
    for sent in sentences:
        sent = process_sent_sg(sent)
        sents.append(sent)
    
    return sents

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('-l', '--lmfile', dest="lm_file", required=True, nargs=1, help="Holmes.lm_format.questions.txt file path")
    parser.add_argument('-mf', '--mffile', dest="mf_file", required=True, nargs=1, help="Holmes.machine_format.questions.txt file path")
    parser.add_argument('-o', '--ofile', dest="output_file", required=True, nargs=1, help="Output file name")
    
    args        = parser.parse_args()
    lmf_file    = args.lm_file
    mf_file     = args.mf_file
    ofile       = args.output_file[0]
    
    lmf_questions   = tools.Sentences(lmf_file)
    mf_sentences    = tools.Sentences(mf_file)
    sg_questions    = make_sentences(mf_sentences)
    
    sentences = merge_lm_questions_qwords(lmf_questions, sg_questions)
    
    tools.write_collection(sentences, ofile)






    file_content = re.sub(r"\bMr\.|mr\.\b", "mr", file_content)
    file_content = re.sub(r"\bMrs\.|mrs\.\b", "mrs", file_content)
    file_content = re.sub(r"\bDr\.|dr\.\b", "dr", file_content)

    return file_content


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("-f", "--files", dest="file_names", required=True, nargs="*", help="Input file names")
    parser.add_argument("-o", "--outputdir", dest="output_dir", required=True, nargs=1, help="Output dir")

    args = parser.parse_args()
    file_names = args.file_names
    output_dir = args.output_dir[0]

    for file_name in file_names:
        if path.isfile(file_name):
            file_basename = path.basename(file_name)
            out_file = path.join(output_dir, file_basename)

            print("Processing " + file_name)

            file_content = preprocess_text(file_name)
            file_content = format_text(file_content)
            sents = make_sententens(file_content)

            tools.write_collection(sents, out_file)
        else:
            print(file_name + " is not found")