def modifyShakespeare(file_in, file_out):
    builder = FileBuilder(file_in, file_out)
    builder.remove_lines(line_last, len(builder))
    builder.remove_lines(0, line_first)
    builder.remove_repeated_empty_lines()
    builder.remove_lines_within("<<", ">>")
    builder.remove_number_lines(6)
    builder.sub('[<}`\r]', '')
    builder.sub('[|_]', ' ')
    char_words = [
        '!', '"', '&', '(', ')', ',', '-', '.', ':', ';', '?', '[', ']', '\n'
    ]
    builder.to_words(char_words)
    builder.to_words_apostrophes()
    builder.to_lowercase()
    builder.strip_lines()
示例#2
0
def test_to_words():
    builder = FileBuilder("../test_text/to_words.txt", outfile)
    builder.to_words(['!', '@', '#', '$', '%', '^', '&', '(', ')', '\n'])
    assert builder[0] == 'abcdefg \n'
    assert builder[1] == '$ % # & ( ) \n'
    assert builder[2] == '   ^   \n'