def modifyShakespeare(file_in, file_out): builder = FileBuilder(file_in, file_out) builder.remove_lines(line_last, len(builder)) builder.remove_lines(0, line_first) builder.remove_repeated_empty_lines() builder.remove_lines_within("<<", ">>") builder.remove_number_lines(6) builder.sub('[<}`\r]', '') builder.sub('[|_]', ' ') char_words = [ '!', '"', '&', '(', ')', ',', '-', '.', ':', ';', '?', '[', ']', '\n' ] builder.to_words(char_words) builder.to_words_apostrophes() builder.to_lowercase() builder.strip_lines()
def test_to_words(): builder = FileBuilder("../test_text/to_words.txt", outfile) builder.to_words(['!', '@', '#', '$', '%', '^', '&', '(', ')', '\n']) assert builder[0] == 'abcdefg \n' assert builder[1] == '$ % # & ( ) \n' assert builder[2] == ' ^ \n'