def test_apostrophes(): builder = FileBuilder("../test_text/apostrophes.txt", outfile) builder.to_words_apostrophes() assert builder[0] == "' hello '\n" assert builder[1] == " ' hello '\n" assert builder[2] == "can't\n" assert builder[3] == "' can't '\n" assert builder[4] == "' ' ' '\n"
def modifyShakespeare(file_in, file_out): builder = FileBuilder(file_in, file_out) builder.remove_lines(line_last, len(builder)) builder.remove_lines(0, line_first) builder.remove_repeated_empty_lines() builder.remove_lines_within("<<", ">>") builder.remove_number_lines(6) builder.sub('[<}`\r]', '') builder.sub('[|_]', ' ') char_words = [ '!', '"', '&', '(', ')', ',', '-', '.', ':', ';', '?', '[', ']', '\n' ] builder.to_words(char_words) builder.to_words_apostrophes() builder.to_lowercase() builder.strip_lines()