def get_word_list(file_name): """ Reads the specified project Gutenberg book. Header comments, punctuation, and whitespace are stripped away. The function returns a list of the words used in the book as a list. All words are converted to lower case. """ if exists(file_name): fiel = open(file_name, 'r') else: with open(file_name, 'w') as fiel: page = URL('http://www.gutenberg.org/cache/epub/' + file_name[2:-4] + '/' + file_name) fiel.write(page.read().strip()) fiel = open(file_name, 'r') txt = fiel.read() txt = txt[txt.index('*** START OF THIS PROJECT GUTENBERG EBOOK'):txt. index('*** END OF THIS PROJECT GUTENBERG EBOOK')] for dot in string.punctuation: if dot is not "'": txt = txt.replace(dot, " ") txt = txt.lower() wordlist = txt.split() return wordlist