class PykuEbooks: """Generate Markov Chain haikus from Reddit source material""" def __init__(self, ml, subreddit, location): """Initialize the Markov Chain and writer ml: MarkovChain's max_links subreddit: The subreddit to use as the text source location: "posts"|"comments" - Whether to get the text from the top posts in the subreddit (faster), or from the children comments of the top posts in the subreddit (can return more text). sources: An array of strings for the MC """ rr = RedditReader(subreddit) if location == "posts": texts = rr.get_many_post_bodies() elif location == "comments": texts = rr.get_many_comment_bodies() else: raise TypeError('`location` must be either "posts" or "comments"') self.mc = MarkovChain(ml) for text in texts: self.mc.add_text(text) self.w = Writer(self.mc) def haiku(self): return self.w.haiku()
filterer = self.__filterer(maximum, before, first, last) word_tuples = self.markov_chain.chain_from_words(before) return [wt for wt in word_tuples if filterer(wt)] def __filterer(self, maximum, before, first, last): """Create a word filterer that can be used for many words""" def filt(wt): """Determine if this word tuple is appropriate or not""" w, s = wt if s > maximum: return False if first and (not self.FIRST_QUERY.match(w)): return False if last and s == maximum and (not self.LAST_QUERY.search(w)): return False if not last and s == maximum and (not self.LINE_END_QUERY.search(w)): return False return True return filt if __name__ == "__main__": mc = MarkovChain(2) mc.add_text("This is a haiku. Written today (of all days). And it's not finished. This is a haiku. Written today (of all days). And it's not finished.") w = Writer(mc) print(w.haiku())