class MarkovOnTopic(object): ''' TODO: ngrams bumped to four or five-grams TODO: first ngram is the technical noun topic of the tweet TODO: stem the topics to narrow them. or even better find a way to standardize synonyms to the same word and then stem TODO: db key has no hashmarks but the values do TODO: db key in lowercase, but values are original ''' def __init__(self, db_path='markov.db'): try: self.mc = MarkovChain(db_path, verbose=False) except: print('No database found at path. Creating new database.') self.mc = seed_db(db_path) def generate_db(self, docs, filename=None): self.docs = docs pass def generate_topics(self): pass def generate_string(self, seed=None): regen = True while regen: if seed: gen_text = self.mc.generateStringWithSeed(seed) else: gen_text = self.mc.generateString() if not drop(gen_text): print gen_text regen = False
class MarkovOnTopic(object): ''' TODO: ngrams bumped to four or five-grams TODO: first ngram is the technical noun topic of the tweet TODO: stem the topics to narrow them. or even better find a way to standardize synonyms to the same word and then stem TODO: db key has no hashmarks but the values do TODO: db key in lowercase, but values are original ''' def __init__(self, db_path='markov.db'): try: self.mc = MarkovChain(db_path, verbose=False) except: print('No database found at path. Creating new database.') self.mc = seed_db(db_path) def generate_db(self, docs, filename=None): self.docs = docs pass def generate_topics(self): pass def generate_string(self, seed=None): regen = True while regen: if seed: gen_text = self.mc.generateStringWithSeed(seed) else: gen_text = self.mc.generateString() if not drop(gen_text): print gen_text regen = False
def generate_markov_sentence(original_sentence): mc = MarkovChain(verbose=False) mc.generateDatabase((' '.join(get_text()))) stripped = strip_tags(original_sentence) try: seed = ' '.join(stripped.split()[0:3]) sent = mc.generateStringWithSeed(seed) except: try: seed = ' '.join(stripped.split()[0:2]) sent = mc.generateStringWithSeed(seed) except: return generate_seedless_markov_sentence() if check_blacklist(sent): return '' else: return sentence_case(sent)