def generate_seedless_markov_sentence(): mc = MarkovChain(verbose=False) mc.generateDatabase((' '.join(get_text()))) sent = mc.generateString() if check_blacklist(sent): return '' else: return sentence_case(sent)
def generate_topic_markov_sentence(texts, index): topics = get_topics(texts, index) mc = MarkovChain(verbose=False) mc.generateDatabase((' '.join(get_text()))) sent = mc.generateStringWithTopics(topics) if check_blacklist(sent): return '' else: return sentence_case(sent)
def generate_database(self, captured_text_path='captured_raw_text.txt'): p = PrepareText() with open(captured_text_path) as f: raw_text = f.readlines() print('Preparing texts') pbar = ProgressBar() prepared_texts = [p.prepare(i) for i in pbar(raw_text)] clean_texts = set(filter(lambda x: not self._drop(x) if x else False, prepared_texts)) print('Generating database') mc = MarkovChain(self.db_path, verbose=False) mc.generateDatabase('\n'.join(clean_texts), n=4, make_lowercase=True) mc.dumpdb() self.markov = mc
def generate_database(self, captured_text_path='captured_raw_text.txt'): p = PrepareText() with open(captured_text_path) as f: raw_text = f.readlines() print('Preparing texts') pbar = ProgressBar() prepared_texts = [p.prepare(i) for i in pbar(raw_text)] clean_texts = set( filter(lambda x: not self._drop(x) if x else False, prepared_texts)) print('Generating database') mc = MarkovChain(self.db_path, verbose=False) mc.generateDatabase('\n'.join(clean_texts), n=4, make_lowercase=True) mc.dumpdb() self.markov = mc
def generate_markov_sentence(original_sentence): mc = MarkovChain(verbose=False) mc.generateDatabase((' '.join(get_text()))) stripped = strip_tags(original_sentence) try: seed = ' '.join(stripped.split()[0:3]) sent = mc.generateStringWithSeed(seed) except: try: seed = ' '.join(stripped.split()[0:2]) sent = mc.generateStringWithSeed(seed) except: return generate_seedless_markov_sentence() if check_blacklist(sent): return '' else: return sentence_case(sent)