示例#1
0
class MarkovOnTopic(object):
    '''
    TODO: ngrams bumped to four or five-grams
    TODO: first ngram is the technical noun topic of the tweet
    TODO: stem the topics to narrow them. or even better find a way to standardize synonyms to the same word and then stem
    TODO: db key has no hashmarks but the values do
    TODO: db key in lowercase, but values are original
    '''
    def __init__(self, db_path='markov.db'):
        try:
            self.mc = MarkovChain(db_path, verbose=False)
        except:
            print('No database found at path. Creating new database.')
            self.mc = seed_db(db_path)

    def generate_db(self, docs, filename=None):
        self.docs = docs
        pass

    def generate_topics(self):
        pass

    def generate_string(self, seed=None):
        regen = True
        while regen:
            if seed:
                gen_text = self.mc.generateStringWithSeed(seed)
            else:
                gen_text = self.mc.generateString()
            if not drop(gen_text):
                print gen_text
                regen = False
示例#2
0
class MarkovOnTopic(object):
    '''
    TODO: ngrams bumped to four or five-grams
    TODO: first ngram is the technical noun topic of the tweet
    TODO: stem the topics to narrow them. or even better find a way to standardize synonyms to the same word and then stem
    TODO: db key has no hashmarks but the values do
    TODO: db key in lowercase, but values are original
    '''
    def __init__(self, db_path='markov.db'):
        try:
            self.mc = MarkovChain(db_path, verbose=False)
        except:
            print('No database found at path. Creating new database.')
            self.mc = seed_db(db_path)

    def generate_db(self, docs, filename=None):
        self.docs = docs
        pass

    def generate_topics(self):
        pass

    def generate_string(self, seed=None):
        regen = True
        while regen:
            if seed:
                gen_text = self.mc.generateStringWithSeed(seed)
            else:
                gen_text = self.mc.generateString()
            if not drop(gen_text):
                print gen_text
                regen = False
示例#3
0
def generate_seedless_markov_sentence():
    mc = MarkovChain(verbose=False)
    mc.generateDatabase((' '.join(get_text())))
    sent = mc.generateString()
    if check_blacklist(sent):
        return ''
    else:
        return sentence_case(sent)