def test_monus(): rules = {'1*1': '*', '*1': '*', '*': '.1'} m = Markov(rules) assert m.evaluate('11111*111') == '111' assert m.evaluate('11*11111') == '1' assert m.evaluate('11*11') == '1'
def Recall(text, archivist): lines = text.splitlines() version = parse(lines[0]).strip() version = version if len(version.strip()) > 1 else lines[4] archivist.logger.info( "Dictionary version: {} ({} lines)".format(version, len(lines)) ) if version == "v4": chatlog = Chatlog.loadl(lines[0:9]) cache = "\n".join(lines[10:]) parrot = Markov.loads(cache) elif version == "v3": chatlog = Chatlog.loadl(lines[0:8]) cache = "\n".join(lines[9:]) parrot = Markov.loads(cache) elif version == "v2": chatlog = Chatlog.loadl(lines[0:7]) cache = "\n".join(lines[8:]) parrot = Markov.loads(cache) elif version == "dict:": chatlog = Chatlog.loadl(lines[0:6]) cache = "\n".join(lines[6:]) parrot = Markov.loads(cache) else: chatlog = Chatlog.loadl(lines[0:4]) cache = lines[4:] parrot = Markov(load=cache, mode=Markov.ModeList) # raise SyntaxError("Scribe: Chatlog format unrecognized.") s = Scribe(chatlog, archivist) s.parrot = parrot return s
def setUp(self): self.test_text = "I have bought this bonnet.\r\nI am Mr. Darcy.\r\nI am.\r\n" with open("in.txt",'w') as f: f.write(self.test_text) self.test_outfile = "out.txt" ospl.process_data("in.txt", self.test_outfile, True) self.test_markov = Markov(self.test_outfile)
def main(params, tracklist): # twitter info consumer_key = params.get('twitter', 'consumer_key') consumer_secret = params.get('twitter', 'consumer_secret') access_token = params.get('twitter', 'access_token') access_token_secret = params.get('twitter', 'access_secret') # auth & api auth = tweepy.auth.OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) api = tweepy.API(auth) # markov corpus = params.get('markov', 'corpus') is_pickled = params.getboolean('markov', 'pickled') m = Markov(corpus) if is_pickled: m.load_db() else: m.make_db() # listener print 'Starting listener for "{}"...'.format(', '.join(tracklist)) listener = Listener(m, is_pickled, api=api) stream = tweepy.Stream(auth, listener) stream.filter(track=tracklist)
def get_city(): num_countries = 1 while random.randint(1, 30) == 1: num_countries += 1 countries = [] corpus = "" with open('countrycodes.csv', 'r', encoding='utf-8') as f: lines = f.read().splitlines() while len(countries) < num_countries: try: num_cities = 0 while num_cities < 100: country_name, rand_country_code = random.choice( lines).split(',') cities = df[df['Country'] == rand_country_code.lower()] num_cities = len(cities) corpus += cities.iloc[:, 2].str.cat(sep='\n') + '\n' countries.append(country_name) except Exception: pass model = Markov(corpus) city_name = None while not city_name: city_name = model.get_word() mashup_edition = '' if num_countries > 1: mashup_edition = 'Special Mashup Edition:\n' tweet_str = mashup_edition + city_name + ', ' + '-'.join(countries) return tweet_str
def markov_test(): print("### Markov Test") m = Markov(3, "word") m.train("../data/texts/lotr_clean.txt") generated = m.generate(10) pp.pprint(generated) print("\n")
def test_markov_with_higher_level(size=100, file_name="sample1.txt"): markov = Markov() mark = markov.train_model_higher( normalize(os.getcwd() + "\\samples\\" + file_name)) text = markov.generate_sentence(size, mark) save_text(text) save_long_file(text, file_name) print(text)
def test_fold_in(self): """It should create a parody text """ markov = Markov(corpus=self.sample_page1) key = markov.get_key() print(key) print(markov.next(key))
def load_markov(filename): """markovオブジェクトを作成し、filenameから読み込みをおこなう""" markov = Markov() try: markov.load(filename) except IOError as e: print(format_error(e)) return markov
def test_count(self): m = Markov(2) m.scan(['a', 'b', 'b', 'b', 'c']) self.assertEquals(m.chains['a']['count'], 1) self.assertEquals(m.chains['b']['count'], 3) self.assertEquals(m.chains['a']['next']['b']['count'], 1) self.assertEquals(m.chains['b']['next']['b']['count'], 2) self.assertEquals(m.chains['b']['next']['c']['count'], 1)
def markov_generate_text(filename): ''' My method of markov chain text generation ''' markov = Markov(filename) result = None while result is None: result = markov.generate_text() return result
def loadParrot(self, scribe): newParrot = False self.parrot = self.archivist.wakeParrot(scribe.cid()) if self.parrot is None: newParrot = True self.parrot = Markov() scribe.teachParrot(self.parrot) self.store(scribe) return newParrot
def index(): words_list = read_file('seinfeld.txt') num_words = int(request.args.get('num_words', 10)) chain = Markov(words_list) sampled_sentence = chain.create_sentence(num_words) return render_template('index.html', random_sentence=sampled_sentence, num_words=num_words)
def test_replace_identical_subs_with_single_letter(): rules = { 'aa': 'a', 'bb': 'b', 'cc': 'c', } m = Markov(rules) assert m.evaluate('abbcacccaabb') == 'abcacab'
def lambda_handler(event, context): """Entrypoint for AWS Lambda. Event contains the payload from the AWS Lambda call.""" user_text = event['userText'] mixin = event['mixin'] length = int(event['length']) full_text = user_text + load_mixin(mixin) markover = Markov(full_text) return markover.generate(length)
class Parser: """ Base class for a lexer/parser that has the rules defined as methods """ tokens = () precedence = () def __init__(self, **kw): self.debug = kw.get('debug', 0) self.sentences = [] self.markov = Markov() self.clause_starter = {} self.para_starter = [] self.words = kw.get('words', None) try: modname = os.path.split(os.path.splitext(__file__)[0])[1] + "_" + self.__class__.__name__ except: modname = "parser"+"_"+self.__class__.__name__ self.debugfile = modname + ".dbg" self.tabmodule = modname + "_" + "parsetab" #print self.debugfile, self.tabmodule # Build the lexer and parser lex.lex(module=self, debug=self.debug) yacc.yacc(module=self, debug=self.debug, debugfile=self.debugfile, tabmodule=self.tabmodule) def run(self): s = sys.stdin.read() s = s.replace('\n\n', '\x00') s = s.replace('\x00\x00', '\x00') s = s.replace('\n\n', '') s = s.replace('\n', ' ') s = s.replace(' ', ' ') yacc.parse(s) print self.sentences self.markov.printout() print print "clause starters" keys = self.clause_starter.keys() keys.sort() for k in keys: v = self.clause_starter[k] print "\t", repr(k), v print print "para starters", self.para_starter print self.markov.prepare() sentence = random_sentence(self.markov, 800, starters=self.clause_starter, para_starters=self.para_starter) print_sentence(sentence, word_filter=self.words)
def markovComments(comments): words = comments.split(' ') m = Markov(words) startWord = random.choice(m.getStartingUnits()) chain = m.getNextSentence(startWord, 20) sentence = startWord for word in chain: sentence += ' ' + word return sentence
def test_sort(): rules = { 'ba': 'ab', 'ca': 'ac', 'da': 'ad', 'cb': 'bc', 'db': 'bd', 'dc': 'cd' } m = Markov(rules) assert m.evaluate('cbadb') == 'abbcd'
def test_replace(): a = 'abcd' b = 'x' assert Markov.replace(a, b, 1, 2) == 'axd' a = 'abcd' b = 'xx' assert Markov.replace(a, b, 1, 2) == 'axxd' a = 'abcde' b = 'x' assert Markov.replace(a, b, 1, 3) == 'axe'
def test_iteration_count_gets_reset(): rules = { 'ba': 'ab', 'ca': 'ac', 'da': 'ad', 'cb': 'bc', 'db': 'bd', 'dc': 'cd' } m = Markov(rules, max_iter=6) for i in range(2): assert m.evaluate('cbadb') == 'abbcd'
def send(): if request.method == 'POST': username = request.form['username'] Mgen = Markov() res = Mgen.markov(username) error = None return render_template('markovres.html', res=res, author=username, error=error) return render_template('index.html')
def newTweet(): tweet_data = Markov(prefix="tweets") tweetline = tweet_data.generate() tweet = "" for word in tweetline: tweet += word + " " tweet = tweet.rstrip() if len(tweet) > 140: return newTweet() else: return tweet
def caption(username, start, end): scraper = Scraper(username, start, end) text = scraper.text try: markov = Markov(text) except: if len(text) < 1: return "Username not found or private account" return "Not enough captions found to generate new text" text = markov.generate() if text == None or len(text) < 1: text = "Not enough captions found to generate new text" return text
def newTweet(): tweet_data = Markov(prefix="tweets") tweetline = tweet_data.generate() tweet = "" for word in tweetline: tweet += word + " " tweet = tweet.rstrip() if len(tweet)>140: return newTweet() else: return tweet
def tweet(userName): twitter = OAuth1Session(settings.CONSUMER_KEY, settings.CONSUMER_SECRET, settings.ACCESS_TOKEN, settings.ACCESS_TOKEN_SECRET) markov = Markov() tweets = markov.generate() randomtweet = tweets #randomtweet = tweets[random.randrange(len(tweets))] params = {"status": randomtweet} #params = {"status": '@' + userName + ' ' + randomtweet + ' ' + str(datetime.datetime.today())} req = twitter.post("https://api.twitter.com/1.1/statuses/update.json", params=params)
def main(): """set-up applcation and send love""" api = get_handle() markov = Markov(get_corpus()) people = People(api) love = markov.generate_markov_text(random.randrange(15, 20)) to_email = people.get_random() print to_email print love if api.send_love(to_email, love): return 0 else: return -1
class TestMarkov(unittest.TestCase): """test class of markov.py""" def setUp(self): print('*** setup ***') self.markov = Markov() def test_add_sentense(self): self.__add_sentense_bocchan() self.assertTrue(len(self.markov.starts) > 0) self.assertTrue(len(self.markov.dic) > 0) def test_generate(self): self.__add_sentense_bocchan() input_texts = [ '初めまして、坊ちゃん', 'あら、ご病気ですか', 'あらあら、大変ですね', 'いたずらして病気になっちゃったんですか?', 'そんな威張らなくてもいいでしょう', 'はあ、そんなもんですか', '遅刻しちゃだめですね', 'よく覚えてないんですか?', 'ターナー?', 'どなたですか?' ] for input_text in input_texts: with self.subTest(): tokens = Morph.analyze(input_text) keyword = 'N/A' for token in tokens: if Morph.is_keyword(token): keyword = token.surface generated = self.markov.generate(keyword) print('you > ' + input_text) print('generated > ' + generated) print('************') self.assertTrue(len(generated) > 0) # private method def __add_sentense_bocchan(self): sample_file = '../KOISURU_PROGRAM/sample/markov/bocchan.txt' content = '' original_content = codecs.open(sample_file, 'r', 'shift_jis') for row in original_content: content += row.rstrip() original_content.close() texts = re.split(r'[。??!! ]+', content) for text in texts: if text == '': continue tokens = Morph.analyze(text) self.markov.add_sentence(tokens) print('.', end='') print('')
def main(): """Entry point for the code execution.""" f = open('sample.txt', 'r') text = f.read() clean_text_list = prep_text(text) m = Markov() m.add_text(clean_text_list) text = generate_paragraph(m, 5, 10) print(text) f.close()
def get_all_words(nightmare: Nightmare): nightmare._cursor.execute('select comments.body from comments;') data = nightmare._cursor.fetchall() words = '' for d in data: words += d[0] n = 10 l = words.split() c = [l[i:i + n] for i in range(0, len(l), n)] print(c) m = Markov(c) # print(words.split()) return m.generate_markov_text()
def test_probabilities(self): m = Markov(2) m.scan(['bacon', 'fish', 'bacon', 'lung', 'bacon', 'lung', 'bacon', 'lung', '.']) result = m.compute_probabilities(3) self.assertEquals(len(result), 3) # least likely to most self.assertEquals(result[0]['word'], 'fish') self.assertEquals(result[0]['chance'], 1.0/8.0 * (1.0 / 0.5)) self.assertEquals(result[1]['word'], 'lung') self.assertEquals(result[1]['chance'], 3.0/8.0 * (1.0 / 0.5)) self.assertEquals(result[2]['word'], 'bacon') self.assertEquals(result[2]['chance'], 0.5 * (1.0 / 0.5))
def __init__(self): # load the markov chain data print('*** Loading markov data from {}...'.format( MARKOV_DATA_SAVE_LOCATION)) try: with open(MARKOV_DATA_SAVE_LOCATION, 'rb') as markov_file: self._markov = Markov(pickle.load(markov_file)) markov_file.close() except IOError: print('*** Unable to load file!') self._markov = Markov() self._messages_since_last_autosave = 0 super().__init__()
def generate_hardcoded(states, matrix, sim_length, dest_path): """ states : List of playable notes matrix : The markov matrix representing transition probabilities sim_length : number of notes in the final result dest_path : where to save the final result """ note_markov = Markov() note_markov.set_markov_matrix(states, matrix) sim_notes = note_markov.simulate_markov_process(sim_length) sim_durs = [1 for el in sim_notes] creator = Midi_Creator() creator.create_mid(sim_notes, sim_durs) creator.save(dest_path)
def __init__(self, name, bus, user_id=1): self.name = name self.bus = bus # Set the weather API key from env vars # For openweathermap API key # self.api_key = "&APPID={}".format(os.environ.get('APPID')) # For Wunderground API key self.api_key = os.environ.get('WUNDERGROUND_API_KEY') # self.server_path = 'http://localhost:5001/api' self.user_id = user_id self.marky = Markov( limit = 600, ngram = 6, paths = ['static/markov_text/alice_in_wonderland.txt', 'static/markov_text/through_the_looking_glass.txt'])
class Test_Markov(unittest.TestCase): def setUp(self): self.test_text = "I have bought this bonnet.\r\nI am Mr. Darcy.\r\nI am.\r\n" with open("in.txt",'w') as f: f.write(self.test_text) self.test_outfile = "out.txt" ospl.process_data("in.txt", self.test_outfile, True) self.test_markov = Markov(self.test_outfile) def test_unigrams(self): self.assertEqual(type(self.test_markov.unigrams), defaultdict) self.assertEqual(round(sum(self.test_markov.unigrams.values()), 5), 1.0) self.assertEqual(self.test_markov.unigrams["**Beginning**"], 0.15) def test_bigrams(self): self.assertEqual(type(self.test_markov.bigrams), defaultdict) self.assertEqual(round(sum(self.test_markov.bigrams["I"].values()), 5), 1.0) self.assertEqual(self.test_markov.bigrams["**Beginning**"]["I"], 1.0) self.assertEqual(self.test_markov.bigrams["I"]["have"], 1/3.) def test_bigrams_counter(self): bigram_freqs = self.test_markov.make_ngram_freqs_dict(2) self.assertEqual(type(bigram_freqs), defaultdict) self.assertEqual(bigram_freqs["I"]["am"], 2) self.assertEqual(bigram_freqs["."]["**End**"], 3) def test_trigrams(self): self.assertEqual(type(self.test_markov.trigrams), defaultdict) self.assertEqual(round(sum(self.test_markov.trigrams[("I","am")].values()), 5), 1.0) self.assertEqual(self.test_markov.trigrams[("**Beginning**","I")]["am"], 2/3.) self.assertEqual(self.test_markov.trigrams[("I","have")]["rocketships"], 1e-5) def test_trigrams_counter(self): trigram_freqs = self.test_markov.make_ngram_freqs_dict(3) self.assertEqual(type(trigram_freqs), defaultdict) self.assertEqual(trigram_freqs[("Darcy",".")]["**End**"], 1) self.assertEqual(trigram_freqs[("**Beginning**","I")]["am"], 2) def test_make_sentence(self): self.test_markov.make_ngram_sentence(n=2) self.test_markov.make_ngram_sentence(n=3) self.assertRaises(AssertionError, self.test_markov.make_ngram_sentence, n=5) def test_score_sentence(self): """ make up nonsense sentence, make sure it scores worse than Markov sentence. possibly also make up word with same words as corpus but in a completly bogus order, that should also score lower""" nonsense = "Vestiges of cetaceous gentility." nonsense_score = self.test_markov.score_sentence(nonsense) real_trigram_sent = self.test_markov.make_ngram_sentence() real_trigram_score = self.test_markov.score_sentence(real_trigram_sent) weird_order = "Darcy . I bonnet am bought have am" weird_order_score = self.test_markov.score_sentence(weird_order) self.assertGreater(nonsense_score, real_trigram_score) self.assertGreater(weird_order_score, real_trigram_score) def tearDown(self): os.remove("in.txt") os.remove("out.txt")
def main(): global database global markov global updater global chat_states # This is safe as long as we only access the db within the dispatcher # callbacks. If not then we need locks. database = Database(sqlite3.connect(config.DBFILE, check_same_thread=False)) database.initialize() markov = Markov(database) chat_states = ChatStates(database) updater = Updater(config.TOKEN) updater.last_update_id = database.get_parameter('last_update', -1) + 1 admin = Admin(database, markov, updater, chat_states, config.ADMIN_LIST) dp = updater.dispatcher admin.register_handlers(dp) dp.add_handler(MessageHandler(Filters.sticker, on_sticker), 0) dp.add_handler(MessageHandler(Filters.all, on_message), 0) # Commit updates after being handled. This avoids messages being handled # twice or updates being missed dp.add_handler(MessageHandler(Filters.all, on_post_message), 1) dp.add_handler(AllUpdateHandler(on_post_update), 1) dp.add_error_handler(on_error) updater.start_polling() updater.idle() os._exit(0)
def setUp(self): self.test_text_one = "I have bought this bonnet.\r\nI am Mr. Darcy.\r\nI am.\r\n" with open("in_one.txt",'w') as f: f.write(self.test_text_one) self.test_outfile_one = "out_one.txt" ospl.process_data("in_one.txt", self.test_outfile_one, True) self.test_markov_one = Markov(self.test_outfile_one) self.test_text_two = "Stubb is whale.\r\nStarbuck is THE whale.\r\nI am a porpoise.\r\n" with open("in_two.txt",'w') as f: f.write(self.test_text_two) self.test_outfile_two = "out_two.txt" ospl.process_data("in_two.txt", self.test_outfile_two, True) self.test_markov_two = Markov(self.test_outfile_two) self.test_markov = DoubleMarkov(self.test_markov_one, self.test_markov_two)
class ClumsyRobot(discord.Client): def __init__(self): # load the markov chain data print('*** Loading markov data from {}...'.format( MARKOV_DATA_SAVE_LOCATION)) try: with open(MARKOV_DATA_SAVE_LOCATION, 'rb') as markov_file: self._markov = Markov(pickle.load(markov_file)) markov_file.close() except IOError: print('*** Unable to load file!') self._markov = Markov() self._messages_since_last_autosave = 0 super().__init__() # Connected as a discord client async def on_ready(self): print('*** Connected as {}.'.format(self.user)) # Message received async def on_message(self, message): # don't respond to or learn from our own messages if message.author == self.user: return # learn and occasionally engage in discussion content = message.content.lower() self._markov.DigestInput(content) if random() < RESPONSE_FREQUENCY: response = self._markov.GenerateChain(content) await message.channel.send(response) # save the markov data sometimes self._messages_since_last_autosave += 1 if self._messages_since_last_autosave == MESSAGES_PER_AUTOSAVE: print('*** Saving markov data to {}...'.format( MARKOV_DATA_SAVE_LOCATION)) try: with open(MARKOV_DATA_SAVE_LOCATION, 'wb') as markov_file: pickle.dump(self._markov.GetData(), markov_file) markov_file.close() self._messages_since_last_autosave = 0 except IOError: print('*** Unable to save file!')
def get_branches(self, order, nb_nodes, branching=None): assert nb_nodes > 0 m = Markov(self.p0, self.p1) b = [m() for i in xrange(nb_nodes)] b[0] = 0 b[-1] = 0 return b
def main(): twitter = Twython(KEY, access_token=TOKEN) try: tweets = twitter.get_user_timeline(screen_name=raw_input("twitter username: "******"User does not exist." return try: count = int(raw_input("number of tweets to generate: ")) text = "" for tw in tweets: text += re.sub("https?:\/\/([^\s]+)", "", tw['text']) + ". " mk = Markov(text) sentences = "" for i in range(0, count): print mk.sentence() except: print "Please enter a valid number."
class TestParsing(unittest.TestCase): def setUp(self): self.markov = Markov() def test_basic_tokenize(self): result = self.markov.tokenize("grraaaaa brainss monkeybutter") self.assertEquals(result, ["grraaaaa", "brainss", "monkeybutter"]) def test_basic_scan(self): self.markov.scan(['a', 'b', 'c', 'd', 'e']) self.assertTrue('a' in self.markov.chains) self.assertTrue('b' in self.markov.chains) self.assertTrue('c' in self.markov.chains) self.assertTrue('b' in self.markov.chains['a']['next']) self.assertTrue('c' in self.markov.chains['a']['next']['b']['next']) self.assertTrue('d' in self.markov.chains['b']['next']['c']['next']) def test_count(self): m = Markov(2) m.scan(['a', 'b', 'b', 'b', 'c']) self.assertEquals(m.chains['a']['count'], 1) self.assertEquals(m.chains['b']['count'], 3) self.assertEquals(m.chains['a']['next']['b']['count'], 1) self.assertEquals(m.chains['b']['next']['b']['count'], 2) self.assertEquals(m.chains['b']['next']['c']['count'], 1) def test_probabilities(self): m = Markov(2) m.scan(['bacon', 'fish', 'bacon', 'lung', 'bacon', 'lung', 'bacon', 'lung', '.']) result = m.compute_probabilities(3) self.assertEquals(len(result), 3) # least likely to most self.assertEquals(result[0]['word'], 'fish') self.assertEquals(result[0]['chance'], 1.0/8.0 * (1.0 / 0.5)) self.assertEquals(result[1]['word'], 'lung') self.assertEquals(result[1]['chance'], 3.0/8.0 * (1.0 / 0.5)) self.assertEquals(result[2]['word'], 'bacon') self.assertEquals(result[2]['chance'], 0.5 * (1.0 / 0.5))
def __init__ (self, jid, password, room, nick, fileName): sleekxmpp.ClientXMPP.__init__(self, jid, password) self.room = room self.nick = nick #initialize markov chain with input file inputFile = open(fileName) self.markov = Markov(inputFile) self.add_event_handler("session_start", self.start) self.add_event_handler("groupchat_message", self.messageHandler)
def main(): r = praw.Reddit(user_agent="Markov Comment Generator by /u/officialdovahkiin") name = raw_input("reddit username: "******"number of comments to consider: ")) sentence_amount = int(raw_input("number of sentences to generate: ")) comments = user.get_comments(limit=(None if comment_amount <= 0 else comment_amount)) text = "" for c in comments: text += format_comment(c.body) + ". " mk = Markov(text) sentences = "" for i in range(0, sentence_amount): sentences += mk.sentence() + ". " print sentences except: print "Please enter a valid number" else: print "No comments found for /u/" + name
def send_markov(user, password, recipient, subject, host, files, key_size, limit, limit_words, paragraph): # Initialize Markov chain. m = Markov(files, key_size) # Build message. verse = m.generate_text(limit, not limit_words, paragraph) message = MIMEText(verse) message["subject"] = subject message["to"] = recipient message = message.as_string() # Connect smtp = SMTP(host) smtp.starttls() smtp.login(user, password) # Send. smtp.sendmail(user, recipient, message) # Disconnect. smtp.close()
class FullTextMarkovEvaluator(HaikuEvaluator): """ Evaluate the full text of a haiku using Markov chains, determine how it """ def __init__(self, weight=1, prefix=None): self.prefix = prefix or getattr(settings, "MARKOV_DATA_PREFIX", "goodlines") self.data = Markov(prefix=prefix, **getattr(settings, 'REDIS', {})) super(FullTextMarkovEvaluator, self).__init__(weight=weight) def evaluate(self, haiku): haiku_text = [] for line in haiku.get_lines(): haiku_text += line.split() score = self.data.score_for_line(haiku_text) return score
def index_feed(target): # load config if os.path.isfile("credentials.json"): try: with open ("credentials.json", "r") as f: credentials = json.load(f) except ValueError: print "credentials.json is malformed." exit() else: empty_credentials = {u'consumer-key': u'', u'consumer-secret': u'', u'access-token': u'', u'access-token-secret': '', u'user-name': u''} with open ("credentials.json", "w") as f: json.dump(empty_credentials, f, indent=4, separators=(',', ':')) print "credentials.json is missing. i've created a blank one for you." exit() # authenticate auth = tweepy.OAuthHandler(credentials["consumer-key"], credentials["consumer-secret"]) auth.set_access_token(credentials["access-token"], credentials["access-token-secret"]) # connect to the api api = tweepy.API(auth) #add tweets to redis tweet_data = Markov(target) #show my tweets try: my_tweets = api.user_timeline(target, count=20000) for tweet in my_tweets: cleaned_tweet = clean_tweet(tweet.text) tweet_string = " ".join(cleaned_tweet) print 'indexing tweet: "' + tweet_string + '"' tweet_data.add_line_to_index(cleaned_tweet) except Exception as e: print e
class MarkovTweet(object): def __init__(self, url): "Generate the markov chain stuff first." file_ = open(url, 'r') self.mkov = Markov(file_) def tweet(self, words=('', '')): "Capitalise the first word and trim to the end of a sentence." twit = self.mkov.generate_markov_text(50, words) twit = twit[0].upper() + twit[1:] while twit != '' and twit[-1] not in ';:?.,!': twit = twit[:-1] if twit == '': twit = self.tweet() else: if twit[-1] in ';:,': twit = twit[:-1] + '.' return twit
def __init__(self, **kw): self.debug = kw.get('debug', 0) self.sentences = [] self.markov = Markov() self.clause_starter = {} self.para_starter = [] self.words = kw.get('words', None) try: modname = os.path.split(os.path.splitext(__file__)[0])[1] + "_" + self.__class__.__name__ except: modname = "parser"+"_"+self.__class__.__name__ self.debugfile = modname + ".dbg" self.tabmodule = modname + "_" + "parsetab" #print self.debugfile, self.tabmodule # Build the lexer and parser lex.lex(module=self, debug=self.debug) yacc.yacc(module=self, debug=self.debug, debugfile=self.debugfile, tabmodule=self.tabmodule)
class TestMarkovClass(unittest.TestCase): """ Test that the Markov wrapper class behaves as expected """ def setUp(self): self.markov = Markov(prefix="testclass",db=11) def test_add_line_to_index(self): line = ['i','ate','a','peach'] line1 = ['i','ate','one','peach'] line2 = ['i','ate','a', 'sandwich'] self.markov.add_line_to_index(line) self.markov.add_line_to_index(line1) self.markov.add_line_to_index(line2) self.assertEqual(self.markov.client.zscore("testclass:i:ate", "a"), 2.0) self.assertEqual(self.markov.client.zscore("testclass:ate:a", "peach"), 1.0) def test_score_for_line(self): self.test_add_line_to_index() line = ['i','ate','a','peach'] self.assertEqual(self.markov.score_for_line(line), 100) def test_generate(self): self.test_add_line_to_index() generated = self.markov.generate(max_words=3) assert len(generated) >= 2 assert len(generated) <= 3 generated = self.markov.generate(seed=['ate','one'], max_words=3) assert 'peach' in generated assert 'sandwich' not in generated def tearDown(self): """ clean up our redis keys """ keys = self.markov.client.keys(self.markov.prefix+"*") for key in keys: self.markov.client.delete(key)
def main(params): consumer_key = params.get('twitter', 'consumer_key') consumer_secret = params.get('twitter', 'consumer_secret') access_token = params.get('twitter', 'access_token') access_token_secret = params.get('twitter', 'access_secret') auth = tweepy.auth.OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) api = tweepy.API(auth) corpus = params.get('markov', 'corpus') is_pickled = params.getboolean('markov', 'pickled') m = Markov(corpus) if is_pickled: m.load_db() else: m.make_db() text = m.gen(climit=140) api.update_status(text)
def __init__(self): self.markov = Markov(prefix='irc') self.silent = False self.dumb = False self.throttles = []
class MarkovPlugin(object): def __init__(self): self.markov = Markov(prefix='irc') self.silent = False self.dumb = False self.throttles = [] @admin_command def silence(self, connection, event): self.silent = True @admin_command def unsilence(self, connection, event): self.silent = False @admin_command def dumb_markov(self, connection, event): self.dumb = True @admin_command def learn_markov(self, connection, event): self.dumb = False @admin_command def flush_brain(self, connection, event): connection.logger.info('FLUSHING MARKOV BRAIN!!!') self.markov.flush() @admin_command def set_brain(self, connection, event): prefix = event.command_params[0] connection.logger.info('SETTING MARKOV BRAIN {prefix: %s}' % prefix) self.markov.prefix = prefix def is_ignored(self, user): t = self.get_throttle(user) return t and t.count > 3 @plugin_hook def on_privmsg(self, connection, event): user = connection.users[event.nick] if self.is_ignored(user): connection.logger.warning('IGNORED {nick: %s}' % event.nick) self.do_throttle(connection.bot.ioloop, user, timedelta(minutes=1)) return m = re.match(QUERY_REGEX.format(connection.nick), event.text) if not self.silent and m: tokens = tokenize_line(m.group('message')) self.do_reply(connection, event, tokens) ioloop = connection.bot.ioloop self.do_throttle(ioloop, user) elif not m and not self.dumb and not event.command: connection.logger.info('Learning {message: %s}' % event.text) message = event.text tokens = tokenize_line(message) self.learn_message(tokens) def do_reply(self, connection, event, tokens): connection.logger.info('Loading reply {tokens: %s}' % repr(tokens)) reply = load_reply_from_markov(self.markov, event.nick, tokens) if reply: connection.reply_with_nick(event, reply) else: connection.reply_with_nick(event, ('I have nothing to' ' say yet. Teach me more!')) def do_throttle(self, ioloop, user, timeout=TIMEOUT): throttle = self.get_throttle(user) if throttle: ioloop.remove_timeout(throttle.io_timeout) throttle.count += 1 else: throttle = MarkovThrottle(user) self.throttles.append(throttle) rem_user_thrtl = partial(self.remove_throttle, user) throttle.io_timeout = ioloop.add_timeout(timeout, rem_user_thrtl) def remove_throttle(self, user): for t in list(self.throttles): if t.user is user: self.throttles.remove(t) def get_throttle(self, user): for t in self.throttles: if t.user is user: return t else: return None def learn_message(self, tokens): self.markov.add_line_to_index(tokens)
def setUp(self): self.markov = Markov()
class AveryBot(SingleServerIRCBot): def __init__(self, mindfile, blfile, logfile, agefile, real_id, real, ident, birthday, friend): SingleServerIRCBot.__init__(self, [(ident.server, ident.port)], ident.nickname, ident.realname) self.mindfile = mindfile self.blfile = blfile self.logfile = logfile self.agefile = agefile # load mind try: self.mind = pickle.load(open(mindfile, 'rb')) except IOError: print("No markov file (" + mindfile + ") found; making a blank one") self.mind = Markov(2) # words that will highlight some nicks, in the form of a dictionary # from words to the nicks they hilight. try: self.blacklist = pickle.load(open(self.blfile, "rb")) except FileNotFoundError: self.blacklist = [] class State(dict): def __getitem__(self, key): if key not in self: super(State, self).__setitem__(key, random.getstate()) return super(State, self).__getitem__(key) # the handle generator self.handlegen = Markov(2) for line in open("nicks", "r"): line = line[:-1] # strip newl n = [] for c in line: n.append(MarkovElem(c)) #n = map(MarkovElem, line) n[0].tags["pos"] = "BEGIN" n[-1].tags["pos"] = "END" self.handlegen.learn(n) try: self.states = pickle.load(open("rstate", "rb")) except FileNotFoundError: self.states = State() self.waiting_for_friend = queue.Queue() # channels to send lurkers to self.nick = ident.nickname self.password = ident.password self.channel = ident.channel # active channels self.cafe = ident.cafe self.rstate = random.getstate() # random state self.real_id = real_id # whether self.real is a user or a nick self.real = real # real user she imitates (i.e. avery) #self.save_counter = 0 # write to disk every 100 talks self.friend = friend self.birthday = birthday def at_bday(self, birthday): now = datetime.datetime.today() bday = self.birthday.replace(year = now.year) if bday <= now: bday = bday.replace(year = bday.year + 1) print("next birthday:", bday) bday_action = threading.Timer((bday - now).total_seconds(), birthday) bday_action.daemon=True bday_action.start() def on_welcome(self, c, e): c.mode(self.nick, '+B-x') c.privmsg("NickServ", "IDENTIFY " + self.password) for channel in self.channel: c.join(channel) c.join(self.cafe) def birthday(): age = int(open(self.agefile, 'r').read()) + 1 c.action(self.cafe, "is %s years old today!" % age) print(age, file=open(self.agefile, 'w')) self.at_bday(birthday) self.at_bday(birthday) # cache each channel's mode def on_join(self, c, e): if e.source.nick == self.nick: c.mode(e.target, "") # basically borrowed from irc.bot.py's _on_mode(), since this library is a # god damn piece of shit def on_channelmodeis(self, c, e): print("debugging mode change:", e.arguments[1]) modes = irc.modes.parse_channel_modes(e.arguments[1]) t = e.arguments[0] ch = self.channels[t] for mode in modes: if mode[0] == '+': f = self.channels[t].set_mode(mode[1], mode[2]) else: f = self.channels[t].clear_mode(mode[1], mode[2]) def talk(self, args, diag=False): for i in range(3): if len(args) == 0: sentence = markov.prettify(self.mind.gen(), diag) elif len(args) == 1: k = self.mind.find_context(args[0]) if k is not None: sentence = markov.prettify(self.mind.gen_out(k), diag) else: return "i don't know anything about " + args[0] elif len(args) == 2: k = self.mind.find_context(args) if k is not None: sentence = markov.prettify(self.mind.gen_out(k), diag) else: return "i don't know anything about " + " ".join(args) else: return "i can only talk about one (or maybe two) things at a time" for channel in self.channel: if channel not in self.channels: print("AVEBOT ERROR: oh f**k this shouldn't actually happen") return "i broke" # catch line length if len(sentence) > 450: # this should probably be good print("message too long. retrying") continue # prevent convoing if sentence.startswith("!"): continue # prevent hilights tryagain = False for nope in self.blacklist: # generate non-blacklisted nick try_again = True while try_again: new = "".join(map(str, self.handlegen.gen())) try_again = False for bad in self.blacklist: if new.lower() == bad.lower(): try_again = True print("replacing", nope, "with", new) sentence = re.compile(re.escape(nope), re.IGNORECASE).sub("\x02" + new + "\x02", sentence) return sentence return "it's too hard :(" def on_privmsg(self, c, e): print("RECEIVED:", e.arguments[0]) if (e.source.nick == self.friend): if not self.waiting_for_friend.empty(): c.privmsg(self.waiting_for_friend.get(), e.arguments[0]) else: print("somebody's lurking!...") else: # friends don't tell friends what to do self.do_shit(c, e, e.source.nick) def on_action(self, c, e): print(e.target) if (irc.client.is_channel(e.target)): # this irc library is a PoFS return if (e.source.nick == self.friend): if not self.waiting_for_friend.empty(): c.action(self.waiting_for_friend.get(), e.arguments[0]) else: print("somebody's lurking!...") else: # friends don't tell friends what to do self.do_shit(c, e, e.source.nick) def on_pubmsg(self, c, e): self.do_shit(c, e, e.target) def do_shit(self, c, e, target): text = e.arguments[0] print(repr(text)) command = "" if target == self.cafe: if text.split()[0].strip(string.punctuation) in ["averbot", "averybot"]: command = text.split()[1] args = text.split()[2:] else: if len(text) and text[0] == '@': command = text.split()[0][1:] args = text.split()[1:] if command: #command = text.split()[0] #args = text.split()[1:] if command == "talk": self.states[target] = random.getstate() c.privmsg(target, self.talk(args)) elif command == "sing": if target in self.channels and self.channels[target].has_mode("c"): c.privmsg(target, "Aesthetic freedom is like free speech; it is, indeed, a form of free speech.") c.privmsg(target, "and when the rights of free speech and free press are at risk, all of the other liberties we hold dear are endangered.") self.states[target] = random.getstate() raw = self.talk(args) out = [] for word in raw.split(): out.append("\x03" + str(random.randrange(16)) + word) c.privmsg(target, " ".join(out) + "\x03") elif command == "bees": self.states[target] = random.getstate() c.privmsg(target, self.talk(["swamp", "monsters"]).replace("bees", "\x02bees\x02")) elif command == "sin": self.states[target] = random.getstate() sin = random.choice(["lust", "gluttony", "greed", "sloth", "wrath", "envy", "pride"]) sin = "".join("[" + "".join(j) + "]" for j in zip(sin.lower(), sin.upper())) c.privmsg(target, self.talk([sin])) elif command == "bee": c.privmsg(target, "there is never just one..."); elif command == "send": e.arguments = [" ".join(args[1:])] self.do_shit(c, e, args[0]) elif command == "don't": self.blacklist.append(e.source.nick) pickle.dump(self.blacklist, open(self.blfile, 'wb')) elif command == "do": if e.source.nick in self.blacklist: self.blacklist.remove(e.source.nick) pickle.dump(self.blacklist, open(self.blfile, 'wb')) elif command in ["blacklist", "bl"]: c.privmsg(e.source.nick, ", ".join(self.blacklist)) elif command == "diag": c.privmsg(target, self.mind.diags) elif command == "vtalk": self.states[target] = random.getstate() c.privmsg(target, self.talk(args, True) + " [" + str(self.mind.diags) + "]") elif command == "freeze": pickle.dump(self.states, open("rstate", 'wb')) elif command == "thaw": try: self.states = pickle.load(open("rstate", 'rb')) random.setstate(self.rstate) except FileNotFoundError: # TODO this is shit pass elif command in ["repeat", "again"]: temp = random.getstate() random.setstate(self.states[e.source.nick]) c.privmsg(target, self.talk(args)) random.setstate(temp) elif command in ["vrepeat", "vagain", "v****a"]: temp = random.getstate() random.setstate(self.states[e.source.nick]) c.privmsg(target, self.talk(args, True) + " [" + str(self.mind.diags) + "]") random.setstate(temp) elif command == "save": pickle.dump(self.mind, open(self.mindfile, 'wb')) elif command == "load": self.mind = pickle.load(open(self.mindfile, 'rb')) elif command in ["quit", "die", "bye", "byebye", "f**k"]: pickle.dump(self.mind, open(self.mindfile, 'wb')) msg = ":(" if command == "@f**k" else "byebye" self.die(msg) # bug: "byebye" doesn't always do elif command == "help": c.privmsg(target, "naw, but feel free to check out my @source ;)") elif command == "source": c.privmsg(target, "https://github.com/anachrome/averybot") elif command == "george": c.privmsg(target, "".join(i + "\x02" if i != 'g' else i for i in "wow i'm a color hating fascist")) elif command == "nbsp": c.privmsg(target, "!convo grep Е") elif command in ["snut", "Snut"]: c.action(target, "scrollin") for line in open("snut", 'r'): c.privmsg(target, line.rstrip()) elif command in ["convo", "hug", "static", "fm", "alert"]: print(self.friend, "!" + command + " " + " ".join(args)) self.waiting_for_friend.put(target) c.privmsg(self.friend, ("!" + command + " " + " ".join(args)).strip()) elif command[0] == "!": # ignore lurkers pass else: command = "" if not command: # to prevent learning commands if self.real_id == "user": source = e.source.user elif self.real_id == "nick": source = e.source.nick if self.real in source.lower(): # extremely f*****g liberal self.mind.learn(markov.sanitize(text)) print(text, file=open(self.logfile, 'a')) pickle.dump(self.mind, open(self.mindfile, 'wb'))
#!/usr/bin/env python from markov import Markov mm = Markov("heiseticker-text.txt") print("###############") print("### bigrams ###") print("###############") print("Markov chain text of size {}:\n{}\n".format(10, mm.generate_markov_text_bigrams(10))) print("Markov chain text of size {}:\n{}\n".format(20, mm.generate_markov_text_bigrams(20))) print("Markov chain text of size {}:\n{}\n".format(30, mm.generate_markov_text_bigrams(30))) print("Markov chain text of size {}:\n{}\n".format(40, mm.generate_markov_text_bigrams(40))) print("################") print("### trigrams ###") print("################") print("Markov chain text of size {}:\n{}\n".format(10, mm.generate_markov_text_trigrams(10))) print("Markov chain text of size {}:\n{}\n".format(20, mm.generate_markov_text_trigrams(20))) print("Markov chain text of size {}:\n{}\n".format(30, mm.generate_markov_text_trigrams(30))) print("Markov chain text of size {}:\n{}\n".format(40, mm.generate_markov_text_trigrams(40))) print("###################") print("### quadrograms ###") print("###################") print("Markov chain text of size {}:\n{}\n".format(10, mm.generate_markov_text_quadrograms(10))) print("Markov chain text of size {}:\n{}\n".format(20, mm.generate_markov_text_quadrograms(20)))
def __init__(self, weight=1, prefix=None): self.prefix = prefix or getattr(settings, "MARKOV_DATA_PREFIX", "goodlines") self.data = Markov(prefix=prefix, **getattr(settings, 'REDIS', {})) super(FullTextMarkovEvaluator, self).__init__(weight=weight)
def __init__(self, mindfile, blfile, logfile, agefile, real_id, real, ident, birthday, friend): SingleServerIRCBot.__init__(self, [(ident.server, ident.port)], ident.nickname, ident.realname) self.mindfile = mindfile self.blfile = blfile self.logfile = logfile self.agefile = agefile # load mind try: self.mind = pickle.load(open(mindfile, 'rb')) except IOError: print("No markov file (" + mindfile + ") found; making a blank one") self.mind = Markov(2) # words that will highlight some nicks, in the form of a dictionary # from words to the nicks they hilight. try: self.blacklist = pickle.load(open(self.blfile, "rb")) except FileNotFoundError: self.blacklist = [] class State(dict): def __getitem__(self, key): if key not in self: super(State, self).__setitem__(key, random.getstate()) return super(State, self).__getitem__(key) # the handle generator self.handlegen = Markov(2) for line in open("nicks", "r"): line = line[:-1] # strip newl n = [] for c in line: n.append(MarkovElem(c)) #n = map(MarkovElem, line) n[0].tags["pos"] = "BEGIN" n[-1].tags["pos"] = "END" self.handlegen.learn(n) try: self.states = pickle.load(open("rstate", "rb")) except FileNotFoundError: self.states = State() self.waiting_for_friend = queue.Queue() # channels to send lurkers to self.nick = ident.nickname self.password = ident.password self.channel = ident.channel # active channels self.cafe = ident.cafe self.rstate = random.getstate() # random state self.real_id = real_id # whether self.real is a user or a nick self.real = real # real user she imitates (i.e. avery) #self.save_counter = 0 # write to disk every 100 talks self.friend = friend self.birthday = birthday