def main(): import config from pymarkovchain import MarkovChain optp = ArgumentParser() optp.add_argument("-d", "--database", dest="database", help="Where to store the database") optp.add_argument("-f", "--file", dest="file", help="File source to use for generating the database") opts = optp.parse_args() # Setup logging. logformat = "%(levelname)-8s %(name)s %(message)s" logging.basicConfig(level=logging.INFO, format=logformat) if opts.database is None: try: opts.database = config.markovbrainfile except NameError: log.critical("I require a brainfile to write into!") exit(1) if opts.file is None: log.critical("I require an imput file to learn from!") exit(1) mc = MarkovChain(opts.database) mc.generateDatabase(opts.file)
def init_model(self, texts): from pymarkovchain import MarkovChain model = MarkovChain() model.generateDatabase( '\n'.join(map(lambda text: text.payload, texts)), '\n') return model
class EuroMarkov: def __init__(self): self.mc = MarkovChain("./markovdata") def generateCountryList(self): countryList = [] for filename in os.listdir("json_lyrics/2015"): countryList.append(os.path.splitext(filename)[0]) return countryList def loadFiles(self,startYear,endYear,countryList): model = "" for year in range(startYear,endYear+1): for country in countryList: fname = "json_lyrics/"+str(year)+"/"+country+".json" if os.path.isfile((fname)): with open (fname,"r") as myfile: data = json.load(myfile) model += (data['lyrics']) + '\n'; return model def runMarkov(self,model): self.mc.generateDatabase(model) def generateString(self): return self.mc.generateString()
class TextGenerator: def __init__(self, generatorName, trainString, prefixLength): self.generatorName = generatorName self.chain = MarkovChain() self.chain.generateDatabase(trainString, n=prefixLength) self.currState = [] self.hyphenator = Hyphenator('en_US') self.syllableQ = Queue() self.stripPattern = re.compile('[\W_]+') while (len(self.currState) < prefixLength): self.currState = self.chain.generateString().split()[-(prefixLength+1):-1] def load_next_word(self): nextword = "" try: while nextword == "": nextword = self.stripPattern.sub('', self.chain._nextWord(self.currState)) self.currState = self.currState[1:] self.currState.append(nextword) if len(nextword) < 4: # because hyphenator doesnt work for words less than 4 letters self.syllableQ.put(nextword) else: for syllable in self.hyphenator.syllables(nextword): self.syllableQ.put(syllable) except UnicodeEncodeError: print("unicode error") def get_next_syllable(self): if (self.syllableQ.empty()): self.load_next_word() return self.syllableQ.get()
def lyrics(): artist = request.form['artist'] lines = int(request.form['lines']) if not artist: return redirect(url_for('index')) # Get a response of sample lyrics from the provided artist uri = "http://api.lyricsnmusic.com/songs" params = { 'api_key': API_KEY, 'artist': artist, } response = requests.get(uri, params=params) lyric_list = response.json() # Parse results into a long string of lyrics lyrics = '' for lyric_dict in lyric_list: lyrics += lyric_dict['snippet'].replace('...', '') + ' ' # Generate a Markov model mc = MarkovChain() mc.generateDatabase(lyrics) result = [] for line in range(0, lines): result.append(mc.generateString()) return render_template('lyrics.html', result=result, artist=artist)
class BeerMarkov(): def __init__(self, reviews_file, markov_dir): self._reviews_file = reviews_file self._markov_dir = markov_dir self._markov = MarkovChain(markov_dir + '/beer_desc') self._name_markov = MarkovChain(markov_dir + '/beer_name') self.refresh_database() def refresh_database(self): with open(self._reviews_file, 'r') as review_data: reviews = json.load(review_data) reviews_string = [r['desc'] for r in reviews] names_string = [r['name'] for r in reviews] new_markov = MarkovChain(self._markov_dir + '/beer_desc') new_markov.generateDatabase(' '.join(reviews_string)) new_name_markov = MarkovChain(self._markov_dir + '/beer_name') new_name_markov.generateDatabase('.'.join(names_string)) self._markov = new_markov self._name_markov = new_name_markov def get_review(self): return self._markov.generateString() + '. ' + \ self._markov.generateString()
def main(): auth = tweepy.OAuthHandler(key, secret) auth.set_access_token(token, token_secret) client = tweepy.API(auth) api = tweepy.API(auth) tweets = [] superString = "" mc = MarkovChain(markDirectory) superString = createSuperString('trump.txt') mc.generateDatabase(superString) while (True): phrase = mc.generateString() try: print(phrase) except UnicodeEncodeError: continue try: answer = input() if (answer == 'y'): client.update_status(phrase) except tweepy.TweepError: continue
def fetch_lyrics(artist, lines): API_KEY = os.environ.get('API_KEY') uri = "http://api.lyricsnmusic.com/songs" params = { 'api_key': API_KEY, 'artist': artist, } response = requests.get(uri, params=params) lyric_list = response.json() lyrics = '' for lyric_dict in lyric_list: lyrics += lyric_dict['snippet'].replace('...', '') + ' ' # Generate a Markov model mc = MarkovChain('./markov') mc.generateDatabase(lyrics) # Add lines of lyrics result = [] for line in range(0, lines): line_string = mc.generateString() result.append(line_string) return result
class Michiov(object): def __init__(self, autogen=True, markovdb=os.path.expanduser("~/markov"), twcreds=os.path.expanduser("~/.michiov_twitter_credentials"),twappcreds=os.path.expanduser("~/.michiov_twitter_appdata")): self.mc = MarkovChain(markovdb) self.reload() if not os.path.exists(twappcreds): print("Lack of app creds") sys.exit(1) twcons = json.loads(open(twappcreds).read()) conskey = twcons['key'] conssec = twcons['secret'] while not os.path.exists(twcreds): twitter.oauth_dance("MPRZ Tech Labs", conskey, conssec, twcreds) oauth_token, oauth_secret = twitter.read_token_file(twcreds) self.t = twitter.Twitter(auth=twitter.OAuth(oauth_token, oauth_secret, conskey, conssec)) def should(self): ret = input("Should I send it? (y/N) ") return ("y" in ret or "Y" in ret) def qas(self): idea = self.mc.generateString() print("Generated: %s" % idea) if self.should(): self.t.statuses.update(status=idea) def loop(self): try: while True: self.qas() #self.reload() except KeyboardInterrupt: pass def reload(self): with open("markovpredb.txt") as file: self.mc.generateDatabase(file.read())
def mkdb(): mc = MarkovChain('./markov') with open('yaks.txt') as input: mc.generateDatabase(input.read()) with open('yaks.txt') as input: yaks = [l.strip() for l in input] return mc, yaks
def lyrics(): artist = request.form['artist'] lines = int(request.form['lines']) if not artist: return redirect(url_for('index')) # Get a response of sample lyrics from the artist uri = "http://api.lyricsnmusic.com/songs" params = { 'api_key': API_KEY, 'artist': artist, } response = requests.get(uri, params=params) lyric_list = response.json() # Parse results into a long string of lyrics lyrics = '' for lyric_dict in lyric_list: lyrics += lyric_dict['snippet'].replace('...', '') + ' ' # Generate a Markov model mc = MarkovChain() mc.generateDatabase(lyrics) # Add lines of lyrics result = [] for line in range(0, lines): result.append(mc.generateString()) return render_template('lyrics.html', result=result, artist=artist)
def markov(msg, botName, channel, db): if msg.rawMatchRe('!markov (?P<source>#?[a-zA-Z]\S*)\s*$') or msg.rawMatchRe('what (would|does) (the )?(?P<source>#?[a-zA-Z]\S+) say\??'): m = msg.getRegExpResult() source = m.group('source') if source[0] == '#': logsList = db.getLogs(chan=source, lines=2000) else: logsList = db.getLogs(nick=source, lines=2000) if len(logsList) < 100: hexchat.command("msg %s Not enough data for %s" % (channel, source)) else: mc = MarkovChain("./markov_db") ircText = '' for line in logsList: # disqualify lines that are too short or are certain bot functions that start with '!' if len(line.split(' ')) >= 5 and line[0] != '!': ircText += line.replace('.','') + '. ' mc.generateDatabase(ircText) markovOutput = mc.generateString().capitalize() hexchat.command('msg %s "%s" --%s' % (channel, markovOutput, source)) return True return False
def __init__(self): self.eightball = EightBall() self.excuses = Excuses() self.commands = Commands() self.straws = Straws("/", "=", "/") self.chain = MarkovChain("./markovdb") self.chain.db = _db_factory() with open("markovsource", "r") as markov_file: self.chain.generateDatabase(markov_file.readline())
def markov(): """A simple markov function""" mc = MarkovChain("./tempchain") with open(CORPUS, 'r') as f: data = f.read() mc.generateDatabase(data) return mc.generateString()
async def markov(self, ctx): """Get a response from inputed text using a markov chain generated from the channels text""" results = '' async for message in self.bot.logs_from(ctx.message.channel, limit=10): line = message.content results += line+"\n" f = StringIO(results.encode('utf-8')) mc = MarkovChain(f.getvalue()) mc.generateDatabase(f) msg = mc.generateString() await self.bot.say(msg)
def markov(messages): # Create an instance of the markov chain. By default, it uses MarkovChain.py's location to # store and load its database files to. You probably want to give it another location, like so: mc = MarkovChain("./markov") # To generate the markov chain's language model, in case it's not present # mc.generateDatabase("\n".join(messages)) # To let the markov chain generate some text, execute for i in xrange(100): print mc.generateString()
def __init__(self, **kwargs): self.chain = MarkovChain("%s/static/markov" % dirname(__file__)) self.proceed = True for k, v in kwargs.items(): if not k in defaults: raise ValueError setattr(self, k, kwargs.get(v, defaults[k])) with open(self.filename) as f: self.data = load(f) if not getattr(self, 'seed'): self.seed = False
def prepare_dict(self): if self.dictfile is None: print "error: no dictfile" return # now build the markov database. just using pymarkovchain's default settings for now. will fail if it doesn't # have write access to $PWD. chain = MarkovChain("./markov") source = self.build_source() chain.generateDatabase(source) # seem to need to do this to reload the database after generating it self.chain = MarkovChain("./markov")
def main(args): markov_filename = "./" + args.subreddit + ".mcd" new_chain = os.path.isfile(markov_filename) == False # this must come before the creation of the Markov Chain mc = MarkovChain(markov_filename) if args.new or new_chain: titles = getTitles(getSubmissions(100, args.subreddit)) training_data = str.join('.', titles) mc.generateDatabase(training_data) N = args.num_submissions while N > 0: print(mc.generateString()) N -= 1
def joke_with_seed(seed): """ Same as joke(), but takes a seed to feed into the Markov Chain. """ if not os.path.isfile('markovdb'): generate_database() chain = MarkovChain() generated_joke = '' while len(generated_joke) < MIN_LENGTH: generated_joke = chain.generateStringWithSeed(seed) return generated_joke
def joke(): """ Produces a joke based on the existing database (creates database if one doesn't already exist). """ if not os.path.isfile('markovdb'): generate_database() chain = MarkovChain() generated_joke = '' while len(generated_joke) < MIN_LENGTH: generated_joke = chain.generateString() return generated_joke
class markovbuild(object): '''Builds a markov chain DB and outputs data''' def __init__(self, target, data, lines=5): self.database = '/tmp/markov_%s.db' % target self.lines = lines self.data = '\n'.join(data) self.mchain = MarkovChain(self.database) def build(self): '''Builds a markov chain''' self.mchain.generateDatabase(self.data) def output(self): '''Outputs markov chain data''' self.build() return [ self.mchain.generateString() for x in xrange(0, self.lines) ]
def poem(): story = str(request.form['story'].encode('ascii', 'ignore')) lines = int(request.form['lines']) if not story: return redirect(url_for('index')) mc = MarkovChain() mc.generateDatabase(story) result = [] for line in range(0, lines): new_line = mc.generateString() if new_line not in result: result.append(new_line) return render_template('poem.html', result=result, story=story)
def refresh_board(board): mc_path = './data/{}-data'.format(board) mc = MarkovChain(mc_path) images = analyze_board(mc, board) return mc, images
def analyze(self): # GenerateModel """ Generate a Markov chain based on retrieved strings. """ mc = MarkovChain() mc.generateDatabase(self.text) result = r'' print "Generating:" for i in range(0, 10): print "Sentence %d" % i # Create 10 sentences sentence = mc.generateString() result += sentence.capitalize() + '. ' return result
def __init__(self, reactor, channel='', nickname=''): self.channel = channel self.nickname = nickname self.markov = MarkovChain("./tempchain") self.reactor = reactor with open('corpus.txt', 'r') as f: self.markov.generateDatabase(f.read())
class Haley(object): def __init__(self, backend): self.backend = backend self.mc = MarkovChain("markov.db") def loop(self): self.backend.connect() while True: for event in self.backend.update(): try: if event["type"] == "text": times = re.search(r"(?P<nm>\d+) times", event["content"].lower()) if times: if int(times.group("nm")) > 0: times = min(5,int(times.group("nm"))) else: self.backend.say("Okay, I won't say anything... Baka.") continue else: times = 1 for i in range(times): if "hi" in detox(event["content"].lower()).split() or "hello" in detox(event["content"].lower()).split(): self.backend.say(random.choice(["%s! Tutturuuu!","Hello, %s, so it was you making the noise up there!"]) % event["by"]) continue if "nano" in event["content"].lower() or "hakase" in event["content"].lower(): self.backend.say("%s%s"%("HAKASE"*len(re.findall("nano", event["content"].lower())),"NANO"*len(re.findall("hakase", event["content"].lower())))) continue if event["mentioned"]: if "roll" in detox(event["content"].lower()).split(): numb = re.search(r"(d|k)(?P<nm>\d+)", event["content"].lower()) if numb and int(numb.group("nm")) > 0: self.backend.say("Aaaand... %d!" % (random.randrange(1,int(numb.group("nm"))+1))) continue else: self.backend.say("Who do you think you are, rolling impossible dice... Baka.") continue if "say" in detox(event["content"].lower()).split(): if "something" in detox(event["content"].lower()).split(): tosay = self.mc.generateString() elif "name" in detox(event["content"].lower()).split(): tosay = self.backend.get_name(event["by"]) self.backend.say(tosay) continue if "xkcd" in detox(event["content"].lower()).split(): if "random" in detox(event["content"].lower()).split(): x = xkcd.getRandomComic() else: numb = re.search(r"(?P<nm>\d+)", event["content"]) if numb: x = xkcd.Comic(int(numb.group("nm"))) else: x = xkcd.getLatestComic() self.backend.say("*%s* - %s - _%s_" % (x.getTitle(), x.getImageLink(), x.getAltText())) continue self.backend.say("Hmm?") continue except: self.backend.say(str(sys.exc_info()[0]))
class SaulBotFactory(protocol.ClientFactory): protocol = SaulBot def __init__(self, reactor, channel='', nickname=''): self.channel = channel self.nickname = nickname self.markov = MarkovChain("./tempchain") self.reactor = reactor with open('corpus.txt', 'r') as f: self.markov.generateDatabase(f.read()) def clientConnectionLost(self, connector, reason): print "Lost connection (%s), reconnecting." % (reason,) connector.connect() def clientConnectionFailed(self, connector, reason): print "Could not connect: %s" % (reason,)
def __init__(self, bus): self.bus = bus self.eightball = EightBall() self.excuses = Excuses() self.commands = Commands() self.chain = MarkovChain("./markovdb") self.chain.db = _db_factory() with open("markovsource", "r") as markov_file: self.chain.generateDatabase(markov_file.readline())
class MarkovBot(BotPlugin): def __init__(self): self.markov = MarkovChain() @botcmd def talk(self, mess, args): """ Generate a sentence based on database """ return self.markov.generateString() @botcmd def complete(self, mess, args): """ Try to complete a sentence """ return self.markov.generateStringWithSeed(args) @botcmd def gendbfromfile(self, mess, args): """ Generate markov chain word database """ try: with open(args) as txtFile: txt = txtFile.read() except IOError as e: return 'Error: could not open text file' # At this point, we've got the file contents if self.markov.generateDatabase(txt): return 'Done.' else: return 'Error: Could not generate database' @botcmd def gendbfromstring(self, mess, args): if self.markov.generateDatabase(args): return 'Done.' else: return 'Error: Could not generate database from String' @botcmd def gendbfromurl(self, mess, args): req = requests.get(args) if req.ok and self.markov.generateDatabase(req.content): return 'Done.' else: return 'Error: Could not generate database from URL'
async def snakeme(self, ctx: Context): """ How would I talk if I were a snake? :param ctx: context :return: you, snakified based on your Discord message history """ mentions = list( filter(lambda m: m.id != self.bot.user.id, ctx.message.mentions)) author = ctx.message.author if (len(mentions) == 0) else ctx.message.mentions[0] channel: discord.TextChannel = ctx.channel channels = [ channel for channel in ctx.message.guild.channels if isinstance(channel, discord.TextChannel) ] channels_messages = [ await channel.history(limit=10000).flatten() for channel in channels ] msgs = [ msg for channel_messages in channels_messages for msg in channel_messages ][:MSG_MAX] my_msgs = list(filter(lambda msg: msg.author.id == author.id, msgs)) my_msgs_content = "\n".join(list(map(lambda x: x.content, my_msgs))) mc = MarkovChain() mc.generateDatabase(my_msgs_content) sentence = mc.generateString() snakeme = discord.Embed() snakeme.set_author( name="{0}#{1}".format(author.name, author.discriminator), icon_url="https://cdn.discordapp.com/avatars/{0}/{1}".format( author.id, author.avatar) if author.avatar is not None else "https://img00.deviantart.net/eee3/i/2017/168/3/4/" "discord__app__avatar_rev1_by_nodeviantarthere-dbd2tp9.png") snakeme.description = "*{0}*".format( snakify(sentence) if sentence is not None else ":question: Not enough messages") await channel.send(embed=snakeme)
def __init__(self, generatorName, trainString, prefixLength): self.generatorName = generatorName self.chain = MarkovChain() self.chain.generateDatabase(trainString, n=prefixLength) self.currState = [] self.hyphenator = Hyphenator('en_US') self.syllableQ = Queue() self.stripPattern = re.compile('[\W_]+') while (len(self.currState) < prefixLength): self.currState = self.chain.generateString().split()[-(prefixLength+1):-1]
def main(): with open("test.txt", "r") as myfile: data = myfile.read().replace('\n', '') mc = MarkovChain("./markovdb") # Start a session so we can have persistant cookies session = requests.Session() # This is the form data that the page sends when logging in login_data = { 'user_email': EMAIL, 'user_password': PASSWORD, 'login': '******', } # Authenticate r = session.post(URL, data=login_data) mc.generateDatabase(data) for x in range(0, 5): r = os.urandom(16).encode('hex') title = "Report#" + str(x) + " " + str(r) description = mc.generateString() #europe only because americans are fags y, x = uniform(-17, 43), uniform(28, 55) print (title) # Create new report based on random content report_data = { 'title': title, 'category': "2", 'description': description, 'latitude': x, 'longitude': y, 'newreport': "1", } r = session.post(newRep, data=report_data)
def __init__(self, autogen=True, markovdb=os.path.expanduser("~/markov"), twcreds=os.path.expanduser("~/.michiov_twitter_credentials"),twappcreds=os.path.expanduser("~/.michiov_twitter_appdata")): self.mc = MarkovChain(markovdb) self.reload() if not os.path.exists(twappcreds): print("Lack of app creds") sys.exit(1) twcons = json.loads(open(twappcreds).read()) conskey = twcons['key'] conssec = twcons['secret'] while not os.path.exists(twcreds): twitter.oauth_dance("MPRZ Tech Labs", conskey, conssec, twcreds) oauth_token, oauth_secret = twitter.read_token_file(twcreds) self.t = twitter.Twitter(auth=twitter.OAuth(oauth_token, oauth_secret, conskey, conssec))
def load_or_train_board(board): mc_path = './data/{}-data'.format(board) images_path = './data/{}-images'.format(board) mc = MarkovChain(mc_path) if not os.path.isfile(mc_path) or not os.path.isfile(images_path): images = analyze_board(mc, board) else: with open(images_path, 'rb') as images_file: images = pickle.load(images_file) return mc, images
def __init__(self): self.presenter = "" self.title = "" self.slide_count = 0 self.slide_min = 15 self.slide_max = 25 self.console = None self.output_dir = "" with open("terms.json", "r") as f: self.terms = json.load(f) with open(os.path.join("GIFs", "hashes.json"), "r") as f: self.gifs = json.load(f) with open(os.path.join("Images", "hashes.json"), "r") as f: self.images = json.load(f) # Load up the proverb data with open(os.path.join("Proverbs", "facts"), "r") as f: self.proverb_lines = f.readlines() self.proverbs = map(string.strip, self.proverb_lines) self.proverb_markov = MarkovChain("markov.db") self.proverb_markov.generateDatabase("".join(self.proverb_lines), n=1) # Make the text data # self.my_face = comptroller.face(self.title) # self.slide_titles = self.my_face.get_titles(50) # self.slide_bullets = self.my_face.get_bullets(100) self.my_face = Face("") self.slide_titles = ["shit", "balls", "butts"] self.slide_bullets = ["butts", "do", "stuff", "f***s", "more f***s"] self.ppt = Presentation() self.slide_weights = SlideWeights()
def __init__(self): self.presenter = "" self.title = "" self.slide_count = 0 self.slide_min = 15 self.slide_max = 25 self.console = None self.output_dir = "" with open("terms.json", "r") as f: self.terms = json.load(f) gifpath = os.path.join('gifs', 'hashes.json') if os.path.exists(gifpath): log.info('Loading previously farmed gifs') with open(gifpath, "r") as f: self.gifs = json.load(f) else: log.info('No gifs found') self.gifs = dict() imgpath = os.path.join('images', 'hashes.json') if os.path.exists(imgpath): log.info('loading previous saved images') with open(imgpath, "r") as f: self.images = json.load(f) else: log.info('no farmed images found') self.images = dict() # Load up the proverb data with open(os.path.join("proverbs", "facts"), "r") as f: self.proverb_lines = f.readlines() self.proverbs = [x.strip() for x in self.proverb_lines] self.proverb_markov = MarkovChain("markov.db") self.proverb_markov.generateDatabase("".join(self.proverb_lines), n=1) self.my_face = Face("") self.slide_titles = ["shit", "balls", "butts"] self.slide_bullets = ["butts", "do", "stuff", "f***s", "more f***s"] self.ppt = Presentation() self.slide_weights = SlideWeights()
def __init__(self): self.presenter = "" self.title = "" self.GIPHY_API_KEY = 'FILL API KEY HERE' self.slide_count = 0 self.slide_min = 15 self.slide_max = 25 self.console = None self.output_dir = "" with open("terms.json", "r") as f: self.terms = json.load(f) try : with open(os.path.join("GIFs", "hashes.json"), "r") as f: self.gifs = json.load(f) except: self.gifs = {} with open(os.path.join("GIFs", "hashes.json"), "w") as f: json.dump(self.gifs, f, indent=2) with open(os.path.join("Images", "hashes.json"), "r") as f: self.images = json.load(f) # Load up the proverb data with open(os.path.join("Proverbs", "facts"), "r") as f: self.proverb_lines = f.readlines() self.proverbs = map(string.strip, self.proverb_lines) self.proverb_markov = MarkovChain("markov.db") self.proverb_markov.generateDatabase("".join(self.proverb_lines), n=1) # Make the text data # self.my_face = comptroller.face(self.title) # self.slide_titles = self.my_face.get_titles(50) # self.slide_bullets = self.my_face.get_bullets(100) self.my_face = Face("") self.slide_titles = ["shit", "balls", "butts"] self.slide_bullets = ["butts", "do", "stuff", "f***s", "more f***s"] self.ppt = Presentation() self.slide_weights = SlideWeights()
def generate_database(): """ Generates the database that the Markov Chain will use to make its word-by- word predictions. It will attempt to create this file in the same directory as where the script is currently located. """ currpath = os.path.dirname(__file__) path_to_data = os.path.join(currpath, 'in.txt') chain = MarkovChain() with open(path_to_data) as f: chain.generateDatabase(f.read()) chain.dumpdb() print(chain.generateString())
def main(): df = fill_song_pd() lyrics = "" #going to be one huge string db_name = './markov/' + genre mc = MarkovChain(db_name) #creating new markov dataset if it doesn't exist if not os.path.isfile(db_name): print("creating new data set based on the " + str(genre) + " genre...") for index, row in df.iterrows(): if row['genre'] == genre_dict[genre]: lyrics += row["lyrics"] + " " mc.generateDatabase(lyrics) mc.dumpdb() for i in range(int(lines) + 1): print(mc.generateString())
def refresh_database(self): with open(self._reviews_file, 'r') as review_data: reviews = json.load(review_data) reviews_string = [r['desc'] for r in reviews] names_string = [r['name'] for r in reviews] new_markov = MarkovChain(self._markov_dir + '/beer_desc') new_markov.generateDatabase(' '.join(reviews_string)) new_name_markov = MarkovChain(self._markov_dir + '/beer_name') new_name_markov.generateDatabase('.'.join(names_string)) self._markov = new_markov self._name_markov = new_name_markov
# generates text using Markov Chain # uses the PyMarkovChain implimetntation: https://pypi.python.org/pypi/PyMarkovChain/ # to install on linux, run "pip install PyMarkovChain" in the shell # import function from pymarkovchain import MarkovChain import re, string # first, read in file with training text data (I made mine by getting all my blog text as a .xml, grabbing only the nodes with the actual blog text in them and then scrubbing all html tags) f = open("extractedText.txt", "r") # tidy up our text input a bit textToScrub = f.read() text = re.sub(r'^https?:\/\/.*[\r\n]*', '', textToScrub, flags=re.MULTILINE) #get rid of urls/links text = text.replace(u'\xa0', u' ') #throw out those pesky non-breaking spaces # then create the markov chain generator mc = MarkovChain("./markov") mc.generateDatabase(text) # finally, generate some text -- run this command multiple times to generate multiple text strings for num in range(1, 10): mc.generateString()
# https://github.com/TehMillhouse/PyMarkovChain # pip install PyMarkovChain from pymarkovchain import MarkovChain mc = MarkovChain("./am_m") f = open('cap_short.txt', 'r') mc.generateDatabase(f.read()) for x in range(0, 20): mc.generateString()
import importlib import os import time import os.path import re from datetime import datetime from pymarkovchain import MarkovChain from glob import glob from random import randint bot = discord.Client() bot.login('USERNAME', 'PASSWORD') # comment out the next 5 lines unless you have a markov database importlib.import_module("plugins") mc = MarkovChain() with open(r'C:\\Python35\\discordbot\\logpruned.txt', 'r', encoding="utf8") as log: thelog = log.read() mc.generateDatabase(thelog) #for plugin in glob("C:/Python35/discordbot/plugins/[!_]*.py"): # module = 'plugins.' + plugin[31:-3] # print(module) # print(plugin) # try: # importlib.import_module(module) # except Exception as e: # print('Failed to import {0}: {1}'.format(plugin, e))
#!/usr/bin python from music21 import * from pymarkovchain import MarkovChain from random import randrange import copy import os import inspect mc = MarkovChain("./markov") files = [ '/Users/telenardo/Downloads/midi0.mid', '/Users/telenardo/Downloads/midi1.mid', '/Users/telenardo/Downloads/midi2.mid', '/Users/telenardo/Downloads/midi3.mid', '/Users/telenardo/Downloads/midi4.mid' ] db = '' assoc = {} path = 'MidiMelodies' #for s in files: for filename in os.listdir(path): s = converter.parse(path + '/' + filename) part = s.parts[0] for cur_note in part.notes: name = cur_note.fullName
description="Generate Tumblr posts from a Markov chain database.") PARSER.add_argument("filename", metavar="CORPUS", type=str, help="The corpus to use in generating text.") PARSER.add_argument("number", metavar="NUMBER", type=int, help="The number of strings to generate.") PARSER.add_argument('--minlen', metavar="LENGTH", type=int, help="Throw out strings shorter than this.", default=3) PARSER.add_argument('--notags', action="store_true", help="Don't generate tags (legacy database compat behaviour)") ARGS = PARSER.parse_args() FILENAME = ARGS.filename NUMBER = ARGS.number BOT = MarkovChain(FILENAME) VALID_SENTENCES = 0 while VALID_SENTENCES < NUMBER: SENTENCE = BOT.generateString() if len(SENTENCE.split()) < ARGS.minlen: continue VALID_SENTENCES += 1 print(SENTENCE) if not ARGS.notags: try: TAGS=BOT.generateStringWithSeed("#") print(TAGS) print(" --- ") except pymarkovchain.StringContinuationImpossibleError as e:
#helper functions def ucfirst(sentence): return sentence[0].upper() + sentence[1:] def file_get_contents(filename): with open(filename) as f: return f.read() def validate_tweet(status): num_words = len(status.split(" ")) + 1 if num_words < 3: return False; return True #setup mc = MarkovChain("./markov") api = twitter.Api(consumer_key=consumer_key, consumer_secret=consumer_secret, access_token_key=access_token, access_token_secret=access_secret) #pull train_text train_text = "" for meta in src_dirs: _dir = meta[0] num_files = meta[1] for root, dirs, files in os.walk(_dir, topdown=False): random.shuffle(files) for name in files: num_files -= 1 if num_files > 0:
#!/usr/bin/env python from pymarkovchain import MarkovChain # Create an instance of the markov chain. By default, it uses MarkovChain.py's location to # store and load its database files to. You probably want to give it another location, like so: mc = MarkovChain("C:/Users/Andrew/OneDrive/Documents/Northwestern/Courses/495-Data-Science/Final Project") # To generate the markov chain's language model, in case it's not present mc.generateDatabase("It is nice to meet you. I would like to meet your friend.") # To let the markov chain generate some text, execute for i in range(10): print(mc.generateString())
inputText = filehandler.loadTextFile(sample) if args.upper_case: inputText = inputText.upper() if args.lower_case: inputText = inputText.lower() if args.title_case: inputText = inputText.title() # generate a markov chain based text from the input if args.generate and args.generate > 0: # disable error message about on-the-fly database logging.disable(logging.WARNING) mc = MarkovChain("./markov-chain-database") mc.generateDatabase(inputText) # reinstate logging logging.disable(logging.NOTSET) generatedText = "" while len(generatedText) < args.generate: if generatedText is not "": generatedText = generatedText + " " generatedText = generatedText + mc.generateString() inputText = generatedText if args.filter_punctuation: inputText = text.removePunctuation(inputText)
class BuzzFeeder(object): def __init__(self, **kwargs): self.chain = MarkovChain("%s/static/markov" % dirname(__file__)) self.proceed = True for k, v in kwargs.items(): if not k in defaults: raise ValueError setattr(self, k, kwargs.get(v, defaults[k])) with open(self.filename) as f: self.data = load(f) if not getattr(self, 'seed'): self.seed = False @property def titles(self): return map(lambda x: d['title'], filter(lambda y: y, self.data)) @property def text(self): return rc(r'[%s]' % escape(punctuation)) \ .sub(" b", "\n".join(self.titles).lower()) def generate_database(self): self.chain.generateDatabase(self.text) def ask(self, prompt, opts=[]): prompt = ">>> " + prompt if opts: prompt += " [%s]" % "|".join(opts) response = raw_input(prompt).lower() if 'x' in response: self.proceed = False return self.proceed if opts and response not in opts: raise ValueError return response def prompt(self, candidate): print ">>> '%s'" % candidate if not self.proceed: return False q = self.ask("Tweet this text?", opts=['y', 'n']) if not q: return False if 'y' in q: return candidate if 'n' in q: if 'y' in self.ask("Edit this text?", opts=['y', 'n']): return self.ask("Enter edited text: ") else: return True def generate(self): if not self.seed: yielder = self.chain.generateString else: yielder = self.chain.generateStringWithSeed yargs = [] if not self.seed else [self.seed] while self.proceed: yield yielder(*yargs) \ .split(".py")[-1] \ .strip() \ .title() def run(self): print "[ press X to stop at any time ]" with open(self.output, "a") as tweets: for candidate in self.generate(): response = self.prompt(candidate) if not response: break if not isinstance(response, bool): tweets.write(response.encode('ascii', 'ignore')) tweets.write('\n') print ''
lyrics = scraper.scrape(chorus, song_list) # Clean up lyrics lyrics = lyrics.replace('(', '').replace(')', '') lyrics = lyrics.replace('"', '') lyrics = lyrics.lower() #print(lyrics) # Import lyrics #else: # with open(db_path, 'r') as inputfile: # for line in inputfile: # lyrics += line + '\n' # Create model instance mc = MarkovChain('db/' + db_name) # Generate db mc.generateDatabase(lyrics) mc.dumpdb() #------------------------------------------------------------------------------- # Write lyrics f = open(folder + db_name + '.txt', 'w') for i in range(0, int(number_of_phrases)): seed = str(input('seed: ')) f.write(mc.generateStringWithSeed(seed) + '\n') #f.write(mc.generateString() + '\n') f.close
if "//" in t: continue if "cw: " in t: continue # Prune short tags if ARGS.prune and len(t) <= 3: continue # Tags which are just numbers should not be in the corpus try: int(t.strip()) continue except ValueError: pass if ARGS.nohash: CORPUS += t + " " else: CORPUS += '#' + t + " " CORPUS += "\n" if ARGS.debug: print(CORPUS) exit(1) print("Generating database...") BOT = MarkovChain(TARGET_FILE) BOT.generateDatabase(CORPUS) print("Dumping database to {}".format(TARGET_FILE)) BOT.dumpdb()
def main(username): r = praw.Reddit(user_agent='trollolol v0.1') r.config.decode_html_entities = True m = MarkovChain('markov-data/%s.chain' % username) last_comment = None try: last_comment = Node.objects( username=username).order_by('-created').first() if last_comment: print("Checking for new messages.") comments = r.get_redditor(username).get_comments( limit=500, params={'after': last_comment.node_id}) else: raise except: print("No messages fetched yet, doing inital import") comments = r.get_redditor(username).get_comments(limit=500) for comment in comments: try: node = Node.objects.get(node_id=comment.name) except: node = Node(node_id=comment.name, parent_id=comment.parent_id, body=comment.body, created=comment.created, username=username) node.save() first_comment = Node.objects( username=username).order_by('+created').first() if first_comment: print("Checking for messages before %s." % first_comment.node_id) comments = r.get_redditor(username).get_comments( limit=500, params={'before': first_comment.node_id}) for comment in comments: try: node = Node.objects.get(node_id=comment.name) except: node = Node(node_id=comment.name, parent_id=comment.parent_id, body=comment.body, created=comment.created, username=username) node.save() comments = Node.objects(username=username).all() corpus = [] for comment in comments: corpus.append(comment.body) shuffle(corpus) if len(corpus) > 0: print( "We have %i messages to work with. Building new markov corpus now." % len(corpus)) m.generateDatabase(" ".join(corpus)) print("Looking for acceptable output for first round of transforms.") output = [] tries = 0 while len(output) < 10: tries = tries + 1 result = m.generateString() if tries < 100: if len(result.split(" ")) >= 10: sys.stdout.write("x") output.append(result) else: sys.stdout.write(".") print("") response = "" for result in output: response = response + " " + result print response else: print("No comments found.")
lyrics_directory = "data1/" files = glob.glob(lyrics_directory + '*.txt') # iterate over the list getting each file all_lyrics = "" for file in files: # open the file and then call .read() to get the text print(file) with open(file) as f: text = f.read() verse_lyrics = parse_file(text) verse_lyrics = re.sub("[\[\]\(\)\"]", " ", verse_lyrics) verse_lyrics = re.sub(" +", " ", verse_lyrics) all_lyrics += verse_lyrics mc = MarkovChain("test") mc.generateDatabase(all_lyrics) output_directory = "generated_lyrics/" if not os.path.exists(output_directory): os.makedirs(output_directory) number_of_phrases = 8 num_files = 1000 for i in range(num_files): # Printing a string with open(output_directory + "{}.txt".format(i), "w") as f: for i in range(0, int(number_of_phrases)): while True: line = mc.generateString()
from pymarkovchain import MarkovChain mc = MarkovChain("./markov") texts = [ "text/confessions.txt", "text/discourses-and-social-contract.txt", "text/emile.txt" ] entire_string = "" for text_url in texts: f = open(text_url, 'r') entire_string += f.read() entire_string += "\n" f.close() test = open("test.txt", 'w') test.write(entire_string) test.close() mc.generateDatabase(entire_string, '\n') print(mc.generateString()) for i in range(10000): f = open("output/{0}.txt".format(i), 'w') f.write(mc.generateString().strip()) f.close()
from pymarkovchain import MarkovChain API_URI = "http://lyrics.wikia.com/api.php?action=lyrics&fmt=realjson" if __name__ == '__main__': if len(sys.argv) != 3: raise "Usage: python3 py-simple-lyric-generator \"[artist_name]\" [number_of_phrases_to_generate]" artist_name = sys.argv[1] number_of_phrases = sys.argv[2] params = {'artist': artist_name} # Generating a Markov Chain Model db_name_hashed = "db/" + hashlib.md5( artist_name.lower().encode('utf-8')).hexdigest() mc = MarkovChain(db_name_hashed) # Checking if the database already exists, if so uses the cache instead another API call if not os.path.isfile(db_name_hashed): print( "No data cached. Please be patient while we search the lyrics of %s." % artist_name) # Adding lyrics to a single gigant string lyrics = '' # Parsing each lyric from this artist. # [http://api.wikia.com/wiki/LyricWiki_API] artist = requests.get(API_URI, params=params).json() for album in artist['albums']: for song in album['songs']:
objlist = [img, rect, name, label0, label1, label2] return objlist def moveAll(shapeList, dx, dy): for shape in shapeList: shape.move(dx, dy) def moveAllOnLine(shapeList, dx, dy, repetitions, delay): for i in range(repetitions): moveAll(shapeList, dx, dy) time.sleep(delay) writeFile = makeWriteFile() mcTrump = MarkovChain("db/trumpdb") mcCruz = MarkovChain("db/cruzdb") mcRubio = MarkovChain("db/rubiodb") mcKasich = MarkovChain("db/kasichdb") mcCarson = MarkovChain("db/carsondb") mcJeb = MarkovChain("db/jebdb") mcChristie = MarkovChain("db/christiedb") #List of candidates to be included in Debate candidateList = ["Trump", "Rubio", "Christie", "Cruz", "Carson", "Jeb", "Kasich"] candidateInfo = {} z = 0 for name in candidateList: if name == "Trump": info = [mcTrump, ["Donald", "Donald's", "Trump", "Trump's", "Don"], "Mr. Trump", "TRUMP: "]
if len(markovString) > 50 and len(markovString) < 140: return markovString # twitterPoster posts the string to Twitter # See http://www.dototot.com/how-to-write-a-twitter-bot-with-python-and-tweepy/ for details def twitter_poster(string): #enter the corresponding information from your Twitter application: CONSUMER_KEY = 'consumerkey'#keep the quotes, replace this with your consumer key CONSUMER_SECRET = 'consumersecret'#keep the quotes, replace this with your consumer secret key ACCESS_KEY = 'accesskey'#keep the quotes, replace this with your access token ACCESS_SECRET = 'accesssecret'#keep the quotes, replace this with your access token secret auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET) auth.set_access_token(ACCESS_KEY, ACCESS_SECRET) api = tweepy.API(auth) api.update_status(string) # The main script will # 1. connect to a Markov database # 2. generate a word pool and a database if it doesn't exist already # 3. generate a string from the markovChain # 4. post the string to Twitter if __name__ == '__main__': databaseName = 'database.p' markovChain = MarkovChain(databaseName) if not os.path.isfile(databaseName): wordpool = wordpool_generator(); database_generator(markovChain, wordpool) else: print('Database already exists, skipping database creation...') string = string_generator(markovChain) twitter_poster(string)