def main():
    import config
    from pymarkovchain import MarkovChain

    optp = ArgumentParser()

    optp.add_argument("-d", "--database", dest="database", help="Where to store the database")
    optp.add_argument("-f", "--file", dest="file", help="File source to use for generating the database")
    opts = optp.parse_args()

    # Setup logging.
    logformat = "%(levelname)-8s %(name)s %(message)s"
    logging.basicConfig(level=logging.INFO, format=logformat)

    if opts.database is None:
        try:
            opts.database = config.markovbrainfile
        except NameError:
            log.critical("I require a brainfile to write into!")
            exit(1)
    if opts.file is None:
        log.critical("I require an imput file to learn from!")
        exit(1)

    mc = MarkovChain(opts.database)

    mc.generateDatabase(opts.file)
示例#2
0
    def init_model(self, texts):
        from pymarkovchain import MarkovChain
        model = MarkovChain()
        model.generateDatabase(
            '\n'.join(map(lambda text: text.payload, texts)), '\n')

        return model
示例#3
0
class EuroMarkov:
    def __init__(self):
        self.mc = MarkovChain("./markovdata")

    def generateCountryList(self):
        countryList = []
        for filename in os.listdir("json_lyrics/2015"):
            countryList.append(os.path.splitext(filename)[0])
        return countryList

    def loadFiles(self,startYear,endYear,countryList):
        model = ""
        for year in range(startYear,endYear+1):
            for country in countryList:
                fname = "json_lyrics/"+str(year)+"/"+country+".json"
                if os.path.isfile((fname)):
                    with open (fname,"r") as myfile:
                        data = json.load(myfile)
                        model += (data['lyrics']) + '\n';
        return model

    def runMarkov(self,model):
        self.mc.generateDatabase(model)

    def generateString(self):
        return self.mc.generateString()
class TextGenerator:
	def __init__(self, generatorName, trainString, prefixLength):
		self.generatorName = generatorName
		self.chain = MarkovChain()
		self.chain.generateDatabase(trainString, n=prefixLength)
		self.currState = []
		self.hyphenator = Hyphenator('en_US')
		self.syllableQ = Queue()
		self.stripPattern = re.compile('[\W_]+')
		while (len(self.currState) < prefixLength):
			self.currState = self.chain.generateString().split()[-(prefixLength+1):-1]
	
	def load_next_word(self):
		nextword = ""
		try:
			while nextword == "":
				nextword = self.stripPattern.sub('', self.chain._nextWord(self.currState))
				self.currState = self.currState[1:]
				self.currState.append(nextword)
			if len(nextword) < 4: # because hyphenator doesnt work for words less than 4 letters
				self.syllableQ.put(nextword)
			else: 
				for syllable in self.hyphenator.syllables(nextword):
					self.syllableQ.put(syllable)
		except UnicodeEncodeError:
			print("unicode error")
		
	def get_next_syllable(self):
		if (self.syllableQ.empty()):
			self.load_next_word()
		return self.syllableQ.get()
示例#5
0
def lyrics():
    artist = request.form['artist']
    lines = int(request.form['lines'])

    if not artist:
        return redirect(url_for('index'))

    # Get a response of sample lyrics from the provided artist
    uri = "http://api.lyricsnmusic.com/songs"
    params = {
        'api_key': API_KEY,
        'artist': artist,
    }
    response = requests.get(uri, params=params)
    lyric_list = response.json()
    # Parse results into a long string of lyrics
    lyrics = ''
    for lyric_dict in lyric_list:
        lyrics += lyric_dict['snippet'].replace('...', '') + ' '

    # Generate a Markov model
    mc = MarkovChain()
    mc.generateDatabase(lyrics)

    result = []
    for line in range(0, lines):
        result.append(mc.generateString())

    return render_template('lyrics.html', result=result, artist=artist)
示例#6
0
class BeerMarkov():

    def __init__(self, reviews_file, markov_dir):
        self._reviews_file = reviews_file
        self._markov_dir = markov_dir
        self._markov = MarkovChain(markov_dir + '/beer_desc')
        self._name_markov = MarkovChain(markov_dir + '/beer_name')
        self.refresh_database()

    def refresh_database(self):
        with open(self._reviews_file, 'r') as review_data:
            reviews = json.load(review_data)

        reviews_string = [r['desc'] for r in reviews]
        names_string = [r['name'] for r in reviews]

        new_markov = MarkovChain(self._markov_dir + '/beer_desc')
        new_markov.generateDatabase(' '.join(reviews_string))

        new_name_markov = MarkovChain(self._markov_dir + '/beer_name')
        new_name_markov.generateDatabase('.'.join(names_string))

        self._markov = new_markov
        self._name_markov = new_name_markov

    def get_review(self):
        return  self._markov.generateString() + '. ' + \
            self._markov.generateString()
示例#7
0
def main():
    auth = tweepy.OAuthHandler(key, secret)
    auth.set_access_token(token, token_secret)
    client = tweepy.API(auth)
    api = tweepy.API(auth)

    tweets = []
    superString = ""

    mc = MarkovChain(markDirectory)
    superString = createSuperString('trump.txt')
    mc.generateDatabase(superString)

    while (True):
        phrase = mc.generateString()
        try:
            print(phrase)
        except UnicodeEncodeError:
            continue
        try:
            answer = input()
            if (answer == 'y'):
                client.update_status(phrase)
        except tweepy.TweepError:
            continue
示例#8
0
def fetch_lyrics(artist, lines):
    API_KEY = os.environ.get('API_KEY')

    uri = "http://api.lyricsnmusic.com/songs"
    params = {
        'api_key': API_KEY,
        'artist': artist,
    }
    response = requests.get(uri, params=params)
    lyric_list = response.json()

    lyrics = ''
    for lyric_dict in lyric_list:
        lyrics += lyric_dict['snippet'].replace('...', '') + ' '

    # Generate a Markov model
    mc = MarkovChain('./markov')
    mc.generateDatabase(lyrics)

    # Add lines of lyrics
    result = []
    for line in range(0, lines):
        line_string = mc.generateString()
        result.append(line_string)
    return result
示例#9
0
文件: reqas.py 项目: MPRZLabs/icarus
class Michiov(object):
  def __init__(self, autogen=True, markovdb=os.path.expanduser("~/markov"), twcreds=os.path.expanduser("~/.michiov_twitter_credentials"),twappcreds=os.path.expanduser("~/.michiov_twitter_appdata")):
    self.mc = MarkovChain(markovdb)
    self.reload()
    if not os.path.exists(twappcreds):
      print("Lack of app creds")
      sys.exit(1)
    twcons = json.loads(open(twappcreds).read())
    conskey = twcons['key']
    conssec = twcons['secret']
    while not os.path.exists(twcreds):
      twitter.oauth_dance("MPRZ Tech Labs", conskey, conssec, twcreds)
    oauth_token, oauth_secret = twitter.read_token_file(twcreds)
    self.t = twitter.Twitter(auth=twitter.OAuth(oauth_token, oauth_secret, conskey, conssec))
  def should(self):
    ret = input("Should I send it? (y/N) ")
    return ("y" in ret or "Y" in ret)
  def qas(self):
    idea = self.mc.generateString()
    print("Generated: %s" % idea)
    if self.should():
      self.t.statuses.update(status=idea)
  def loop(self):
    try:
      while True:
        self.qas()
        #self.reload()
    except KeyboardInterrupt:
      pass
  def reload(self):
    with open("markovpredb.txt") as file:
      self.mc.generateDatabase(file.read())
示例#10
0
def mkdb():
  mc = MarkovChain('./markov')
  with open('yaks.txt') as input:
    mc.generateDatabase(input.read())
  with open('yaks.txt') as input:
    yaks = [l.strip() for l in input]
  return mc, yaks
示例#11
0
def lyrics():
    artist = request.form['artist']
    lines = int(request.form['lines'])

    if not artist:
        return redirect(url_for('index'))

    # Get a response of sample lyrics from the artist
    uri = "http://api.lyricsnmusic.com/songs"
    params = {
        'api_key': API_KEY,
        'artist': artist,
    }
    response = requests.get(uri, params=params)
    lyric_list = response.json()

    # Parse results into a long string of lyrics
    lyrics = ''
    for lyric_dict in lyric_list:
        lyrics += lyric_dict['snippet'].replace('...', '') + ' '

    # Generate a Markov model
    mc = MarkovChain()
    mc.generateDatabase(lyrics)

    # Add lines of lyrics
    result = []
    for line in range(0, lines):
        result.append(mc.generateString())

    return render_template('lyrics.html', result=result, artist=artist)
示例#12
0
def markov(msg, botName, channel, db):
  if msg.rawMatchRe('!markov (?P<source>#?[a-zA-Z]\S*)\s*$') or msg.rawMatchRe('what (would|does) (the )?(?P<source>#?[a-zA-Z]\S+) say\??'):
    m = msg.getRegExpResult()
    source = m.group('source')

    if source[0] == '#':
      logsList = db.getLogs(chan=source, lines=2000)
    else:
      logsList = db.getLogs(nick=source, lines=2000)
    
    if len(logsList) < 100:
      hexchat.command("msg %s Not enough data for %s" % (channel, source))
      
    else:
      mc = MarkovChain("./markov_db")
      ircText = ''
      
      for line in logsList:
        # disqualify lines that are too short or are certain bot functions that start with '!'
        if len(line.split(' ')) >= 5 and line[0] != '!':
          ircText += line.replace('.','') + '. '
          
      mc.generateDatabase(ircText)
      markovOutput = mc.generateString().capitalize()
      hexchat.command('msg %s "%s"  --%s' % (channel, markovOutput, source))
      
    return True
  return False
 def __init__(self):
     self.eightball = EightBall()
     self.excuses = Excuses()
     self.commands = Commands()
     self.straws = Straws("/", "=", "/")
     self.chain = MarkovChain("./markovdb")
     self.chain.db = _db_factory()
     with open("markovsource", "r") as markov_file:
         self.chain.generateDatabase(markov_file.readline())
示例#14
0
文件: saulbot.py 项目: twopir/saulbot
def markov():
    """A simple markov function"""
    mc = MarkovChain("./tempchain")

    with open(CORPUS, 'r') as f:
        data = f.read()

    mc.generateDatabase(data)

    return mc.generateString()
示例#15
0
 async def markov(self, ctx):
   """Get a response from inputed text using a markov chain generated from the channels text"""
   results = ''
   async for message in self.bot.logs_from(ctx.message.channel, limit=10):
     line = message.content
     results += line+"\n"
   f = StringIO(results.encode('utf-8'))
   mc = MarkovChain(f.getvalue())
   mc.generateDatabase(f)
   msg = mc.generateString()
   await self.bot.say(msg)
示例#16
0
文件: commitz.py 项目: ksikka/commitz
def markov(messages):
    # Create an instance of the markov chain. By default, it uses MarkovChain.py's location to
    # store and load its database files to. You probably want to give it another location, like so:
    mc = MarkovChain("./markov")

    # To generate the markov chain's language model, in case it's not present
    # mc.generateDatabase("\n".join(messages))

    # To let the markov chain generate some text, execute
    for i in xrange(100):
        print mc.generateString()
示例#17
0
 def __init__(self, **kwargs):
     self.chain = MarkovChain("%s/static/markov" % dirname(__file__))
     self.proceed = True
     for k, v in kwargs.items():
         if not k in defaults:
             raise ValueError
         setattr(self, k, kwargs.get(v, defaults[k]))
     with open(self.filename) as f:
         self.data = load(f)
     if not getattr(self, 'seed'):
         self.seed = False
示例#18
0
    def prepare_dict(self):
        if self.dictfile is None:
            print "error: no dictfile"
            return
        # now build the markov database. just using pymarkovchain's default settings for now. will fail if it doesn't
        # have write access to $PWD.
        chain = MarkovChain("./markov")

        source = self.build_source()
        chain.generateDatabase(source)

        # seem to need to do this to reload the database after generating it
        self.chain = MarkovChain("./markov")
def main(args):
	markov_filename = "./" + args.subreddit + ".mcd"
	new_chain = os.path.isfile(markov_filename) == False # this must come before the creation of the Markov Chain
	mc = MarkovChain(markov_filename)

	if args.new or new_chain:
		titles = getTitles(getSubmissions(100, args.subreddit))
		training_data = str.join('.', titles)
		mc.generateDatabase(training_data)

	N = args.num_submissions
	while N > 0:
		print(mc.generateString())
		N -= 1
示例#20
0
def joke_with_seed(seed):
    """
    Same as joke(), but takes a seed to feed into the Markov Chain.
    """
    if not os.path.isfile('markovdb'):
        generate_database()

    chain = MarkovChain()
    generated_joke = ''

    while len(generated_joke) < MIN_LENGTH:
        generated_joke = chain.generateStringWithSeed(seed)

    return generated_joke
示例#21
0
def joke():
    """
    Produces a joke based on the existing database (creates database if 
    one doesn't already exist).
    """
    if not os.path.isfile('markovdb'):
        generate_database()

    chain = MarkovChain()
    generated_joke = ''

    while len(generated_joke) < MIN_LENGTH:
        generated_joke = chain.generateString()

    return generated_joke
示例#22
0
class markovbuild(object):
    '''Builds a markov chain DB and outputs data'''
    def __init__(self, target, data, lines=5):
        self.database = '/tmp/markov_%s.db' % target
        self.lines = lines
        self.data = '\n'.join(data)
        self.mchain = MarkovChain(self.database)

    def build(self):
        '''Builds a markov chain'''
        self.mchain.generateDatabase(self.data)

    def output(self):
        '''Outputs markov chain data'''
        self.build()
        return [ self.mchain.generateString() for x in xrange(0, self.lines) ]
示例#23
0
文件: app.py 项目: mathur/PoemRemixer
def poem():
    story = str(request.form['story'].encode('ascii', 'ignore'))
    lines = int(request.form['lines'])

    if not story:
        return redirect(url_for('index'))

    mc = MarkovChain()
    mc.generateDatabase(story)

    result = []
    for line in range(0, lines):
        new_line = mc.generateString()
        if new_line not in result:
            result.append(new_line)

    return render_template('poem.html', result=result, story=story)
示例#24
0
def refresh_board(board):
    mc_path = './data/{}-data'.format(board)

    mc = MarkovChain(mc_path)

    images = analyze_board(mc, board)

    return mc, images
示例#25
0
    def analyze(self):
        # GenerateModel
        """ Generate a Markov chain based on retrieved strings. """

        mc = MarkovChain()
        mc.generateDatabase(self.text)
        result = r''

        print "Generating:"

        for i in range(0, 10):
            print "Sentence %d" % i
            # Create 10 sentences
            sentence = mc.generateString()
            result += sentence.capitalize() + '. '

        return result
示例#26
0
    def __init__(self, reactor, channel='', nickname=''):
        self.channel = channel
        self.nickname = nickname
        self.markov = MarkovChain("./tempchain")
        self.reactor = reactor

        with open('corpus.txt', 'r') as f:
            self.markov.generateDatabase(f.read())
示例#27
0
class Haley(object):
    def __init__(self, backend):
        self.backend = backend
        self.mc = MarkovChain("markov.db")
    def loop(self):
        self.backend.connect()
        while True:
            for event in self.backend.update():
                try:
                    if event["type"] == "text":
                        times = re.search(r"(?P<nm>\d+) times", event["content"].lower())
                        if times:
                            if int(times.group("nm")) > 0:
                                times = min(5,int(times.group("nm")))
                            else:
                                self.backend.say("Okay, I won't say anything... Baka.")
                                continue
                        else:
                            times = 1
                        for i in range(times):
                            if "hi" in detox(event["content"].lower()).split() or "hello" in detox(event["content"].lower()).split():
                                self.backend.say(random.choice(["%s! Tutturuuu!","Hello, %s, so it was you making the noise up there!"]) % event["by"])
                                continue
                            if "nano" in event["content"].lower() or "hakase" in event["content"].lower():
                                self.backend.say("%s%s"%("HAKASE"*len(re.findall("nano", event["content"].lower())),"NANO"*len(re.findall("hakase", event["content"].lower()))))
                                continue
                            if event["mentioned"]:
                                if "roll" in detox(event["content"].lower()).split():
                                    numb = re.search(r"(d|k)(?P<nm>\d+)", event["content"].lower())
                                    if numb and int(numb.group("nm")) > 0:
                                        self.backend.say("Aaaand... %d!" % (random.randrange(1,int(numb.group("nm"))+1)))
                                        continue
                                    else:
                                        self.backend.say("Who do you think you are, rolling impossible dice... Baka.")
                                        continue
                                if "say" in detox(event["content"].lower()).split():
                                    if "something" in detox(event["content"].lower()).split():
                                        tosay = self.mc.generateString()
                                    elif "name" in detox(event["content"].lower()).split():
                                        tosay = self.backend.get_name(event["by"])
                                    self.backend.say(tosay)
                                    continue
                                if "xkcd" in detox(event["content"].lower()).split():
                                    if "random" in detox(event["content"].lower()).split():
                                        x = xkcd.getRandomComic()
                                    else:
                                        numb = re.search(r"(?P<nm>\d+)", event["content"])
                                        if numb:
                                            x = xkcd.Comic(int(numb.group("nm")))
                                        else:
                                            x = xkcd.getLatestComic()
                                    self.backend.say("*%s* - %s - _%s_" % (x.getTitle(), x.getImageLink(), x.getAltText()))
                                    continue
                                self.backend.say("Hmm?")
                                continue
                except:
                    self.backend.say(str(sys.exc_info()[0]))
示例#28
0
class SaulBotFactory(protocol.ClientFactory):
    protocol = SaulBot

    def __init__(self, reactor, channel='', nickname=''):
        self.channel = channel
        self.nickname = nickname
        self.markov = MarkovChain("./tempchain")
        self.reactor = reactor

        with open('corpus.txt', 'r') as f:
            self.markov.generateDatabase(f.read())

    def clientConnectionLost(self, connector, reason):
        print "Lost connection (%s), reconnecting." % (reason,)
        connector.connect()

    def clientConnectionFailed(self, connector, reason):
        print "Could not connect: %s" % (reason,)
示例#29
0
 def __init__(self, bus):
     self.bus = bus
     self.eightball = EightBall()
     self.excuses = Excuses()
     self.commands = Commands()
     self.chain = MarkovChain("./markovdb")
     self.chain.db = _db_factory()
     with open("markovsource", "r") as markov_file:
         self.chain.generateDatabase(markov_file.readline())
示例#30
0
class MarkovBot(BotPlugin):

    def __init__(self):
        self.markov = MarkovChain()

    @botcmd
    def talk(self, mess, args):
        """ Generate a sentence based on database """
        return self.markov.generateString()

    @botcmd
    def complete(self, mess, args):
        """ Try to complete a sentence """
        return self.markov.generateStringWithSeed(args)

    @botcmd
    def gendbfromfile(self, mess, args):
        """ Generate markov chain word database """
        try:
            with open(args) as txtFile:
                txt = txtFile.read()
        except IOError as e:
            return 'Error: could not open text file'
        # At this point, we've got the file contents
        if self.markov.generateDatabase(txt):
            return 'Done.'
        else:
            return 'Error: Could not generate database'

    @botcmd
    def gendbfromstring(self, mess, args):
        if self.markov.generateDatabase(args):
            return 'Done.'
        else:
            return 'Error: Could not generate database from String'

    @botcmd
    def gendbfromurl(self, mess, args):
        req = requests.get(args)
        if req.ok and self.markov.generateDatabase(req.content):
            return 'Done.'
        else:
            return 'Error: Could not generate database from URL'
示例#31
0
    async def snakeme(self, ctx: Context):
        """
        How would I talk if I were a snake?
        :param ctx: context
        :return: you, snakified based on your Discord message history
        """
        mentions = list(
            filter(lambda m: m.id != self.bot.user.id, ctx.message.mentions))
        author = ctx.message.author if (len(mentions)
                                        == 0) else ctx.message.mentions[0]
        channel: discord.TextChannel = ctx.channel

        channels = [
            channel for channel in ctx.message.guild.channels
            if isinstance(channel, discord.TextChannel)
        ]
        channels_messages = [
            await channel.history(limit=10000).flatten()
            for channel in channels
        ]
        msgs = [
            msg for channel_messages in channels_messages
            for msg in channel_messages
        ][:MSG_MAX]

        my_msgs = list(filter(lambda msg: msg.author.id == author.id, msgs))
        my_msgs_content = "\n".join(list(map(lambda x: x.content, my_msgs)))

        mc = MarkovChain()
        mc.generateDatabase(my_msgs_content)
        sentence = mc.generateString()

        snakeme = discord.Embed()
        snakeme.set_author(
            name="{0}#{1}".format(author.name, author.discriminator),
            icon_url="https://cdn.discordapp.com/avatars/{0}/{1}".format(
                author.id, author.avatar) if author.avatar is not None else
            "https://img00.deviantart.net/eee3/i/2017/168/3/4/"
            "discord__app__avatar_rev1_by_nodeviantarthere-dbd2tp9.png")
        snakeme.description = "*{0}*".format(
            snakify(sentence)
            if sentence is not None else ":question: Not enough messages")
        await channel.send(embed=snakeme)
示例#32
0
	def __init__(self, generatorName, trainString, prefixLength):
		self.generatorName = generatorName
		self.chain = MarkovChain()
		self.chain.generateDatabase(trainString, n=prefixLength)
		self.currState = []
		self.hyphenator = Hyphenator('en_US')
		self.syllableQ = Queue()
		self.stripPattern = re.compile('[\W_]+')
		while (len(self.currState) < prefixLength):
			self.currState = self.chain.generateString().split()[-(prefixLength+1):-1]
示例#33
0
class MarkovBot(BotPlugin):
    def __init__(self):
        self.markov = MarkovChain()

    @botcmd
    def talk(self, mess, args):
        """ Generate a sentence based on database """
        return self.markov.generateString()

    @botcmd
    def complete(self, mess, args):
        """ Try to complete a sentence """
        return self.markov.generateStringWithSeed(args)

    @botcmd
    def gendbfromfile(self, mess, args):
        """ Generate markov chain word database """
        try:
            with open(args) as txtFile:
                txt = txtFile.read()
        except IOError as e:
            return 'Error: could not open text file'
        # At this point, we've got the file contents
        if self.markov.generateDatabase(txt):
            return 'Done.'
        else:
            return 'Error: Could not generate database'

    @botcmd
    def gendbfromstring(self, mess, args):
        if self.markov.generateDatabase(args):
            return 'Done.'
        else:
            return 'Error: Could not generate database from String'

    @botcmd
    def gendbfromurl(self, mess, args):
        req = requests.get(args)
        if req.ok and self.markov.generateDatabase(req.content):
            return 'Done.'
        else:
            return 'Error: Could not generate database from URL'
示例#34
0
 def __init__(self, **kwargs):
     self.chain = MarkovChain("%s/static/markov" % dirname(__file__))
     self.proceed = True
     for k, v in kwargs.items():
         if not k in defaults:
             raise ValueError
         setattr(self, k, kwargs.get(v, defaults[k]))
     with open(self.filename) as f:
         self.data = load(f)
     if not getattr(self, 'seed'):
         self.seed = False
示例#35
0
def main():
    with open("test.txt", "r") as myfile:
        data = myfile.read().replace('\n', '')
    mc = MarkovChain("./markovdb")

    # Start a session so we can have persistant cookies
    session = requests.Session()

    # This is the form data that the page sends when logging in
    login_data = {
        'user_email': EMAIL,
        'user_password': PASSWORD,
        'login': '******',
    }

    # Authenticate
    r = session.post(URL, data=login_data)

    mc.generateDatabase(data)

    for x in range(0, 5):
        r = os.urandom(16).encode('hex')
        title = "Report#" + str(x) + " " + str(r)
        description = mc.generateString()

        #europe only because americans are fags
        y, x = uniform(-17, 43), uniform(28, 55)

        print (title)

        # Create new report based on random content
        report_data = {
            'title': title,
            'category': "2",
            'description': description,
            'latitude': x,
            'longitude': y,
            'newreport': "1",
        }

        r = session.post(newRep, data=report_data)
示例#36
0
文件: reqas.py 项目: MPRZLabs/icarus
 def __init__(self, autogen=True, markovdb=os.path.expanduser("~/markov"), twcreds=os.path.expanduser("~/.michiov_twitter_credentials"),twappcreds=os.path.expanduser("~/.michiov_twitter_appdata")):
   self.mc = MarkovChain(markovdb)
   self.reload()
   if not os.path.exists(twappcreds):
     print("Lack of app creds")
     sys.exit(1)
   twcons = json.loads(open(twappcreds).read())
   conskey = twcons['key']
   conssec = twcons['secret']
   while not os.path.exists(twcreds):
     twitter.oauth_dance("MPRZ Tech Labs", conskey, conssec, twcreds)
   oauth_token, oauth_secret = twitter.read_token_file(twcreds)
   self.t = twitter.Twitter(auth=twitter.OAuth(oauth_token, oauth_secret, conskey, conssec))
示例#37
0
def load_or_train_board(board):
    mc_path = './data/{}-data'.format(board)
    images_path = './data/{}-images'.format(board)

    mc = MarkovChain(mc_path)

    if not os.path.isfile(mc_path) or not os.path.isfile(images_path):
        images = analyze_board(mc, board)
    else:
        with open(images_path, 'rb') as images_file:
            images = pickle.load(images_file)

    return mc, images
示例#38
0
    def __init__(self):
        self.presenter = ""
        self.title = ""

        self.slide_count = 0
        self.slide_min = 15
        self.slide_max = 25

        self.console = None
        self.output_dir = ""

        with open("terms.json", "r") as f:
            self.terms = json.load(f)

        with open(os.path.join("GIFs", "hashes.json"), "r") as f:
            self.gifs = json.load(f)

        with open(os.path.join("Images", "hashes.json"), "r") as f:
            self.images = json.load(f)

        # Load up the proverb data
        with open(os.path.join("Proverbs", "facts"), "r") as f:
            self.proverb_lines = f.readlines()
        self.proverbs = map(string.strip, self.proverb_lines)
        self.proverb_markov = MarkovChain("markov.db")
        self.proverb_markov.generateDatabase("".join(self.proverb_lines), n=1)

        # Make the text data
        # self.my_face = comptroller.face(self.title)
        # self.slide_titles = self.my_face.get_titles(50)
        # self.slide_bullets = self.my_face.get_bullets(100)

        self.my_face = Face("")

        self.slide_titles = ["shit", "balls", "butts"]
        self.slide_bullets = ["butts", "do", "stuff", "f***s", "more f***s"]

        self.ppt = Presentation()
        self.slide_weights = SlideWeights()
示例#39
0
    def __init__(self):
        self.presenter = ""
        self.title = ""

        self.slide_count = 0
        self.slide_min = 15
        self.slide_max = 25

        self.console = None
        self.output_dir = ""

        with open("terms.json", "r") as f:
            self.terms = json.load(f)

        gifpath = os.path.join('gifs', 'hashes.json')
        if os.path.exists(gifpath):
            log.info('Loading previously farmed gifs')
            with open(gifpath, "r") as f:
                self.gifs = json.load(f)
        else:
            log.info('No gifs found')
            self.gifs = dict()

        imgpath = os.path.join('images', 'hashes.json')
        if os.path.exists(imgpath):
            log.info('loading previous saved images')
            with open(imgpath, "r") as f:
                self.images = json.load(f)
        else:
            log.info('no farmed images found')
            self.images = dict()

        # Load up the proverb data
        with open(os.path.join("proverbs", "facts"), "r") as f:
            self.proverb_lines = f.readlines()
        self.proverbs = [x.strip() for x in self.proverb_lines]
        self.proverb_markov = MarkovChain("markov.db")
        self.proverb_markov.generateDatabase("".join(self.proverb_lines), n=1)

        self.my_face = Face("")

        self.slide_titles = ["shit", "balls", "butts"]
        self.slide_bullets = ["butts", "do", "stuff", "f***s", "more f***s"]

        self.ppt = Presentation()
        self.slide_weights = SlideWeights()
示例#40
0
    def __init__(self):
        self.presenter = ""
        self.title = ""

        self.GIPHY_API_KEY = 'FILL API KEY HERE'

        self.slide_count = 0
        self.slide_min = 15
        self.slide_max = 25

        self.console = None
        self.output_dir = ""

        with open("terms.json", "r") as f:
            self.terms = json.load(f)

        try :
            with open(os.path.join("GIFs", "hashes.json"), "r") as f:
                self.gifs = json.load(f)
        except:
            self.gifs = {}
            with open(os.path.join("GIFs", "hashes.json"), "w") as f:
                json.dump(self.gifs, f, indent=2)

        with open(os.path.join("Images", "hashes.json"), "r") as f:
            self.images = json.load(f)

        # Load up the proverb data
        with open(os.path.join("Proverbs", "facts"), "r") as f:
            self.proverb_lines = f.readlines()
        self.proverbs = map(string.strip, self.proverb_lines)
        self.proverb_markov = MarkovChain("markov.db")
        self.proverb_markov.generateDatabase("".join(self.proverb_lines), n=1)

        # Make the text data
        # self.my_face = comptroller.face(self.title)
        # self.slide_titles = self.my_face.get_titles(50)
        # self.slide_bullets = self.my_face.get_bullets(100)

        self.my_face = Face("")

        self.slide_titles = ["shit", "balls", "butts"]
        self.slide_bullets = ["butts", "do", "stuff", "f***s", "more f***s"]

        self.ppt = Presentation()
        self.slide_weights = SlideWeights()
示例#41
0
def generate_database():
    """
    Generates the database that the Markov Chain will use to make its word-by-
    word predictions. It will attempt to create this file in the same directory
    as where the script is currently located.
    """
    currpath = os.path.dirname(__file__)
    path_to_data = os.path.join(currpath, 'in.txt')

    chain = MarkovChain()

    with open(path_to_data) as f:
        chain.generateDatabase(f.read())
        chain.dumpdb()

    print(chain.generateString())
示例#42
0
def main():
    df = fill_song_pd()
    lyrics = ""  #going to be one huge string
    db_name = './markov/' + genre
    mc = MarkovChain(db_name)
    #creating new markov dataset if it doesn't exist
    if not os.path.isfile(db_name):
        print("creating new data set based on the " + str(genre) + " genre...")
        for index, row in df.iterrows():
            if row['genre'] == genre_dict[genre]:
                lyrics += row["lyrics"] + " "
            mc.generateDatabase(lyrics)
            mc.dumpdb()

    for i in range(int(lines) + 1):
        print(mc.generateString())
示例#43
0
    def refresh_database(self):
        with open(self._reviews_file, 'r') as review_data:
            reviews = json.load(review_data)

        reviews_string = [r['desc'] for r in reviews]
        names_string = [r['name'] for r in reviews]

        new_markov = MarkovChain(self._markov_dir + '/beer_desc')
        new_markov.generateDatabase(' '.join(reviews_string))

        new_name_markov = MarkovChain(self._markov_dir + '/beer_name')
        new_name_markov.generateDatabase('.'.join(names_string))

        self._markov = new_markov
        self._name_markov = new_name_markov
示例#44
0
# generates text using Markov Chain
# uses the PyMarkovChain implimetntation: https://pypi.python.org/pypi/PyMarkovChain/
# to install on linux, run "pip install PyMarkovChain" in the shell

# import function
from pymarkovchain import MarkovChain
import re, string

# first, read in file with training text data (I made mine by getting all my blog text as a .xml, grabbing only the nodes with the actual blog text in them and then scrubbing all html tags)
f = open("extractedText.txt", "r")

# tidy up our text input a bit
textToScrub = f.read()
text = re.sub(r'^https?:\/\/.*[\r\n]*', '', textToScrub,
              flags=re.MULTILINE)  #get rid of urls/links
text = text.replace(u'\xa0', u' ')  #throw out those pesky non-breaking spaces

# then create the markov chain generator
mc = MarkovChain("./markov")
mc.generateDatabase(text)

# finally, generate some text -- run this command multiple times to generate multiple text strings
for num in range(1, 10):
    mc.generateString()
示例#45
0
# https://github.com/TehMillhouse/PyMarkovChain
# pip install PyMarkovChain
from pymarkovchain import MarkovChain

mc = MarkovChain("./am_m")
f = open('cap_short.txt', 'r')
mc.generateDatabase(f.read())
for x in range(0, 20):
    mc.generateString()
示例#46
0
import importlib
import os
import time
import os.path
import re
from datetime import datetime
from pymarkovchain import MarkovChain
from glob import glob
from random import randint

bot = discord.Client()
bot.login('USERNAME', 'PASSWORD')

# comment out the next 5 lines unless you have a markov database
importlib.import_module("plugins")
mc = MarkovChain()
with open(r'C:\\Python35\\discordbot\\logpruned.txt', 'r',
          encoding="utf8") as log:
    thelog = log.read()
mc.generateDatabase(thelog)

#for plugin in glob("C:/Python35/discordbot/plugins/[!_]*.py"):
#	module = 'plugins.' + plugin[31:-3]
#	print(module)
#	print(plugin)
#	try:
#		importlib.import_module(module)
#	except Exception as e:
#		print('Failed to import {0}: {1}'.format(plugin, e))

示例#47
0
#!/usr/bin python

from music21 import *
from pymarkovchain import MarkovChain
from random import randrange
import copy
import os
import inspect
mc = MarkovChain("./markov")

files = [
    '/Users/telenardo/Downloads/midi0.mid',
    '/Users/telenardo/Downloads/midi1.mid',
    '/Users/telenardo/Downloads/midi2.mid',
    '/Users/telenardo/Downloads/midi3.mid',
    '/Users/telenardo/Downloads/midi4.mid'
]
db = ''
assoc = {}

path = 'MidiMelodies'

#for s in files:

for filename in os.listdir(path):
    s = converter.parse(path + '/' + filename)
    part = s.parts[0]

    for cur_note in part.notes:

        name = cur_note.fullName
示例#48
0
        description="Generate Tumblr posts from a Markov chain database.")
PARSER.add_argument("filename", metavar="CORPUS", type=str,
        help="The corpus to use in generating text.")
PARSER.add_argument("number", metavar="NUMBER", type=int,
        help="The number of strings to generate.")
PARSER.add_argument('--minlen', metavar="LENGTH", type=int,
        help="Throw out strings shorter than this.", default=3)
PARSER.add_argument('--notags', action="store_true",
        help="Don't generate tags (legacy database compat behaviour)")

ARGS = PARSER.parse_args()

FILENAME = ARGS.filename
NUMBER = ARGS.number

BOT = MarkovChain(FILENAME)

VALID_SENTENCES = 0
while VALID_SENTENCES < NUMBER:
    SENTENCE = BOT.generateString()
    if len(SENTENCE.split()) < ARGS.minlen:
        continue
    VALID_SENTENCES += 1
    print(SENTENCE)

    if not ARGS.notags:
        try:
            TAGS=BOT.generateStringWithSeed("#")
            print(TAGS)    
            print(" --- ")
        except pymarkovchain.StringContinuationImpossibleError as e:
#helper functions
def ucfirst(sentence):
    return sentence[0].upper() + sentence[1:]

def file_get_contents(filename):
    with open(filename) as f:
        return f.read()

def validate_tweet(status):
    num_words = len(status.split(" ")) + 1
    if num_words < 3:
        return False;
    return True

#setup 
mc = MarkovChain("./markov")
api = twitter.Api(consumer_key=consumer_key,
                  consumer_secret=consumer_secret,
                  access_token_key=access_token,
                  access_token_secret=access_secret)

#pull train_text
train_text = ""
for meta in src_dirs:
    _dir = meta[0]
    num_files = meta[1]
    for root, dirs, files in os.walk(_dir, topdown=False):
        random.shuffle(files)
        for name in files:
            num_files -= 1
            if num_files > 0:
示例#50
0
#!/usr/bin/env python

from pymarkovchain import MarkovChain
# Create an instance of the markov chain. By default, it uses MarkovChain.py's location to
# store and load its database files to. You probably want to give it another location, like so:
mc = MarkovChain("C:/Users/Andrew/OneDrive/Documents/Northwestern/Courses/495-Data-Science/Final Project")
# To generate the markov chain's language model, in case it's not present
mc.generateDatabase("It is nice to meet you.  I would like to meet your friend.")
# To let the markov chain generate some text, execute
for i in range(10):
        print(mc.generateString())
示例#51
0
    inputText = filehandler.loadTextFile(sample)    

    if args.upper_case:
        inputText = inputText.upper()

    if args.lower_case:
        inputText = inputText.lower()

    if args.title_case:
        inputText = inputText.title()
    
    # generate a markov chain based text from the input
    if args.generate and args.generate > 0:
        # disable error message about on-the-fly database
        logging.disable(logging.WARNING)
        mc = MarkovChain("./markov-chain-database")
        mc.generateDatabase(inputText)

        # reinstate logging
        logging.disable(logging.NOTSET)

        generatedText = ""
        while len(generatedText) < args.generate:
            if generatedText is not "":
                generatedText = generatedText + " "
            generatedText = generatedText + mc.generateString()
        inputText = generatedText

    if args.filter_punctuation:
        inputText = text.removePunctuation(inputText)
示例#52
0
class BuzzFeeder(object):
    def __init__(self, **kwargs):
        self.chain = MarkovChain("%s/static/markov" % dirname(__file__))
        self.proceed = True
        for k, v in kwargs.items():
            if not k in defaults:
                raise ValueError
            setattr(self, k, kwargs.get(v, defaults[k]))
        with open(self.filename) as f:
            self.data = load(f)
        if not getattr(self, 'seed'):
            self.seed = False

    @property
    def titles(self):
        return map(lambda x: d['title'], filter(lambda y: y, self.data))

    @property
    def text(self):
        return rc(r'[%s]' % escape(punctuation)) \
            .sub(" b", "\n".join(self.titles).lower())

    def generate_database(self):
        self.chain.generateDatabase(self.text)

    def ask(self, prompt, opts=[]):
        prompt = ">>>  " + prompt
        if opts:
            prompt += " [%s]" % "|".join(opts)
        response = raw_input(prompt).lower()
        if 'x' in response:
            self.proceed = False
            return self.proceed
        if opts and response not in opts:
            raise ValueError
        return response

    def prompt(self, candidate):
        print ">>> '%s'" % candidate
        if not self.proceed:
            return False
        q = self.ask("Tweet this text?", opts=['y', 'n'])
        if not q:
            return False
        if 'y' in q:
            return candidate
        if 'n' in q:
            if 'y' in self.ask("Edit this text?", opts=['y', 'n']):
                return self.ask("Enter edited text: ")
            else:
                return True

    def generate(self):
        if not self.seed:
            yielder = self.chain.generateString
        else:
            yielder = self.chain.generateStringWithSeed
        yargs = [] if not self.seed else [self.seed]
        while self.proceed:
            yield yielder(*yargs) \
                .split(".py")[-1] \
                .strip() \
                .title()

    def run(self):
        print "[ press X to stop at any time ]"
        with open(self.output, "a") as tweets:
            for candidate in self.generate():
                response = self.prompt(candidate)
                if not response:
                    break
                if not isinstance(response, bool):
                    tweets.write(response.encode('ascii', 'ignore'))
                    tweets.write('\n')
                print ''
示例#53
0
    lyrics = scraper.scrape(chorus, song_list)

    # Clean up lyrics
    lyrics = lyrics.replace('(', '').replace(')', '')
    lyrics = lyrics.replace('"', '')
    lyrics = lyrics.lower()
    #print(lyrics)

# Import lyrics
#else:
#    with open(db_path, 'r') as inputfile:
#        for line in inputfile:
#            lyrics += line + '\n'

# Create model instance
mc = MarkovChain('db/' + db_name)

# Generate db
mc.generateDatabase(lyrics)
mc.dumpdb()

#-------------------------------------------------------------------------------
# Write lyrics
f = open(folder + db_name + '.txt', 'w')

for i in range(0, int(number_of_phrases)):
    seed = str(input('seed: '))
    f.write(mc.generateStringWithSeed(seed) + '\n')
#f.write(mc.generateString() + '\n')

f.close
示例#54
0
        if "//" in t:
            continue
        if "cw: " in t:
            continue

        # Prune short tags
        if ARGS.prune and len(t) <= 3:
            continue

        # Tags which are just numbers should not be in the corpus
        try:
            int(t.strip())
            continue
        except ValueError:
            pass

        if ARGS.nohash:
            CORPUS += t + " "
        else:
            CORPUS += '#' + t + " "
    CORPUS += "\n"

if ARGS.debug:
    print(CORPUS)
    exit(1)
print("Generating database...")
BOT = MarkovChain(TARGET_FILE)
BOT.generateDatabase(CORPUS)
print("Dumping database to {}".format(TARGET_FILE))
BOT.dumpdb()
示例#55
0
def main(username):
    r = praw.Reddit(user_agent='trollolol v0.1')
    r.config.decode_html_entities = True

    m = MarkovChain('markov-data/%s.chain' % username)

    last_comment = None
    try:
        last_comment = Node.objects(
            username=username).order_by('-created').first()
        if last_comment:
            print("Checking for new messages.")
            comments = r.get_redditor(username).get_comments(
                limit=500, params={'after': last_comment.node_id})
        else:
            raise
    except:
        print("No messages fetched yet, doing inital import")
        comments = r.get_redditor(username).get_comments(limit=500)

    for comment in comments:
        try:
            node = Node.objects.get(node_id=comment.name)
        except:
            node = Node(node_id=comment.name,
                        parent_id=comment.parent_id,
                        body=comment.body,
                        created=comment.created,
                        username=username)
            node.save()

    first_comment = Node.objects(
        username=username).order_by('+created').first()
    if first_comment:
        print("Checking for messages before %s." % first_comment.node_id)
        comments = r.get_redditor(username).get_comments(
            limit=500, params={'before': first_comment.node_id})

        for comment in comments:
            try:
                node = Node.objects.get(node_id=comment.name)
            except:
                node = Node(node_id=comment.name,
                            parent_id=comment.parent_id,
                            body=comment.body,
                            created=comment.created,
                            username=username)
                node.save()

    comments = Node.objects(username=username).all()

    corpus = []
    for comment in comments:
        corpus.append(comment.body)

    shuffle(corpus)
    if len(corpus) > 0:
        print(
            "We have %i messages to work with. Building new markov corpus now."
            % len(corpus))
        m.generateDatabase(" ".join(corpus))

        print("Looking for acceptable output for first round of transforms.")
        output = []
        tries = 0
        while len(output) < 10:
            tries = tries + 1
            result = m.generateString()
            if tries < 100:
                if len(result.split(" ")) >= 10:
                    sys.stdout.write("x")
                    output.append(result)
                else:
                    sys.stdout.write(".")

        print("")

        response = ""
        for result in output:
            response = response + " " + result

        print response
    else:
        print("No comments found.")
示例#56
0
lyrics_directory = "data1/"
files = glob.glob(lyrics_directory + '*.txt')
# iterate over the list getting each file
all_lyrics = ""
for file in files:
    # open the file and then call .read() to get the text
    print(file)
    with open(file) as f:
        text = f.read()
        verse_lyrics = parse_file(text)
        verse_lyrics = re.sub("[\[\]\(\)\"]", " ", verse_lyrics)
        verse_lyrics = re.sub(" +", " ", verse_lyrics)
        all_lyrics += verse_lyrics

mc = MarkovChain("test")
mc.generateDatabase(all_lyrics)

output_directory = "generated_lyrics/"
if not os.path.exists(output_directory):
    os.makedirs(output_directory)

number_of_phrases = 8
num_files = 1000
for i in range(num_files):
    # Printing a string
    with open(output_directory + "{}.txt".format(i), "w") as f:
        for i in range(0, int(number_of_phrases)):

            while True:
                line = mc.generateString()
示例#57
0
from pymarkovchain import MarkovChain

mc = MarkovChain("./markov")

texts = [
    "text/confessions.txt",
    "text/discourses-and-social-contract.txt",
    "text/emile.txt"
]

entire_string = ""

for text_url in texts:
    f = open(text_url, 'r')
    entire_string += f.read()
    entire_string += "\n"
    f.close()

test = open("test.txt", 'w')
test.write(entire_string)
test.close()

mc.generateDatabase(entire_string, '\n')

print(mc.generateString())

for i in range(10000):
    f = open("output/{0}.txt".format(i), 'w')
    f.write(mc.generateString().strip())
    f.close()
示例#58
0
from pymarkovchain import MarkovChain

API_URI = "http://lyrics.wikia.com/api.php?action=lyrics&fmt=realjson"
if __name__ == '__main__':

    if len(sys.argv) != 3:
        raise "Usage: python3 py-simple-lyric-generator \"[artist_name]\" [number_of_phrases_to_generate]"

    artist_name = sys.argv[1]
    number_of_phrases = sys.argv[2]
    params = {'artist': artist_name}

    # Generating a Markov Chain Model
    db_name_hashed = "db/" + hashlib.md5(
        artist_name.lower().encode('utf-8')).hexdigest()
    mc = MarkovChain(db_name_hashed)

    # Checking if the database already exists, if so uses the cache instead another API call
    if not os.path.isfile(db_name_hashed):
        print(
            "No data cached. Please be patient while we search the lyrics of %s."
            % artist_name)

        # Adding lyrics to a single gigant string
        lyrics = ''

        # Parsing each lyric from this artist.
        # [http://api.wikia.com/wiki/LyricWiki_API]
        artist = requests.get(API_URI, params=params).json()
        for album in artist['albums']:
            for song in album['songs']:
示例#59
0
	objlist = [img, rect, name, label0, label1, label2]
	return objlist

def moveAll(shapeList, dx, dy):
    for shape in shapeList: 
        shape.move(dx, dy)

def moveAllOnLine(shapeList, dx, dy, repetitions, delay):
    for i in range(repetitions):
        moveAll(shapeList, dx, dy)
        time.sleep(delay)

writeFile = makeWriteFile()

mcTrump = MarkovChain("db/trumpdb")
mcCruz = MarkovChain("db/cruzdb")
mcRubio = MarkovChain("db/rubiodb")
mcKasich = MarkovChain("db/kasichdb")
mcCarson = MarkovChain("db/carsondb")
mcJeb = MarkovChain("db/jebdb")
mcChristie = MarkovChain("db/christiedb")

#List of candidates to be included in Debate
candidateList = ["Trump", "Rubio", "Christie", "Cruz", "Carson", "Jeb", "Kasich"]
candidateInfo = {}

z = 0
for name in candidateList:
	if name == "Trump":
		info = [mcTrump, ["Donald", "Donald's", "Trump", "Trump's", "Don"], "Mr. Trump", "TRUMP: "]
示例#60
0
		if len(markovString) > 50 and len(markovString) < 140:
			return markovString

# twitterPoster posts the string to Twitter
# See http://www.dototot.com/how-to-write-a-twitter-bot-with-python-and-tweepy/ for details
def twitter_poster(string):
	#enter the corresponding information from your Twitter application:
	CONSUMER_KEY = 'consumerkey'#keep the quotes, replace this with your consumer key
	CONSUMER_SECRET = 'consumersecret'#keep the quotes, replace this with your consumer secret key
	ACCESS_KEY = 'accesskey'#keep the quotes, replace this with your access token
	ACCESS_SECRET = 'accesssecret'#keep the quotes, replace this with your access token secret
	auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
	auth.set_access_token(ACCESS_KEY, ACCESS_SECRET)
	api = tweepy.API(auth)
	api.update_status(string)

# The main script will 
# 1. connect to a Markov database
# 2. generate a word pool and a database if it doesn't exist already
# 3. generate a string from the markovChain
# 4. post the string to Twitter
if __name__ == '__main__':
	databaseName = 'database.p'
	markovChain = MarkovChain(databaseName)
	if not os.path.isfile(databaseName):
		wordpool = wordpool_generator(); database_generator(markovChain, wordpool)
	else:
		print('Database already exists, skipping database creation...')
	string = string_generator(markovChain)
	twitter_poster(string)