Exemplo n.º 1
0
class Reddit:
	
	def __init__(self):
		self.agent = praw.Reddit(user_agent="cs_haiku_bot")
		self.haiku = Haiku()
		self.sc = SyllableCounter()

	def allComments(self):
		for comment in [x.body for x in self.agent.get_all_comments(limit=300)]:
			if self.isHaikuCandidate(comment):
				self.prepareHaiku(comment)

	def subredditComments(self,subreddit):
		sub = self.agent.get_subreddit(subreddit)

		for comment in [x.body for x in sub.get_comments(limit=300)]:
			if self.isHaikuCandidate(comment):
				self.prepareHaiku(comment)
	
	def isHaikuCandidate(self,comment):
		return self.sc.getLineCounts(comment) and sum(self.sc.getLineCounts(comment)) == 17
			
	def prepareHaiku(self,comment):
		lineCounts = [5,7,5]
		
		splits = [line.split(" ") for line in comment.splitlines()]	
		words = [str(word) for word in list(itertools.chain(*splits))]
			
		count = 0
		lineIndex = 0
		haiku = ""
		
		for index in xrange(len(words)):
			if count > lineCounts[lineIndex] or count < 0:
				return

			if lineIndex > 2:
				break
		
			if count == lineCounts[lineIndex]:
				haiku+="\n"
				haiku+=words[index] + " "
				count = self.sc.syllableCount(words[index])
				lineIndex+=1
			else:
				haiku+=words[index] + " "
				count += self.sc.syllableCount(words[index])
	
		print "Haiku: \n"
		print haiku
Exemplo n.º 2
0
	def __init__(self,grammar,season):
		self.syllableCounter = SyllableCounter()
		self.grammar = grammar
		self.season = season
	
		connection = sqlite3.connect("data/haiku.db")
		self.cursor = connection.cursor()
Exemplo n.º 3
0
	def __init__(self):
		self.connection = sqlite3.connect('data/haiku.db')
		self.cursor = self.connection.cursor()
		self.syllableCounter = SyllableCounter()

		path = "data/ngrams"

		self.fileList = [path+"/2grams.txt",
				path+"/3grams.txt",
				path+"/4grams.txt",
				path+"/5grams.txt"]

		self.fiveCount = 0
		self.sevenCount = 0
		self.segmentList = {}
		self.posList = {}
		self.bigramList = {}
		self.conversion = {}
		
		for row in csv.reader(open("data/tagset/conversion.csv","rb")):
			self.conversion[row[0]] = row[1]
Exemplo n.º 4
0
 def __init__(self):
     self.syllableCounter = SyllableCounter()
     self.grammar = self.setGrammar()
     self.setSeason()
Exemplo n.º 5
0
class Haiku:
    def __init__(self):
        self.syllableCounter = SyllableCounter()
        self.grammar = self.setGrammar()
        self.setSeason()

    def isHaiku(self, text):
        counts = self.syllableCounter.getLineCounts(text)
        expectedCounts = [5, 7, 5]

        if not counts:
            return False

        if len(counts) != 3:
            return False

        for i in xrange(len(expectedCounts)):
            if counts[i] != expectedCounts[i]:
                return False

        return True

    def createNaiveHaiku(self):
        connection = sqlite3.connect("data/haiku.db")
        cursor = connection.cursor()
        cursor.execute("select count(*) from FiveSyllables")
        numFiveRecords = cursor.fetchone()[0]
        cursor.execute("select count(*) from SevenSyllables")
        numSevenRecords = cursor.fetchone()[0]

        firstIndex = int(randint(1, numFiveRecords))
        secondIndex = int(randint(1, numSevenRecords))
        thirdIndex = int(randint(1, numFiveRecords))

        haiku = []

        cursor.execute("select segment from FiveSyllables where id = ?", (firstIndex,))
        haiku.append(cursor.fetchone()[0])
        cursor.execute("select segment from SevenSyllables where id = ?", (secondIndex,))
        haiku.append(cursor.fetchone()[0])
        cursor.execute("select segment from FiveSyllables where id = ?", (thirdIndex,))
        haiku.append(cursor.fetchone()[0])

        connection.close()
        return "\n".join(haiku)

    def createEvolvedHaiku(self):
        individual = Individual(self.grammar)
        initialHaiku = self.createNaiveHaiku()

        fitnessLevel = individual.fitness(initialHaiku)

        bestHaiku = initialHaiku
        bestFitness = fitnessLevel

        for i in xrange(5000):
            initialHaiku = individual.naiveMutate(initialHaiku)
            fitnessLevel = individual.fitness(initialHaiku)

            if fitnessLevel > bestFitness:
                bestHaiku = initialHaiku
                bestFitness = fitnessLevel

            print initialHaiku + "\n"
            print "count is: " + str(i)

        print "Best Haiku is :\n" + bestHaiku
        print "\nEvaluation is:\n" + str(bestFitness)

    def grammarHaiku(self):
        grammar = self.grammar
        newGrammar = [[None] * len(grammar[0]), [None] * len(grammar[1]), [None] * len(grammar[2])]

        connection = sqlite3.connect("data/haiku.db")
        cursor = connection.cursor()

        for line in xrange(len(grammar)):

            for index in xrange(len(grammar[line])):
                if grammar[line][index] in string.punctuation:
                    newGrammar[line][index] = grammar[line][index]
                    continue

                if index == 0:
                    query = "select firstWord,secondWord from PoeticBigrams where firstPos=? and secondPos=?"

                    wordList = [row for row in cursor.execute(query, (grammar[line][index], grammar[line][index + 1]))]

                    if len(wordList) == 0:
                        query = "select firstWord from PoeticBigrams where firstPos=?"
                        wordList = [row for row in cursor.execute(query, (grammar[line][index],))]
                        randomIndex = int(randint(0, len(wordList) - 1))
                        newGrammar[line][index] = str(wordList[randomIndex][0])
                        continue

                    randomIndex = int(randint(0, len(wordList) - 1))
                    newGrammar[line][index] = str(wordList[randomIndex][0])
                    newGrammar[line][index + 1] = str(wordList[randomIndex][1])

                    index += 1
                    continue
                else:
                    wordList = [
                        row
                        for row in cursor.execute(
                            "select secondWord from PoeticBigrams where firstWord = ? and secondPos = ?",
                            (newGrammar[line][index - 1], grammar[line][index]),
                        )
                    ]

                    if len(wordList) == 0:
                        wordList = [
                            row
                            for row in cursor.execute(
                                "select secondWord from PoeticBigrams where firstPos = ? and secondPos = ?",
                                (grammar[line][index - 1], grammar[line][index]),
                            )
                        ]

                    if len(wordList) == 0:
                        wordList = [
                            row
                            for row in cursor.execute(
                                "select firstWord from PoeticBigrams where firstPos = ?", (grammar[line][index],)
                            )
                        ]

                    randomIndex = int(randint(0, len(wordList) - 1))

                    newGrammar[line][index] = str(wordList[randomIndex][0])

        return "\n".join([" ".join(line) for line in [line for line in newGrammar]])

    def setGrammar(self):
        count = 0
        grammar = ""
        grammars = []

        for line in open("data/haiku_grammar/grammar_copy"):
            count += 1
            if count % 4 == 0:
                grammars.append(grammar)
                grammar = ""
            grammar += line

        randIndex = int(randint(0, len(grammars) - 1))

        newGrammars = []

        if len(grammars[randIndex].splitlines()[0]) > 1:
            splitLines = grammars[randIndex].splitlines()
        else:
            splitLines = grammars[randIndex].splitlines()[1:]

        for line in splitLines:
            newGrammars.append(line.split(" "))

        return newGrammars

    def getGrammar(self):
        return self.grammar

    def setSeason(self):
        seasons = ["winter", "fall", "summer", "spring"]
        self.season = choice(seasons)

    def getSeason(self):
        return this.season

    def evolvedGrammarHaiku(self):

        bestHaiku = ""
        bestFitness = 0
        currFitness = 0

        individual = Individual(self.grammar, self.season)
        newHaiku = self.grammarHaiku()

        best = []

        for i in xrange(25):
            print i
            newHaiku = individual.mutate(newHaiku)
            currFitness = individual.fitness(newHaiku)

            print newHaiku
            print currFitness
            print "\n"

            if currFitness > bestFitness:
                best.append((currFitness, newHaiku))
                bestHaiku = newHaiku
                bestFitness = currFitness

        print "Best haiku is:"
        print bestHaiku
        print bestFitness
        print self.grammar

        print "Best Haiku: "
        print sorted(best)
Exemplo n.º 6
0
class DatabaseInit:

	def __init__(self):
		self.connection = sqlite3.connect('data/haiku.db')
		self.cursor = self.connection.cursor()
		self.syllableCounter = SyllableCounter()

		path = "data/ngrams"

		self.fileList = [path+"/2grams.txt",
				path+"/3grams.txt",
				path+"/4grams.txt",
				path+"/5grams.txt"]

		self.fiveCount = 0
		self.sevenCount = 0
		self.segmentList = {}
		self.posList = {}
		self.bigramList = {}
		self.conversion = {}
		
		for row in csv.reader(open("data/tagset/conversion.csv","rb")):
			self.conversion[row[0]] = row[1]

	def TablesInit(self):
		self.cursor.execute('''CREATE TABLE FiveSyllables
					(id integer primary key, segment text)''')
		
		self.cursor.execute('''CREATE TABLE SevenSyllables
					(id integer primary key, segment text)''')
		
		self.cursor.execute('''CREATE TABLE Bigrams
					(id integer primary key, firstWord text, firstPos text, secondWord text,
					secondPos text,frequency float, firstSyllables integer,secondSyllables integer)''')


		self.cursor.execute('''CREATE TABLE PoeticBigrams
					(firstWord text, firstPos text, secondWord text,
					secondPos text, firstSyllables integer,secondSyllables integer)''')

		self.cursor.execute('''CREATE TABLE WordAssociations
					(cue text,target text,FSG float,BSG float, cuePos text, tarPos text)''')
		self.PoeticBigrams()
		self.WordAssociationInit()

		for i in xrange(len(self.fileList)):
			f = open(self.fileList[i])
			for line in f:
				self.SyllableTablesInit(line,i)

				if i == 3:  #only use 5grams data since using everything takes too long
					self.BuildBigramTable(line)

			self.connection.commit()
			f.close()
		
		self.BigramTablesInit()
		
	def SyllableTablesInit(self,line,fileNumber):
		#populates data in FiveSyllables Table and SevenSyllables Table
		    
		culledLine = " ".join([word.strip("\r\n") for word in line.split("\t")[1:(fileNumber+3)]])
				
		if culledLine.lower() not in self.segmentList:
			self.segmentList[culledLine.lower()] = 0
		else:
			return
					
		syllableCounts = self.syllableCounter.getLineCounts(culledLine)
				
		if syllableCounts and syllableCounts[0] == 5:
			self.fiveCount+=1
			self.cursor.execute("INSERT into FiveSyllables VALUES (?,?)",(self.fiveCount,culledLine))

		if syllableCounts and syllableCounts[0] == 7:
			self.sevenCount+=1
			self.cursor.execute("INSERT into SevenSyllables VALUES (?,?)",(self.sevenCount,culledLine))
			
		
	
	def BuildBigramTable(self,line):
		splitLine = [word.strip("\t\r\n ") for word in line.split("\t")]
		count = int(splitLine[0])

		for i in xrange(1,5):
			firstWord = splitLine[i]
			secondWord = splitLine[i+1]
			firstPos = self.convertPos(splitLine[i+5])
			secondPos = self.convertPos(splitLine[i+6])
			
			if firstWord not in self.bigramList:
				self.bigramList[firstWord] = {}
				self.bigramList[firstWord][secondWord] = []
				self.bigramList[firstWord][secondWord].append(firstPos)
				self.bigramList[firstWord][secondWord].append(secondPos)
				self.bigramList[firstWord][secondWord].append(count)
			else:
				if secondWord not in self.bigramList[firstWord]:
					self.bigramList[firstWord][secondWord] = []
					self.bigramList[firstWord][secondWord].append(firstPos)
					self.bigramList[firstWord][secondWord].append(secondPos)
					self.bigramList[firstWord][secondWord].append(count)
				else:
					self.bigramList[firstWord][secondWord][2]+=count
		
			if "totCount" not in self.bigramList[firstWord]:
				self.bigramList[firstWord]["totCount"] = count
			else:
				self.bigramList[firstWord]["totCount"]+=count
						
	def BigramTablesInit(self):
		count = 0
		for key,value in self.bigramList.items():
			for word,information in self.bigramList[key].items():
				if word != "totCount":
					count+=1
					keySyllables = self.syllableCounter.syllableCount(key) 
					wordSyllables = self.syllableCounter.syllableCount(word)

					information[2] = float("{0:.2f}".format(information[2]/(self.bigramList[key]["totCount"]+0.0)))
						
					self.cursor.execute("INSERT into Bigrams VALUES (?,?,?,?,?,?,?,?)",
								(count,key,information[0],word,information[1],information[2],
								 keySyllables,wordSyllables))

		self.connection.commit()

	def PoeticBigrams(self):

		f = open("data/haiku_samples/tagged_haiku")
		count = 0

		for line in f:
			splits = line.split(" ")
	
			if count % 3 == 0 or line == "\n":
				count += 1
				continue

			if count == 0 or count % 4 == 0:
				self.cursor.execute("INSERT into PoeticBigrams VALUES (?,?,?,?,?,?)",
							("S==>","S==>",splits[0].split("/")[0],splits[0].split("/")[1],
							 0,self.syllableCounter.syllableCount(splits[0].split("/")[0]),))
			else:
				for index in xrange(len(splits)-2):
					firstWord = splits[index].split("/")[0]
					secondWord = splits[index+1].split("/")[0]
					firstPos = splits[index].split("/")[1]
					secondPos = splits[index+1].split("/")[1]
					
					firstSyllables = self.syllableCounter.syllableCount(firstWord)
					secondSyllables = self.syllableCounter.syllableCount(secondWord)
					
					self.cursor.execute("INSERT into PoeticBigrams VALUES (?,?,?,?,?,?)",
								(firstWord,firstPos,secondWord,secondPos,firstSyllables,secondSyllables,))
					
			count+=1
	
	def WordAssociationInit(self):

		fileList = ["AB.csv",
			"C.csv",
			"LO.csv",
			"PR.csv",
			"TZ.csv",
			"DF.csv",
			"GK.csv",
			"S.csv"]

		path = "data/associations/"

		for files in fileList:
			s = csv.reader(open(path + files))
			
			for row in s:
				try:
					FSG = float(row[5])
				except:
					FSG = 0.0
				
				try:
					BSG = float(row[6])
				except:	
					BSG = 0.0
				
				cue = row[0].lower().strip(" ")
				target = row[1].lower().strip(" ")
				cuePos = row[17].strip(" ")				
				tarPos = row[26].strip(" ")

				self.cursor.execute("INSERT into WordAssociations VALUES (?,?,?,?,?,?)",
							(cue,target,FSG,BSG,cuePos,tarPos))	

	def convertPos(self,pos):
		pos = pos.upper()

		if pos in self.conversion:
			return self.conversion[pos]
		else:
			return pos
Exemplo n.º 7
0
	def __init__(self):
		self.agent = praw.Reddit(user_agent="cs_haiku_bot")
		self.haiku = Haiku()
		self.sc = SyllableCounter()
Exemplo n.º 8
0
class Individual:
	
	def __init__(self,grammar,season):
		self.syllableCounter = SyllableCounter()
		self.grammar = grammar
		self.season = season
	
		connection = sqlite3.connect("data/haiku.db")
		self.cursor = connection.cursor()

	def naiveMutate(self,haiku):
	
		lines = haiku.split("\n")	
	
		randomSelection = randint(1,11)
	
		if randomSelection < 6:
			self.cursor.execute("Select count(*) from FiveSyllables")
			fiveUpperBound = self.cursor.fetchone()[0]
			randomIndex = int(randint(0,fiveUpperBound))
			self.cursor.execute("Select segment from FiveSyllables where id = ?",(randomIndex,))
		
			randomLine = choice([0,2])
			lines[randomLine] = self.cursor.fetchone()[0]
		else:
			self.cursor.execute("Select count(*) from SevenSyllables")
			sevenUpperBound = self.cursor.fetchone()[0]
			randomIndex = int(randint(0,sevenUpperBound))
			self.cursor.execute("Select segment from SevenSyllables where id = ?",(randomIndex,))
			lines[1] = self.cursor.fetchone()[0]	
	
		return "\n".join(lines)

	def mutate(self,haiku):
		splitHaiku = [line.split(" ") for line in [line for line in haiku.splitlines()]]				
		
		seasonList = {"summer": Summer(),"winter": Winter(),"fall": Fall(),"spring": Spring()}
		
		associations = seasonList[self.season]
			
		splitHaiku = self.replaceNouns(splitHaiku,associations)
		
		return "\n".join([" ".join(line) for line in [line for line in splitHaiku]])

	def replaceNouns(self,splitHaiku,seed):

		for i in xrange(len(splitHaiku)):
			for j in xrange(len(splitHaiku[i])):
				if "NN" in self.grammar[i][j] and splitHaiku[i][j] != seed:
					
					if len(seed) == 0:
						break
					
					replacement = seed[randint(0,len(seed)-1)]
					seed.remove(replacement)
					
					if j != 0:
						pre = [row for row in self.cursor.execute('''select firstWord from Bigrams 
						where firstPos = ? and secondWord =?''',
						(self.grammar[i][j-1],replacement))]
						
						if len(pre) > 0:
							index = randint(0,len(pre)-1)
							splitHaiku[i][j-1] = str(pre[index][0])

					if j < len(splitHaiku[i])-1:
						pos = [row for row in self.cursor.execute('''select secondWord from Bigrams
						where secondPos = ? and firstWord = ?''',
						(self.grammar[i][j+1],replacement))]
					
						if len(pos) > 0:
							index = randint(0,len(pos)-1)
							splitHaiku[i][j+1] = str(pos[index][0])

					splitHaiku[i][j] = replacement

		return splitHaiku		
	
		
	def fitness(self,haiku):
		splits = [line.split(" ") for line in haiku.split("\n")]	
		words = [str(word) for word in list(itertools.chain(*splits))]
 	
		similarity = 0	

		for word in words:
			if wn.morphy(word,wn.NOUN):
				nounSyns = wn.synsets(word,wn.NOUN)

				for secondWord in words:
					if wn.morphy(secondWord,wn.NOUN) and (word not in secondWord and secondWord not in word):
						for synset in nounSyns:
							if secondWord in synset.lemma_names:
								#similarity+=10
								pass

					similarity+=connectedness(word,secondWord)					

						
		similarity+=self.isHaiku(haiku)
			
		return similarity

		
	def isHaiku(self,haiku):	
		
		haikuCounts = [5,7,5]
		lineCounts = []
		splits = [line.split(" ") for line in haiku.split("\n")]	
 		
		for line in splits:
			count = 0
			for word in line:	
				count += self.syllableCounter.syllableCount(word)
			
			lineCounts.append(count)
	
		if haikuCounts == lineCounts:
			return 200
		else:
			return 0