class Reddit: def __init__(self): self.agent = praw.Reddit(user_agent="cs_haiku_bot") self.haiku = Haiku() self.sc = SyllableCounter() def allComments(self): for comment in [x.body for x in self.agent.get_all_comments(limit=300)]: if self.isHaikuCandidate(comment): self.prepareHaiku(comment) def subredditComments(self,subreddit): sub = self.agent.get_subreddit(subreddit) for comment in [x.body for x in sub.get_comments(limit=300)]: if self.isHaikuCandidate(comment): self.prepareHaiku(comment) def isHaikuCandidate(self,comment): return self.sc.getLineCounts(comment) and sum(self.sc.getLineCounts(comment)) == 17 def prepareHaiku(self,comment): lineCounts = [5,7,5] splits = [line.split(" ") for line in comment.splitlines()] words = [str(word) for word in list(itertools.chain(*splits))] count = 0 lineIndex = 0 haiku = "" for index in xrange(len(words)): if count > lineCounts[lineIndex] or count < 0: return if lineIndex > 2: break if count == lineCounts[lineIndex]: haiku+="\n" haiku+=words[index] + " " count = self.sc.syllableCount(words[index]) lineIndex+=1 else: haiku+=words[index] + " " count += self.sc.syllableCount(words[index]) print "Haiku: \n" print haiku
def __init__(self,grammar,season): self.syllableCounter = SyllableCounter() self.grammar = grammar self.season = season connection = sqlite3.connect("data/haiku.db") self.cursor = connection.cursor()
def __init__(self): self.connection = sqlite3.connect('data/haiku.db') self.cursor = self.connection.cursor() self.syllableCounter = SyllableCounter() path = "data/ngrams" self.fileList = [path+"/2grams.txt", path+"/3grams.txt", path+"/4grams.txt", path+"/5grams.txt"] self.fiveCount = 0 self.sevenCount = 0 self.segmentList = {} self.posList = {} self.bigramList = {} self.conversion = {} for row in csv.reader(open("data/tagset/conversion.csv","rb")): self.conversion[row[0]] = row[1]
def __init__(self): self.syllableCounter = SyllableCounter() self.grammar = self.setGrammar() self.setSeason()
class Haiku: def __init__(self): self.syllableCounter = SyllableCounter() self.grammar = self.setGrammar() self.setSeason() def isHaiku(self, text): counts = self.syllableCounter.getLineCounts(text) expectedCounts = [5, 7, 5] if not counts: return False if len(counts) != 3: return False for i in xrange(len(expectedCounts)): if counts[i] != expectedCounts[i]: return False return True def createNaiveHaiku(self): connection = sqlite3.connect("data/haiku.db") cursor = connection.cursor() cursor.execute("select count(*) from FiveSyllables") numFiveRecords = cursor.fetchone()[0] cursor.execute("select count(*) from SevenSyllables") numSevenRecords = cursor.fetchone()[0] firstIndex = int(randint(1, numFiveRecords)) secondIndex = int(randint(1, numSevenRecords)) thirdIndex = int(randint(1, numFiveRecords)) haiku = [] cursor.execute("select segment from FiveSyllables where id = ?", (firstIndex,)) haiku.append(cursor.fetchone()[0]) cursor.execute("select segment from SevenSyllables where id = ?", (secondIndex,)) haiku.append(cursor.fetchone()[0]) cursor.execute("select segment from FiveSyllables where id = ?", (thirdIndex,)) haiku.append(cursor.fetchone()[0]) connection.close() return "\n".join(haiku) def createEvolvedHaiku(self): individual = Individual(self.grammar) initialHaiku = self.createNaiveHaiku() fitnessLevel = individual.fitness(initialHaiku) bestHaiku = initialHaiku bestFitness = fitnessLevel for i in xrange(5000): initialHaiku = individual.naiveMutate(initialHaiku) fitnessLevel = individual.fitness(initialHaiku) if fitnessLevel > bestFitness: bestHaiku = initialHaiku bestFitness = fitnessLevel print initialHaiku + "\n" print "count is: " + str(i) print "Best Haiku is :\n" + bestHaiku print "\nEvaluation is:\n" + str(bestFitness) def grammarHaiku(self): grammar = self.grammar newGrammar = [[None] * len(grammar[0]), [None] * len(grammar[1]), [None] * len(grammar[2])] connection = sqlite3.connect("data/haiku.db") cursor = connection.cursor() for line in xrange(len(grammar)): for index in xrange(len(grammar[line])): if grammar[line][index] in string.punctuation: newGrammar[line][index] = grammar[line][index] continue if index == 0: query = "select firstWord,secondWord from PoeticBigrams where firstPos=? and secondPos=?" wordList = [row for row in cursor.execute(query, (grammar[line][index], grammar[line][index + 1]))] if len(wordList) == 0: query = "select firstWord from PoeticBigrams where firstPos=?" wordList = [row for row in cursor.execute(query, (grammar[line][index],))] randomIndex = int(randint(0, len(wordList) - 1)) newGrammar[line][index] = str(wordList[randomIndex][0]) continue randomIndex = int(randint(0, len(wordList) - 1)) newGrammar[line][index] = str(wordList[randomIndex][0]) newGrammar[line][index + 1] = str(wordList[randomIndex][1]) index += 1 continue else: wordList = [ row for row in cursor.execute( "select secondWord from PoeticBigrams where firstWord = ? and secondPos = ?", (newGrammar[line][index - 1], grammar[line][index]), ) ] if len(wordList) == 0: wordList = [ row for row in cursor.execute( "select secondWord from PoeticBigrams where firstPos = ? and secondPos = ?", (grammar[line][index - 1], grammar[line][index]), ) ] if len(wordList) == 0: wordList = [ row for row in cursor.execute( "select firstWord from PoeticBigrams where firstPos = ?", (grammar[line][index],) ) ] randomIndex = int(randint(0, len(wordList) - 1)) newGrammar[line][index] = str(wordList[randomIndex][0]) return "\n".join([" ".join(line) for line in [line for line in newGrammar]]) def setGrammar(self): count = 0 grammar = "" grammars = [] for line in open("data/haiku_grammar/grammar_copy"): count += 1 if count % 4 == 0: grammars.append(grammar) grammar = "" grammar += line randIndex = int(randint(0, len(grammars) - 1)) newGrammars = [] if len(grammars[randIndex].splitlines()[0]) > 1: splitLines = grammars[randIndex].splitlines() else: splitLines = grammars[randIndex].splitlines()[1:] for line in splitLines: newGrammars.append(line.split(" ")) return newGrammars def getGrammar(self): return self.grammar def setSeason(self): seasons = ["winter", "fall", "summer", "spring"] self.season = choice(seasons) def getSeason(self): return this.season def evolvedGrammarHaiku(self): bestHaiku = "" bestFitness = 0 currFitness = 0 individual = Individual(self.grammar, self.season) newHaiku = self.grammarHaiku() best = [] for i in xrange(25): print i newHaiku = individual.mutate(newHaiku) currFitness = individual.fitness(newHaiku) print newHaiku print currFitness print "\n" if currFitness > bestFitness: best.append((currFitness, newHaiku)) bestHaiku = newHaiku bestFitness = currFitness print "Best haiku is:" print bestHaiku print bestFitness print self.grammar print "Best Haiku: " print sorted(best)
class DatabaseInit: def __init__(self): self.connection = sqlite3.connect('data/haiku.db') self.cursor = self.connection.cursor() self.syllableCounter = SyllableCounter() path = "data/ngrams" self.fileList = [path+"/2grams.txt", path+"/3grams.txt", path+"/4grams.txt", path+"/5grams.txt"] self.fiveCount = 0 self.sevenCount = 0 self.segmentList = {} self.posList = {} self.bigramList = {} self.conversion = {} for row in csv.reader(open("data/tagset/conversion.csv","rb")): self.conversion[row[0]] = row[1] def TablesInit(self): self.cursor.execute('''CREATE TABLE FiveSyllables (id integer primary key, segment text)''') self.cursor.execute('''CREATE TABLE SevenSyllables (id integer primary key, segment text)''') self.cursor.execute('''CREATE TABLE Bigrams (id integer primary key, firstWord text, firstPos text, secondWord text, secondPos text,frequency float, firstSyllables integer,secondSyllables integer)''') self.cursor.execute('''CREATE TABLE PoeticBigrams (firstWord text, firstPos text, secondWord text, secondPos text, firstSyllables integer,secondSyllables integer)''') self.cursor.execute('''CREATE TABLE WordAssociations (cue text,target text,FSG float,BSG float, cuePos text, tarPos text)''') self.PoeticBigrams() self.WordAssociationInit() for i in xrange(len(self.fileList)): f = open(self.fileList[i]) for line in f: self.SyllableTablesInit(line,i) if i == 3: #only use 5grams data since using everything takes too long self.BuildBigramTable(line) self.connection.commit() f.close() self.BigramTablesInit() def SyllableTablesInit(self,line,fileNumber): #populates data in FiveSyllables Table and SevenSyllables Table culledLine = " ".join([word.strip("\r\n") for word in line.split("\t")[1:(fileNumber+3)]]) if culledLine.lower() not in self.segmentList: self.segmentList[culledLine.lower()] = 0 else: return syllableCounts = self.syllableCounter.getLineCounts(culledLine) if syllableCounts and syllableCounts[0] == 5: self.fiveCount+=1 self.cursor.execute("INSERT into FiveSyllables VALUES (?,?)",(self.fiveCount,culledLine)) if syllableCounts and syllableCounts[0] == 7: self.sevenCount+=1 self.cursor.execute("INSERT into SevenSyllables VALUES (?,?)",(self.sevenCount,culledLine)) def BuildBigramTable(self,line): splitLine = [word.strip("\t\r\n ") for word in line.split("\t")] count = int(splitLine[0]) for i in xrange(1,5): firstWord = splitLine[i] secondWord = splitLine[i+1] firstPos = self.convertPos(splitLine[i+5]) secondPos = self.convertPos(splitLine[i+6]) if firstWord not in self.bigramList: self.bigramList[firstWord] = {} self.bigramList[firstWord][secondWord] = [] self.bigramList[firstWord][secondWord].append(firstPos) self.bigramList[firstWord][secondWord].append(secondPos) self.bigramList[firstWord][secondWord].append(count) else: if secondWord not in self.bigramList[firstWord]: self.bigramList[firstWord][secondWord] = [] self.bigramList[firstWord][secondWord].append(firstPos) self.bigramList[firstWord][secondWord].append(secondPos) self.bigramList[firstWord][secondWord].append(count) else: self.bigramList[firstWord][secondWord][2]+=count if "totCount" not in self.bigramList[firstWord]: self.bigramList[firstWord]["totCount"] = count else: self.bigramList[firstWord]["totCount"]+=count def BigramTablesInit(self): count = 0 for key,value in self.bigramList.items(): for word,information in self.bigramList[key].items(): if word != "totCount": count+=1 keySyllables = self.syllableCounter.syllableCount(key) wordSyllables = self.syllableCounter.syllableCount(word) information[2] = float("{0:.2f}".format(information[2]/(self.bigramList[key]["totCount"]+0.0))) self.cursor.execute("INSERT into Bigrams VALUES (?,?,?,?,?,?,?,?)", (count,key,information[0],word,information[1],information[2], keySyllables,wordSyllables)) self.connection.commit() def PoeticBigrams(self): f = open("data/haiku_samples/tagged_haiku") count = 0 for line in f: splits = line.split(" ") if count % 3 == 0 or line == "\n": count += 1 continue if count == 0 or count % 4 == 0: self.cursor.execute("INSERT into PoeticBigrams VALUES (?,?,?,?,?,?)", ("S==>","S==>",splits[0].split("/")[0],splits[0].split("/")[1], 0,self.syllableCounter.syllableCount(splits[0].split("/")[0]),)) else: for index in xrange(len(splits)-2): firstWord = splits[index].split("/")[0] secondWord = splits[index+1].split("/")[0] firstPos = splits[index].split("/")[1] secondPos = splits[index+1].split("/")[1] firstSyllables = self.syllableCounter.syllableCount(firstWord) secondSyllables = self.syllableCounter.syllableCount(secondWord) self.cursor.execute("INSERT into PoeticBigrams VALUES (?,?,?,?,?,?)", (firstWord,firstPos,secondWord,secondPos,firstSyllables,secondSyllables,)) count+=1 def WordAssociationInit(self): fileList = ["AB.csv", "C.csv", "LO.csv", "PR.csv", "TZ.csv", "DF.csv", "GK.csv", "S.csv"] path = "data/associations/" for files in fileList: s = csv.reader(open(path + files)) for row in s: try: FSG = float(row[5]) except: FSG = 0.0 try: BSG = float(row[6]) except: BSG = 0.0 cue = row[0].lower().strip(" ") target = row[1].lower().strip(" ") cuePos = row[17].strip(" ") tarPos = row[26].strip(" ") self.cursor.execute("INSERT into WordAssociations VALUES (?,?,?,?,?,?)", (cue,target,FSG,BSG,cuePos,tarPos)) def convertPos(self,pos): pos = pos.upper() if pos in self.conversion: return self.conversion[pos] else: return pos
def __init__(self): self.agent = praw.Reddit(user_agent="cs_haiku_bot") self.haiku = Haiku() self.sc = SyllableCounter()
class Individual: def __init__(self,grammar,season): self.syllableCounter = SyllableCounter() self.grammar = grammar self.season = season connection = sqlite3.connect("data/haiku.db") self.cursor = connection.cursor() def naiveMutate(self,haiku): lines = haiku.split("\n") randomSelection = randint(1,11) if randomSelection < 6: self.cursor.execute("Select count(*) from FiveSyllables") fiveUpperBound = self.cursor.fetchone()[0] randomIndex = int(randint(0,fiveUpperBound)) self.cursor.execute("Select segment from FiveSyllables where id = ?",(randomIndex,)) randomLine = choice([0,2]) lines[randomLine] = self.cursor.fetchone()[0] else: self.cursor.execute("Select count(*) from SevenSyllables") sevenUpperBound = self.cursor.fetchone()[0] randomIndex = int(randint(0,sevenUpperBound)) self.cursor.execute("Select segment from SevenSyllables where id = ?",(randomIndex,)) lines[1] = self.cursor.fetchone()[0] return "\n".join(lines) def mutate(self,haiku): splitHaiku = [line.split(" ") for line in [line for line in haiku.splitlines()]] seasonList = {"summer": Summer(),"winter": Winter(),"fall": Fall(),"spring": Spring()} associations = seasonList[self.season] splitHaiku = self.replaceNouns(splitHaiku,associations) return "\n".join([" ".join(line) for line in [line for line in splitHaiku]]) def replaceNouns(self,splitHaiku,seed): for i in xrange(len(splitHaiku)): for j in xrange(len(splitHaiku[i])): if "NN" in self.grammar[i][j] and splitHaiku[i][j] != seed: if len(seed) == 0: break replacement = seed[randint(0,len(seed)-1)] seed.remove(replacement) if j != 0: pre = [row for row in self.cursor.execute('''select firstWord from Bigrams where firstPos = ? and secondWord =?''', (self.grammar[i][j-1],replacement))] if len(pre) > 0: index = randint(0,len(pre)-1) splitHaiku[i][j-1] = str(pre[index][0]) if j < len(splitHaiku[i])-1: pos = [row for row in self.cursor.execute('''select secondWord from Bigrams where secondPos = ? and firstWord = ?''', (self.grammar[i][j+1],replacement))] if len(pos) > 0: index = randint(0,len(pos)-1) splitHaiku[i][j+1] = str(pos[index][0]) splitHaiku[i][j] = replacement return splitHaiku def fitness(self,haiku): splits = [line.split(" ") for line in haiku.split("\n")] words = [str(word) for word in list(itertools.chain(*splits))] similarity = 0 for word in words: if wn.morphy(word,wn.NOUN): nounSyns = wn.synsets(word,wn.NOUN) for secondWord in words: if wn.morphy(secondWord,wn.NOUN) and (word not in secondWord and secondWord not in word): for synset in nounSyns: if secondWord in synset.lemma_names: #similarity+=10 pass similarity+=connectedness(word,secondWord) similarity+=self.isHaiku(haiku) return similarity def isHaiku(self,haiku): haikuCounts = [5,7,5] lineCounts = [] splits = [line.split(" ") for line in haiku.split("\n")] for line in splits: count = 0 for word in line: count += self.syllableCounter.syllableCount(word) lineCounts.append(count) if haikuCounts == lineCounts: return 200 else: return 0