def __init__(self, author): util.CSP.__init__(self) self.author = author['author'] self.token_num = util.weightedRandomChoice(author['typeTokenCount']) self.line_num = util.weightedRandomChoice(author['linesPerPoem']) self.word_num = {} keys = random.sample(author['wordDomain'], self.token_num) self.domain = {key: author['wordDomain'][key] for key in keys} for line_id in xrange(self.line_num): word_num = util.weightedRandomChoice(author['wordsPerLine']) self.word_num[line_id] = word_num
def observe(self, agentX, agentY, observedDist): # BEGIN_YOUR_CODE (our solution is 12 lines of code, but don't worry if you deviate from this) # raise Exception("Not implemented yet") # Begin reweight # Initialize the update particles Dict randomly updateDict = collections.defaultdict(int) for particle in self.particles: # Think of the particle distribution as the unnormalized posterior probability posterior = self.particles[particle] (row, col) = particle # To convert from a tile to a location X, Y = util.colToX(col), util.rowToY(row) # mean = ||At - Ct||, At is your car's position # Ct represents the actual location of the single other car mean = math.sqrt((agentX - X) ** 2 + (agentY - Y) ** 2) # Use util.pdf(mean, std, value) to compute the probability density function (PDF) # of a Gaussian with given mean and standard deviation, evaluated at value condition = util.pdf(mean, Const.SONAR_STD, observedDist) # Update P = P*P(dt|ct) updateDict[particle] = posterior * condition # Begin resample # Initialize the particles randomly self.particles = collections.defaultdict(int) # Create |self.NUM_PARTICLES| new particles during resampling. for i in range(self.NUM_PARTICLES): newParticle = util.weightedRandomChoice(updateDict) self.particles[newParticle] += 1 # END_YOUR_CODE self.updateBelief()
def observe(self, agentX, agentY, observedDist): # BEGIN_YOUR_CODE (our solution is 22 lines of code, but don't worry if you deviate from this) # Create dict to store new beliefs new_beliefs = {} # Iterate through self.particles so we only re-weight tiles in which there # are particles present for key in self.particles.keys(): # Convert from tile row/col to coordinates row = key[0] col = key[1] x = util.colToX(col) y = util.rowToY(row) # Pull number of particles (proxy for the old probability) numParticles = self.particles[key] # Calculate distance and update probability # as numParticles * PDF result (since numParticles are the estimate # of the posterior distribution) mean = math.sqrt((x-agentX)**2 + (y-agentY)**2) stdev = Const.SONAR_STD D_t = util.pdf(mean, stdev, observedDist) newProb = D_t*numParticles # Assign to new beliefs dict new_beliefs[(row, col)] = newProb # Clear self.particles and update with resampled set # using weightedRandomChoice self.particles = {} for i in range(self.NUM_PARTICLES): # Create new weighted-random tile location for a particle newKey = util.weightedRandomChoice(new_beliefs) # Add to the count of particles for that tile in self.particles self.particles[newKey] = self.particles.get(newKey, 0) + 1 # END_YOUR_CODE self.updateBelief()
def elapseTime(self) -> None: # BEGIN_YOUR_CODE (our solution is 6 lines of code, but don't worry if you deviate from this) nextParticles = collections.defaultdict(int) for tile, count in self.particles.items(): for _ in range(count): nextParticles[util.weightedRandomChoice(self.transProbDict[tile])] += 1 self.particles = nextParticles
def next(self): # If no seed, pick a seed and return seed if self.seed_key == None: self.seed_key = util.weightedRandomChoice(self.frequency_map) for i in range(len(self.seed_key)): self.seed += self.seed_key[i] + " " self.seed = self.seed.strip() #trim whitespaces return self.seed # Grammar is stuck if self.seed_key not in self.word_map: return None # Pick a random choice from the successors of seed # Mostly deterministic except for large corpora else: next = util.weightedRandomChoice(self.word_map[self.seed_key]) return next
def observe( self, agentX, agentY, observedDist, ): # BEGIN_YOUR_CODE (around 15 lines of code expected) weights = dict() for p in self.particles: weights[p] = util.pdf( observedDist, Const.SONAR_STD, math.hypot( util.rowToY(p[0]) - agentY, util.colToX(p[1]) - agentX)) * self.particles[p] newParticles = collections.Counter() for i in range(self.NUM_PARTICLES): newParticles[util.weightedRandomChoice(weights)] += 1 self.particles = newParticles # END_YOUR_CODE self.updateBelief()
def _sample_topic(self, m, word): prob_k = self._full_conditional(m, word) prob_k /= prob_k.sum() new_k = weightedRandomChoice(prob_k) return new_k
def _sample_topic_predict(self, m, word, n_mk, n_m, n_kt, n_k): prob_k = self._full_conditional_predict(m, word, n_mk, n_m, n_kt, n_k) prob_k /= prob_k.sum() new_k = weightedRandomChoice(prob_k) return new_k
def elapseTime(self): # BEGIN_YOUR_CODE (our solution is 6 lines of code, but don't worry if you deviate from this) newParticles = collections.defaultdict(int) for tile in self.particles: for particle in xrange(self.particles[tile]): newTile = util.weightedRandomChoice(self.transProbDict[tile]) newParticles[newTile] += 1 self.particles = newParticles
def elapseTime(self): # BEGIN_YOUR_CODE (our solution is 6 lines of code, but don't worry if you deviate from this) self.particles, particles = collections.defaultdict( int), self.particles for cur_pos, cnt in particles.items(): if cur_pos in self.transProbDict: for _ in range(cnt): self.particles[util.weightedRandomChoice( self.transProbDict[cur_pos])] += 1
def elapseTime(self): # BEGIN_YOUR_CODE (our solution is 6 lines of code, but don't worry if you deviate from this) particles = collections.defaultdict(int) for tile in self.particles: transition_distribution = self.transProbDict[tile] if tile in self.transProbDict else None for _ in range(self.particles[tile]): new_tile = util.weightedRandomChoice(transition_distribution) particles[new_tile] += 1 self.particles = particles
def elapseTime(self): # BEGIN_YOUR_CODE (our solution is 6 lines of code, but don't worry if you deviate from this) pw = collections.defaultdict(int) for particle in self.particles: for i in range(self.particles[particle]): pw[util.weightedRandomChoice( self.transProbDict[particle])] += 1 self.particles = pw self.updateBelief()
def elapseTime(self): # BEGIN_YOUR_CODE (our solution is 6 lines of code, but don't worry if you deviate from this) NewProb = collections.defaultdict(int) for particle in self.particles: for i in range(self.particles[particle]): # for every particle NewParticle = util.weightedRandomChoice( self.transProbDict[particle]) NewProb[NewParticle] = NewProb.get(NewParticle, 0) + 1 self.particles = NewProb
def elapseTime(self): # BEGIN_YOUR_CODE (around 10 lines of code expected) newParticles = collections.Counter() for tile, occurrences in self.particles.items(): weights = self.transProbDict[tile] for occurrence in range(occurrences): newTile = util.weightedRandomChoice(weights) newParticles[newTile] += 1 self.particles = newParticles
def resample(self): newParticles = collections.Counter() # BEGIN_YOUR_CODE (around 3 lines of code expected) # raise Exception("Not implemented yet") for i in range(self.NUM_PARTICLES): # particle = util.weightedRandomChoice(self.particles) newParticles[particle] += 1 #MC # END_YOUR_CODE self.particles = newParticles self.updateBelief()
def elapseTime(self): # BEGIN_YOUR_CODE (our solution is 6 lines of code, but don't worry if you deviate from this) # raise Exception("Not implemented yet") updateDict = collections.defaultdict(int) for particle in self.particles: for i in range(self.particles[particle]): # sample a new particle, transition probabilities is now using |self.transProbDict| newParticle = util.weightedRandomChoice(self.transProbDict[particle]) updateDict[newParticle] += 1 if newParticle in updateDict else 1 self.particles = updateDict
def elapseTime(self): # BEGIN_YOUR_CODE (around 10 lines of code expected) newParticles = collections.Counter() for p in self.particles: for i in range(self.particles[p]): weights = self.transProbDict[p] newParticles[util.weightedRandomChoice(weights)] += 1 self.particles = newParticles
def elapseTime(self): # BEGIN_YOUR_CODE (our solution is 6 lines of code, but don't worry if you deviate from this) newParicles = collections.Counter() for tile in self.particles: count = self.particles[tile] weightDict = self.transProbDict[tile] for i in range(count): nextParticle = util.weightedRandomChoice(weightDict) newParicles[nextParticle] += 1 self.particles = newParicles
def elapseTime(self): # BEGIN_YOUR_CODE (our solution is 6 lines of code, but don't worry if you deviate from this) # raise Exception("Not implemented yet") proposal = collections.defaultdict(int) for tile in self.particles: # tile = (row,col) for i in range(self.particles[tile]): proposalTile = util.weightedRandomChoice( self.transProbDict[tile]) proposal[proposalTile] += 1 self.particles = proposal
def elapseTime(self): # BEGIN_YOUR_CODE (our solution is 6 lines of code, but don't worry if you deviate from this) # raise Exception("Not implemented yet") new_particles = collections.defaultdict(int) for particle in self.particles: for i in range(self.particles[particle]): new_particle = util.weightedRandomChoice( self.transProbDict[particle]) new_particles[new_particle] += 1 self.particles = new_particles
def elapseTime(self): # BEGIN_YOUR_CODE (our solution is 6 lines of code, but don't worry if you deviate from this) newParticles = collections.defaultdict(int) for tile, value in self.particles.items(): if tile in self.transProbDict: for _ in range(value): newWeightDict = self.transProbDict[tile] particle = util.weightedRandomChoice(newWeightDict) newParticles[particle] += 1 self.particles = newParticles self.updateBelief()
def elapseTime(self): # BEGIN_YOUR_CODE (around 10 lines of code expected) # raise Exception("Not implemented yet") allParticles = collections.Counter() for tile in self.particles: for i in range(self.particles[tile]): nextParticle = util.weightedRandomChoice(self.transProbDict[tile]) if nextParticle in allParticles: allParticles[nextParticle] += 1 else: allParticles[nextParticle] = 1 self.particles = allParticles
def observe(self, agentX: int, agentY: int, observedDist: float) -> None: # BEGIN_YOUR_CODE (our solution is 10 lines of code, but don't worry if you deviate from this) for (r, c) in self.particles: true_dist = math.sqrt(((agentX - util.colToX(c))**2) + ((agentY - util.rowToY(r))**2)) self.particles[(r, c)] *= util.pdf(true_dist, Const.SONAR_STD, observedDist) nextParticles = collections.defaultdict(int) for _ in range(self.NUM_PARTICLES): nextParticles[util.weightedRandomChoice(self.particles)] += 1 self.particles = nextParticles # END_YOUR_CODE self.updateBelief()
def elapseTime(self): # BEGIN_YOUR_CODE (around 10 lines of code expected) newParticles = collections.Counter() for tile in self.particles: for i in range( self.particles[tile] ): # if on that tile there're more particles, that tile is # an important start point newTile = util.weightedRandomChoice(self.transProbDict[tile]) # wherever newTile it lands, increase that counter. newParticles[newTile] += 1 self.particles = newParticles
def elapseTime(self): # BEGIN_YOUR_CODE (our solution is 6 lines of code, but don't worry if you deviate from this) updateDict = collections.defaultdict(int) for particle in self.particles: for i in range(self.particles[particle]): new_par = util.weightedRandomChoice( self.transProbDict[particle]) if new_par in updateDict: updateDict[new_par] = updateDict[new_par] + 1 else: updateDict[new_par] = 1 self.particles = updateDict
def elapseTime(self): newParticles = collections.Counter() # BEGIN_YOUR_CODE (around 7 lines of code expected) # raise Exception("Not implemented yet") for particle in self.particles: number=self.particles[particle] weightdict=self.transProbDict[particle] for _ in range(number): # |self.particles[particle]| times # pick a random transition to use new_particle = util.weightedRandomChoice(weightdict) newParticles[new_particle]+=1 # END_YOUR_CODE self.particles = newParticles
def elapseTime(self): # BEGIN_YOUR_CODE (our solution is 6 lines of code, but don't worry if you deviate from this) newParticles=collections.defaultdict(int) for tile in self.particles: if tile in self.transProbDict: value=self.particles[tile] for i in range(value): newWeightDict=self.transProbDict[tile] zz=util.weightedRandomChoice(newWeightDict) addFactor=0 newParticles[zz]+=1+addFactor self.particles=newParticles self.updateBelief()
def generate(frequency_map, word_map): output = "" seed = "" seed_key = util.weightedRandomChoice(frequency_map) count = 0 for i in range(len(seed_key)): seed += seed_key[i] + " " count += 1 output += seed for _ in range(48 - options.ngrams): if seed_key not in word_map: break next = util.weightedRandomChoice(word_map[seed_key]) count += 1 output += next + ("\n" if (count % 6 == 0) else " ") broken_seed = seed.split() broken_seed.pop(0) broken_seed.append(next) seed = ' '.join(broken_seed) seed_key = tuple(broken_seed) return output
def elapseTime(self): # BEGIN_YOUR_CODE (around 10 lines of code expected) # raise Exception("Not implemented yet") allParticles = collections.Counter() for tile in self.particles: for i in range(self.particles[tile]): nextParticle = util.weightedRandomChoice( self.transProbDict[tile]) if nextParticle in allParticles: allParticles[nextParticle] += 1 else: allParticles[nextParticle] = 1 self.particles = allParticles
def elapseTime(self): ''' your code here''' temp = {} self.realtemp = {} for key in self.randomPart: for i in range(self.randomPart[key]): next = util.weightedRandomChoice(self.probdic[key]) if next in temp: temp[next] += 1 count = 1 else: temp[next] = 1 self.randomPart = temp self.realtemp[count] = count
def poem_weighted_random_choice(poem, var, assignment): """ Given a |poem| csp, a word variable |var|, and a current |assignment|, Returns a new assignment for |var| based on a weighted random choice given the neighboring variable assignments """ distributions = [] probability = {} poss_words = set() # Assemble the distributions with smoothing for neighbor, factor_func in poem.neighborFactors[var]: neighbor_val = assignment[neighbor] cur_distribution = copy.deepcopy(factor_func(neighbor_val)) distributions.append(cur_distribution) #Take union of the words poss_words = poss_words | set(cur_distribution.keys()) # There is a chance we will have no possible words. In this case, just # reseed everything so we take a random choice if not poss_words: return random.choice(poem.domain.keys()) else: # Perform smoothing for distribution in distributions: # Perform Smoothing on the two distributions based on the union for possibility in poss_words: if distribution[possibility] == 0: distribution[possibility] = 1 else: distribution[possibility] += 1 # Create the joint probability distr by converting the distributions to # probabilities and elem-wise multiplication. Don't need to deepcopy again for distribution in distributions: normalize_distribution(distribution) if not probability: probability = distribution else: for elem in probability.keys(): probability[elem] *= distribution[elem] # Renormalize probabilities and then call weighted random choice normalize_distribution(probability) new_val = util.weightedRandomChoice(probability) return new_val
def observe(self, agentX, agentY, observedDist): w, newP = collections.Counter(), collections.Counter() for p in self.particles: dist = math.sqrt((agentX - util.colToX(p[1]))**2 + (agentY - util.rowToY(p[0]))**2) w[p] = self.particles[p] * util.pdf(observedDist, Const.SONAR_STD, dist) for n in range(self.NUM_PARTICLES): newP[util.weightedRandomChoice(w)] += 1 self.particles = newP # END_YOUR_CODE self.updateBelief()
def elapseTime(self): # BEGIN_YOUR_CODE (our solution is 6 lines of code, but don't worry if you deviate from this) # raise Exception("Not implemented yet") particles = collections.Counter() # sample again to see where each particle would end up using the transition model. for (rows, cols), numofParticles in self.particles.items(): for p in range(numofParticles): # Use util.weightedRandomChoice() to sample a new particle. newParticle = util.weightedRandomChoice( self.transProbDict[(rows, cols)]) if newParticle in particles: particles[newParticle] += 1 else: particles[newParticle] = 1 self.particles = particles
def observe(self, agentX, agentY, observedDist): # BEGIN_YOUR_CODE (our solution is 12 lines of code, but don't worry if you deviate from this) particles_weight = collections.defaultdict(float) for particle in self.particles: x = util.colToX(particle[1]) y = util.rowToY(particle[0]) dist = ((agentX - x)**2 + (agentY - y)**2)**0.5 p = util.pdf(dist, Const.SONAR_STD, observedDist) particles_weight[particle] = self.particles[particle] * p ##resample self.particles = collections.defaultdict(int) for i in range(self.NUM_PARTICLES): self.particles[util.weightedRandomChoice(particles_weight)] += 1 # END_YOUR_CODE self.updateBelief()
def observe(self, agentX, agentY, observedDist): # BEGIN_YOUR_CODE (our solution is 10 lines of code, but don't worry if you deviate from this) proposed = collections.defaultdict(float) for row, col in self.particles: dist = math.sqrt((util.colToX(col) - agentX)**2 + (util.rowToY(row) - agentY)**2) prob_distr = util.pdf(dist, Const.SONAR_STD, observedDist) proposed[(row, col)] = self.particles[(row, col)] * prob_distr newParticles = collections.defaultdict(int) for i in range(self.NUM_PARTICLES): particle = util.weightedRandomChoice(proposed) newParticles[particle] += 1 self.particles = newParticles # END_YOUR_CODE self.updateBelief()
def observe(self, agentX, agentY, observedDist): # BEGIN_YOUR_CODE (our solution is 22 lines of code, but don't worry if you deviate from this) for tile in self.particles: other_x, other_y = util.colToX(tile[1]), util.rowToY(tile[0]) true_distance = math.sqrt((agentX - other_x)**2 + (agentY - other_y)**2) self.particles[tile] *= util.pdf(true_distance, Const.SONAR_STD, observedDist) new_particles = collections.defaultdict(int) for i in range(self.NUM_PARTICLES): new_particle = util.weightedRandomChoice(self.particles) new_particles[new_particle] += 1 self.particles = new_particles # END_YOUR_CODE self.updateBelief()
def observe(self, agentX, agentY, observedDist): # BEGIN_YOUR_CODE (around 15 lines of code expected) def euclidean(x1, y1, x2, y2): return math.sqrt((y2 - y1)**2 + (x2 - x1)**2) weights = {} for tile, occurrences in self.particles.items(): weights[tile] = 0 r, c = tile pX, pY = (util.colToX(c), util.rowToY(r)) dist = euclidean(agentX, agentY, pX, pY) pdf = util.pdf(observedDist, Const.SONAR_STD, dist) weights[tile] = pdf * occurrences newParticles = collections.Counter() for p in range(self.NUM_PARTICLES): tile = util.weightedRandomChoice(weights) newParticles[tile] += 1 self.particles = newParticles # END_YOUR_CODE self.updateBelief()
def observe(self, agentX, agentY, observedDist): # BEGIN_YOUR_CODE (around 15 lines of code expected) # raise Exception("Not implemented yet") particleDict = collections.Counter() for particle in self.particles: posterior = self.particles[particle] row = particle[0] col = particle[1] Y = util.rowToY(row) X = util.colToX(col) mean = math.sqrt((agentX - X)**2 + (agentY - Y)**2) cond = util.pdf(mean,Const.SONAR_STD,observedDist) newPosterior = posterior*cond particleDict[particle] = newPosterior self.particles = collections.Counter() for i in range(self.NUM_PARTICLES): newParticle = util.weightedRandomChoice(particleDict) self.particles[newParticle] += 1 # END_YOUR_CODE self.updateBelief()
def observe( self, agentX, agentY, observedDist, ): # BEGIN_YOUR_CODE (around 15 lines of code expected) weights = dict() for p in self.particles: weights[p] = util.pdf(observedDist, Const.SONAR_STD, math.hypot(util.rowToY(p[0]) - agentY, util.colToX(p[1]) - agentX)) * self.particles[p] newParticles = collections.Counter() for i in range(self.NUM_PARTICLES): newParticles[util.weightedRandomChoice(weights)] += 1 self.particles = newParticles # END_YOUR_CODE self.updateBelief()
def succAndCost(self, state): #(current poem state, seed) poem, seed = state numSeeds = self.beginseeds # Initial call, seed needs to be initialized. # ------------------------------------------- if not seed: toReturn = [] #Assumption: the initial seed will fit on the first line. for _ in range(numSeeds): seed = util.weightedRandomChoice(self.grammar.frequency_map) while ((seed[0] not in self.grammar.begin_map) or ('-BEGIN-' in seed)): seed = util.weightedRandomChoice(self.grammar.frequency_map) new_poem = copy.deepcopy(poem) #necessary for i in range(len(seed)): #push seeds into first line new_poem.getLine().add(seed[i]) self.poem = new_poem toReturn.append((seed, (new_poem, seed), 0)) return toReturn # Branching calls # ------------------------------------------- if self.probabilistic: result = {} else: result = [] if (options.verbose > 0): print poem if (poem.isFirst()): if (options.verbose > 1): print "[ ] poem isFirst so we get new sentence seeds" #This is the number of starting seeds this returns for x in range(numSeeds): first_seed = [] for y in range(self.ngrams - 1): first_seed.append('-BEGIN-') startWord = util.weightedRandomChoice(self.grammar.begin_map) first_seed.append(startWord) seed = tuple(first_seed) new_poem = copy.deepcopy(poem) curr = new_poem.getLine() if curr: if curr.add(startWord): if (options.verbose > 1): print "[ ] reseeded grammar with: ", startWord if not curr: #line has been finished if curr.propagator: for line_i in curr.paired_indices: if new_poem[line_i].constraint == "": #line has no previous constraint new_poem[line_i].constraint = startWord new_poem.iterate() if self.probabilistic: result[(startWord, (new_poem, seed), self.grammar.begin_map[startWord])] = self.grammar.begin_map[startWord] else: result.append((startWord, (new_poem, seed), self.grammar.begin_map[startWord])) # IMPORTANT NOTE # For every successor word, consider all possible children nodes. # Pruning to meet rhyming and syllabic constraints of actions needs to be # performed here. There is no error checking in the Line or Poetry objects. # The rhyming word needs to be passed back and forth between Poetry and Line # objects once they are completed (not implemented) else: if seed in self.grammar.word_map: for word, frequency in self.grammar.word_map[seed].iteritems(): #CONSIDER ALL POSSIBLE BRANCHES # word: the word that follows the current seed given the n-gram model # frequency: the number of times that that word occurs after the given seed if (not(word == '-BEGIN-')): new_poem = copy.deepcopy(poem) #necessary curr = new_poem.getLine() if curr: if curr.add(word): #if the word fits on the current line if (options.verbose > 1): print "[ ] added seed ", startWord broken_seed = [seed[i] for i in range(len(seed))] broken_seed.pop(0) broken_seed.append(word) new_seed = tuple(broken_seed) cost = frequency if not curr: #line has been finished if curr.propagator: for line_i in curr.paired_indices: if new_poem[line_i].constraint == "": #line has no previous constraint new_poem[line_i].constraint = word new_poem.iterate() if self.probabilistic: result[(word, (new_poem, new_seed), cost)] = cost else: result.append((word, (new_poem, new_seed), cost)) # Idea: sort by descending frequencies so that it looks down more likely paths first # or make the ordering probabilistic if self.probabilistic: toReturn = [] resultLength = len(result.keys()) for _ in range(resultLength): toAdd = util.weightedRandomChoice(result) toReturn.append(toAdd) del result[toAdd] if (options.branching): return toReturn[:options.branching] return toReturn else: result.sort(key=operator.itemgetter(2), reverse=True) if (options.branching): return result[:options.branching] return result
def gibbs(self, poem): assignment = {} epsilon = 100 # Assigns unweighted random words to each variable for variable in poem.variables: rand_word = random.choice(poem.values[variable].keys()) assignment[variable] = rand_word num_changes = epsilon+1 loop_count = 0 while num_changes > epsilon: num_changes = 0 loop_count += 1 print "INFO: Gibbs Sampling, Loop {}".format(loop_count) for (line_id, word_id) in poem.variables: variable = (line_id, word_id) print "\n\n" print "INFO: Sampling word {}".format(variable) print "INFO: Current assignment is {}".format(assignment) context = poem.get_neighbor_vars(variable) #print "\nINFO: Context for {}: {}".format((line_id, word_id), context) # Finding previous word prev = zip(*context) prev_assignment = {} if (line_id, word_id-1) in context: prev = (line_id, word_id-1) elif line_id-1 in prev[0]: prev = context[prev[0].index(line_id-1)] # Previous word exists if type(prev) is tuple: #print "INFO: Previous variable is {}".format(prev) prev_assignment = {} # Generating counter of previous word tuples for word_pair, count in self.authors[poem.author]['wordPairs'].items(): if word_pair[0] == assignment[prev]: if variable in assignment: del assignment[variable] prev_assignment[word_pair[1]] = util.get_delta_weight(poem, assignment, variable, word_pair[1]) # FIXME: Smoothing?? prev_assignment[word_pair[1]] += 1 # If previous assignment successful, then do Gibbs Sampling if len(prev_assignment) == 1: assignment[variable] = prev_assignment.keys()[0] continue elif len(prev_assignment) > 1: new_word = util.weightedRandomChoice(prev_assignment) assignment[variable] = new_word num_changes += 1 continue # If no preceding word found, then find succeeding word succ = None succ_assignment = {} if (line_id, word_id+1) in context: succ = (line_id, word_id+1) elif (line_id+1, 0) in context: succ = (line_id+1, 0) # Succeeding word exists if type(succ) is tuple: #print "INFO: Succeeding variable is {}".format(succ) # Generating counter of previous word tuples for word_pair, count in self.authors[poem.author]['wordPairs'].items(): if word_pair[1] == assignment[succ]: if variable in assignment: del assignment[variable] succ_assignment[word_pair[0]] = util.get_delta_weight(poem, assignment, variable, word_pair[0]) # FIXME: Smoothing?? succ_assignment[word_pair[0]] += 1 # If previous assignment successful, then do Gibbs Sampling if len(succ_assignment) == 1: assignment[variable] = succ_assignment.keys()[0] continue elif len(succ_assignment) > 1: new_word = util.weightedRandomChoice(succ_assignment) assignment[variable] = new_word num_changes += 1 continue # If no preceding or succeeding word found, then choose random word new_word = util.weightedRandomChoice(poem.values[variable].values) assignment[variable] = new_word num_changes += 1
def __init__(self, author): util.CSP.__init__(self) self.author = author['author'] # self.token_num = util.weightedRandomChoice(author['typeTokenCount']) self.token_num = 70 self.line_num = util.weightedRandomChoice(author['linesPerPoem']) self.word_num = {} self.domain = {} # Domain of our word variables # Next two are dict of dicts. The top level dict is keyed by every word # in our reduced domain. Each of these is then a dict that is again # keyed by the entire reduced domain, with value = probability that # a word appears before/after the top level key. self.prev_distribution = {} # Gives a distribution of the of the number of times that # the inner key comes after the outer key self.post_distribution = {} # Reverse of above # The following two normalizes the distributions into probability # distributions self.prev_prob = {} self.post_prob = {} self.smoothing = 1 # Laplacian smoothing for prev/post distributions self.neighborFactors = {} # self.neighborFactors[var1] returns a list tuples. # The first element in the tuple is a neighbor variable. # The second element is a factor function that returns # a distribution given the neighbor assignment. # Create the domain of our variables while(len(self.domain)<self.token_num): add_word = util.weightedRandomChoice(author['wordDomain']) if add_word != '' and add_word not in self.domain: self.domain[add_word] = author['wordDomain'][add_word] # Randomly select the number of words per line for line_id in xrange(self.line_num): word_num = util.weightedRandomChoice(author['wordsPerLine']) self.word_num[line_id] = word_num # # Add each variable as (line, word_id) # for word_id in xrange(word_num): # self.add_variable((line_id, word_id), self.domain) # Create the prev/post dicts for word in self.domain: # Create the prev_dict self.prev_distribution[word] = collections.defaultdict(int) self.prev_prob[word] = collections.defaultdict(float) total_prev_pairs = 0 total_post_pairs = 0 for next_word in self.domain: count = author['wordPairs'][word,next_word] if count > 0: self.prev_distribution[word][next_word] = count total_prev_pairs += count # Create the post_dict self.post_distribution[word] = collections.defaultdict(int) self.post_prob[word] = collections.defaultdict(float) for before_word in self.domain: count = author['wordPairs'][(before_word, word)] if count > 0: self.post_distribution[word][before_word] = count total_post_pairs += count # Normalize the distributions to make probabilities self.prev_prob[word].update((next_word, count/float(total_prev_pairs)) for next_word, count in self.prev_distribution[word].items()) self.post_prob[word].update((before_word, count/float(total_post_pairs)) for before_word, count in self.post_distribution[word].items())