def makeZipfianLexiconData(lexicon, word, context, n=100, s=1.0, alpha=0.9, verbose=False): # TODO remove word param from Shift files data = [] true_set = lexicon.make_true_data(context) all_poss_speakers = [ t[1] for t in true_set ] p = [ zipf(t, s, context, len(context.objects)) for t in all_poss_speakers ] for i in xrange(n): if flip(alpha): speaker = weighted_sample(all_poss_speakers, probs=p) bagR = {w : lexicon(w, context, set([speaker])) for w in lexicon.all_words()} uniqR = [] for w in lexicon.all_words(): uniqR.extend(bagR[w]) p1 = [ zipf(t, s, context, len(context.objects)) for t in uniqR ] referent = weighted_sample(uniqR, probs=p1) word = sample1([w for w in lexicon.all_words() if referent in bagR[w]]) if verbose: print "True data:", i, word, speaker, referent data.append(KinshipData(word, speaker, referent, context)) else: word = sample1(lexicon.all_words()) x = sample1(context.objects) y = sample1(context.objects) if verbose: print "Noise data:", i, word, x, y data.append(KinshipData(word, x, y, context)) if verbose: print lexicon.compute_likelihood(data) return data
def makeUniformLexiconData(lexicon, context, n=1000, alpha=0.9, verbose=False): ''' For w in words: L(w) --> {(W,S,R)} :param lexicon: the target lexicon :param context: the context :param n: the number of data points :param alpha: the reliability parameter. Noise = 1 - alpha :param verbose: print the generated data points :return: list of KinshipData objects ''' output = [] data = {w: [] for w in lexicon.all_words()} trueset = lexicon.make_true_data(context) for dp in trueset: data[dp[0]].extend([KinshipData(dp[0], dp[1], dp[2], context)]) gos = int(n * alpha) for w in lexicon.all_words(): for s in xrange(gos): output.append(sample1(data[w])) if verbose: print 'True Data:', s, output[-1].word, output[-1].X, output[ -1].Y for s in xrange(n - gos): output.append( KinshipData(w, sample1(context.objects), sample1(context.objects), context)) if verbose: print 'Noise Data:', s, output[-1].word, output[-1].X, output[ -1].Y return output
def makeVariableLexiconData(lexicon, word, context, n=100, s=1.0, alpha=0.9, verbose=False): data = [] true_set = lexicon.make_true_data(context) all_poss_speakers = [t[1] for t in true_set] p = [zipf(t, s, context, len(context.objects)) for t in all_poss_speakers] for i in xrange(n): if flip(alpha): speaker = weighted_sample(all_poss_speakers, probs=p) referents = lexicon(word, context, set([speaker])) p1 = [zipf(t, s, context, len(context.objects)) for t in referents] referent = weighted_sample(referents, probs=p1) if verbose: print "True data:", i, word, speaker, referent data.append(KinshipData(word, speaker, referent, context)) else: x = sample1(context.objects) y = sample1(context.objects) if verbose: print "Noise data:", i, word, x, y data.append(KinshipData(word, x, y, context)) if verbose: print lexicon.compute_likelihood(data) return data
def makeUniformData(lexicon, context, n=1000, alpha=0.9): output = [] data = {w : [] for w in lexicon.all_words()} trueset = lexicon.make_true_data(context) for dp in trueset: data[dp[0]].extend([KinshipData(dp[0], dp[1], dp[2], context)]) gos = int(n * alpha) for w in lexicon.all_words(): for _ in xrange(gos): output.append(sample1(data[w])) for _ in xrange(n-gos): output.append(KinshipData(w, sample1(context.objects), sample1(context.objects), context)) return output
def makeRandomData(context, word='Word', n=3, ego=None, verbose=False): data = [] for i in xrange(n): if isinstance(word, list): w = sample1(word) else: w = word if ego is not None: data.append(KinshipData(w, ego, sample1(context.objects), context)) if verbose: print 'Data: ', i, data[-1] else: data.append(KinshipData(w, sample1(context.objects), sample1(context.objects), context)) if verbose: print 'Data: ', i, data[-1] return data
def makeTreeLexiconData(lexicon, context, n=100, alpha=0.9, epsilon=0.9, verbose=False): ''' L() --> {(W,S,R)} data ~ uniform( L() ) :param lexicon: the target lexicon :param context: the context :param n: the number of data points :param alpha: the reliability parameter. Noise = 1 - alpha :param epsilon: the ego-centric probability. :param verbose: print the generated data points :return: list of KinshipData objects ''' data = [] tree_truth = lexicon.make_true_data(context) ego_truth = lexicon.make_true_data(context, fixX=context.ego) for s in xrange(n): if flip(alpha): if flip(epsilon): t = sample1(ego_truth) if verbose: print "True data:", s, t[0], t[1], t[2] data.append(KinshipData(t[0], t[1], t[2], context)) else: t = sample1(tree_truth) if verbose: print "True data:", s, t[0], t[1], t[2] data.append(KinshipData(t[0], t[1], t[2], context)) else: x = sample1(context.objects) y = sample1(context.objects) word = sample1(lexicon.all_words()) if verbose: print "Noise data:", s, word, x, y data.append(KinshipData(word, x, y, context)) if verbose: print lexicon.compute_likelihood(data) return data
def propose(self, epsilon=1e-10): # should return is f-b, proposal prop = type(self)(self.Counts, self.L, self.GroupLength, self.prior_offset, self.Nyes, \ self.Ntrials, self.ModelResponse, value=deepcopy(self.value)) fb = 0.0 if random() < FullGrammarHypothesis.P_PROPOSE_RULEP: # propose to the rule parameters nt = sample1(self.nts) # which do we propose to? prop.value['rulep'][nt], fb = prop.value['rulep'][nt].propose() else: # propose to one of the other grammar variables which = sample1(['alpha', 'beta', 'likelihood_temperature', 'prior_temperature']) prop.value[which], fb = prop.value[which].propose() return prop, fb
def propose(self, epsilon=1e-10): # should return is f-b, proposal prop = type(self)(self.Counts, self.L, self.GroupLength, self.prior_offset, self.Nyes, \ self.Ntrials, self.ModelResponse, value=deepcopy(self.value)) fb = 0.0 nt = sample1(self.nts) # which do we propose to? prop.value[nt], fb = prop.value[nt].propose() return prop, fb
def propose(self, **kwargs): ret_value, fb = None, None while True: # keep trying to propose try: ret_value, fb = sample1([insert_delete_proposal,regeneration_proposal])(self.grammar, self.value, **kwargs) break except ProposalFailedException: pass ret = self.__copy__(value=ret_value) return ret, fb
def propose_tree(self, grammar, tree, resampleProbability=lambdaOne): new_t = copy(tree) try: # to choose a node to insert on ni, lp = new_t.sample_subnode(lambda t: can_insert_FunctionNode( t, grammar) * resampleProbability(t)) except NodeSamplingException: raise ProposalFailedException # is there a rule that expands from ni.returntype to some ni.returntype? replicating_rules = filter(can_insert_GrammarRule, grammar.rules[ni.returntype]) if len(replicating_rules) == 0: raise ProposalFailedException # sample a rule r = sample1(replicating_rules) # the functionNode we are building fn = r.make_FunctionNodeStub(grammar, ni.parent) # figure out which arg will be the existing ni replicatingindices = filter(lambda i: fn.args[i] == ni.returntype, xrange(len(fn.args))) if len(replicatingindices) <= 0: # should never happen raise ProposalFailedException # choose the one to replace replace_i = sample1(replicatingindices) ## Now expand the other args, with the right rules in the grammar with BVRuleContextManager(grammar, fn, recurse_up=True): for i, a in enumerate(fn.args): fn.args[i] = copy(ni) if ( i == replace_i) else grammar.generate(a) # perform the insertion ni.setto(fn) return new_t
def makeLexiconData(lexicon, context, n=100, alpha=0.9, verbose=False): data = [] if context.ego is None: tree_truth = lexicon.make_true_data(context) else: tree_truth = lexicon.make_true_data(context, fixX=context.ego) for s in xrange(n): if flip(alpha): t = sample1(tree_truth) if verbose: print "True data:", s, t data.append(KinshipData(t[0], t[1], t[2], context)) else: x = sample1(context.objects) y = sample1(context.objects) word = sample1(lexicon.all_words()) if verbose: print "Noise data:", s, word, x, y data.append(KinshipData(word, x, y, context)) if verbose: print lexicon.compute_likelihood(data) return data
def propose_tree(self,grammar,tree,resampleProbability=lambdaOne): new_t = copy(tree) try: # to choose a node to insert on ni, lp = new_t.sample_subnode(can_insert_FunctionNode) except NodeSamplingException: raise ProposalFailedException # is there a rule that expands from ni.returntype to some ni.returntype? replicating_rules = filter(can_insert_GrammarRule, grammar.rules[ni.returntype]) if len(replicating_rules) == 0: raise ProposalFailedException # sample a rule r = sample1(replicating_rules) # the functionNode we are building fn = r.make_FunctionNodeStub(grammar, ni.parent) # figure out which arg will be the existing ni replicatingindices = filter( lambda i: fn.args[i] == ni.returntype, xrange(len(fn.args))) if len(replicatingindices) <= 0: # should never happen raise ProposalFailedException # choose the one to replace replace_i = sample1(replicatingindices) ## Now expand the other args, with the right rules in the grammar with BVRuleContextManager(grammar, fn, recurse_up=True): for i,a in enumerate(fn.args): fn.args[i] = copy(ni) if (i == replace_i) else grammar.generate(a) # perform the insertion ni.setto(fn) return new_t
def makeVariableLexiconData(lexicon, word, context, n=100, s=1.0, alpha=0.9, verbose=False): data = [] true_set = lexicon.make_true_data(context) all_poss_speakers = [ t[1] for t in true_set ] p = [ zipf(t, s, context, len(context.objects)) for t in all_poss_speakers ] for i in xrange(n): if flip(alpha): speaker = weighted_sample(all_poss_speakers, probs=p) referents = lexicon(word, context, set([speaker])) p1 = [ zipf(t, s, context, len(context.objects)) for t in referents ] referent = weighted_sample(referents, probs=p1) if verbose: print "True data:", i, word, speaker, referent data.append(KinshipData(word, speaker, referent, context)) else: x = sample1(context.objects) y = sample1(context.objects) if verbose: print "Noise data:", i, word, x, y data.append(KinshipData(word, x, y, context)) if verbose: print lexicon.compute_likelihood(data) return data
def propose_tree(self,grammar,tree,resampleProbability=lambdaOne): new_t = copy(tree) try: # to choose a node to delete n, lp = new_t.sample_subnode(dp_rp(resampleProbability)) except NodeSamplingException: raise ProposalFailedException # Figure out which of my children have the same type as me replicating_children = list_replicating_children(n) if not replicating_children: raise ProposalFailedException # who to promote; NOTE: not done via any weighting chosen_child = sample1(replicating_children) # perform the deletion n.setto(chosen_child) return new_t
def propose_tree(self, grammar, tree, resampleProbability=lambdaOne): new_t = copy(tree) try: # to choose a node to delete n, lp = new_t.sample_subnode(dp_rp(resampleProbability)) except NodeSamplingException: raise ProposalFailedException # Figure out which of my children have the same type as me replicating_children = list_replicating_children(n) if not replicating_children: raise ProposalFailedException # who to promote; NOTE: not done via any weighting chosen_child = sample1(replicating_children) # perform the deletion n.setto(chosen_child) return new_t
def propose(self): ret = copy(self) if len(ret.value) == 1: return ret, 0.0 # handle singleton rules inx = sample1(range(0,self.alpha.shape[0])) ret.value[inx] = numpy.random.beta(self.value[inx]*self.proposal_scale, self.proposal_scale - self.value[inx] * self.proposal_scale) # add a tiny bit of smoothing away from 0/1 ret.value[inx] = (1.0 - DirichletDistribution.SMOOTHING) * ret.value[inx] + DirichletDistribution.SMOOTHING / 2.0 v = sum(ret.value) fb = sum(gamma.logpdf(ret.value, self.value)) + gamma.logpdf(v, 1) -\ sum(gamma.logpdf(self.value, ret.value)) - gamma.logpdf(1, v) # and renormalize it, slightly breaking MCMC ret.value = ret.value / sum(ret.value) return ret, fb
def propose(self): ret = copy(self) if len(ret.value) == 1: return ret, 0.0 # handle singleton rules inx = sample1(range(0, self.alpha.shape[0])) ret.value[inx] = numpy.random.beta( self.value[inx] * self.proposal_scale, self.proposal_scale - self.value[inx] * self.proposal_scale) # add a tiny bit of smoothing away from 0/1 ret.value[inx] = (1.0 - DirichletDistribution.SMOOTHING) * ret.value[ inx] + DirichletDistribution.SMOOTHING / 2.0 v = sum(ret.value) fb = sum(gamma.logpdf(ret.value, self.value)) + gamma.logpdf(v, 1) -\ sum(gamma.logpdf(self.value, ret.value)) - gamma.logpdf(1, v) # and renormalize it, slightly breaking MCMC ret.value = ret.value / sum(ret.value) return ret, fb
def propose_tree(self, t): """ Delete: - find an apply - take the interior of the lambdathunk and sub it in for the lambdaarg everywhere, remove the apply Insert: - Find a node - Find a subnode s - Remove all repetitions of s, create a lambda - and add an apply """ newt = copy(t) f, b = 0.0, 0.0 # ------------------ if random() < 0.5: # Am inverse-inlining move # where the lambda goes try: n, np = newt.sample_subnode( resampleProbability=self.can_abstract_at) except NodeSamplingException: raise ProposalFailedException # print "# INVERSE-INLINE" # Pick the rule we will use ir = self.insertable_rules[n.returntype] ar, lr = sample1(ir) # the apply and lambda rules assert ar.nt == n.returntype assert lr.nt == ar.to[0] # what the argument is. Must have a returntype equal to the second apply type arg_predicate = lambda z: z.returntype == ar.to[ 1] and self.is_valid_argument(n, z) #how do we choose args? try: argval, _ = n.sample_subnode(resampleProbability=arg_predicate) except NodeSamplingException: raise ProposalFailedException argval = copy( argval ) # necessary since the argval in the tree gets overwritten below = copy(n) # necessary since n gets setto the new apply rule # now make the function nodes. n.setto(ar.make_FunctionNodeStub(self.grammar, None)) # n's parent is preserved lambdafn = lr.make_FunctionNodeStub( self.grammar, n ) ## this must be n, not applyfn, since n will eventually be setto applyfn bvfn = lambdafn.added_rule.make_FunctionNodeStub( self.grammar, None) # this takes the place of argval everywhere below below.replace_subnodes(lambda x: x == argval, bvfn) # substitute below the lambda lambdafn.args[0] = below below.parent = lambdafn argval.parent = n # build our little structure n.args = lambdafn, argval assert self.can_inline_at(n) # this had better be true # to go forward, you choose a node, a rule, and an argument f = np + (-log(len(ir))) + lp_sample_equal_to( n, argval, resampleProbability=arg_predicate) newZ = newt.sample_node_normalizer(self.can_inline_at) b = (log(self.can_inline_at(n) * 1.0) - log(newZ)) else: # An inlining move try: n, np = newt.sample_subnode( resampleProbability=self.can_inline_at) except NodeSamplingException: raise ProposalFailedException # print "# INLINE" # Replace the subnodes newn = n.args[0].args[0] # what's below the lambda argval = n.args[1] bvn = n.args[0].added_rule.name # the name of the added variable newn.replace_subnodes( lambda x: isinstance(x, BVUseFunctionNode) and x.name == bvn, argval) n.setto(newn) assert self.can_abstract_at(n) # this had better be true # figure out which rule we are supposed to use possible_rules = [ r for r in self.grammar.rules[n.returntype] if r.name == n.name and tuple(r.to) == tuple(n.argTypes()) ] assert len(possible_rules) == 1 # for now? n.rule = possible_rules[0] ir = self.insertable_rules[ n.returntype] # for the backward probability f = np # just the probability of choosing this apply # choose n, choose a, choose the rule arg_predicate = lambda z: (z.returntype == argval.returntype ) and self.is_valid_argument(newn, z) new_nZ = newt.sample_node_normalizer( self.can_abstract_at) # prob of choosing n argvalp = lp_sample_equal_to(newn, argval, resampleProbability=arg_predicate) assert len(ir) > 0 b = (log(self.can_abstract_at(newn)) - log(new_nZ)) + argvalp + (-log(len(ir))) assert newt.check_parent_refs( ) # Can comment out -- here for debugging return [newt, f - b]
def propose_tree(self, t): """ Delete: - find an apply - take the interior of the lambdathunk and sub it in for the lambdaarg everywhere, remove the apply Insert: - Find a node - Find a subnode s - Remove all repetitions of s, create a lambda - and add an apply """ newt = copy(t) f, b = 0.0, 0.0 # ------------------ if random() < 0.5: # Am inverse-inlining move # where the lambda goes try: n, np = newt.sample_subnode(resampleProbability=self.can_abstract_at) except NodeSamplingException: raise ProposalFailedException # print "# INVERSE-INLINE" # Pick the rule we will use ir = self.insertable_rules[n.returntype] ar, lr = sample1(ir) # the apply and lambda rules assert ar.nt == n.returntype assert lr.nt == ar.to[0] # what the argument is. Must have a returntype equal to the second apply type arg_predicate = lambda z: z.returntype == ar.to[1] and self.is_valid_argument( n, z ) # how do we choose args? try: argval, _ = n.sample_subnode(resampleProbability=arg_predicate) except NodeSamplingException: raise ProposalFailedException argval = copy(argval) # necessary since the argval in the tree gets overwritten below = copy(n) # necessary since n gets setto the new apply rule # now make the function nodes. n.setto(ar.make_FunctionNodeStub(self.grammar, None)) # n's parent is preserved lambdafn = lr.make_FunctionNodeStub( self.grammar, n ) ## this must be n, not applyfn, since n will eventually be setto applyfn bvfn = lambdafn.added_rule.make_FunctionNodeStub( self.grammar, None ) # this takes the place of argval everywhere below below.replace_subnodes(lambda x: x == argval, bvfn) # substitute below the lambda lambdafn.args[0] = below below.parent = lambdafn argval.parent = n # build our little structure n.args = lambdafn, argval assert self.can_inline_at(n) # this had better be true # to go forward, you choose a node, a rule, and an argument f = np + (-log(len(ir))) + lp_sample_equal_to(n, argval, resampleProbability=arg_predicate) newZ = newt.sample_node_normalizer(self.can_inline_at) b = log(self.can_inline_at(n) * 1.0) - log(newZ) else: # An inlining move try: n, np = newt.sample_subnode(resampleProbability=self.can_inline_at) except NodeSamplingException: raise ProposalFailedException # print "# INLINE" # Replace the subnodes newn = n.args[0].args[0] # what's below the lambda argval = n.args[1] bvn = n.args[0].added_rule.name # the name of the added variable newn.replace_subnodes(lambda x: isinstance(x, BVUseFunctionNode) and x.name == bvn, argval) n.setto(newn) assert self.can_abstract_at(n) # this had better be true # figure out which rule we are supposed to use possible_rules = [ r for r in self.grammar.rules[n.returntype] if r.name == n.name and tuple(r.to) == tuple(n.argTypes()) ] assert len(possible_rules) == 1 # for now? n.rule = possible_rules[0] ir = self.insertable_rules[n.returntype] # for the backward probability f = np # just the probability of choosing this apply # choose n, choose a, choose the rule arg_predicate = lambda z: (z.returntype == argval.returntype) and self.is_valid_argument(newn, z) new_nZ = newt.sample_node_normalizer(self.can_abstract_at) # prob of choosing n argvalp = lp_sample_equal_to(newn, argval, resampleProbability=arg_predicate) assert len(ir) > 0 b = (log(self.can_abstract_at(newn)) - log(new_nZ)) + argvalp + (-log(len(ir))) assert newt.check_parent_refs() # Can comment out -- here for debugging return [newt, f - b]
def sample_string(self): return sample1(self.strings)
def insert_delete_proposal(grammar, t): newt = copy(t) if random() < 0.5: # insert! # Choose a node at random to insert on # TODO: We could precompute the nonterminals we can do this move on, if we wanted try: ni, lp = newt.sample_subnode(can_insert_FunctionNode) except NodeSamplingException: raise ProposalFailedException # is there a rule that expands from ni.returntype to some ni.returntype? replicating_rules = filter(can_insert_GrammarRule, grammar.rules[ni.returntype]) if len(replicating_rules) == 0: raise ProposalFailedException # sample a rule r = sample1(replicating_rules) # the functionNode we are building fn = r.make_FunctionNodeStub(grammar, ni.parent) # figure out which arg will be the existing ni replicatingindices = filter(lambda i: fn.args[i] == ni.returntype, xrange(len(fn.args))) if len(replicatingindices) <= 0: # should never happen raise ProposalFailedException replace_i = sample1(replicatingindices) # choose the one to replace ## Now expand the other args, with the right rules in the grammar with BVRuleContextManager(grammar, fn, recurse_up=True): for i, a in enumerate(fn.args): if i == replace_i: fn.args[i] = copy(ni) # the one we replace else: fn.args[i] = grammar.generate(a) # else generate like normal # we need a count of how many kids are the same afterwards after_same_children = sum([x == ni for x in fn.args]) # perform the insertion ni.setto(fn) # TODO: fix the fact that there are potentially multiple backward steps to give the equivalent tree # need to use the right grammar for log_probability calculations with BVRuleContextManager(grammar, fn, recurse_up=True): # what is the prob mass of the new stuff? new_lp_below = sum( [ grammar.log_probability(fn.args[i]) if (i != replace_i and isFunctionNode(fn.args[i])) else 0.0 for i in xrange(len(fn.args)) ] ) # What is the new normalizer? newZ = newt.sample_node_normalizer(can_delete_FunctionNode) assert newZ > 0 # forward: choose the node ni, choose the replicating rule, choose which "to" to expand, and generate the rest of the tree f = ( lp - nicelog(len(replicating_rules)) + (nicelog(after_same_children) - nicelog(len(replicatingindices))) + new_lp_below ) # backward: choose the inserted node, choose one of the children identical to the original ni, and deterministically delete b = (nicelog(1.0 * can_delete_FunctionNode(fn)) - nicelog(newZ)) + ( nicelog(after_same_children) - nicelog(len(replicatingindices)) ) else: # delete! try: # sample a node at random ni, lp = newt.sample_subnode(can_delete_FunctionNode) # this could raise exception if ni.args is None: # doesn't have children to promote raise NodeSamplingException except NodeSamplingException: raise ProposalFailedException # Figure out which of my children have the same type as me replicating_kid_indices = filter( lambda i: isFunctionNode(ni.args[i]) and ni.args[i].returntype == ni.returntype, range(len(ni.args)) ) nrk = len(replicating_kid_indices) # how many replicating kids if nrk == 0: raise ProposalFailedException replicating_rules = filter(can_delete_GrammarRule, grammar.rules[ni.returntype]) assert len(replicating_rules) > 0 # better be some or where did ni come from? samplei = sample1(replicating_kid_indices) # who to promote; NOTE: not done via any weighting # We need to be in the right grammar state to evaluate log_probability with BVRuleContextManager(grammar, ni.args[samplei], recurse_up=True): # Now we must count the multiple ways we could go forward or back # Here, we could have sampled any of them equivalent to ni.args[i] before_same_children = sum([x == ni.args[samplei] for x in ni.args]) # how many are the same after? # the lp of everything we'd have to create going backwards old_lp_below = sum( [ grammar.log_probability(ni.args[i]) if (i != samplei and isFunctionNode(ni.args[i])) else 0.0 for i in xrange(len(ni.args)) ] ) # and replace it ni.setto(ni.args[samplei]) newZ = newt.sample_node_normalizer(resampleProbability=can_insert_FunctionNode) # forward: choose the node, and then from all equivalent children f = lp + (log(before_same_children) - log(nrk)) # backward: choose the node, choose the replicating rule, choose where to put it, and generate the rest of the tree b = ( (nicelog(1.0 * can_insert_FunctionNode(ni)) - nicelog(newZ)) - nicelog(len(replicating_rules)) + (nicelog(before_same_children) - nicelog(nrk)) + old_lp_below ) return [newt, f - b]
def propose_tree(self, t): """ Delete: - find an apply - take the interior of the lambdathunk and sub it in for the lambdaarg everywhere, remove the apply Insert: - Find a node - Find a subnode s - Remove all repetitions of s, create a lambda - and add an apply """ newt = copy(t) f,b = 0.0, 0.0 # ------------------ if random() < 0.5 : # Am inverse-inlining move # where the lambda goes try: n, np = newt.sample_subnode(resampleProbability=self.can_abstract_at) except NodeSamplingException: return [newt, 0.0] # Pick the rule we will use ir = self.insertable_rules[n.returntype] ar,lr = sample1(ir) # the apply and lambda rules assert ar.nt == n.returntype assert lr.nt == ar.to[0] # what the argument is. Must have a returntype equal to the second apply type arg_predicate = lambda z: z.returntype == ar.to[1] and self.is_valid_argument(n, z) #how do we choose args? try: argval, _ = n.sample_subnode(resampleProbability=arg_predicate ) except NodeSamplingException: return [newt, 0.0] argval = copy(argval) # necessary since the argval in the tree gets overwritten below = copy(n) # necessary since n gets setto the new apply rule # now make the function nodes. The generation_probabilities will be reset later, as will the parents for applyfn and bvfn n.setto(ar.make_FunctionNodeStub(self.grammar, 0.0, None)) # n's parent is preserved lambdafn = lr.make_FunctionNodeStub(self.grammar, 0.0, n) ## this must be n, not applyfn, since n will eventually be setto applyfn bvfn = lambdafn.added_rule.make_FunctionNodeStub(self.grammar, 0.0, None) # this takes the place of argval everywhere below below.replace_subnodes(lambda x:x==argval, bvfn) # substitute below the lambda lambdafn.args[0] = below below.parent = lambdafn argval.parent = n # build our little structure n.args = lambdafn, argval assert self.can_inline_at(n) # this had better be true #assert newt.check_parent_refs() # to go forward, you choose a node, a rule, and an argument f = np + (-log(len(ir))) + lp_sample_equal_to(n, argval, resampleProbability=arg_predicate) newZ = newt.sample_node_normalizer(self.can_inline_at) b = (log(self.can_inline_at(n)*1.0) - log(newZ)) else: # An inlining move try: n, np = newt.sample_subnode(resampleProbability=self.can_inline_at) except NodeSamplingException: return [newt, 0.0] #print "CHOOSING n=", n #print "PARENT n=", n.parent # Replace the subnodes newn = n.args[0].args[0] # what's below the lambda argval = n.args[1] bvn = n.args[0].added_rule.name # the name of the added variable newn.replace_subnodes(lambda x: x.name == bvn, argval) n.setto(newn) assert self.can_abstract_at(n) # this had better be true ir = self.insertable_rules[n.returntype] # for the backward probability # just the probability of choosing this apply f = np # choose n, choose a, choose the rule arg_predicate = lambda z: (z.returntype == argval.returntype) and self.is_valid_argument(newn, z) new_nZ = newt.sample_node_normalizer(self.can_abstract_at) # prob of choosing n argvalp = lp_sample_equal_to(newn, argval, resampleProbability=arg_predicate) assert len(ir)>0 b = (log(self.can_abstract_at(newn)) - log(new_nZ)) + argvalp + (-log(len(ir))) ## and fix the generation probabilites, because otherwise they are ruined by all the mangling above newt.recompute_generation_probabilities(self.grammar) assert newt.check_parent_refs() # Can comment out -- here for debugging return [newt, f-b]
def propose_tree(self, t): # Default regeneration proposal with some probability if random() >= self.insert_delete_probability: return self.my_regeneration_proposal.propose_tree(t) newt = copy(t) fb = 0.0 # the forward/backward prob we return sampled=False # so we can see if we didn't do it if random() < 0.5: # So we insert # first sample a node (through sample_node_via_iterate, which handles everything well) for ni, di, resample_p, resample_Z in self.grammar.sample_node_via_iterate(newt): if ni.args is None: continue # Can't deal with these TODO: CHECK THIS? # Since it's an insert, see if there is a (replicating) rule that expands # from ni.returntype to some ni.returntype replicating_rules = filter(lambda x: x.name != 'lambda' and (x.to is not None) and any([a==ni.returntype for a in x.to]), self.grammar.rules[ni.returntype]) # If there are none, then we can't insert! if len(replicating_rules) == 0: continue # choose a replicating rule; NOTE: this is done uniformly in this step, for simplicity r, gp = weighted_sample(replicating_rules, probs=lambda x: x.p, return_probability=True, log=False) gp = log(r.p) - sum([x.p for x in self.grammar.rules[ni.returntype]]) # this is the probability overall in the grammar, not my prob of sampling # Now take the rule and expand the children: # choose who gets to be ni nrhs = len( [ x for x in r.to if x == ni.returntype] ) # how many on the rhs are there? if nrhs == 0: continue replace_i = randint(0,nrhs-1) # choose the one to replace ## Now expand args but only for the one we don't sample... args = [] for x in r.to: if x == ni.returntype: if replace_i == 0: args.append( copy(ni) ) # if it's the one we replace into else: args.append( self.grammar.generate(x, d=di+1) ) #else generate like normalized replace_i -= 1 else: args.append( self.grammar.generate(x, d=di+1) ) #else generate like normal # Now we must count the multiple ways we could go forward or back after_same_children = [ x for x in args if x==ni] # how many are the same after? #backward_resample_p = sum([ x.resample_p for x in after_same_children]) # if you go back, you can choose any identical kids # create the new node sampled = True ni.setto( FunctionNode(returntype=r.nt, name=r.name, args=args, generation_probability=gp, bv_name=None, bv_args=None, ruleid=r.rid, resample_p=r.resample_p ) ) if sampled: new_lp_below = sum(map(lambda z: z.log_probability(), filter(isFunctionNode, args))) - ni.log_probability() newZ = self.grammar.resample_normalizer(newt) # To sample forward: choose the node ni, choose the replicating rule, choose which "to" to expand (we could have put it on any of the replicating rules that are identical), and genreate the rest of the tree f = (log(resample_p) - log(resample_Z)) + -log(len(replicating_rules)) + (log(len(after_same_children))-log(nrhs)) + new_lp_below # To go backwards, choose the inserted rule, and any of the identical children, out of all replicators b = (log(ni.resample_p) - log(newZ)) + (log(len(after_same_children)) - log(nrhs)) fb = f-b else: # A delete move! for ni, di, resample_p, resample_Z in self.grammar.sample_node_via_iterate(newt): if ni.name == 'lambda': continue # can't do anything if ni.args is None: continue # Can't deal with these TODO: CHECK THIS? # Figure out which of my children have the same type as me replicating_kid_indices = [ i for i in xrange(len(ni.args)) if isFunctionNode(ni.args[i]) and ni.args[i].returntype==ni.returntype] nrk = len(replicating_kid_indices) # how many replicating kids if nrk == 0: continue # if no replicating rules here ## We need to compute a few things for the backwards probability replicating_rules = filter(lambda x: (x.to is not None) and any([a==ni.returntype for a in x.to]), self.grammar.rules[ni.returntype]) if len(replicating_rules) == 0: continue i = sample1(replicating_kid_indices) # who to promote; NOTE: not done via any weighting # Now we must count the multiple ways we could go forward or back # Here, we could have sampled any of them equivalent to ni.args[i] before_same_children = [ x for x in ni.args if x==ni.args[i] ] # how many are the same after? # the lp of everything we'd have to create going backwards old_lp_below = sum(map(lambda z: z.log_probability(), filter(isFunctionNode, ni.args) )) - ni.args[i].log_probability() # and replace it sampled = True ni.setto( copy(ni.args[i]) ) # TODO: copy not necessary here, I think? if sampled: newZ = self.grammar.resample_normalizer(newt) # To go forward, choose the node, and then from all equivalent children f = (log(resample_p) - log(resample_Z)) + (log(len(before_same_children)) - log(nrk)) # To go back, choose the node, choose the replicating rule, choose where to put it, and generate the rest of the tree b = (log(ni.resample_p) - log(newZ)) + -log(len(replicating_rules)) + (log(len(before_same_children)) - log(nrk)) + old_lp_below fb = f-b # and fix the bound variables, whose depths may have changed if sampled: newt.fix_bound_variables() return [newt, fb]
def propose_tree(self, t): newt = copy(t) if random() < 0.5: # So we insert # Choose a node at random to insert on # TODO: We could precompute the nonterminals we can do this move on, if we wanted try: ni, lp = newt.sample_subnode(isNotBVAddFunctionNode) except NodeSamplingException: raise ProposalFailedException # Since it's an insert, see if there is a (replicating) rule that expands # from ni.returntype to some ni.returntype replicating_rules = filter(is_replicating_GrammarRule, self.grammar.rules[ni.returntype]) if len(replicating_rules) == 0: return [newt, fb] # sample a rule and compute its probability (not under the predicate) r = sample1(replicating_rules) # the functionNode we are building fn = r.make_FunctionNodeStub(self, ni.parent) # figure out which arg will be the existing ni replicatingindices = filter( lambda i: fn.args[i] == ni.returntype, xrange(len(fn.args))) assert replicatingindices > 0 # since that's what a replicating rule is replace_i = sample1(replicatingindices) # choose the one to replace fn.args[replace_i] = copy(ni) # the one we replace ## Now expand the other args, with the right rules in the grammar with BVRuleContextManager(self.grammar, fn, recurse_up=True): # and generate the args below for i,a in enumerate(fn.args): if i != replace_i: fn.args[i] = self.grammar.generate(a) #else generate like normalized # we need a count of how many kids are the same afterwards after_same_children = sum([x==ni for x in fn.args]) ni.setto(fn) with BVRuleContextManager(self.grammar, fn, recurse_up=True): # what is the prob mass of the new stuff? new_lp_below = sum([ self.grammar.log_probability(fn.args[i]) if (i!=replace_i and isFunctionNode(fn.args[i])) else 0. for i in xrange(len(fn.args))]) # What is the new normalizer? newZ = newt.sample_node_normalizer(isNotBVAddFunctionNode) assert newZ > 0 # To sample forward: choose the node ni, choose the replicating rule, choose which "to" to expand (we could have put it on any of the replicating rules that are identical), and genreate the rest of the tree f = lp + (-log(len(replicating_rules))) + (log(after_same_children)-log(len(replicatingindices))) + new_lp_below # To go backwards, choose the inserted rule, and any of the identical children, out of all replicators b = (log(1.0*isNotBVAddFunctionNode(fn)) - log(newZ)) + (log(after_same_children) - log(len(fn.args))) else: # A delete move! # Sample a node at random try: ni, lp = newt.sample_subnode(isNotBVAddFunctionNode) # this could raise exception # Really, it had to be not None if ni.args is None: raise NodeSamplingException except NodeSamplingException: raise ProposalFailedException # Figure out which of my children have the same type as me replicating_kid_indices = filter(lambda i: isFunctionNode(ni.args[i]) and ni.args[i].returntype == ni.returntype, range(len(ni.args))) nrk = len(replicating_kid_indices) # how many replicating kids if nrk == 0: raise ProposalFailedException replicating_rules = filter(is_replicating_GrammarRule, self.grammar.rules[ni.returntype]) assert len(replicating_rules) > 0 # better be some or where did ni come from? samplei = sample1(replicating_kid_indices) # who to promote; NOTE: not done via any weighting # We need to be in the right grammar state to evaluate log_probability with BVRuleContextManager(self.grammar, ni.args[samplei], recurse_up=True): # Now we must count the multiple ways we could go forward or back # Here, we could have sampled any of them equivalent to ni.args[i] before_same_children = sum([x==ni.args[samplei] for x in ni.args ]) # how many are the same after? # the lp of everything we'd have to create going backwards old_lp_below = sum([ self.grammar.log_probability(ni.args[i]) if (i!=samplei and isFunctionNode(ni.args[i])) else 0. for i in xrange(len(ni.args))]) # and replace it ni.args[samplei].parent = ni.parent # update this first ;; TODO: IS THIS NECSESARY? ni.setto( ni.args[samplei] ) # And compute f/b probs newZ = newt.sample_node_normalizer(resampleProbability=isNotBVAddFunctionNode) # To go forward, choose the node, and then from all equivalent children f = lp + (log(before_same_children) - log(nrk)) # To go back, choose the node, choose the replicating rule, choose where to put it, and generate the rest of the tree b = (log(1.0*isNotBVAddFunctionNode(ni)) - log(newZ)) + -log(len(replicating_rules)) + (log(before_same_children) - log(nrk)) + old_lp_below return [newt, f-b]
def propose(self): new = self.copy() i = sample1(range(len(self.word_idx))) p, fb = distance_based_proposer(i) new.word_idx[i] = p return new, fb
def propose_tree(self, t): newt = copy(t) if random() < 0.5: # So we insert # Choose a node at random to insert on # TODO: We could precompute the nonterminals we can do this move on, if we wanted try: ni, lp = newt.sample_subnode(isNotBVAddFunctionNode) except NodeSamplingException: raise ProposalFailedException # Since it's an insert, see if there is a (replicating) rule that expands # from ni.returntype to some ni.returntype replicating_rules = filter(is_replicating_GrammarRule, self.grammar.rules[ni.returntype]) if len(replicating_rules) == 0: return [newt, fb] # sample a rule and compute its probability (not under the predicate) r = sample1(replicating_rules) # the functionNode we are building fn = r.make_FunctionNodeStub(self, ni.parent) # figure out which arg will be the existing ni replicatingindices = filter(lambda i: fn.args[i] == ni.returntype, xrange(len(fn.args))) assert replicatingindices > 0 # since that's what a replicating rule is replace_i = sample1( replicatingindices) # choose the one to replace fn.args[replace_i] = copy(ni) # the one we replace ## Now expand the other args, with the right rules in the grammar with BVRuleContextManager(self.grammar, fn, recurse_up=True): # and generate the args below for i, a in enumerate(fn.args): if i != replace_i: fn.args[i] = self.grammar.generate( a) #else generate like normalized # we need a count of how many kids are the same afterwards after_same_children = sum([x == ni for x in fn.args]) ni.setto(fn) with BVRuleContextManager(self.grammar, fn, recurse_up=True): # what is the prob mass of the new stuff? new_lp_below = sum([ self.grammar.log_probability(fn.args[i]) if (i != replace_i and isFunctionNode(fn.args[i])) else 0. for i in xrange(len(fn.args)) ]) # What is the new normalizer? newZ = newt.sample_node_normalizer(isNotBVAddFunctionNode) assert newZ > 0 # To sample forward: choose the node ni, choose the replicating rule, choose which "to" to expand (we could have put it on any of the replicating rules that are identical), and genreate the rest of the tree f = lp + (-log(len(replicating_rules))) + ( log(after_same_children) - log(len(replicatingindices))) + new_lp_below # To go backwards, choose the inserted rule, and any of the identical children, out of all replicators b = (log(1.0 * isNotBVAddFunctionNode(fn)) - log(newZ)) + ( log(after_same_children) - log(len(fn.args))) else: # A delete move! # Sample a node at random try: ni, lp = newt.sample_subnode( isNotBVAddFunctionNode) # this could raise exception # Really, it had to be not None if ni.args is None: raise NodeSamplingException except NodeSamplingException: raise ProposalFailedException # Figure out which of my children have the same type as me replicating_kid_indices = filter( lambda i: isFunctionNode(ni.args[i]) and ni.args[i].returntype == ni.returntype, range(len(ni.args))) nrk = len(replicating_kid_indices) # how many replicating kids if nrk == 0: raise ProposalFailedException replicating_rules = filter(is_replicating_GrammarRule, self.grammar.rules[ni.returntype]) assert len(replicating_rules ) > 0 # better be some or where did ni come from? samplei = sample1( replicating_kid_indices ) # who to promote; NOTE: not done via any weighting # We need to be in the right grammar state to evaluate log_probability with BVRuleContextManager(self.grammar, ni.args[samplei], recurse_up=True): # Now we must count the multiple ways we could go forward or back # Here, we could have sampled any of them equivalent to ni.args[i] before_same_children = sum([ x == ni.args[samplei] for x in ni.args ]) # how many are the same after? # the lp of everything we'd have to create going backwards old_lp_below = sum([ self.grammar.log_probability(ni.args[i]) if (i != samplei and isFunctionNode(ni.args[i])) else 0. for i in xrange(len(ni.args)) ]) # and replace it ni.args[ samplei].parent = ni.parent # update this first ;; TODO: IS THIS NECSESARY? ni.setto(ni.args[samplei]) # And compute f/b probs newZ = newt.sample_node_normalizer( resampleProbability=isNotBVAddFunctionNode) # To go forward, choose the node, and then from all equivalent children f = lp + (log(before_same_children) - log(nrk)) # To go back, choose the node, choose the replicating rule, choose where to put it, and generate the rest of the tree b = (log(1.0 * isNotBVAddFunctionNode(ni)) - log(newZ)) + -log(len(replicating_rules)) + ( log(before_same_children) - log(nrk)) + old_lp_below return [newt, f - b]
def propose_tree(self, t): # Default regeneration proposal with some probability if random() >= self.insert_delete_probability: return self.my_regeneration_proposal.propose_tree(t) newt = copy(t) fb = 0.0 # the forward/backward prob we return sampled = False # so we can see if we didn't do it if random() < 0.5: # So we insert # first sample a node (through sample_node_via_iterate, which handles everything well) for ni, di, resample_p, resample_Z in self.grammar.sample_node_via_iterate( newt): if ni.args is None: continue # Can't deal with these TODO: CHECK THIS? # Since it's an insert, see if there is a (replicating) rule that expands # from ni.returntype to some ni.returntype replicating_rules = filter( lambda x: x.name != 'lambda' and (x.to is not None) and any([a == ni.returntype for a in x.to]), self.grammar.rules[ni.returntype]) # If there are none, then we can't insert! if len(replicating_rules) == 0: continue # choose a replicating rule; NOTE: this is done uniformly in this step, for simplicity r, gp = weighted_sample(replicating_rules, probs=lambda x: x.p, return_probability=True, log=False) gp = log(r.p) - sum( [x.p for x in self.grammar.rules[ni.returntype]] ) # this is the probability overall in the grammar, not my prob of sampling # Now take the rule and expand the children: # choose who gets to be ni nrhs = len([x for x in r.to if x == ni.returntype ]) # how many on the rhs are there? if nrhs == 0: continue replace_i = randint(0, nrhs - 1) # choose the one to replace ## Now expand args but only for the one we don't sample... args = [] for x in r.to: if x == ni.returntype: if replace_i == 0: args.append( copy(ni)) # if it's the one we replace into else: args.append(self.grammar.generate( x, d=di + 1)) #else generate like normalized replace_i -= 1 else: args.append(self.grammar.generate( x, d=di + 1)) #else generate like normal # Now we must count the multiple ways we could go forward or back after_same_children = [x for x in args if x == ni ] # how many are the same after? #backward_resample_p = sum([ x.resample_p for x in after_same_children]) # if you go back, you can choose any identical kids # create the new node sampled = True ni.setto( FunctionNode(returntype=r.nt, name=r.name, args=args, generation_probability=gp, bv_name=None, bv_args=None, ruleid=r.rid, resample_p=r.resample_p)) if sampled: new_lp_below = sum( map(lambda z: z.log_probability(), filter(isFunctionNode, args))) - ni.log_probability() newZ = self.grammar.resample_normalizer(newt) # To sample forward: choose the node ni, choose the replicating rule, choose which "to" to expand (we could have put it on any of the replicating rules that are identical), and genreate the rest of the tree f = (log(resample_p) - log(resample_Z)) + -log(len(replicating_rules)) + (log( len(after_same_children)) - log(nrhs)) + new_lp_below # To go backwards, choose the inserted rule, and any of the identical children, out of all replicators b = (log(ni.resample_p) - log(newZ)) + (log(len(after_same_children)) - log(nrhs)) fb = f - b else: # A delete move! for ni, di, resample_p, resample_Z in self.grammar.sample_node_via_iterate( newt): if ni.name == 'lambda': continue # can't do anything if ni.args is None: continue # Can't deal with these TODO: CHECK THIS? # Figure out which of my children have the same type as me replicating_kid_indices = [ i for i in xrange(len(ni.args)) if isFunctionNode(ni.args[i]) and ni.args[i].returntype == ni.returntype ] nrk = len(replicating_kid_indices) # how many replicating kids if nrk == 0: continue # if no replicating rules here ## We need to compute a few things for the backwards probability replicating_rules = filter( lambda x: (x.to is not None) and any( [a == ni.returntype for a in x.to]), self.grammar.rules[ni.returntype]) if len(replicating_rules) == 0: continue i = sample1( replicating_kid_indices ) # who to promote; NOTE: not done via any weighting # Now we must count the multiple ways we could go forward or back # Here, we could have sampled any of them equivalent to ni.args[i] before_same_children = [x for x in ni.args if x == ni.args[i] ] # how many are the same after? # the lp of everything we'd have to create going backwards old_lp_below = sum( map(lambda z: z.log_probability(), filter(isFunctionNode, ni.args))) - ni.args[i].log_probability() # and replace it sampled = True ni.setto(copy( ni.args[i])) # TODO: copy not necessary here, I think? if sampled: newZ = self.grammar.resample_normalizer(newt) # To go forward, choose the node, and then from all equivalent children f = (log(resample_p) - log(resample_Z)) + ( log(len(before_same_children)) - log(nrk)) # To go back, choose the node, choose the replicating rule, choose where to put it, and generate the rest of the tree b = (log(ni.resample_p) - log(newZ)) + -log(len(replicating_rules)) + (log( len(before_same_children)) - log(nrk)) + old_lp_below fb = f - b # and fix the bound variables, whose depths may have changed if sampled: newt.fix_bound_variables() return [newt, fb]