Пример #1
0
def makeZipfianLexiconData(lexicon, word, context, n=100, s=1.0, alpha=0.9, verbose=False): # TODO remove word param from Shift files
    data = []
    true_set = lexicon.make_true_data(context)
    all_poss_speakers = [ t[1] for t in true_set ]
    p = [ zipf(t, s, context, len(context.objects)) for t in all_poss_speakers ]

    for i in xrange(n):
        if flip(alpha):
            speaker = weighted_sample(all_poss_speakers, probs=p)

            bagR = {w : lexicon(w, context, set([speaker])) for w in lexicon.all_words()}
            uniqR = []
            for w in lexicon.all_words():
                uniqR.extend(bagR[w])

            p1 = [ zipf(t, s, context, len(context.objects)) for t in uniqR ]
            referent = weighted_sample(uniqR, probs=p1)

            word = sample1([w for w in lexicon.all_words() if referent in bagR[w]])

            if verbose:
                print "True data:", i, word, speaker, referent
            data.append(KinshipData(word, speaker, referent, context))
        else:
            word = sample1(lexicon.all_words())
            x = sample1(context.objects)
            y = sample1(context.objects)
            if verbose:
                print "Noise data:", i, word, x, y
            data.append(KinshipData(word, x, y, context))
    if verbose:
        print lexicon.compute_likelihood(data)
    return data
Пример #2
0
def makeVariableLexiconData(lexicon,
                            word,
                            context,
                            n=100,
                            s=1.0,
                            alpha=0.9,
                            verbose=False):
    data = []
    true_set = lexicon.make_true_data(context)
    all_poss_speakers = [t[1] for t in true_set]
    p = [zipf(t, s, context, len(context.objects)) for t in all_poss_speakers]

    for i in xrange(n):
        if flip(alpha):
            speaker = weighted_sample(all_poss_speakers, probs=p)
            referents = lexicon(word, context, set([speaker]))
            p1 = [zipf(t, s, context, len(context.objects)) for t in referents]
            referent = weighted_sample(referents, probs=p1)
            if verbose:
                print "True data:", i, word, speaker, referent
            data.append(KinshipData(word, speaker, referent, context))
        else:
            x = sample1(context.objects)
            y = sample1(context.objects)
            if verbose:
                print "Noise data:", i, word, x, y
            data.append(KinshipData(word, x, y, context))
    if verbose:
        print lexicon.compute_likelihood(data)
    return data
Пример #3
0
def genetic_algorithm(make_hypothesis, data, mutate, crossover, population_size=100, generations=100000):

    population = [make_hypothesis() for _ in xrange(population_size)]
    for h in population:
        h.compute_posterior(data)

    for g in xrange(generations):

        nextpopulation = []

        while len(nextpopulation) < population_size:
            # sample proportional to fitness
            mom = weighted_sample(population, probs=[v.posterior_score for v in population], log=True)
            dad = weighted_sample(population, probs=[v.posterior_score for v in population], log=True)

            try:
                kid = mutate(crossover(mom, dad))
            except (ProposalFailedException, NodeSamplingException):
                continue

            kid.compute_posterior(data)
            yield kid

            nextpopulation.append(kid)

            # # if MH_acceptance(population[i].posterior_score, kid.posterior_score, 0.0):
            # if kid.posterior_score > population[i].posterior_score:
            #     population[i] = kid
            #     yield kid
        population = nextpopulation
Пример #4
0
    def sample_utterance(self, possible_utterances, context):

        t, f, others = self.partition_utterances( possible_utterances, context)

        m = set(t).union(f)

        if flip(self.palpha) and (len(m) > 0): # if we sample from a presup is true
            if (flip(self.alpha) and (len(t)>0)):
                return weighted_sample(t, probs=map( lambda u: self.weightfunction(u, context), t), log=False)
            else:   return weighted_sample(m, probs=map( lambda u: self.weightfunction(u, context), m), log=False)
        else:           return weighted_sample(possible_utterances, probs=map( lambda u: self.weightfunction(u, context), possible_utterances), log=False) # sample from all utterances
Пример #5
0
    def sample_utterance(self, possible_utterances, context):

        t, f, others = self.partition_utterances( possible_utterances, context)

        m = set(t).union(f)

        if flip(self.palpha) and (len(m) > 0): # if we sample from a presup is true
            if (flip(self.alpha) and (len(t)>0)):
                return weighted_sample(t, probs=map( lambda u: self.weightfunction(u, context), t), log=False)
            else:   return weighted_sample(m, probs=map( lambda u: self.weightfunction(u, context), m), log=False)
        else:           return weighted_sample(possible_utterances, probs=map( lambda u: self.weightfunction(u, context), possible_utterances), log=False) # sample from all utterances
Пример #6
0
 def propose(current_state, bag=lexicon, probs=L):
     mod = len(current_state.all_words())
     proposal = copy(current_state)
     proposal.value[words[propose.inx % mod]].value = weighted_sample(bag[words[propose.inx % mod]],
                                                             probs=probs[words[propose.inx % mod]], log=True).value
     propose.inx += 1
     return proposal
Пример #7
0
 def sample_data(self, n):
     """
     Return a dictionary of {string:count}  that is a sample from this language
     """
     return weighted_sample(self.str_sets,
                            N=n,
                            probs=self.string_log_probability,
                            log=True)
Пример #8
0
def sample_sets_of_objects(N, objs):
    """
    Makes a set of size N appropriate to using "set" functions on -- this means it must contain copies, not duplicate references
    """
    s = weighted_sample(objs, N=N, returnlist=True)  # the set of objects
    return map(
        deepcopy, s
    )  # the set must NOT be just the pointers sampled, since then set() operations will collapse them!
Пример #9
0
 def propose(current_state, bag=lexicon, probs=L):
     mod = len(current_state.all_words())
     proposal = copy(current_state)
     proposal.value[words[propose.inx % mod]].value = weighted_sample(
         bag[words[propose.inx % mod]],
         probs=probs[words[propose.inx % mod]],
         log=True).value
     propose.inx += 1
     return proposal
Пример #10
0
def generate_data(data_size):
    """
    Sample some data according to the target
    """
    data = []
    for i in range(data_size):
        # how many in this set
        set_size = weighted_sample( range(1,10+1), probs=[7187, 1484, 593, 334, 297, 165, 151, 86, 105, 112] )
        # get the objects in the current set
        s = set(sample_sets_of_objects(set_size, all_objects))

        # sample according to the target
        if random() < ALPHA: r = WORDS[len(s)-1]
        else:                r = weighted_sample( WORDS )

        # and append the sampled utterance
        data.append(FunctionData(input=[s], output=r))  # convert to "FunctionData" and store
    return data
Пример #11
0
def distance_based_proposer(x):
    y, lp = weighted_sample(proposal_to[x, :],
                            probs=proposal_probs[x, :],
                            Z=proposal_Z[x],
                            return_probability=True,
                            log=False)
    bp = lp + log(proposal_Z[x]) - log(
        proposal_Z[y]
    )  # the distance d is the same, but the normalizer differs
    return y, lp - bp
Пример #12
0
def make_data(data_size=300, alpha=0.75):
    """
    Sample some data according to the target
    """
    data = []
    for i in range(data_size):
        # how many in this set
        set_size = weighted_sample(
            range(1, 10 + 1),
            probs=[7187, 1484, 593, 334, 297, 165, 151, 86, 105, 112])
        # get the objects in the current set
        s = set(sample_sets_of_objects(set_size, all_objects))

        # sample according to the target
        if random() < alpha: r = WORDS[len(s) - 1]
        else: r = weighted_sample(WORDS)

        # and append the sampled utterance
        data.append(FunctionData(input=[s], output=r, alpha=alpha))
    return data
Пример #13
0
def genetic_algorithm(make_hypothesis,
                      data,
                      mutate,
                      crossover,
                      population_size=100,
                      generations=100000):

    population = [make_hypothesis() for _ in xrange(population_size)]
    for h in population:
        h.compute_posterior(data)

    for g in xrange(generations):

        nextpopulation = []

        while len(nextpopulation) < population_size:
            # sample proportional to fitness
            mom = weighted_sample(
                population,
                probs=[v.posterior_score for v in population],
                log=True)
            dad = weighted_sample(
                population,
                probs=[v.posterior_score for v in population],
                log=True)

            try:
                kid = mutate(crossover(mom, dad))
            except (ProposalFailedException, NodeSamplingException):
                continue

            kid.compute_posterior(data)
            yield kid

            nextpopulation.append(kid)

            # # if MH_acceptance(population[i].posterior_score, kid.posterior_score, 0.0):
            # if kid.posterior_score > population[i].posterior_score:
            #     population[i] = kid
            #     yield kid
        population = nextpopulation
Пример #14
0
def makeVariableLexiconData(lexicon, word, context, n=100, s=1.0, alpha=0.9, verbose=False):
    data = []
    true_set = lexicon.make_true_data(context)
    all_poss_speakers = [ t[1] for t in true_set ]
    p = [ zipf(t, s, context, len(context.objects)) for t in all_poss_speakers ]

    for i in xrange(n):
        if flip(alpha):
            speaker = weighted_sample(all_poss_speakers, probs=p)
            referents = lexicon(word, context, set([speaker]))
            p1 = [ zipf(t, s, context, len(context.objects)) for t in referents ]
            referent = weighted_sample(referents, probs=p1)
            if verbose:
                print "True data:", i, word, speaker, referent
            data.append(KinshipData(word, speaker, referent, context))
        else:
            x = sample1(context.objects)
            y = sample1(context.objects)
            if verbose:
                print "Noise data:", i, word, x, y
            data.append(KinshipData(word, x, y, context))
    if verbose:
        print lexicon.compute_likelihood(data)
    return data
Пример #15
0
    def propose(self):
        """
        Default proposal to a lexicon -- now at least one, plus some coin flips
        :return:
        """

        new = copy(self) ## Now we just copy the whole thing

        # Propose one for sure
        w = weighted_sample(self.value.keys()) # the word to change
        p, fb = self.value[w].propose()
        new.set_word(w, p)

        for x in self.all_words():
            if w != x and flip(self.propose_p):
                xp, xfb = self.value[x].propose()
                new.set_word(x, xp)
                fb += xfb

        return new, fb
Пример #16
0
 def sample_output(self, datum):
     # return a sample of my output given the input in datum
     if random() < datum.alpha:
         return self(*datum.input)
     else:
         return weighted_sample(WORDS)  # uniform sample
Пример #17
0
	def propose_tree(self, t):
		
		# Default regeneration proposal with some probability
		if random() >= self.insert_delete_probability: 
			return self.my_regeneration_proposal.propose_tree(t)
		
		newt = copy(t)
		fb = 0.0 # the forward/backward prob we return
		sampled=False # so we can see if we didn't do it
		
		if random() < 0.5: # So we insert
			
			# first sample a node (through sample_node_via_iterate, which handles everything well)
			for ni, di, resample_p, resample_Z in self.grammar.sample_node_via_iterate(newt):
				if ni.args is None: continue # Can't deal with these TODO: CHECK THIS?
				
				# Since it's an insert, see if there is a (replicating) rule that expands
				# from ni.returntype to some ni.returntype
				replicating_rules = filter(lambda x: x.name != 'lambda' and (x.to is not None) and any([a==ni.returntype for a in x.to]), self.grammar.rules[ni.returntype])
				
				# If there are none, then we can't insert!
				if len(replicating_rules) == 0: continue
				
				# choose a replicating rule; NOTE: this is done uniformly in this step, for simplicity
				r, gp = weighted_sample(replicating_rules, probs=lambda x: x.p, return_probability=True, log=False)
				gp = log(r.p) - sum([x.p for x in self.grammar.rules[ni.returntype]]) # this is the probability overall in the grammar, not my prob of sampling
				
				# Now take the rule and expand the children:
				
				# choose who gets to be ni
				nrhs = len( [ x for x in r.to if x == ni.returntype] ) # how many on the rhs are there?
				if nrhs == 0: continue
				replace_i = randint(0,nrhs-1) # choose the one to replace
				
				## Now expand args but only for the one we don't sample...
				args = []
				for x in r.to:
					if x == ni.returntype:
						if replace_i == 0: args.append( copy(ni) ) # if it's the one we replace into
						else:              args.append( self.grammar.generate(x, d=di+1) ) #else generate like normalized
						replace_i -= 1
					else:              
						args.append( self.grammar.generate(x, d=di+1) ) #else generate like normal	
							
				# Now we must count the multiple ways we could go forward or back
				after_same_children = [ x for x in args if x==ni] # how many are the same after?
				#backward_resample_p = sum([ x.resample_p for x in after_same_children]) # if you go back, you can choose any identical kids
				
				# create the new node
				sampled = True
				ni.setto( FunctionNode(returntype=r.nt, name=r.name, args=args, generation_probability=gp, bv_name=None, bv_args=None, ruleid=r.rid, resample_p=r.resample_p ) )
				
			if sampled:
				
				new_lp_below = sum(map(lambda z: z.log_probability(), filter(isFunctionNode, args))) - ni.log_probability()
				
				newZ = self.grammar.resample_normalizer(newt)
				# To sample forward: choose the node ni, choose the replicating rule, choose which "to" to expand (we could have put it on any of the replicating rules that are identical), and genreate the rest of the tree
				f = (log(resample_p) - log(resample_Z)) + -log(len(replicating_rules)) + (log(len(after_same_children))-log(nrhs)) + new_lp_below
				# To go backwards, choose the inserted rule, and any of the identical children, out of all replicators
				b = (log(ni.resample_p) - log(newZ)) + (log(len(after_same_children)) - log(nrhs))
				fb = f-b
				
		else: # A delete move!
			for ni, di, resample_p, resample_Z in self.grammar.sample_node_via_iterate(newt):
				if ni.name == 'lambda': continue # can't do anything
				if ni.args is None: continue # Can't deal with these TODO: CHECK THIS?
				
				# Figure out which of my children have the same type as me
				replicating_kid_indices = [ i for i in xrange(len(ni.args)) if isFunctionNode(ni.args[i]) and ni.args[i].returntype==ni.returntype]
				
				nrk = len(replicating_kid_indices) # how many replicating kids
				if nrk == 0: continue # if no replicating rules here
				
				## We need to compute a few things for the backwards probability
				replicating_rules = filter(lambda x: (x.to is not None) and any([a==ni.returntype for a in x.to]), self.grammar.rules[ni.returntype])
				if len(replicating_rules) == 0: continue
				
				i = sample1(replicating_kid_indices) # who to promote; NOTE: not done via any weighting
				
				# Now we must count the multiple ways we could go forward or back
				# Here, we could have sampled any of them equivalent to ni.args[i]
				
				before_same_children = [ x for x in ni.args if x==ni.args[i] ] # how many are the same after?
				
				# the lp of everything we'd have to create going backwards
				old_lp_below = sum(map(lambda z: z.log_probability(), filter(isFunctionNode, ni.args)  )) - ni.args[i].log_probability()
				
				# and replace it
				sampled = True
				ni.setto( copy(ni.args[i]) ) # TODO: copy not necessary here, I think?
				
			if sampled:
				
				newZ = self.grammar.resample_normalizer(newt)
				# To go forward, choose the node, and then from all equivalent children
				f = (log(resample_p) - log(resample_Z)) + (log(len(before_same_children)) - log(nrk))
				# To go back, choose the node, choose the replicating rule, choose where to put it, and generate the rest of the tree
				b = (log(ni.resample_p) - log(newZ))  + -log(len(replicating_rules)) + (log(len(before_same_children)) - log(nrk)) + old_lp_below
				fb = f-b
		
		# and fix the bound variables, whose depths may have changed
		if sampled: newt.fix_bound_variables()
		
		return [newt, fb]
Пример #18
0
NYes = [0] * (DATASET_SIZE * NDATASETS)  #number of yes/no responses for each
NNo = [0] * (DATASET_SIZE * NDATASETS)

di = 0
for datasi, data in enumerate(datas):
    print "# Simulating data for ", datasi
    for i in xrange(len(data)):

        # update the posterior
        for h in hypotheses:
            h.compute_posterior([data[j] for j in xrange(i)])
        probs = [x.posterior_score for x in hypotheses]
        # sample (if this is the hypothesis)
        for person in break_ctrlc(xrange(NPEOPLE)):
            h = weighted_sample(hypotheses, probs=probs, log=True)

            if random() < ALPHA:
                r = h(*data[i].input)  # and use it to respond to the next one
            else:
                r = random() < BETA

            if r: NYes[di] += 1
            else: NNo[di] += 1

        di += 1

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Take into account the likelihoods in our inference
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Пример #19
0
NYes = [0] * (DATASET_SIZE*NDATASETS) #number of yes/no responses for each
NNo  = [0] * (DATASET_SIZE*NDATASETS)

di = 0
for datasi, data in enumerate(datas):
    print "# Simulating data for ", datasi
    for i in xrange(len(data)):

        # update the posterior
        for h in hypotheses:
            h.compute_posterior( [data[j] for j in xrange(i)])
        probs = [x.posterior_score for x in hypotheses]
        # sample (if this is the hypothesis)
        for person in break_ctrlc(xrange(NPEOPLE)):
            h = weighted_sample(hypotheses, probs=probs, log=True)

            if random() < ALPHA:
                r =  h(*data[i].input)            # and use it to respond to the next one
            else:
                r = random() < BETA

            if r: NYes[di] += 1
            else: NNo[di]  += 1

        di += 1


# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Take into account the likelihoods in our inference
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Пример #20
0
	def propose_tree(self, t):
		p = weighted_sample(self.proposals, probs=self.probs, log=False)
		
		return p.propose_tree(t)
Пример #21
0
 def propose_tree(self,grammar,tree,resampleProbability=lambdaOne):
     """ sample a sub-proposer and propose from it """
     chosen_proposer = weighted_sample(self.proposers, probs=self.proposer_weights)
     return chosen_proposer.propose_tree(grammar,tree,resampleProbability)
Пример #22
0
 def sample_output(self, datum):
     # return a sample of my output given the input in datum
     if random() < datum.alpha:
         return self(*datum.input)
     else:
         return weighted_sample(WORDS)  # uniform sample
Пример #23
0
 def sample_string(self):
     return weighted_sample(self.strings, probs=lambda s: pow(2.0, -len(s))
                            )  # sample inversely with length, ok?
Пример #24
0
 def sample_string(self): # fix that this is not CF
     return weighted_sample(self.strings, probs=self.probs)
Пример #25
0
    def propose_tree(self, t):

        # Default regeneration proposal with some probability
        if random() >= self.insert_delete_probability:
            return self.my_regeneration_proposal.propose_tree(t)

        newt = copy(t)
        fb = 0.0  # the forward/backward prob we return
        sampled = False  # so we can see if we didn't do it

        if random() < 0.5:  # So we insert

            # first sample a node (through sample_node_via_iterate, which handles everything well)
            for ni, di, resample_p, resample_Z in self.grammar.sample_node_via_iterate(
                    newt):
                if ni.args is None:
                    continue  # Can't deal with these TODO: CHECK THIS?

                # Since it's an insert, see if there is a (replicating) rule that expands
                # from ni.returntype to some ni.returntype
                replicating_rules = filter(
                    lambda x: x.name != 'lambda' and (x.to is not None) and
                    any([a == ni.returntype for a in x.to]),
                    self.grammar.rules[ni.returntype])

                # If there are none, then we can't insert!
                if len(replicating_rules) == 0: continue

                # choose a replicating rule; NOTE: this is done uniformly in this step, for simplicity
                r, gp = weighted_sample(replicating_rules,
                                        probs=lambda x: x.p,
                                        return_probability=True,
                                        log=False)
                gp = log(r.p) - sum(
                    [x.p for x in self.grammar.rules[ni.returntype]]
                )  # this is the probability overall in the grammar, not my prob of sampling

                # Now take the rule and expand the children:

                # choose who gets to be ni
                nrhs = len([x for x in r.to if x == ni.returntype
                            ])  # how many on the rhs are there?
                if nrhs == 0: continue
                replace_i = randint(0, nrhs - 1)  # choose the one to replace

                ## Now expand args but only for the one we don't sample...
                args = []
                for x in r.to:
                    if x == ni.returntype:
                        if replace_i == 0:
                            args.append(
                                copy(ni))  # if it's the one we replace into
                        else:
                            args.append(self.grammar.generate(
                                x, d=di + 1))  #else generate like normalized
                        replace_i -= 1
                    else:
                        args.append(self.grammar.generate(
                            x, d=di + 1))  #else generate like normal

                # Now we must count the multiple ways we could go forward or back
                after_same_children = [x for x in args if x == ni
                                       ]  # how many are the same after?
                #backward_resample_p = sum([ x.resample_p for x in after_same_children]) # if you go back, you can choose any identical kids

                # create the new node
                sampled = True
                ni.setto(
                    FunctionNode(returntype=r.nt,
                                 name=r.name,
                                 args=args,
                                 generation_probability=gp,
                                 bv_name=None,
                                 bv_args=None,
                                 ruleid=r.rid,
                                 resample_p=r.resample_p))

            if sampled:

                new_lp_below = sum(
                    map(lambda z: z.log_probability(),
                        filter(isFunctionNode, args))) - ni.log_probability()

                newZ = self.grammar.resample_normalizer(newt)
                # To sample forward: choose the node ni, choose the replicating rule, choose which "to" to expand (we could have put it on any of the replicating rules that are identical), and genreate the rest of the tree
                f = (log(resample_p) -
                     log(resample_Z)) + -log(len(replicating_rules)) + (log(
                         len(after_same_children)) - log(nrhs)) + new_lp_below
                # To go backwards, choose the inserted rule, and any of the identical children, out of all replicators
                b = (log(ni.resample_p) -
                     log(newZ)) + (log(len(after_same_children)) - log(nrhs))
                fb = f - b

        else:  # A delete move!
            for ni, di, resample_p, resample_Z in self.grammar.sample_node_via_iterate(
                    newt):
                if ni.name == 'lambda': continue  # can't do anything
                if ni.args is None:
                    continue  # Can't deal with these TODO: CHECK THIS?

                # Figure out which of my children have the same type as me
                replicating_kid_indices = [
                    i for i in xrange(len(ni.args))
                    if isFunctionNode(ni.args[i])
                    and ni.args[i].returntype == ni.returntype
                ]

                nrk = len(replicating_kid_indices)  # how many replicating kids
                if nrk == 0: continue  # if no replicating rules here

                ## We need to compute a few things for the backwards probability
                replicating_rules = filter(
                    lambda x: (x.to is not None) and any(
                        [a == ni.returntype for a in x.to]),
                    self.grammar.rules[ni.returntype])
                if len(replicating_rules) == 0: continue

                i = sample1(
                    replicating_kid_indices
                )  # who to promote; NOTE: not done via any weighting

                # Now we must count the multiple ways we could go forward or back
                # Here, we could have sampled any of them equivalent to ni.args[i]

                before_same_children = [x for x in ni.args if x == ni.args[i]
                                        ]  # how many are the same after?

                # the lp of everything we'd have to create going backwards
                old_lp_below = sum(
                    map(lambda z: z.log_probability(),
                        filter(isFunctionNode,
                               ni.args))) - ni.args[i].log_probability()

                # and replace it
                sampled = True
                ni.setto(copy(
                    ni.args[i]))  # TODO: copy not necessary here, I think?

            if sampled:

                newZ = self.grammar.resample_normalizer(newt)
                # To go forward, choose the node, and then from all equivalent children
                f = (log(resample_p) - log(resample_Z)) + (
                    log(len(before_same_children)) - log(nrk))
                # To go back, choose the node, choose the replicating rule, choose where to put it, and generate the rest of the tree
                b = (log(ni.resample_p) -
                     log(newZ)) + -log(len(replicating_rules)) + (log(
                         len(before_same_children)) - log(nrk)) + old_lp_below
                fb = f - b

        # and fix the bound variables, whose depths may have changed
        if sampled: newt.fix_bound_variables()

        return [newt, fb]
Пример #26
0
 def sample_string(self):  # fix that this is not CF
     return weighted_sample(self.strings, probs=self.probs)
Пример #27
0
    def propose_tree(self, t):
        p = weighted_sample(self.proposals, probs=self.probs, log=False)

        return p.propose_tree(t)
Пример #28
0
 def sample_data(self, n):
     """
     Return a dictionary of {string:count}  that is a sample from this language
     """
     return weighted_sample(self.str_sets, N=n, probs=self.string_log_probability, log=True)
Пример #29
0
 def propose_tree(self, grammar, tree, resampleProbability=lambdaOne):
     """ sample a sub-proposer and propose from it """
     chosen_proposer = weighted_sample(self.proposers,
                                       probs=self.proposer_weights)
     return chosen_proposer.propose_tree(grammar, tree, resampleProbability)
Пример #30
0
def sample_sets_of_objects(N, objs):
    """
    Makes a set of size N appropriate to using "set" functions on -- this means it must contain copies, not duplicate references
    """
    s = weighted_sample(objs, N=N, returnlist=True) # the set of objects
    return map(deepcopy, s) # the set must NOT be just the pointers sampled, since then set() operations will collapse them!
Пример #31
0
def makeZipfianLexiconData(lexicon,
                           context,
                           dfreq=None,
                           n=100,
                           s=1.0,
                           alpha=0.9,
                           epsilon=0.8,
                           verbose=False):
    '''

    L() --> P(W) [ eps P(S|W) P(R|W) + 1-eps P(S|W) P(R|SW)]
    P(W) ~ dfreq or defaults to uniform
    P(S|W)  ~ Zipf(s) domain: all speakers that can use that word
    P(R|W)  ~ Zipf(s) domain: all people the learner has a word for
    P(R|SW) ~ Zipf(s) domain: all referents the speaker can use the word to refer to

    :param lexicon: the target lexicon
    :param context: the context
    :param dfreq: dictionary[word] = frequency weight (float)
    :param n: the number of data points
    :param s: the zipfian exponent parameter
    :param alpha: the reliability parameter. Noise = 1 - alpha
    :param epsilon: the ego-centric probability
    :param verbose: print the generated data points
    :return: list of KinshipData objects
    '''
    assert context.distance is not None, "There are no distances in the context!"
    if dfreq is not None:
        assert set(lexicon.all_words()).issubset(set(
            dfreq.keys())), "Words in lexicon without frequencies"
        freq = lambda w: dfreq[w]
    else:
        freq = None
    data = []
    speakers = dict()
    egoRef = dict()
    for w in lexicon.all_words():
        speakers[w] = [t[1] for t in lexicon.make_word_data(w, context)]
        egoRef[w] = [
            t[2] for t in lexicon.make_word_data(w, context, fixX=context.ego)
        ]

    for i in xrange(n):
        if flip(alpha):
            wrd = weighted_sample(lexicon.all_words(), probs=freq)
            speaker = weighted_sample(
                speakers[wrd],
                probs=lambda x: zipf(x, s, context, len(context.objects)))
            if flip(epsilon):
                referent = weighted_sample(
                    egoRef[wrd],
                    probs=lambda x: zipf(x, s, context, len(context.objects)))
                eps = 'Ego'
            else:
                referent = weighted_sample(
                    lexicon(wrd, context, set([speaker])),
                    probs=lambda x: zipf(x, s, context, len(context.objects)))
                eps = 'Speaker'
            if verbose:
                print "True data:", i, wrd, speaker, referent, eps
            data.append(KinshipData(wrd, speaker, referent, context))
        else:
            wrd = weighted_sample(lexicon.all_words(), probs=freq)
            x = weighted_sample(
                context.objects,
                probs=lambda x: zipf(x, s, context, len(context.objects)))
            y = weighted_sample(
                context.objects,
                probs=lambda x: zipf(x, s, context, len(context.objects)))
            if verbose:
                print "Noise data:", i, wrd, x, y
            data.append(KinshipData(wrd, x, y, context))
    if verbose:
        print lexicon.compute_likelihood(data)
    return data