示例#1
0
def run(llt=1.0):
    h0 = CCGLexicon(make_hypothesis, words=all_words, alpha=0.9, palpha=0.9, likelihood_temperature=llt)

    fbs = FiniteBestSet(N=10)
    from LOTlib.Inference.MetropolisHastings import mh_sample
    for h in lot_iter(mh_sample(h0, data, SAMPLES)):
        fbs.add(h, h.posterior_score)

    return fbs
示例#2
0
def run(llt=1.0):

    h0 = CCGLexicon(make_hypothesis, words=all_words, alpha=0.9, palpha=0.9, likelihood_temperature=llt)

    fbs = FiniteBestSet(N=10)
    from LOTlib.Inference.MetropolisHastings import mh_sample
    for h in lot_iter(mh_sample(h0, data, SAMPLES)):
        fbs.add(h, h.posterior_score)

    return fbs
示例#3
0
文件: Run.py 项目: moverlan/LOTlib
def run(*args):
    """The running function."""
    # starting hypothesis -- here this generates at random
    h0 = GaussianLOTHypothesis(grammar)

    # We store the top 100 from each run
    pq = FiniteBestSet(N=100, max=True, key="posterior_score")
    pq.add(MHSampler(h0, data, STEPS, skip=SKIP))

    return pq
示例#4
0
    def run(*args):

        # starting hypothesis -- here this generates at random
        h0 = GaussianLOTHypothesis(grammar, prior_temperature=PRIOR_TEMPERATURE)

        # We store the top 100 from each run
        pq = FiniteBestSet(100, max=True, key="posterior_score")
        pq.add( mh_sample(h0, data, STEPS, skip=SKIP)  )

        return pq
示例#5
0
    def run(*args):

        # starting hypothesis -- here this generates at random
        h0 = GaussianLOTHypothesis(grammar,
                                   prior_temperature=PRIOR_TEMPERATURE)

        # We store the top 100 from each run
        pq = FiniteBestSet(100, max=True, key="posterior_score")
        pq.add(mh_sample(h0, data, STEPS, skip=SKIP))

        return pq
示例#6
0
文件: Run.py 项目: moverlan/LOTlib
from LOTlib.FiniteBestSet import FiniteBestSet
from LOTlib.Inference.MetropolisHastings import MHSampler
from Model import *


NDATA = 50 # How many total data points?
NSTEPS = 10000
BEST_N = 100 # How many from each hypothesis to store
OUTFILE = "hypotheses.pkl"

# Where we keep track of all hypotheses (across concepts)
all_hypotheses = FiniteBestSet()

if __name__ == "__main__":
    # Now loop over each target concept and get a set of hypotheses
    for i, f in enumerate(TARGET_CONCEPTS):

        # Set up the hypothesis
        h0 = LOTHypothesis(grammar, start='START', args=['x'])

        # Set up some data
        data = generate_data(NDATA, f)

        # Now run some MCMC
        fs = FiniteBestSet(N=BEST_N, key="posterior_score")
        fs.add(lot_iter(MHSampler(h0, data, steps=NSTEPS, trace=False)))

        all_hypotheses.merge(fs)

    pickle.dump(all_hypotheses, open(OUTFILE, 'w'))
示例#7
0
文件: Run.py 项目: pratiksha/LOTlib
from LOTlib import break_ctrlc
from LOTlib.FiniteBestSet import FiniteBestSet
from LOTlib.Inference.Samplers.MetropolisHastings import MHSampler
from Model import *
from TargetConcepts import TargetConcepts

NDATA = 20  # How many data points for each function?
NSTEPS = 100000
BEST_N = 500  # How many from each hypothesis to store

# Where we keep track of all hypotheses (across concepts)
all_hypotheses = FiniteBestSet()

if __name__ == "__main__":
    # Now loop over each target concept and get a set of hypotheses
    for i, f in enumerate(TargetConcepts):

        # Set up the hypothesis
        h0 = make_hypothesis()

        # Set up some data
        data = make_data(NDATA, f)

        # Now run some MCMC
        fs = FiniteBestSet(N=BEST_N, key="posterior_score")
        fs.add(break_ctrlc(MHSampler(h0, data, steps=NSTEPS, trace=False)))

        all_hypotheses.merge(fs)

    pickle.dump(all_hypotheses, open("hypotheses.pkl", "w"))
示例#8
0
def novelty_search(h0s, data, grammar, props=10, novelty_advantage=100):
	"""
		Search through hypotheses, maintaining a queue of good ones. We propose 
		to ones based on their posterior and how much they've been proposed to in the past. 
		See heapweight(h) below -- it determines how we trade off posterior and prior search there...
		
		SO: You are searched further if you are good, and in a "novel" part of the space
		
		TODO: We could make this track the performance of proposals from a given hypothesis?
	"""
	
	novelty = defaultdict(float) # last time we proposed here, what proportion were novel? If we haven't done any, set to 1.0
	froms = defaultdict(int) # how many times did we propose from this?
	tos   = defaultdict(int) # how many to this?
	FS = FiniteBestSet(N=10)
	
	# When we add something to the heap, what weight does it have?
	# This should prefer high log probability, but also it should 
	# keep us from looking at things too much
	def heapweight(h):
		return -h.lp - novelty[h]*novelty_advantage

	openset = []
	for h0 in h0s:
		if h0 not in novelty:
			h0.compute_posterior(data)
			heapq.heappush( openset, (heapweight(h0),h0) )
			novelty[h0] = 1.0 # treat as totally novel
			FS.add(h0, h0.lp)
	
	
	while not LOTlib.SIG_INTERRUPTED:
		lph, h = heapq.heappop(openset)
		
		froms[h] += 1
		
		#proposals_from[h] += props
		print "\n"
		print len(openset), "\t", h.lp, "\t", heapweight(h), "\t", novelty[h], "\t", froms[h], tos[h], "\t", q(h)
		for x in FS.get_all(sorted=True):
			print "\t", x.lp, "\t", heapweight(x), "\t", novelty[x], "\t", q(get_knower_pattern(h)), "\t", froms[x], tos[x],"\t", q(x)
		
		# Store all together so we know who to update (we make their novelty the same as their parent's)
		proposals = [ h.propose()[0] for i in xrange(props) ]
		new_proposals = [] # which are new?
		
		novelprop = 0
		for p in proposals:
			if p not in novelty:
				p.compute_posterior(data)
				FS.add(p, p.lp)
				novelty[p] = "ERROR" # just keep track -- should be overwritten later
				novelprop += 1
				new_proposals.append(p)
			tos[p] += 1
		
		novelty[h] = float(novelprop) / float(props)
		
		# use the novelty from the parent
		for p in new_proposals: 
			novelty[p] = random() * novelty[h]
			heapq.heappush(openset, (heapweight(p), p) )
		
		# and put myself back on the heap, but with the new proposal numbers
		heapq.heappush(openset, (heapweight(h), h) )