Exemplo n.º 1
0
def run(llt=1.0):
    h0 = CCGLexicon(make_hypothesis, words=all_words, alpha=0.9, palpha=0.9, likelihood_temperature=llt)

    fbs = FiniteBestSet(N=10)
    from LOTlib.Inference.MetropolisHastings import mh_sample
    for h in lot_iter(mh_sample(h0, data, SAMPLES)):
        fbs.add(h, h.posterior_score)

    return fbs
Exemplo n.º 2
0
    def run(*args):

        # starting hypothesis -- here this generates at random
        h0 = GaussianLOTHypothesis(grammar, prior_temperature=PRIOR_TEMPERATURE)

        # We store the top 100 from each run
        pq = FiniteBestSet(100, max=True, key="posterior_score")
        pq.add( mh_sample(h0, data, STEPS, skip=SKIP)  )

        return pq
Exemplo n.º 3
0
def run(llt=1.0):

    h0 = CCGLexicon(make_hypothesis, words=all_words, alpha=0.9, palpha=0.9, likelihood_temperature=llt)

    fbs = FiniteBestSet(N=10)
    from LOTlib.Inference.MetropolisHastings import mh_sample
    for h in lot_iter(mh_sample(h0, data, SAMPLES)):
        fbs.add(h, h.posterior_score)

    return fbs
Exemplo n.º 4
0
    def run(*args):

        # starting hypothesis -- here this generates at random
        h0 = GaussianLOTHypothesis(grammar,
                                   prior_temperature=PRIOR_TEMPERATURE)

        # We store the top 100 from each run
        pq = FiniteBestSet(100, max=True, key="posterior_score")
        pq.add(mh_sample(h0, data, STEPS, skip=SKIP))

        return pq
Exemplo n.º 5
0
def ptaboo_search(h0, data, steps, skip=0, noisy_memoize=1000, seen_penalty=1.0):

	seen_count = defaultdict(int)
	
	# define a wrapper class that overwrites prior with our penalized version
	class WrapperClass(type(h0)):
	
		def compute_prior(self):
	
			self.rawprior =  type(h0).compute_prior(self) # save the prior for use if we want to convert back
			self.prior = self.rawprior - seen_count[self]*seen_penalty
			self.lp = self.prior + self.likelihood
			return self.prior
		
		def fixlp(self):
			"""
				Temporarily fix our log probability returned
			"""
			self.prior = self.rawprior
			self.lp = self.prior + self.likelihood
	
	myh0 = WrapperClass(h0.grammar, v=h0.value) ## TODO: NOTE HERE WE ASSUME G IS TAKEN!
	
	# Now just run standard MCMC:
	for h in mh_sample(myh0, data, steps, skip=skip):
		if LOTlib.SIG_INTERRUPTED: break
		# THIS IS VERY BIZARRE: 
		# We don't yield a copy, so we fixlp, yield, and then re-compute the prior to restore the lp
		# to the current sample
		#h.fixlp()
		#yield h
		#h.compute_prior() # 
		
		# Slower way to do it, just copy the value
		h0.set_value(h.value)
		h0.compute_posterior(data)
		yield h0
		
		seen_count[h] += 1
Exemplo n.º 6
0
Arquivo: run.py Projeto: sa-/LOTlib
                http://www.mit.edu/~ndg/papers/RRfinal3.pdf

        This script scatters our imports around to show where each part comes from
"""

from Shared import *

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Create an initial hypothesis. Here we use a RationalRulesLOTHypothesis, which
# is defined in LOTlib.Hypotheses and wraps LOTHypothesis with the rational rules prior

from LOTlib.Hypotheses.RationalRulesLOTHypothesis import RationalRulesLOTHypothesis

h0 = RationalRulesLOTHypothesis(grammar=DNF, rrAlpha=1.0)

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Run the MH

from LOTlib.Inference.MetropolisHastings import mh_sample

# Run the vanilla sampler. Without steps, it will run infinitely
# this prints out posterior (posterior_score), prior, tree grammar probability, likelihood,
for h in mh_sample(h0, data, 10000, skip=100):
    print h.posterior_score, h.prior, h.value.log_probability(
    ), h.likelihood, q(h)

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# This yields data like below.
#-10.1447997767 -9.93962659915 -12.2377573418 -0.20517317755 'and_(not_(is_shape_(x, 'triangle')), not_(is_color_(x, 'blue')))'
#-11.9260879461 -8.77647578935 -12.2377573418 -3.14961215672 'and_(not_(is_shape_(x, 'triangle')), not_(is_shape_(x, 'triangle')))'
Exemplo n.º 7
0
"""
from random import randint, sample

from LOTlib.Inference.MetropolisHastings import mh_sample
from Shared import *

NDATA = 50  # How many total data points?
NSTEPS = 10000
BEST_N = 100  # How many from each hypothesis to store
OUTFILE = "hypotheses.pkl"

# Where we keep track of all hypotheses (across concepts)
all_hypotheses = FiniteBestSet()

# Now loop over each target concept and get a set of hypotheses
for i, f in enumerate(TARGET_CONCEPTS):

    # Set up the hypothesis
    h0 = LOTHypothesis(grammar, start='START', args=['x'])

    # Set up some data
    data = generate_data(NDATA, f)

    # Now run some MCMC
    fs = FiniteBestSet(N=BEST_N, key="posterior_score")
    fs.add(mh_sample(h0, data, steps=NSTEPS, trace=False))

    all_hypotheses.merge(fs)

pickle_save(all_hypotheses, OUTFILE)
Exemplo n.º 8
0
"""
from random import randint, sample

from LOTlib.Inference.MetropolisHastings import mh_sample
from Shared import *

NDATA = 50 # How many total data points?
NSTEPS = 10000
BEST_N = 100 # How many from each hypothesis to store
OUTFILE = "hypotheses.pkl"

# Where we keep track of all hypotheses (across concepts)
all_hypotheses = FiniteBestSet()

# Now loop over each target concept and get a set of hypotheses
for i, f in enumerate(TARGET_CONCEPTS):

    # Set up the hypothesis
    h0 = LOTHypothesis(grammar, start='START', args=['x'])

    # Set up some data
    data = generate_data(NDATA, f)

    # Now run some MCMC
    fs = FiniteBestSet(N=BEST_N, key="posterior_score")
    fs.add(mh_sample(h0, data, steps=NSTEPS, trace=False))

    all_hypotheses.merge(fs)

pickle_save(all_hypotheses, OUTFILE)
Exemplo n.º 9
0
        if value is None: value = numpy.array([0.0, 0.0])
        VectorHypothesis.__init__(self, value=value, N=2, proposal=numpy.eye(2)*0.1)

    """
            MCMC plays nicest if we have defined prior and likelihood, and just don't touch compute_posterior
    """
    def compute_likelihood(self, data):
        self.likelihood = 0.0
        self.posterior_score = self.prior + self.likelihood
        return self.likelihood

    def compute_prior(self):
        x,y = self.value
        self.prior = -((1.0-x)**2.0 + 100.0*(y-x**2.0)**2.0)
        self.posterior_score = self.prior + self.likelihood
        return self.prior


    def propose(self):
        ## NOTE: Does not copy proposal
        newv = numpy.random.multivariate_normal(self.value, self.proposal)
        return RosenbrockSampler(value=newv), 0.0 # from symmetric proposals

if __name__ == "__main__":

    N = 1
    initial_hyp = RosenbrockSampler()

    for x in mh_sample(initial_hyp, [], 1000000, skip=100, trace=False):
        print x, x.posterior_score
Exemplo n.º 10
0
from LOTlib.Hypotheses.LOTHypothesis import LOTHypothesis
from LOTlib.Inference.MetropolisHastings import mh_sample
from LOTlib.Examples.Quantifier.Model import *

ALPHA = 0.9
SAMPLES = 100000
DATA_SIZE = 1000

if __name__ == "__main__":

    ## sample the target data
    data = generate_data(DATA_SIZE)

    W = 'every'

    # Now to use it as a LOTHypothesis, we need data to have an "output" field which is true/false for whether its the target word. This is then used by LOTHypothesis.compute_likelihood to see if we match or not with whether a word was said (ignoring the other words -- that's why its a pseudolikelihood)
    for di in data:
        di.output = (di.word == W)
        #print (di.word == W)

    FBS = FiniteBestSet(max=True, N=100)

    H = LOTHypothesis(grammar, args=['A', 'B', 'S'], ALPHA=ALPHA)
    # Now just run the sampler with a LOTHypothesis
    for s in mh_sample(H, data, SAMPLES, skip=10):
        #print s.lp, "\t", s.prior, "\t", s.likelihood, "\n", s, "\n\n"
        FBS.push(s, s.lp)

    for k in reversed(FBS.get_all(sorted=True)):
        print k.lp, k.prior, k.likelihood, k
Exemplo n.º 11
0
from Data import generate_data
from Grammar import grammar, NCONSTANTS

STEPS = 500000
SKIP = 0
data_sd = 0.1  # the SD of the data
NDATA = 50
MEMOIZE = 1000  # 0 means don't memoize

## The target function for symbolic regression
target = lambda x: 3. * x + sin(4.3 / x)

# # # # # # # # # # # # # # # # # # # # # # # # # # # #
# starting hypothesis -- here this generates at random

data = generate_data(target, NDATA, data_sd)  # generate some data
h0 = MAPSymbolicRegressionHypothesis(grammar)
h0.CONSTANT_VALUES = numpy.zeros(
    NCONSTANTS)  ## TODO: Move this to an itializer

from LOTlib.Inference.MetropolisHastings import mh_sample
for h in mh_sample(h0,
                   data,
                   STEPS,
                   skip=SKIP,
                   trace=False,
                   debug=False,
                   memoize=MEMOIZE):
    print h.posterior_score, h.likelihood, h.prior, h.CONSTANT_VALUES, qq(h)
Exemplo n.º 12
0
#for i in xrange(100):
	#print LOTHypothesis(G, args=['S'])
	

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Or real inference:
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

from LOTlib.DataAndObjects import FunctionData, Obj # for nicely managing data
from LOTlib.Inference.MetropolisHastings import mh_sample # for running MCMC


# Make up some data -- here just one set containing {red, red, green} colors
data = [ FunctionData(input=[ {Obj(color='red'), Obj(color='red'), Obj(color='green')} ], \
	              output=True) ]

# Create an initial hypothesis
h0 = LOTHypothesis(G, args=['S'])

# OR if we want to specify and use insert/delete proposals
#from LOTlib.Proposals import *
#h0 = LOTHypothesis(G, proposal_function=MixtureProposal(G, [RegenerationProposal(G), InsertDeleteProposal(G)] ) )


# MCMC!
for h in mh_sample(h0, data, 4000): # run sampler
#for h in unique(mh_sample(h0, data, 4000)): # get unique samples
	# hypotheses' .prior, .likelihood, and .posterior_score are set in mh_sample
	print h.likelihood, h.prior, h.posterior_score, h

	
Exemplo n.º 13
0
def run_one(r):
	if LOTlib.SIG_INTERRUPTED: return

	h0 = NumberExpression(G)
	
	#sampler = tempered_transitions_sample(copy(h0), data, TEST_SAMPLES, skip=0, temperatures=[1.0, 1.25, 1.5])
	#evaluate_sampler(target, sampler, steps=TEST_SAMPLES, name="TemperedTransitions-1.5\t"+str(r), output=output )
	
	#sampler = tempered_transitions_sample(copy(h0), data, TEST_SAMPLES, skip=0, temperatures=[1.0, 1.05, 1.1])
	#evaluate_sampler(target, sampler, steps=TEST_SAMPLES, name="TemperedTransitions-1.1\t"+str(r), output=output )
	
	#sampler = tempered_transitions_sample(copy(h0), data, TEST_SAMPLES, skip=0, temperatures=[1.0, 1.025, 1.05])
	#evaluate_sampler(target, sampler, steps=TEST_SAMPLES, name="TemperedTransitions-1.05\t"+str(r), output=output )
	
	
	
	#sampler = parallel_tempering_sample(copy(h0), data, TEST_SAMPLES, within_steps=10, temperatures=[1.0, 1.25, 1.5], swaps=1)
	#evaluate_sampler(target, sampler, steps=TEST_SAMPLES, name="ParallelTempering-1.5\t"+str(r), output=output )
	
	#sampler = parallel_tempering_sample(copy(h0), data, TEST_SAMPLES, within_steps=10, temperatures=[1.0, 1.05, 1.1], swaps=1)
	#evaluate_sampler(target, sampler, steps=TEST_SAMPLES, name="ParallelTempering-1.1\t"+str(r), output=output )
	
	#sampler = parallel_tempering_sample(copy(h0), data, TEST_SAMPLES, within_steps=10, temperatures=[1.0, 1.025, 1.05], swaps=1)
	#evaluate_sampler(target, sampler, steps=TEST_SAMPLES, name="ParallelTempering-1.05\t"+str(r), output=output )
	
	
	inner_steps=10
	sampler = datawise_optimize(copy(h0), data, TEST_SAMPLES*inner_steps, inner_steps=inner_steps, data_weight=1.0)
	evaluate_sampler(target, sampler, steps=TEST_SAMPLES*inner_steps, name="DatawiseOptimize-1.0\t"+str(r), output=output )
	
	sampler = datawise_optimize(copy(h0), data, TEST_SAMPLES*inner_steps, inner_steps=inner_steps, data_weight=0.1)
	evaluate_sampler(target, sampler, steps=TEST_SAMPLES*inner_steps, name="DatawiseOptimize-0.1\t"+str(r), output=output )

	sampler = datawise_optimize(copy(h0), data, TEST_SAMPLES*inner_steps, inner_steps=inner_steps, data_weight=0.01)
	evaluate_sampler(target, sampler, steps=TEST_SAMPLES*inner_steps, name="DatawiseOptimize-0.01\t"+str(r), output=output )
	
	
	#sampler = ptaboo_search( copy(h0), data, steps=TEST_SAMPLES, skip=0, seen_penalty=1.0)
	#evaluate_sampler(target, sampler, steps=TEST_SAMPLES, name="PtabooSearch-1.0\t"+str(r), trace=False, output=output )

	#sampler = ptaboo_search( copy(h0), data, steps=TEST_SAMPLES, skip=0, seen_penalty=10.0)
	#evaluate_sampler(target, sampler, steps=TEST_SAMPLES, name="PtabooSearch-10.0\t"+str(r), trace=False, output=output )

	#sampler = ptaboo_search( copy(h0), data, steps=TEST_SAMPLES, skip=0, seen_penalty=100.0)
	#evaluate_sampler(target, sampler, steps=TEST_SAMPLES, name="PtabooSearch-100.0\t"+str(r), trace=False, output=output )



	#sampler = increase_temperature_mh_sample( copy(h0), data, steps=TEST_SAMPLES, skip=0, increase_amount=1.01)
	#evaluate_sampler(target, sampler, steps=TEST_SAMPLES, name="IncreaseTemperature-1.01\t"+str(r), trace=False, output=output)

	#sampler = increase_temperature_mh_sample( copy(h0), data, steps=TEST_SAMPLES, skip=0, increase_amount=1.1)
	#evaluate_sampler(target, sampler, steps=TEST_SAMPLES, name="IncreaseTemperature-1.1\t"+str(r), trace=False, output=output)
	
	#sampler = increase_temperature_mh_sample( copy(h0), data, steps=TEST_SAMPLES, skip=0, increase_amount=1.5)
	#evaluate_sampler(target, sampler, steps=TEST_SAMPLES, name="IncreaseTemperature-1.5\t"+str(r), trace=False, output=output)
	
	#sampler = increase_temperature_mh_sample( copy(h0), data, steps=TEST_SAMPLES, skip=0, increase_amount=2.0)
	#evaluate_sampler(target, sampler, steps=TEST_SAMPLES, name="IncreaseTemperature-2.0\t"+str(r), trace=False, output=output)

	
	
	sampler = mh_sample( copy(h0), data, steps=TEST_SAMPLES, skip=0)
	evaluate_sampler(target, sampler, steps=TEST_SAMPLES, name="BasicSampler\t"+str(r), trace=False, output=output)
	
	sampler = mh_sample( copy(h0), data, steps=TEST_SAMPLES, skip=0, temperature=1.01)
	evaluate_sampler(target, sampler, steps=TEST_SAMPLES, name="BasicSampler-T1.01\t"+str(r), trace=False, output=output)
	
	sampler = mh_sample( copy(h0), data, steps=TEST_SAMPLES, skip=0, temperature=1.05)
	evaluate_sampler(target, sampler, steps=TEST_SAMPLES, name="BasicSampler-T1.05\t"+str(r), trace=False, output=output )
	
	sampler = mh_sample( copy(h0), data, steps=TEST_SAMPLES, skip=0, temperature=1.1)
	evaluate_sampler(target, sampler, steps=TEST_SAMPLES, name="BasicSampler-T1.1\t"+str(r), trace=False, output=output )
Exemplo n.º 14
0
        VectorHypothesis.__init__(self, value=value, n=2, proposal=numpy.eye(2)*0.1)

    """
    MCMC plays nicest if we have defined prior and likelihood, and just don't touch compute_posterior.

    """
    def compute_likelihood(self, data, **kwargs):
        self.likelihood = 0.0
        self.posterior_score = self.prior + self.likelihood
        return self.likelihood

    def compute_prior(self):
        x,y = self.value
        self.prior = -((1.0-x)**2.0 + 100.0*(y-x**2.0)**2.0)
        self.posterior_score = self.prior + self.likelihood
        return self.prior

    def propose(self):
        ## NOTE: Does not copy proposal
        newv = numpy.random.multivariate_normal(self.value, self.proposal)
        return RosenbrockSampler(value=newv), 0.0 # from symmetric proposals


if __name__ == "__main__":

    N = 1
    initial_hyp = RosenbrockSampler()

    for x in lot_iter(mh_sample(initial_hyp, [], 1000000, skip=100, trace=False)):
        print x, x.posterior_score
Exemplo n.º 15
0
                                  proposal=numpy.eye(2) * 0.1)

    """
            MCMC plays nicest if we have defined prior and likelihood, and just don't touch compute_posterior
    """

    def compute_likelihood(self, data):
        self.likelihood = 0.0
        self.posterior_score = self.prior + self.likelihood
        return self.likelihood

    def compute_prior(self):
        x, y = self.value
        self.prior = -((1.0 - x)**2.0 + 100.0 * (y - x**2.0)**2.0)
        self.posterior_score = self.prior + self.likelihood
        return self.prior

    def propose(self):
        ## NOTE: Does not copy proposal
        newv = numpy.random.multivariate_normal(self.value, self.proposal)
        return RosenbrockSampler(value=newv), 0.0  # from symmetric proposals


if __name__ == "__main__":

    N = 1
    initial_hyp = RosenbrockSampler()

    for x in mh_sample(initial_hyp, [], 1000000, skip=100, trace=False):
        print x, x.posterior_score
Exemplo n.º 16
0
# Or we can make them as hypotheses (functions of S):
#for i in xrange(100):
#print LOTHypothesis(grammar, args=['S'])

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Or real inference:
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

from LOTlib.DataAndObjects import FunctionData, Obj  # for nicely managing data
from LOTlib.Inference.MetropolisHastings import mh_sample  # for running MCMC

# Make up some data -- here just one set containing {red, red, green} colors
data = [ FunctionData(input=[ {Obj(color='red'), Obj(color='red'), Obj(color='green')} ], \
                      output=True) ]

# Create an initial hypothesis
h0 = LOTHypothesis(grammar, args=['S'])

# OR if we want to specify and use insert/delete proposals
#from LOTlib.Proposals import *
#h0 = LOTHypothesis(grammar, proposal_function=MixtureProposal(grammar, [RegenerationProposal(grammar), InsertDeleteProposal(grammar)] ) )

if __name__ == "__main__":

    # MCMC!
    for h in mh_sample(h0, data, 4000):  # run sampler
        #for h in unique(mh_sample(h0, data, 4000)): # get unique samples
        # hypotheses' .prior, .likelihood, and .posterior_score are set in mh_sample
        print h.likelihood, h.prior, h.posterior_score, h
Exemplo n.º 17
0
                 FunctionData(input=[ "n2", "n1" ], output=False),
                 FunctionData(input=[ "n2", "n2" ], output=False),
                 FunctionData(input=[ "n2", "p1" ], output=True),
                 FunctionData(input=[ "n2", "p2" ], output=True)]

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Standard exports
from LOTlib.Hypotheses.LOTHypothesis import LOTHypothesis

def make_ho(value=None):
    return LOTHypothesis(grammar, value=value, args=['x', 'y'], ALPHA=0.999) # alpha here trades off with the amount of data. Currently assuming no noise, but that's not necessary

if __name__ == "__main__":
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Run mcmc
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    from LOTlib.Proposals.RegenerationProposal import *
    #mp = MixtureProposal([RegenerationProposal(grammar), InsertDeleteProposal(grammar)] )
    mp = RegenerationProposal(grammar)

    h0 = LOTHypothesis(grammar, args=['x', 'y'], ALPHA=0.999, proposal_function=mp) # alpha here trades off with the amount of data. Currently assuming no noise, but that's not necessary


    from LOTlib.Inference.MetropolisHastings import mh_sample
    for h in mh_sample(h0, data, 4000000, skip=100):
        print h.posterior_score, h.likelihood, h.prior,  cleanFunctionNodeString(h)
        print map( lambda d: h(*d.input), data)
        print "\n"
Exemplo n.º 18
0

from LOTlib.DataAndObjects import FunctionData
from LOTlib.Hypotheses.LOTHypothesis import LOTHypothesis
from LOTlib.Miscellaneous import qq

from MAPSymbolicRegressionHypothesis import MAPSymbolicRegressionHypothesis, grammar

from Data import generate_data
from Grammar import grammar, NCONSTANTS

STEPS = 500000
SKIP = 0
data_sd = 0.1 # the SD of the data
NDATA = 50
MEMOIZE = 1000 # 0 means don't memoize

## The target function for symbolic regression
target = lambda x: 3.*x + sin(4.3/x)

# # # # # # # # # # # # # # # # # # # # # # # # # # # #
# starting hypothesis -- here this generates at random

data = generate_data(target, NDATA, data_sd) # generate some data
h0 = MAPSymbolicRegressionHypothesis(grammar)
h0.CONSTANT_VALUES = numpy.zeros(NCONSTANTS) ## TODO: Move this to an itializer

from LOTlib.Inference.MetropolisHastings import mh_sample
for h in mh_sample(h0, data, STEPS, skip=SKIP, trace=False, debug=False, memoize=MEMOIZE):
    print h.posterior_score, h.likelihood, h.prior, h.CONSTANT_VALUES, qq(h)