Пример #1
0
def run(*args):
    #print "# Running data"

    global hypotheses

    data_size = args[0]

    p_representation = defaultdict(int) # how often do you get the right representation
    p_response = defaultdict(int) # how often do you get the right response?
    p_representation_literal = defaultdict(int) # how often do you get the right representation
    p_response_literal = defaultdict(int)  # how often do you get the right response?
    p_representation_presup = defaultdict(int) # how often do you get the right representation
    p_response_presup = defaultdict(int) # how often do you get the right response?

    #print "# Generating data"
    data = generate_data(data_size)

    # recompute these
    #print "# Computing posterior"
    #[ x.unclear_functions() for x in hypotheses ]
    [ x.compute_posterior(data) for x in hypotheses ]

    # normalize the posterior in fs
    #print "# Computing normalizer"
    Z = logsumexp([x.posterior_score for x in hypotheses])

    # and output the top hypotheses
    qq = FiniteBestSet(max=True, N=25)
    for h in hypotheses: qq.push(h, h.posterior_score) # get the tops
    for i, h in enumerate(qq.get_all(sorted=True)):
        for w in h.all_words():
            fprintn(8, data_size, i, w, h.posterior_score, q(h.value[w]), f=options.OUT_PATH+"-hypotheses."+str(get_rank())+".txt")

    # and compute the probability of being correct
    #print "# Computing correct probability"
    for h in hypotheses:
        hstr = str(h)
        #print data_size, len(data), exp(h.posterior_score), correct[ str(h)+":"+w ]
        for w in words:
            p = exp(h.posterior_score - Z)
            key = w + ":" + hstr

            p_representation[w] += p * (agree_pct[key] == 1.)
            p_representation_presup[w]  += p * (agree_pct_presup[key] == 1.) # if we always agree with the target, then we count as the right rep.
            p_representation_literal[w] += p * (agree_pct_literal[key] == 1.)

            # and just how often does the hypothesis agree?
            p_response[w] += p * agree_pct[key]
            p_response_presup[w]  += p * agree_pct_presup[key]
            p_response_literal[w] += p * agree_pct_literal[key]

    #print "# Outputting"


    for w in words:
        fprintn(10, str(get_rank()), q(w), data_size, p_representation[w], p_representation_presup[w], p_representation_literal[w], p_response[w], p_response_presup[w], p_response_literal[w], f=options.OUT_PATH+"-stats."+str(get_rank())+".txt")

    return 0
Пример #2
0
def run(llt=1.0):
    h0 = CCGLexicon(make_hypothesis, words=all_words, alpha=0.9, palpha=0.9, likelihood_temperature=llt)

    fbs = FiniteBestSet(N=10)
    from LOTlib.Inference.MetropolisHastings import mh_sample
    for h in lot_iter(mh_sample(h0, data, SAMPLES)):
        fbs.add(h, h.posterior_score)

    return fbs
Пример #3
0
def run(llt=1.0):

    h0 = CCGLexicon(make_hypothesis, words=all_words, alpha=0.9, palpha=0.9, likelihood_temperature=llt)

    fbs = FiniteBestSet(N=10)
    from LOTlib.Inference.MetropolisHastings import mh_sample
    for h in lot_iter(mh_sample(h0, data, SAMPLES)):
        fbs.add(h, h.posterior_score)

    return fbs
Пример #4
0
def run(*args):
    """The running function."""
    # starting hypothesis -- here this generates at random
    h0 = GaussianLOTHypothesis(grammar)

    # We store the top 100 from each run
    pq = FiniteBestSet(N=100, max=True, key="posterior_score")
    pq.add(MHSampler(h0, data, STEPS, skip=SKIP))

    return pq
Пример #5
0
    def run(*args):

        # starting hypothesis -- here this generates at random
        h0 = GaussianLOTHypothesis(grammar, prior_temperature=PRIOR_TEMPERATURE)

        # We store the top 100 from each run
        pq = FiniteBestSet(100, max=True, key="posterior_score")
        pq.add( mh_sample(h0, data, STEPS, skip=SKIP)  )

        return pq
Пример #6
0
    def run(*args):

        # starting hypothesis -- here this generates at random
        h0 = GaussianLOTHypothesis(grammar,
                                   prior_temperature=PRIOR_TEMPERATURE)

        # We store the top 100 from each run
        pq = FiniteBestSet(100, max=True, key="posterior_score")
        pq.add(mh_sample(h0, data, STEPS, skip=SKIP))

        return pq
Пример #7
0
def load_finite_trees(f):
	"""
		Load in either a list of fintie trees 
		This can come in either two formats -- a "finite sample" of hypotheses, or a "finite sample" of lexica.
		In the latter case, we take the top for *each word* and just use them
	"""
	inh = open(f)
	fs = pickle.load(inh)
	if isinstance(fs.Q[0], VectorizedLexicon) or isinstance(fs.Q[0], SimpleLexicon): # else we want to extract the treesed from this priority queue
		upq = FiniteBestSet()
		for l in fs.get_all():
			for e in l.dexpr.values():
				upq.push(e, 0.0)
		return upq.get_all()
	else:
		#print type(type(fs.Q[0]).__name__)
		return fs.get_all()
Пример #8
0
def run(data_size):

    print "Running ", data_size

    # We store the top 100 from each run
    hypset = FiniteBestSet(TOP_COUNT, max=True)

    # initialize the data
    data = generate_data(data_size)

    # starting hypothesis -- here this generates at random
    learner = GriceanQuantifierLexicon(make_my_hypothesis, my_weight_function)

    # We will defautly generate from null the grammar if no value is specified
    for w in target.all_words(): learner.set_word(w)

    # populate the finite sample by running the sampler for this many steps
    for x in mh_sample(learner, data, SAMPLES, skip=0):
        hypset.push(x, x.posterior_score)

    return hypset
Пример #9
0
def run(data_size):

    print "Running ", data_size

    # We store the top 100 from each run
    hypset = FiniteBestSet(TOP_COUNT, max=True)

    # initialize the data
    data = generate_data(data_size)

    # starting hypothesis -- here this generates at random
    learner = GriceanQuantifierLexicon(make_my_hypothesis, my_weight_function)

    # We will defautly generate from null the grammar if no value is specified
    for w in target.all_words():
        learner.set_word(w)

    # populate the finite sample by running the sampler for this many steps
    for x in mh_sample(learner, data, SAMPLES, skip=0):
        hypset.push(x, x.posterior_score)

    return hypset
Пример #10
0
    learner = GriceanQuantifierLexicon(make_my_hypothesis, my_weight_function)

    # We will defautly generate from null the grammar if no value is specified
    for w in target.all_words():
        learner.set_word(w)

    # populate the finite sample by running the sampler for this many steps
    for x in mh_sample(learner, data, SAMPLES, skip=0):
        hypset.push(x, x.posterior_score)

    return hypset


if __name__ == "__main__":

    # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
    # MPI interface

    # Map. SimpleMPI will use a normal MAP if we are not running in MPI
    allret = MPI_map(run, map(lambda x: [x],
                              DATA_AMOUNTS * CHAINS))  # this many chains

    ## combine into a single hypothesis set and save
    outhyp = FiniteBestSet(max=True)
    for r in allret:
        print "# Merging ", len(r)
        outhyp.merge(r)

    import pickle
    pickle.dump(outhyp, open(OUT_PATH, 'w'))
Пример #11
0
def run(*args):
    #print "# Running data"

    global hypotheses

    data_size = args[0]

    p_representation = defaultdict(
        int)  # how often do you get the right representation
    p_response = defaultdict(int)  # how often do you get the right response?
    p_representation_literal = defaultdict(
        int)  # how often do you get the right representation
    p_response_literal = defaultdict(
        int)  # how often do you get the right response?
    p_representation_presup = defaultdict(
        int)  # how often do you get the right representation
    p_response_presup = defaultdict(
        int)  # how often do you get the right response?

    #print "# Generating data"
    data = generate_data(data_size)

    # recompute these
    #print "# Computing posterior"
    #[ x.unclear_functions() for x in hypotheses ]
    [x.compute_posterior(data) for x in hypotheses]

    # normalize the posterior in fs
    #print "# Computing normalizer"
    Z = logsumexp([x.posterior_score for x in hypotheses])

    # and output the top hypotheses
    qq = FiniteBestSet(max=True, N=25)
    for h in hypotheses:
        qq.push(h, h.posterior_score)  # get the tops
    for i, h in enumerate(qq.get_all(sorted=True)):
        for w in h.all_words():
            fprintn(8,
                    data_size,
                    i,
                    w,
                    h.posterior_score,
                    q(h.value[w]),
                    f=options.OUT_PATH + "-hypotheses." + str(get_rank()) +
                    ".txt")

    # and compute the probability of being correct
    #print "# Computing correct probability"
    for h in hypotheses:
        hstr = str(h)
        #print data_size, len(data), exp(h.posterior_score), correct[ str(h)+":"+w ]
        for w in words:
            p = exp(h.posterior_score - Z)
            key = w + ":" + hstr

            p_representation[w] += p * (agree_pct[key] == 1.)
            p_representation_presup[w] += p * (
                agree_pct_presup[key] == 1.
            )  # if we always agree with the target, then we count as the right rep.
            p_representation_literal[w] += p * (agree_pct_literal[key] == 1.)

            # and just how often does the hypothesis agree?
            p_response[w] += p * agree_pct[key]
            p_response_presup[w] += p * agree_pct_presup[key]
            p_response_literal[w] += p * agree_pct_literal[key]

    #print "# Outputting"

    for w in words:
        fprintn(10,
                str(get_rank()),
                q(w),
                data_size,
                p_representation[w],
                p_representation_presup[w],
                p_representation_literal[w],
                p_response[w],
                p_response_presup[w],
                p_response_literal[w],
                f=options.OUT_PATH + "-stats." + str(get_rank()) + ".txt")

    return 0
Пример #12
0
from LOTlib.Hypotheses.LOTHypothesis import LOTHypothesis
from LOTlib.Inference.Samplers.MetropolisHastings import mh_sample
from LOTlib.Examples.Quantifier.Model import *

ALPHA = 0.9
SAMPLES = 100000
DATA_SIZE = 1000

if __name__ == "__main__":

    ## sample the target data
    data = generate_data(DATA_SIZE)

    W = 'every'

    # Now to use it as a LOTHypothesis, we need data to have an "output" field which is true/false for whether its the target word. This is then used by LOTHypothesis.compute_likelihood to see if we match or not with whether a word was said (ignoring the other words -- that's why its a pseudolikelihood)
    for di in data:
        di.output = (di.word == W)
        #print (di.word == W)

    FBS = FiniteBestSet(max=True, N=100)

    H = LOTHypothesis(grammar, args=['A', 'B', 'S'], ALPHA=ALPHA)
    # Now just run the sampler with a LOTHypothesis
    for s in mh_sample(H, data, SAMPLES, skip=10):
        #print s.lp, "\t", s.prior, "\t", s.likelihood, "\n", s, "\n\n"
        FBS.push(s, s.lp)

    for k in reversed(FBS.get_all(sorted=True)):
        print k.lp, k.prior, k.likelihood, k
Пример #13
0

def make_h0(value=None):
    return GaussianLOTHypothesis(grammar, value=value)


if __name__ == "__main__":

    # # # # # # # # # # # # # # # # # # # # # # # # # # # #
    # the running function

    def run(*args):

        # starting hypothesis -- here this generates at random
        h0 = GaussianLOTHypothesis(grammar,
                                   prior_temperature=PRIOR_TEMPERATURE)

        # We store the top 100 from each run
        pq = FiniteBestSet(100, max=True, key="posterior_score")
        pq.add(mh_sample(h0, data, STEPS, skip=SKIP))

        return pq

    finitesample = FiniteBestSet(max=True)  # the finite sample of all
    results = map(run, [[None]] * CHAINS)  # Run on a single core
    finitesample.merge(results)

    ## and display
    for r in finitesample.get_all(decreasing=False, sorted=True):
        print r.posterior_score, r.prior, r.likelihood, qq(str(r))
Пример #14
0
        fbs.add(h, h.posterior_score)

    return fbs


## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
### MPI map
## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

from SimpleMPI.MPI_map import MPI_map, is_master_process

allret = MPI_map(run, map(lambda x: [x], [0.01, 0.1, 1.0] * 100 ))

if is_master_process():

    allfbs = FiniteBestSet(max=True)
    allfbs.merge(allret)

    H = allfbs.get_all()

    for h in H:
        h.likelihood_temperature = 0.01 # on what set of data we want?
        h.compute_posterior(data)

    # show the *average* ll for each hypothesis
    for h in sorted(H, key=lambda h: h.posterior_score):
        print h.posterior_score, h.prior, h.likelihood, h.likelihood_temperature
        print h

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## Play around with some different inference schemes
Пример #15
0
from Data import data
from Grammar import grammar
from Utilities import make_h0


def run(*args):
    """The running function."""
    # starting hypothesis -- here this generates at random
    h0 = GaussianLOTHypothesis(grammar)

    # We store the top 100 from each run
    pq = FiniteBestSet(N=100, max=True, key="posterior_score")
    pq.add(MHSampler(h0, data, STEPS, skip=SKIP))

    return pq


if __name__ == "__main__":

    CHAINS = 10
    STEPS = 10000000
    SKIP = 0

    finitesample = FiniteBestSet(max=True) # the finite sample of all
    results = map(run, [ [None] ] * CHAINS ) # Run on a single core
    finitesample.merge(results)

    ## and display
    for r in finitesample.get_all(decreasing=False, sorted=True):
        print r.posterior_score, r.prior, r.likelihood, qq(str(r))
Пример #16
0
def novelty_search(h0s, data, grammar, props=10, novelty_advantage=100):
	"""
		Search through hypotheses, maintaining a queue of good ones. We propose 
		to ones based on their posterior and how much they've been proposed to in the past. 
		See heapweight(h) below -- it determines how we trade off posterior and prior search there...
		
		SO: You are searched further if you are good, and in a "novel" part of the space
		
		TODO: We could make this track the performance of proposals from a given hypothesis?
	"""
	
	novelty = defaultdict(float) # last time we proposed here, what proportion were novel? If we haven't done any, set to 1.0
	froms = defaultdict(int) # how many times did we propose from this?
	tos   = defaultdict(int) # how many to this?
	FS = FiniteBestSet(N=10)
	
	# When we add something to the heap, what weight does it have?
	# This should prefer high log probability, but also it should 
	# keep us from looking at things too much
	def heapweight(h):
		return -h.lp - novelty[h]*novelty_advantage

	openset = []
	for h0 in h0s:
		if h0 not in novelty:
			h0.compute_posterior(data)
			heapq.heappush( openset, (heapweight(h0),h0) )
			novelty[h0] = 1.0 # treat as totally novel
			FS.add(h0, h0.lp)
	
	
	while not LOTlib.SIG_INTERRUPTED:
		lph, h = heapq.heappop(openset)
		
		froms[h] += 1
		
		#proposals_from[h] += props
		print "\n"
		print len(openset), "\t", h.lp, "\t", heapweight(h), "\t", novelty[h], "\t", froms[h], tos[h], "\t", q(h)
		for x in FS.get_all(sorted=True):
			print "\t", x.lp, "\t", heapweight(x), "\t", novelty[x], "\t", q(get_knower_pattern(h)), "\t", froms[x], tos[x],"\t", q(x)
		
		# Store all together so we know who to update (we make their novelty the same as their parent's)
		proposals = [ h.propose()[0] for i in xrange(props) ]
		new_proposals = [] # which are new?
		
		novelprop = 0
		for p in proposals:
			if p not in novelty:
				p.compute_posterior(data)
				FS.add(p, p.lp)
				novelty[p] = "ERROR" # just keep track -- should be overwritten later
				novelprop += 1
				new_proposals.append(p)
			tos[p] += 1
		
		novelty[h] = float(novelprop) / float(props)
		
		# use the novelty from the parent
		for p in new_proposals: 
			novelty[p] = random() * novelty[h]
			heapq.heappush(openset, (heapweight(p), p) )
		
		# and put myself back on the heap, but with the new proposal numbers
		heapq.heappush(openset, (heapweight(h), h) )
Пример #17
0
Run inference on each target concept and save the output

"""
import pickle
from LOTlib import break_ctrlc
from LOTlib.FiniteBestSet import FiniteBestSet
from LOTlib.Inference.Samplers.MetropolisHastings import MHSampler
from Model import *
from TargetConcepts import TargetConcepts

NDATA = 20  # How many data points for each function?
NSTEPS = 100000
BEST_N = 500  # How many from each hypothesis to store

# Where we keep track of all hypotheses (across concepts)
all_hypotheses = FiniteBestSet()

if __name__ == "__main__":
    # Now loop over each target concept and get a set of hypotheses
    for i, f in enumerate(TargetConcepts):

        # Set up the hypothesis
        h0 = make_hypothesis()

        # Set up some data
        data = make_data(NDATA, f)

        # Now run some MCMC
        fs = FiniteBestSet(N=BEST_N, key="posterior_score")
        fs.add(break_ctrlc(MHSampler(h0, data, steps=NSTEPS, trace=False)))
Пример #18
0
    ## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    ### MPI map
    ## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    try:
        from SimpleMPI.MPI_map import MPI_map, is_master_process
    except ImportError:
        MPI_map = map
        is_master_process = lambda: True

    allret = MPI_map(run, map(lambda x: [x], [0.01, 0.1, 1.0] * 100 ))

    if is_master_process():

        allfbs = FiniteBestSet(max=True)
        allfbs.merge(allret)

        H = allfbs.get_all()

        for h in H:
            h.likelihood_temperature = 0.01 # on what set of data we want?
            h.compute_posterior(data)

        # show the *average* ll for each hypothesis
        for h in sorted(H, key=lambda h: h.posterior_score):
            print h.posterior_score, h.prior, h.likelihood, h.likelihood_temperature
            print h

    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    ## Play around with some different inference schemes
Пример #19
0
"""
import pickle
from LOTlib import lot_iter
from LOTlib.Hypotheses.LOTHypothesis import LOTHypothesis
from LOTlib.FiniteBestSet import FiniteBestSet
from LOTlib.Inference.MetropolisHastings import MHSampler
from Model import *


NDATA = 50 # How many total data points?
NSTEPS = 10000
BEST_N = 100 # How many from each hypothesis to store
OUTFILE = "hypotheses.pkl"

# Where we keep track of all hypotheses (across concepts)
all_hypotheses = FiniteBestSet()

if __name__ == "__main__":
    # Now loop over each target concept and get a set of hypotheses
    for i, f in enumerate(TARGET_CONCEPTS):

        # Set up the hypothesis
        h0 = LOTHypothesis(grammar, start='START', args=['x'])

        # Set up some data
        data = generate_data(NDATA, f)

        # Now run some MCMC
        fs = FiniteBestSet(N=BEST_N, key="posterior_score")
        fs.add(lot_iter(MHSampler(h0, data, steps=NSTEPS, trace=False)))
Пример #20
0
    data = generate_data(data_size)

    # starting hypothesis -- here this generates at random
    learner = GriceanQuantifierLexicon(make_my_hypothesis, my_weight_function)

    # We will defautly generate from null the grammar if no value is specified
    for w in target.all_words(): learner.set_word(w)

    # populate the finite sample by running the sampler for this many steps
    for x in mh_sample(learner, data, SAMPLES, skip=0):
        hypset.push(x, x.posterior_score)

    return hypset

if __name__ == "__main__":

    # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
    # MPI interface

    # Map. SimpleMPI will use a normal MAP if we are not running in MPI
    allret = MPI_map(run, map(lambda x: [x], DATA_AMOUNTS * CHAINS)) # this many chains

    ## combine into a single hypothesis set and save
    outhyp = FiniteBestSet(max=True)
    for r in allret:
        print "# Merging ", len(r)
        outhyp.merge(r)

    import pickle
    pickle.dump(outhyp, open(OUT_PATH, 'w'))