示例#1
0
def run(data_amount):
    print "Starting chain on %s data points" % data_amount
    data = makeTreeLexiconData(target,
                               the_context,
                               n=data_amount,
                               alpha=options.alpha,
                               epsilon=options.epsilon,
                               verbose=True)

    h0 = KinshipLexicon(alpha=options.alpha)
    for w in target_words:
        h0.set_word(
            w, LOTHypothesis(my_grammar, display='lambda recurse_, C, X: %s'))

    hyps = TopN(N=options.top_count)

    mhs = MHSampler(h0,
                    data,
                    options.steps,
                    likelihood_temperature=options.llt,
                    prior_temperature=options.prior_temp)

    for samples_yielded, h in break_ctrlc(enumerate(mhs)):
        if samples_yielded % 100 == 0:
            print h.prior, h.likelihood, h
        hyps.add(h)

    import pickle
    print 'Writing ' + data[0].X + data[0].Y + str(
        data_amount) + data[0].word + '.pkl'
    with open(
            'Chains/' + data[0].X + data[0].Y + str(data_amount) +
            data[0].word + '.pkl', 'w') as f:
        pickle.dump(hyps, f)

    return hyps
示例#2
0
文件: Run.py 项目: TerryLew/BinLOTlib
Run inference on each target concept and save the output

"""
import pickle
from LOTlib import break_ctrlc
from LOTlib.TopN import TopN
from LOTlib.Inference.Samplers.MetropolisHastings import MHSampler
from Model import *
from TargetConcepts import TargetConcepts

NDATA = 20 # How many data points for each function?
NSTEPS = 100000
BEST_N = 500 # How many from each hypothesis to store

# Where we keep track of all hypotheses (across concepts)
all_hypotheses = TopN(N=BEST_N)

if __name__ == "__main__":
    # Now loop over each target concept and get a set of hypotheses
    for i, f in enumerate(TargetConcepts):

        # Set up the hypothesis
        h0 = make_hypothesis()

        # Set up some data
        data = make_data(NDATA, f)

        # Now run some MCMC
        fs = TopN(N=BEST_N, key="posterior_score")
        fs.add(break_ctrlc(MHSampler(h0, data, steps=NSTEPS, trace=False)))
示例#3
0
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Hypothesis
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

from LOTlib.Hypotheses.RationalRulesLOTHypothesis import RationalRulesLOTHypothesis


def make_hypothesis(grammar=grammar, **kwargs):
    return RationalRulesLOTHypothesis(grammar=grammar, rrAlpha=1.0, **kwargs)


if __name__ == "__main__":

    from LOTlib.TopN import TopN
    hyps = TopN(N=1000)

    from LOTlib.Inference.Samplers.MetropolisHastings import MHSampler
    from LOTlib import break_ctrlc
    mhs = MHSampler(make_hypothesis(),
                    make_data(),
                    1000000,
                    likelihood_temperature=1.,
                    prior_temperature=1.)

    for samples_yielded, h in break_ctrlc(enumerate(mhs)):
        h.ll_decay = 0.
        hyps.add(h)

    import pickle
    with open('HypothesisSpace.pkl', 'w') as f:
示例#4
0
    d.update(pickle.load(f))

if options.filename2 is not None:
    print "Loading Space 2: " + options.filename2
    with open(options.filename2, 'r') as f:
        d.update(pickle.load(f))

Mass = set()

for a in range(1, 25, 2) + range(25, 251, 25):
    print "Grabbing Top " + str(options.Nsize) + " from " + str(a) + ' dp'
    data = makeZipfianLexiconData(target,
                                  four_gen_tree_context,
                                  n=a,
                                  alpha=0.9,
                                  s=0.0,
                                  epsilon=0.0)
    simplicity_mass = TopN(N=options.Nsize)
    reuse_mass = TopN(N=options.Nsize)
    for h in d:
        h.posterior_score = h.compute_likelihood(data) + compute_reuse_prior(h)
        reuse_mass.add(h)
        h.compute_posterior(data)
        simplicity_mass.add(h)
    Mass.update(simplicity_mass)
    Mass.update(reuse_mass)

print "Writing output file for " + str(len(Mass)) + ' hypotheses.'
with open(options.out_path, 'w') as f:
    pickle.dump(Mass, f)
示例#5
0
    c1 = vanilla_conditions(True, False)[0:2]
    c2 = vanilla_conditions(False, True)[0:1]

    for to_seq in c1:
        for from_seq in c2:
            print_star("")
            print from_seq, to_seq
            data = [
                FunctionData(alpha=ALPHA,
                             input=[from_seq],
                             output={to_seq: len(to_seq)})
            ]
            h0 = MyHypothesis()
            step = 0
            tn = TopN(N=N_H)
            # Stream from the sampler to a printer
            for h in MHSampler(h0,
                               data,
                               steps=STEPS,
                               acceptance_temperature=5.):
                tn.add(h)

            print

            for h in tn.get_all(sorted=True):
                out = h(from_seq)
                if len(out) >= len(to_seq):
                    hd = hamming_distance(out[:len(to_seq)], to_seq)
                else:
                    hd = 15
示例#6
0
    databundle = [response, weights]
    generator = MHSampler(learner, databundle, STEPS, skip=SKIP)
    for g in generator:
        hypset.add(VectorizedLexicon_to_SimpleLexicon(g), g.posterior_score)
    return hypset


####################################################################################
## Main running
####################################################################################

# Load the trees from a file
my_finite_trees = pickle.load(open(IN_PATH))
print "# Done loading", len(my_finite_trees), "trees"

# Gives index to each hypothesis
for i, h in enumerate(my_finite_trees):
    hyp2index[h] = i

# Compute or load proposals
get_proposal_dist(my_finite_trees)

DATA_AMOUNTS = range(0, 2050, 100)
allret = map(run, DATA_AMOUNTS)

# combine into a single hypothesis set and save
outhyp = TopN()
for r in allret:
    outhyp.update(r)
pickle.dump(outhyp, open(OUT_PATH, 'w'))
示例#7
0
def runparts(size, x, p):
    #problem: right now only recording last partition, never saving from others.
    print "Start: " + str(x) + " on this many: " + str(size)
    try:
        #make new TopN for each data amount
        topn = TopN(N=200, key="posterior_score")
        print "Starting on partition ", p

        # Now we have to go in and fill in the nodes that are nonterminals
        # We can do this with generate
        v = grammar.generate(copy(p))

        h0 = MyHypothesis(grammar, value=v)
        data = [
            FunctionData(input=[],
                         output={
                             'n i k': size,
                             'h i N': size,
                             'f a n': size,
                             'g i f': size,
                             'm a N': size,
                             'f a m': size,
                             'g i k': size,
                             'k a n': size,
                             'f a f': size,
                             'g i n': size,
                             'g i m': size,
                             'g i s': size,
                             's i f': size,
                             's i n': size,
                             'n i s': size,
                             's i m': size,
                             's i k': size,
                             'h a N': size,
                             'f i N': size,
                             'h i m': size,
                             'h i n': size,
                             'h a m': size,
                             'n i N': size,
                             'h i k': size,
                             'f a s': size,
                             'f i n': size,
                             'h i f': size,
                             'n i m': size,
                             'g i N': size,
                             'h a g': size,
                             's i N': size,
                             'n i n': size,
                             'f i m': size,
                             's i s': size,
                             'h i s': size,
                             'n a s': size,
                             'k a s': size,
                             'f i s': size,
                             'n i f': size,
                             'm i n': size,
                             's a s': size,
                             'f a g': size,
                             'k a g': size,
                             'k a f': size,
                             's a m': size,
                             'n a f': size,
                             'n a g': size,
                             'm i N': size,
                             's a g': size,
                             'f i k': size,
                             'k a m': size,
                             'n a n': size,
                             's a f': size,
                             'n a m': size,
                             'm a s': size,
                             'h a f': size,
                             'h a s': size,
                             'n a N': size,
                             'm i s': size,
                             's a n': size,
                             's a N': size,
                             'm i k': size,
                             'f a N': size,
                             'm i m': size,
                             'm a g': size,
                             'm a f': size,
                             'f i f': size,
                             'k a N': size,
                             'h a n': size,
                             'm a n': size,
                             'm a m': size,
                             'm i f': size
                         })
        ]

        for h in break_ctrlc(
                MHSampler(h0, data, steps=options.steps, trace=False)):
            # print "\t", h.posterior_score, h
            topn.add(h)

        return size, set(topn)

    except Exception as e:
        print "*** Exception ignored: ", e
        #if we fail, we can return a blank TopN
        return size, set()
示例#8
0
from LOTlib.Hypotheses.LOTHypothesis import LOTHypothesis
from LOTlib.Inference.Samplers.MetropolisHastings import MHSampler
from LOTlib.Projects.Quantifier.Model import *

ALPHA = 0.9
SAMPLES = 100000
DATA_SIZE = 1000

if __name__ == "__main__":

    ## sample the target data
    data = generate_data(DATA_SIZE)

    W = 'every'

    # Now to use it as a LOTHypothesis, we need data to have an "output" field which is true/false for whether its the target word. This is then used by LOTHypothesis.compute_likelihood to see if we match or not with whether a word was said (ignoring the other words -- that's why its a pseudolikelihood)
    for di in data:
        di.output = (di.utterance == W)
        #print (di.word == W)

    FBS = TopN(N=100)

    H = LOTHypothesis(grammar, display='lambda A,B,S: %s', ALPHA=ALPHA)
    # Now just run the sampler with a LOTHypothesis
    for s in MHSampler(H, data, SAMPLES, skip=10):
        #print s.lp, "\t", s.prior, "\t", s.likelihood, "\n", s, "\n\n"
        FBS.push(s, s.lp)

    for k in reversed(FBS.get_all(sorted=True)):
        print k.lp, k.prior, k.likelihood, k
示例#9
0
from LOTlib.DataAndObjects import FunctionData

# Now our data takes input x=3 and maps it to 12
# What could the function be?
data = [FunctionData(input=[3], output=12, alpha=0.95)]

########################################
## Actually run
########################################
from LOTlib.Inference.Samplers.MetropolisHastings import MHSampler
from LOTlib.SampleStream import *
## First let's make a bunch of hypotheses
from LOTlib.TopN import TopN

tn = TopN(1000)

h0 = MyHypothesisX()
for h in MHSampler(h0, data, steps=100000):  # run more steps
    tn.add(h)

# store these in a list (tn.get_all is defaultly a generator)
hypotheses = list(tn.get_all())

# Compute the normalizing constant
from LOTlib.Miscellaneous import logsumexp
z = logsumexp([h.posterior_score for h in hypotheses])

## Now compute a matrix of how likely each input is to go
## to each output
M = 20  # an MxM matrix of values
示例#10
0
        collapsed_prob = grammar.log_probability(collapsed_forms[resps])
        collapsed_forms[resps].my_log_probability = logplusexp(collapsed_prob, tprior)
        if tprior > collapsed_forms[resps].display_tree_probability: # display the most concise form
            collapsed_forms[resps] = t
            collapsed_forms[resps].display_tree_probability = tprior
    else:
        collapsed_forms[resps] = t
        collapsed_forms[resps].display_tree_probability = tprior
        t.my_log_probability = tprior # FunctionNode uses this value when we call log_probability()
        print ">>", all_tree_count, len(collapsed_forms),  t, tprior

############################################
### Now actually enumarate trees
for t in grammar.enumerate(d=DEPTH):
    if 'presup_(False' in str(t):
        continue
    if not check_expansion(t):
        continue
    if t.count_subnodes() <= MAX_NODES:
        add_to_collapsed_trees(t)
        all_tree_count += 1
        print ">", t, grammar.log_probability(t)

## for kinder saving and unsaving:
upq = TopN()
for k in collapsed_forms.values():
    upq.add(LOTHypothesis(grammar, k, display='lambda context: %s'), 0.0)
pickle.dump(upq, open(OUT, 'w'))

print "Total tree count: ", all_tree_count
示例#11
0
        if self.nsamples in self.adapt_at: ## TODO: Maybe make this faster?
            self.adapt_temperatures()

        return ret


if __name__ == "__main__":

    from LOTlib import break_ctrlc
    from LOTlib.Examples.Number2015.Model import generate_data, make_h0
    data = generate_data(300)

    from LOTlib.TopN import TopN
    z = Z(unique=True)
    tn = TopN(N=10)

    from LOTlib.Miscellaneous import logrange

    sampler = AdaptiveParallelTemperingSampler(make_h0, data, steps=1000000, \
                                               yield_only_t0=False, whichtemperature='acceptance_temperature', \
                                               temperatures=logrange(1.0, 10.0, 10))

    for h in break_ctrlc(tn(z(sampler))):
        # print sampler.chain_idx, h.posterior_score, h
        pass

    for x in tn.get_all(sorted=True):
        print x.posterior_score, x

    print z
示例#12
0
# from LOTlib.TopN import TopN
# topChoice = TopN(N=10)

# print "\n\n\n\n\n\n_________1 data point____________\n"
# for h in MHSampler(h0, data, steps=5000):
#     print h.posterior_score
#     topChoice.add(h)
# else:
#     print "\n\n\n\n\n_________2 data point____________\n"
#     for h in MHSampler(h0, data_1, steps=5000):
#         print h.posterior_score

# Running and show only the top choice
from LOTlib.TopN import TopN

topChoice = TopN(N=10)
posProbs = []
stepNum = 40000

for step, h in enumerate(MHSampler(h0, make_data(data_size=1), steps=stepNum)):
    if step % 5000 == 0:
        print('current step: %d, current posterior score: %f' %
              (step, h.posterior_score))
    posProbs.append(h.posterior_score)
    topChoice.add(h)
    h0 = h

# for step, h in enumerate(MHSampler(h0, make_data(data_size=2), steps=stepNum)):
#     if step % 5000 == 0:
#         print ('current step: %d, current posterior score: %f' % (step, h.posterior_score))
#     posProbs.append(h.posterior_score)