Exemplo n.º 1
0
    def canIrecurse(self, data, trueset):
        d = [(datum.word, datum.X, datum.Y) for datum in data]

        hyps = [self.value[w] for w in self.all_words()]
        try:
            grammar = hyps[0].grammar
        except:
            return True  # Because if it doesn't have a grammar it's a force function
        counts, inx, _ = create_counts(grammar, hyps)
        relinx = [(k[2], inx[k]) for k in inx.keys() if k[1] == 'recurse_']
        if len(relinx) == 0:
            return True
        counts = np.sum(counts['SET'], axis=0)

        F1s = []
        for wi, w in enumerate(self.all_words()):
            wd = [dp for dp in d if dp[0] == w]  # Word Data
            pw = [dp for dp in trueset if dp[0] == w]  # Proposed Word Data
            pId = [dp for dp in wd if dp in pw]  # Proposed Word Data Observed
            precision = float(len(set(pId))) / float(len(pw) + 1e-6)
            recall = float(len(pId)) / float(len(wd) + 1e-6)
            f1 = (2. * precision * recall) / (precision + recall + 1e-6)
            i = [ri[1] for ri in relinx if ri[0] == q(w)]
            F1s.append((counts[i], w, f1, precision, recall))
            if counts[i] >= 1 and f1 <= self.alpha * 2. / 3.:
                return False

        return True
Exemplo n.º 2
0
    def canIrecurse(self, data, trueset):
        d = [(datum.word, datum.X, datum.Y) for datum in data]

        hyps = [self.value[w] for w in self.all_words()]
        try:
            grammar = hyps[0].grammar
        except:
            return True # Because if it doesn't have a grammar it's a force function
        counts, inx, _ = create_counts(grammar, hyps)
        counts = np.sum(counts['SET'], axis=0)
        relinx = [(k[2], inx[k]) for k in inx.keys() if k[1] == 'recurse_']

        F1s = []
        for wi, w in enumerate(self.all_words()):
            wd = [dp for dp in d if dp[0] == w] # Word Data
            pw = [dp for dp in trueset if dp[0] == w] # Proposed Word Data
            pId = [dp for dp in wd if dp in pw] # Proposed Word Data Observed
            precision = float(len(set(pId))) / float(len(pw) + 1e-6)
            recall = float(len(pId)) / float(len(wd) + 1e-6)
            f1 = (2.*precision*recall) / (precision + recall + 1e-6)
            i = [ri[1] for ri in relinx if ri[0] == q(w)]
            F1s.append((counts[i], w, f1, precision, recall))
            if counts[i] >= 1 and f1 <= self.alpha * 2./ 3.:
                return False

        return True
Exemplo n.º 3
0
def AnBnCnGrammar():
    register_primitive(flatten2str)

    grammar = Grammar()
    grammar.add_rule('START', 'flatten2str', ['LIST', 'sep=\"\"'], 1.0)
    grammar.add_rule('LIST', 'if_', ['BOOL', 'LIST', 'LIST'], 0.09)
    grammar.add_rule('BOOL', 'empty_', ['LIST'], 0.56)
    grammar.add_rule('BOOL', 'flip_', [''], 0.43)
    grammar.add_rule('LIST', 'cons_', ['ATOM', 'LIST'], 0.203)
    grammar.add_rule('LIST', 'cdr_', ['LIST'], 0.15)
    grammar.add_rule('LIST', 'car_', ['LIST'], 0.15)
    grammar.add_rule('LIST', '\'\'', None, 0.23)
    grammar.add_rule('ATOM', q('a'), None, .33)
    grammar.add_rule('ATOM', q('b'), None, .33)
    grammar.add_rule('ATOM', q('c'), None, .33)

    return grammar
Exemplo n.º 4
0
def DyckGrammar():
    register_primitive(flatten2str)

    TERMINAL_WEIGHT = 2.
    grammar = Grammar()
    grammar.add_rule('START', 'flatten2str', ['LIST', 'sep=\"\"'], 1.0)
    grammar.add_rule('BOOL', 'empty_', ['LIST'], 1.)
    grammar.add_rule('BOOL', 'flip_', [''], 1.0)
    grammar.add_rule('LIST', 'if_', ['BOOL', 'LIST', 'LIST'], 1.)
    grammar.add_rule('LIST', 'cons_', ['ATOM', 'LIST'], 1.)
    grammar.add_rule('LIST', 'cons_', ['LIST', 'LIST'], 1.)
    grammar.add_rule('LIST', 'cdr_', ['LIST'], 1.)
    grammar.add_rule('LIST', 'car_', ['LIST'], 1.)
    grammar.add_rule('LIST', 'recurse_', [], 1.)
    grammar.add_rule('LIST', '[]', None, TERMINAL_WEIGHT)
    grammar.add_rule('ATOM', q('('), None, TERMINAL_WEIGHT)
    grammar.add_rule('ATOM', q(')'), None, TERMINAL_WEIGHT)

    return grammar
Exemplo n.º 5
0
def make_hypothesis(s, **kwargs):
    """
        NOTE: grammar only has atom a, you need to add other atoms yourself
    """
    grammar = eng_grammar if s == 'SimpleEnglish' else a_grammar

    if 'terminals' in kwargs:
        terminals = kwargs.pop('terminals')
        if terminals is not None:
            for e in terminals:
                grammar.add_rule('ATOM', q(e), None, 2)

    return SimpleEnglishHypothesis(grammar=grammar, **kwargs)
Exemplo n.º 6
0
def run_one(iteration, probs=None):

	m = MixtureProposal([RegenerationProposal(grammar), InsertDeleteProposal(grammar), InverseInlineProposal(grammar)], probs=probs )
		
	# define a wrapper to set this proposal
	def wrapped_make_h0():
		h0 = make_h0()
		h0.set_proposal_function(m)
		return h0

	sampler = MultipleChainMCMC(wrapped_make_h0, data, steps=options.SAMPLES, nchains=options.CHAINS)
	
	# Run evaluate on it, printing to the right locations
	evaluate_sampler(sampler, prefix="\t".join(map(str, [options.MODEL, iteration, q(str(probs)) ])),  out_hypotheses=out_hypotheses, out_aggregate=out_aggregate)
Exemplo n.º 7
0
def run(options, ndata):
    """
    This out on the DATA_RANGE amounts of data and returns all hypotheses in top count
    """
    if LOTlib.SIG_INTERRUPTED:
        return 0, set()

    language = eval(options.LANG+"()")
    data = language.sample_data(LARGE_SAMPLE)
    assert len(data) == 1

    # renormalize the counts
    for k in data[0].output.keys():
        data[0].output[k] = float(data[0].output[k] * ndata) / LARGE_SAMPLE
    #print data

    # Now add the rules to the grammar
    grammar = deepcopy(base_grammar)
    for t in language.terminals():  # add in the specifics
        grammar.add_rule('ATOM', q(t), None, 2)

    h0 = IncrementalLexiconHypothesis(grammar=grammar)

    tn = TopN(N=options.TOP_COUNT)

    for outer in xrange(options.N): # how many do we add?
        # add to the grammar
        grammar.add_rule('SELFF', '%s' % (outer), None, 1.0)

        # Add one more to the number of words here
        h0.set_word(outer, h0.make_hypothesis(grammar=grammar))
        h0.N = outer+1
        assert len(h0.value.keys())==h0.N==outer+1

        # now run mcmc
        for h in break_ctrlc(MHSampler(h0, data, steps=options.STEPS)):
            tn.add(h)

            # print h.posterior_score, h
            # print getattr(h, 'll_counts', None)

        # and start from where we ended
        h0 = deepcopy(h) # must deepcopy

    return ndata, tn
Exemplo n.º 8
0
def run(options, ndata):
    """
    This out on the DATA_RANGE amounts of data and returns all hypotheses in top count
    """
    if LOTlib.SIG_INTERRUPTED:
        return 0, set()

    language = eval(options.LANG+"()")
    data = language.sample_data(LARGE_SAMPLE)
    assert len(data) == 1

    # renormalize the counts
    for k in data[0].output.keys():
        data[0].output[k] = float(data[0].output[k] * ndata) / LARGE_SAMPLE
    #print data

    # Now add the rules to the grammar
    grammar = deepcopy(base_grammar)
    for t in language.terminals():  # add in the specifics
        grammar.add_rule('ATOM', q(t), None, 2)

    h0 = IncrementalLexiconHypothesis(grammar=grammar)

    tn = TopN(N=options.TOP_COUNT)

    for outer in xrange(options.N): # how many do we add?
        # add to the grammar
        grammar.add_rule('SELFF', '%s' % (outer), None, 1.0)

        # Add one more to the number of words here
        h0.set_word(outer, h0.make_hypothesis(grammar=grammar))
        h0.N = outer+1
        assert len(h0.value.keys())==h0.N==outer+1

        # now run mcmc
        for h in break_ctrlc(MHSampler(h0, data, steps=options.STEPS)):
            tn.add(h)

            print h.posterior_score, h
            print getattr(h, 'll_counts', None)

        # and start from where we ended
        h0 = deepcopy(h) # must deepcopy

    return ndata, tn
Exemplo n.º 9
0
def run(options, ndata):
    """
    This out on the DATA_RANGE amounts of data and returns all hypotheses in top count
    """
    if LOTlib.SIG_INTERRUPTED:
        return set()

    language = eval(options.LANG + "()")
    data = language.sample_data(LARGE_SAMPLE)
    assert len(data) == 1

    # renormalize the counts
    for k in data[0].output.keys():
        data[0].output[k] = float(data[0].output[k] * ndata) / LARGE_SAMPLE
    # print data

    # Now add the rules to the grammar
    grammar = deepcopy(base_grammar)
    for t in language.terminals():  # add in the specifics
        grammar.add_rule("ATOM", q(t), None, 2)

    h0 = AugustHypothesis(grammar=grammar, display="lambda recurse_ :%s")
    print "# Starting on ", h0

    tn = TopN(N=options.TOP_COUNT)

    # print h0.compute_posterior(data)
    # for i, h in enumerate(break_ctrlc(MHSampler(h0, data, steps=options.STEPS))):
    # # for h in MHSampler(h0, data, steps=options.STEPS, trace=True):
    #     print h.posterior_score, h
    #     print getattr(h, 'll_counts', None)

    with open(
        prefix + "hypotheses_" + options.LANG + "_" + str(rank) + "_" + str(ndata) + "_" + suffix + ".txt", "a"
    ) as ofile:

        for i, h in enumerate(break_ctrlc(MHSampler(h0, data, steps=options.STEPS))):
            tn.add(h)
            # print h.posterior_score, getattr(h, 'll_counts', None), h
            if i % options.SKIP == 0 and h.posterior_score > -Infinity:
                print >> ofile, i, ndata, h.posterior_score, h.prior, h.likelihood, h.likelihood / ndata
                print >> ofile, getattr(h, "ll_counts", None)
                print >> ofile, h, "\0"  # must add \0 when not Lexicon

    return tn
Exemplo n.º 10
0
def run():
    """A version that cares more about recent data, showing how to use
    Hypotheses.DecayedLikelihoodHypothesis.

    """
    G = grammar
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Create an initial hypothesis
    # This is where we set a number of relevant variables -- whether to use RR, alpha, etc.Z
    h0 = MyHypothesis(G, ll_decay=1.0, rrAlpha=1.0, args=['x'])

    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Run the MH

    # Run the vanilla sampler. Without steps, it will run infinitely
    # this prints out posterior (posterior_score), prior, likelihood,
    for h in break_ctrlc(MHSampler(h0, data, 10000, skip=100)):
        print h.posterior_score, h.prior, h.likelihood, q(h)
Exemplo n.º 11
0
def run(options, ndata):
    """
    This out on the DATA_RANGE amounts of data and returns all hypotheses in top count
    """
    if LOTlib.SIG_INTERRUPTED:
        return set()

    language = eval(options.LANG+"()")
    data = language.sample_data(LARGE_SAMPLE)
    assert len(data) == 1

    # renormalize the counts
    for k in data[0].output.keys():
        data[0].output[k] = float(data[0].output[k] * ndata) / LARGE_SAMPLE
    # print data

    # Now add the rules to the grammar
    grammar = deepcopy(base_grammar)
    for t in language.terminals():  # add in the specifics
        grammar.add_rule('ATOM', q(t), None, 2)

    h0 = AugustHypothesis(grammar=grammar, display="lambda recurse_ :%s")
    print "# Starting on ", h0

    tn = TopN(N=options.TOP_COUNT)

    # print h0.compute_posterior(data)
    # for i, h in enumerate(break_ctrlc(MHSampler(h0, data, steps=options.STEPS))):
    # # for h in MHSampler(h0, data, steps=options.STEPS, trace=True):
    #     print h.posterior_score, h
    #     print getattr(h, 'll_counts', None)

    with open(prefix+'hypotheses_'+options.LANG+'_'+str(rank)+'_'+str(ndata)+'_'+suffix+".txt", 'a') as ofile:

        for i, h in enumerate(break_ctrlc(MHSampler(h0, data, steps=options.STEPS))):
            tn.add(h)
            # print h.posterior_score, getattr(h, 'll_counts', None), h
            if i%options.SKIP == 0 and h.posterior_score > -Infinity:
                print >>ofile, i, ndata, h.posterior_score, h.prior, h.likelihood, h.likelihood/ndata
                print >>ofile, getattr(h,'ll_counts', None)
                print >>ofile, h, '\0' # must add \0 when not Lexicon


    return tn
Exemplo n.º 12
0
def run_mh():
    """Run the MH."""
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # somewhat weirdly, we'll make an upper node above "START" for the two concepts
    # and require it to check if concept (an argument below) is 'A'
    grammar.add_rule('TWO_CONCEPT_START', 'if_', ['(concept==\'A\')', 'START', 'START'], 1.0)

    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Create an initial hypothesis
    # This is where we set a number of relevant variables -- whether to use RR, alpha, etc.
    # Here we give args as "concept" (used in TWO_CONCEPT_START above) and "x"
    h0 = RationalRulesLOTHypothesis(grammar=grammar, rrAlpha=1.0, ALPHA=0.9, start='TWO_CONCEPT_START',
                                    args=['concept', 'x'])

    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Run the vanilla sampler. Without steps, it will run infinitely
    # this prints out posterior (posterior_score), prior, likelihood,
    for h in break_ctrlc(MHSampler(h0, data, 10000, skip=100)):
        print h.posterior_score, h.prior, h.likelihood, q(h)
Exemplo n.º 13
0
def run(options, ndata):
    """
    This out on the DATA_RANGE amounts of data and returns all hypotheses in top count
    """
    if LOTlib.SIG_INTERRUPTED:
        return set()

    language = eval(options.LANG + "()")
    data = language.sample_data(LARGE_SAMPLE)
    assert len(data) == 1

    # renormalize the counts
    for k in data[0].output.keys():
        data[0].output[k] = float(data[0].output[k] * ndata) / LARGE_SAMPLE

    print data
    # Now add the rules to the grammar
    grammar = deepcopy(base_grammar)
    for t in language.terminals():  # add in the specifics
        grammar.add_rule('ATOM', q(t), None, 2)

    h0 = AugustHypothesis(grammar=grammar, display="lambda recurse_ :%s")

    tn = TopN(N=options.TOP_COUNT)

    for i, h in enumerate(break_ctrlc(MHSampler(h0, data,
                                                steps=options.STEPS))):
        print h.posterior_score, h
        print getattr(h, 'll_counts', None)

    # with open(prefix+'hypotheses_'+options.LANG+'_'+str(rank)+'_'+str(ndata)+'_'+suffix+".txt", 'a') as ofile:
    #
    #     for i, h in enumerate(break_ctrlc(MHSampler(h0, data, steps=options.STEPS))):
    #         tn.add(h)
    #         # print h.posterior_score, getattr(h, 'll_counts', None), h
    #         if i%options.SKIP == 0:
    #             print >>ofile, "\n"
    #             print >>ofile, i, ndata, h.posterior_score, h.prior, h.likelihood, h.likelihood/ndata
    #             print >>ofile, getattr(h,'ll_counts', None),
    #             print >>ofile, h # ends in \0 so we can sort with sort -g -z

    return tn
Exemplo n.º 14
0
def run(options, ndata):
    """
    This out on the DATA_RANGE amounts of data and returns all hypotheses in top count
    """
    if LOTlib.SIG_INTERRUPTED:
        return set()

    language = eval(options.LANG + "()")
    data = language.sample_data(LARGE_SAMPLE)
    assert len(data) == 1

    # renormalize the counts
    for k in data[0].output.keys():
        data[0].output[k] = float(data[0].output[k] * ndata) / LARGE_SAMPLE

    print data
    # Now add the rules to the grammar
    grammar = deepcopy(base_grammar)
    for t in language.terminals():  # add in the specifics
        grammar.add_rule("ATOM", q(t), None, 2)

    h0 = AugustHypothesis(grammar=grammar, display="lambda recurse_ :%s")

    tn = TopN(N=options.TOP_COUNT)

    for i, h in enumerate(break_ctrlc(MHSampler(h0, data, steps=options.STEPS))):
        print h.posterior_score, h
        print getattr(h, "ll_counts", None)

    # with open(prefix+'hypotheses_'+options.LANG+'_'+str(rank)+'_'+str(ndata)+'_'+suffix+".txt", 'a') as ofile:
    #
    #     for i, h in enumerate(break_ctrlc(MHSampler(h0, data, steps=options.STEPS))):
    #         tn.add(h)
    #         # print h.posterior_score, getattr(h, 'll_counts', None), h
    #         if i%options.SKIP == 0:
    #             print >>ofile, "\n"
    #             print >>ofile, i, ndata, h.posterior_score, h.prior, h.likelihood, h.likelihood/ndata
    #             print >>ofile, getattr(h,'ll_counts', None),
    #             print >>ofile, h # ends in \0 so we can sort with sort -g -z

    return tn
Exemplo n.º 15
0
# flattern2str lives at the top, and it takes a cons, cdr, car structure and projects it to a string
grammar.add_rule('START', 'flatten2str', ['EXPR'], 1.0)

grammar.add_rule('BOOL', 'and_', ['BOOL', 'BOOL'], 1.)
grammar.add_rule('BOOL', 'or_', ['BOOL', 'BOOL'], 1.)
grammar.add_rule('BOOL', 'not_', ['BOOL'], 1.)

grammar.add_rule('EXPR', 'if_', ['BOOL', 'EXPR', 'EXPR'], 1.)
grammar.add_rule('BOOL', 'equal_', ['EXPR', 'EXPR'], 1.)

grammar.add_rule('BOOL', 'flip_', [''], TERMINAL_WEIGHT)

# List-building operators
grammar.add_rule('EXPR', 'cons_', ['EXPR', 'EXPR'], 1.)
grammar.add_rule('EXPR', 'cdr_', ['EXPR'], 1.)
grammar.add_rule('EXPR', 'car_', ['EXPR'], 1.)

grammar.add_rule('EXPR', '[]', None, TERMINAL_WEIGHT)
grammar.add_rule('EXPR', q('D'), None, TERMINAL_WEIGHT)
grammar.add_rule('EXPR', q('A'), None, TERMINAL_WEIGHT)
grammar.add_rule('EXPR', q('N'), None, TERMINAL_WEIGHT)
grammar.add_rule('EXPR', q('V'), None, TERMINAL_WEIGHT)
grammar.add_rule('EXPR', q('who'), None, TERMINAL_WEIGHT)


## Allow lambda abstraction
grammar.add_rule('EXPR', 'apply_', ['LAMBDAARG', 'LAMBDATHUNK'], 1)
grammar.add_rule('LAMBDAARG',   'lambda', ['EXPR'], 1., bv_type='EXPR', bv_args=[] )
grammar.add_rule('LAMBDATHUNK', 'lambda', ['EXPR'], 1., bv_type=None, bv_args=None ) # A thunk

Exemplo n.º 16
0
# (but the actual RR prior does not care about these probabilities)

grammar = Grammar()

grammar.add_rule('START', '', ['WORD'], 1.0)

grammar.add_rule('BOOL', 'and_',    ['BOOL', 'BOOL'], 1./3.)
grammar.add_rule('BOOL', 'or_',     ['BOOL', 'BOOL'], 1./3.)
grammar.add_rule('BOOL', 'not_',    ['BOOL'], 1./3.)

grammar.add_rule('BOOL', 'True',    None, 1.0/2.)
grammar.add_rule('BOOL', 'False',   None, 1.0/2.)

# note that this can take basically any types for return values
grammar.add_rule('WORD', 'if_',    ['BOOL', 'WORD', 'WORD'], 0.5)
grammar.add_rule('WORD', q('undef'), None, 0.5)
# grammar.add_rule('WORD', 'if_',    ['BOOL', 'WORD', q('undef')], 0.5)
# grammar.add_rule('WORD', 'ifU_',    ['BOOL', 'WORD'], 0.5)  # if returning undef if condition not met

grammar.add_rule('BOOL', 'cardinality1_',    ['SET'], 1.0)
grammar.add_rule('BOOL', 'cardinality2_',    ['SET'], 1.0)
grammar.add_rule('BOOL', 'cardinality3_',    ['SET'], 1.0)

grammar.add_rule('BOOL', 'equal_',    ['WORD', 'WORD'], 1.0)

grammar.add_rule('SET', 'union_',     ['SET', 'SET'], 1./3.)
grammar.add_rule('SET', 'intersection_',     ['SET', 'SET'], 1./3.)
grammar.add_rule('SET', 'setdifference_',     ['SET', 'SET'], 1./3.)
grammar.add_rule('SET', 'select_',     ['SET'], 1.0)

grammar.add_rule('SET', 'x',     None, 4.0)
Exemplo n.º 17
0
FEATURE_WEIGHT = 2.0  # Probability of expanding to a terminal

# Set up the grammar
# Here, we create our own instead of using DefaultGrammars.Nand because
# we don't want a BOOL/PREDICATE distinction
grammar = Grammar()

grammar.add_rule("START", "", ["BOOL"], 1.0)

grammar.add_rule("BOOL", "nand_", ["BOOL", "BOOL"], 1.0 / 3.0)
grammar.add_rule("BOOL", "nand_", ["True", "BOOL"], 1.0 / 3.0)
grammar.add_rule("BOOL", "nand_", ["False", "BOOL"], 1.0 / 3.0)

# And finally, add the primitives
for s in SHAPES:
    grammar.add_rule("BOOL", "is_shape_", ["x", q(s)], FEATURE_WEIGHT)

for c in COLORS:
    grammar.add_rule("BOOL", "is_color_", ["x", q(c)], FEATURE_WEIGHT)

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Data
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

from LOTlib.DataAndObjects import FunctionData, make_all_objects
from LOTlib.Miscellaneous import sample_one

all_objects = make_all_objects(shape=SHAPES, color=COLORS)

# Generator for data
Exemplo n.º 18
0
        self.penalty = penalty

        self.seen = Counter()

    def internal_sample(self, h):
        """
                Keep track of how many samples we've drawn for h
        """
        self.seen[h] += 1

    def compute_posterior(self, h, data):
        """
                Wrap the posterior with a penalty for how often we've seen h. Computes the penalty on the prior
        """
        mypenalty = self.seen[h] * self.penalty
        np, nl = MHSampler.compute_posterior(self, h, data)
        return np + mypenalty, nl


if __name__ == "__main__":

    from LOTlib.Examples.Number.Shared import generate_data, NumberExpression, grammar, get_knower_pattern
    from LOTlib.Miscellaneous import q

    data = generate_data(500)
    h0 = NumberExpression(grammar)
    for h in TabooMCMC(h0, data, steps=10000):

        print q(get_knower_pattern(
            h)), h.posterior_score, h.prior, h.likelihood, q(h)
Exemplo n.º 19
0
        MHSampler.__init__(self, h0, data, **kwargs)
        self.penalty = penalty
        self.seen = Counter()

    def next(self):
        v = MHSampler.next(self)
        self.seen[v] += 1
        return v

    def compute_posterior(self, h, data, **kwargs):
        """
        Compute prior & likelihood for `h`, penalizing prior by how many samples have been generated so far.

        """
        return self.seen[h] * self.penalty + h.compute_posterior(data, **kwargs)


if __name__ == "__main__":

    from LOTlib import break_ctrlc
    from LOTlib.Examples.Number.Model import *
    from LOTlib.Miscellaneous import q

    data = make_data(500)
    h0 = NumberExpression(grammar)

    tmc = TabooMCMC(h0, data, steps=10000)

    for h in break_ctrlc(tmc):
        print tmc.seen[h], h.posterior_score, h.prior, h.likelihood, q(h)
Exemplo n.º 20
0
grammar = Grammar()

grammar.add_rule('START', '', ['WORD'], 1.0)

grammar.add_rule('BOOL', 'and_', ['BOOL', 'BOOL'], 1. / 3.)
grammar.add_rule('BOOL', 'or_', ['BOOL', 'BOOL'], 1. / 3.)
grammar.add_rule('BOOL', 'not_', ['BOOL'], 1. / 3.)

grammar.add_rule('BOOL', 'True', None, 1.0 / 2.)
grammar.add_rule('BOOL', 'False', None, 1.0 / 2.)

# note that this can take basically any types for return values
grammar.add_rule('WORD', '(%s if %s else %s)', ['WORD', 'BOOL', 'WORD'], 0.5)

grammar.add_rule('WORD', q('undef'), None, 0.5)
# grammar.add_rule('WORD', 'if_',    ['BOOL', 'WORD', q('undef')], 0.5)
# grammar.add_rule('WORD', 'ifU_',    ['BOOL', 'WORD'], 0.5)  # if returning undef if condition not met

grammar.add_rule('BOOL', 'cardinality1_', ['SET'], 1.0)
grammar.add_rule('BOOL', 'cardinality2_', ['SET'], 1.0)
grammar.add_rule('BOOL', 'cardinality3_', ['SET'], 1.0)

grammar.add_rule('BOOL', 'equal_', ['WORD', 'WORD'], 1.0)

grammar.add_rule('SET', 'union_', ['SET', 'SET'], 1. / 3.)
grammar.add_rule('SET', 'intersection_', ['SET', 'SET'], 1. / 3.)
grammar.add_rule('SET', 'setdifference_', ['SET', 'SET'], 1. / 3.)
grammar.add_rule('SET', 'select_', ['SET'], 1.0)

grammar.add_rule('SET', 'x', None, 4.0)
Exemplo n.º 21
0
# -*- coding: utf-8 -*-
"""
        A quick script to load some large data and re-run-evaluate it to generate a file readable by plot_learning_curve.R
"""

import pickle
from LOTlib.Miscellaneous import q
from LOTlib.Examples.Number.Model import *

LARGE_DATA_SIZE = 1000

if __name__ == "__main__":

    #now evaluate on different amounts of data too:
    huge_data = generate_data(LARGE_DATA_SIZE)
    print "# Generated data!"

    allfs = pickle.load(open("mpi-run.pkl")) # for now, use data from the run on February 10
    print "# Loaded!"

    # save this with a huge data set -- eval with average ll
    H = allfs.get_all()

    [h.compute_posterior(huge_data) for h in H]

    # show the *average* ll for each hypothesis
    for h in H:
        if h.prior > float("-inf"):
            print h.prior, h.likelihood/float(LARGE_DATA_SIZE), q(get_knower_pattern(h)),  q(h)
Exemplo n.º 22
0
        MHSampler.__init__(self, h0, data, **kwargs)
        self.penalty=penalty

        self.seen = Counter()

    def internal_sample(self, h):
        """
                Keep track of how many samples we've drawn for h
        """
        self.seen[h] += 1

    def compute_posterior(self, h, data):
        """
                Wrap the posterior with a penalty for how often we've seen h. Computes the penalty on the prior
        """
        mypenalty = self.seen[h] * self.penalty
        np, nl = MHSampler.compute_posterior(self, h, data)
        return np+mypenalty, nl


if __name__ == "__main__":

    from LOTlib.Examples.Number.Shared import generate_data, NumberExpression, grammar, get_knower_pattern
    from LOTlib.Miscellaneous import q

    data = generate_data(500)
    h0 = NumberExpression(grammar)
    for h in TabooMCMC(h0, data, steps=10000):

        print q(get_knower_pattern(h)), h.posterior_score, h.prior, h.likelihood, q(h)
Exemplo n.º 23
0
def makeBiasedGrammar(objects, nterms=['Tree', 'Set', 'Gender', 'Generation', 'Ancestry', 'Paternity', 'English'],
                    terms=['X', 'objects', 'all'], recursive=False, words=None, compositional=True):


    """
    Define a grammar for tree relations
    """

    grammar = Grammar()

    grammar.add_rule('START', '', ['SET'], 1.0)

    if 'Tree' in nterms:
        # TREE
        grammar.add_rule('SET', 'parents_of_', ['SET', 'C'], 1.0)
        grammar.add_rule('SET', 'children_of_', ['SET', 'C'], 2.6118861522)
        grammar.add_rule('SET', 'spouses_of_', ['SET', 'C'], 46.1592503413)

    if 'Set' in nterms:
        # SET THEORETIC
        grammar.add_rule('SET', 'union_', ['SET', 'SET'], 82.6253980731)
        grammar.add_rule('SET', 'complement_', ['SET', 'C'], 4.134794019)
        grammar.add_rule('SET', 'intersection_', ['SET', 'SET'], 13.6030444971)
        grammar.add_rule('SET', 'setdifference_', ['SET', 'SET'], 12.1666763444)

    if 'Gender' in nterms:
        # GENDER
        grammar.add_rule('SET', 'female_', ['SET'], 209.5667590174)
        grammar.add_rule('SET', 'male_', ['SET'], 266.749332462)

    if 'Generation' in nterms:
        # GENERATION
        grammar.add_rule('SET', 'generation0_', ['SET', 'C'], 4.9008668098)
        grammar.add_rule('SET', 'generation1_', ['SET', 'C'], 1.3398224552)
        grammar.add_rule('SET', 'generation2_', ['SET', 'C'], 1.165400777)

    if 'Ancestry' in nterms:
        # CEST
        grammar.add_rule('SET', 'ancestors', ['SET', 'C'], 8.0872979353)
        grammar.add_rule('SET', 'descendants', ['SET', 'C'], 3.1124377558)

    if 'Paternity' in nterms:
        # TERNAL
        grammar.add_rule('SET', 'maternal_', ['SET', 'C'], 2.2192339232)
        grammar.add_rule('SET', 'paternal_', ['SET', 'C'], 1.3887916971)

    if 'English' in nterms:
        if compositional:
            lhs = 'SET'
        else:
            lhs = 'O'

        # ENGLISH
        grammar.add_rule('SET', 'brothers_', [lhs, 'C'], 1.0)
        grammar.add_rule('SET', 'sisters_', [lhs, 'C'], 1.0)
        grammar.add_rule('SET', 'moms_', [lhs, 'C'], 1.0)
        grammar.add_rule('SET', 'dads_', [lhs, 'C'], 1.0)
        grammar.add_rule('SET', 'children_', [lhs, 'C'], 1.0)
        grammar.add_rule('SET', 'uncles_', [lhs, 'C'], 1.0)
        grammar.add_rule('SET', 'aunts_', [lhs, 'C'], 1.0)
        grammar.add_rule('SET', 'grandpas_', [lhs, 'C'], 1.0)
        grammar.add_rule('SET', 'grandmas_', [lhs, 'C'], 1.0)
        grammar.add_rule('SET', 'cousins_', [lhs, 'C'], 1.0)

    if recursive and words is not None:
        for w in words:
            grammar.add_rule('SET', 'recurse_', [q(w), 'C', 'SET'], 1.0 / len(words))

    if 'objects' in terms:
        if compositional:
            for o in objects:
                grammar.add_rule('SET', 'set', ["[\'%s\']" % o], 123.5304511982 / len(objects))
        else:
            for o in objects:
                grammar.add_rule('O', 'set', ["[\'%s\']" % o], 123.5304511982 / len(objects))

    if 'all' in terms:
        grammar.add_rule('SET', 'all_', ['C'], 3.8903782136)

    if 'X' in terms:
        if compositional:
            grammar.add_rule('SET', 'X', None, 69.8908794494)  # Had to give high prob to make pcfg well-defined
        else:
            grammar.add_rule('O', 'X', None, 69.8908794494)  # Had to give high prob to make pcfg well-defined

    return grammar
Exemplo n.º 24
0
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

from LOTlib.DefaultGrammars import DNF
from LOTlib.Miscellaneous import q

# DNF defaultly includes the logical connectives so we need to add predicates to it.
grammar = DNF

# Two predicates for checking x's color and shape
# Note: per style, functions in the LOT end in _
grammar.add_rule('PREDICATE', 'is_color_', ['x', 'COLOR'], 1.0)
grammar.add_rule('PREDICATE', 'is_shape_', ['x', 'SHAPE'], 1.0)

# Some colors/shapes each (for this simple demo)
# These are written in quotes so they can be evaled
grammar.add_rule('COLOR', q('red'), None, 1.0)
grammar.add_rule('COLOR', q('blue'), None, 1.0)
grammar.add_rule('COLOR', q('green'), None, 1.0)
grammar.add_rule('COLOR', q('mauve'), None, 1.0)

grammar.add_rule('SHAPE', q('square'), None, 1.0)
grammar.add_rule('SHAPE', q('circle'), None, 1.0)
grammar.add_rule('SHAPE', q('triangle'), None, 1.0)
grammar.add_rule('SHAPE', q('diamond'), None, 1.0)

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Hypothesis
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

from LOTlib.Hypotheses.RationalRulesLOTHypothesis import RationalRulesLOTHypothesis
Exemplo n.º 25
0
grammar.add_rule('START', '', ['BOOL'],  0.7)
grammar.add_rule('START', 'True',  None, 0.2)
grammar.add_rule('START', 'False', None, 0.1)

grammar.add_rule('BOOL', 'and_',     ['BOOL', 'BOOL'], 0.1)
grammar.add_rule('BOOL', 'or_',      ['BOOL', 'BOOL'], 0.05)
grammar.add_rule('BOOL', 'not_',     ['BOOL'],         0.025)
grammar.add_rule('BOOL', 'iff_',     ['BOOL', 'BOOL'], 0.0249)
grammar.add_rule('BOOL', 'implies_', ['BOOL', 'BOOL'], 0.0001) # if we sample hypotheses (below), we will have high uncertainty on this
grammar.add_rule('BOOL', '',         ['FEATURE'],      0.8)

grammar.add_rule('FEATURE', 'is_shape_', ['x', 'SHAPE'], 0.3)
grammar.add_rule('FEATURE', 'is_color_', ['x', 'COLOR'], 0.7)

for i, s in enumerate(SHAPES):
    grammar.add_rule('SHAPE', '%s'%q(s), None, 2.0 * (i+1))

for i, c in enumerate(COLORS):
    grammar.add_rule('COLOR', '%s'%q(c), None, 1.0/len(COLORS))


# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Hypothesis
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

from LOTlib.Hypotheses.LOTHypothesis import LOTHypothesis
from LOTlib.Hypotheses.Likelihoods.BinaryLikelihood import BinaryLikelihood

class MyHypothesis(BinaryLikelihood, LOTHypothesis):
    def __init__(self, grammar=grammar, **kwargs):
        LOTHypothesis.__init__(self, grammar=grammar, display="lambda x : %s", maxnodes=150, **kwargs)
Exemplo n.º 26
0
def run_one(iteration, model, model2data, probs):
    if LOTlib.SIG_INTERRUPTED: # do this so we don't create (big) hypotheses
        return

    # Take model and load the function to create hypotheses
    # Data is passed in to be constant across runs
    if re.search(r":", model):
        m, d = re.split(r":", model)
        make_hypothesis, _ = load_example(m)
    else:
        make_hypothesis, _ = load_example(model)

    htmp = make_hypothesis() # just use this to get the grammar

    # Make a new class to wrap our mixture in
    class WrappedClass(MixtureProposer, type(htmp)):
        pass

    # define a wrapper to set this proposal
    def wrapped_make_hypothesis(**kwargs):
        h = WrappedClass(**kwargs)
        print ">>", htmp, model,  h, kwargs
        h.set_proposal_probabilities(probs)
        return h

    sampler = MultipleChainMCMC(wrapped_make_hypothesis,  model2data[model], steps=options.SAMPLES, nchains=options.CHAINS)

    with open(options.OUT+"/aggregate.%s" % get_rank(), 'a') as out_aggregate:
        evaluate_sampler(sampler, trace=False, prefix="\t".join(map(str, [model, iteration, q(str(probs)) ])),
                         out_aggregate=out_aggregate, print_every=options.PRINTEVERY)
Exemplo n.º 27
0
grammar = Grammar()

grammar.add_rule("START", "", ["WORD"], 1.0)

grammar.add_rule("BOOL", "and_", ["BOOL", "BOOL"], 1.0 / 3.0)
grammar.add_rule("BOOL", "or_", ["BOOL", "BOOL"], 1.0 / 3.0)
grammar.add_rule("BOOL", "not_", ["BOOL"], 1.0 / 3.0)

grammar.add_rule("BOOL", "True", None, 1.0 / 2.0)
grammar.add_rule("BOOL", "False", None, 1.0 / 2.0)

# note that this can take basically any types for return values
grammar.add_rule("WORD", "(%s if %s else %s)", ["WORD", "BOOL", "WORD"], 0.5)

grammar.add_rule("WORD", q("undef"), None, 0.5)
# grammar.add_rule('WORD', 'if_',    ['BOOL', 'WORD', q('undef')], 0.5)
# grammar.add_rule('WORD', 'ifU_',    ['BOOL', 'WORD'], 0.5)  # if returning undef if condition not met

grammar.add_rule("BOOL", "cardinality1_", ["SET"], 1.0)
grammar.add_rule("BOOL", "cardinality2_", ["SET"], 1.0)
grammar.add_rule("BOOL", "cardinality3_", ["SET"], 1.0)

grammar.add_rule("BOOL", "equal_", ["WORD", "WORD"], 1.0)

grammar.add_rule("SET", "union_", ["SET", "SET"], 1.0 / 3.0)
grammar.add_rule("SET", "intersection_", ["SET", "SET"], 1.0 / 3.0)
grammar.add_rule("SET", "setdifference_", ["SET", "SET"], 1.0 / 3.0)
grammar.add_rule("SET", "select_", ["SET"], 1.0)

grammar.add_rule("SET", "x", None, 4.0)
Exemplo n.º 28
0
if __name__ == "__main__":
    # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
    # Main running

    if is_master_process():
        display_option_summary(options)
        huge_data = generate_data(options.LARGE_DATA_SIZE)

    # choose the appropriate map function
    argarray = map(lambda x: [x], options.DATA_AMOUNTS * options.CHAINS)

    seen = set()
    for fs in MPI_unorderedmap(run, numpy.random.permutation(argarray)):
        for h in fs.get_all():
            if h not in seen:
                seen.add(h)
                h.compute_posterior(huge_data)

                if h.prior > float("-inf"):
                    print h.prior, \
                        h.likelihood /float(options.LARGE_DATA_SIZE), \
                        q(get_knower_pattern(h)), \
                        qq(h)

            sys.stdout.flush()

    import pickle
    with open(options.OUT_PATH, 'w') as f:
        pickle.dump(seen, f)
Exemplo n.º 29
0
# (but the actual RR prior does not care about these probabilities)

grammar = Grammar()

grammar.add_rule('START', '', ['WORD'], 1.0)

grammar.add_rule('BOOL', 'and_',    ['BOOL', 'BOOL'], 1./3.)
grammar.add_rule('BOOL', 'or_',     ['BOOL', 'BOOL'], 1./3.)
grammar.add_rule('BOOL', 'not_',    ['BOOL'], 1./3.)

grammar.add_rule('BOOL', 'True',    None, 1.0/2.)
grammar.add_rule('BOOL', 'False',   None, 1.0/2.)

# note that this can take basically any types for return values
grammar.add_rule('WORD', 'if_',    ['BOOL', 'WORD', 'WORD'], 0.5)
grammar.add_rule('WORD', q('undef'), None, 0.5)
# grammar.add_rule('WORD', 'if_',    ['BOOL', 'WORD', q('undef')], 0.5)
# grammar.add_rule('WORD', 'ifU_',    ['BOOL', 'WORD'], 0.5)  # if returning undef if condition not met

grammar.add_rule('BOOL', 'cardinality1_',    ['SET'], 1.0)
grammar.add_rule('BOOL', 'cardinality2_',    ['SET'], 1.0)
grammar.add_rule('BOOL', 'cardinality3_',    ['SET'], 1.0)

grammar.add_rule('BOOL', 'equal_',    ['WORD', 'WORD'], 1.0)

grammar.add_rule('SET', 'union_',     ['SET', 'SET'], 1./3.)
grammar.add_rule('SET', 'intersection_',     ['SET', 'SET'], 1./3.)
grammar.add_rule('SET', 'setdifference_',     ['SET', 'SET'], 1./3.)
grammar.add_rule('SET', 'select_',     ['SET'], 1.0)

grammar.add_rule('SET', 'x',     None, 4.0)
Exemplo n.º 30
0
def makeGrammar(objects,  nterms=['Tree', 'Set', 'Gender', 'Generation', 'Ancestry', 'Paternity', 'English'],
                terms=['X', 'objects', 'all'], recursive=False, words=None, compositional=True, abstractP=10.0):
    """
    Define a grammar for tree relations
    """

    grammar = Grammar()

    grammar.add_rule('START', '', ['SET'], 1.0)

    if 'Tree' in nterms:
        # TREE
        grammar.add_rule('SET', 'parents_of_', ['SET', 'C'], 1.0)
        grammar.add_rule('SET', 'children_of_', ['SET', 'C'], 1.0)
        grammar.add_rule('SET', 'spouses_of_', ['SET', 'C'], 1.0)

    if 'Set' in nterms:
        # SET THEORETIC
        grammar.add_rule('SET', 'union_', ['SET', 'SET'], 1.0)
        grammar.add_rule('SET', 'complement_', ['SET', 'C'], 1.0)
        grammar.add_rule('SET', 'intersection_', ['SET', 'SET'], 1.0)
        grammar.add_rule('SET', 'setdifference_', ['SET', 'SET'], 1.0)

    if 'Gender' in nterms:
        # GENDER
        grammar.add_rule('SET', 'female_', ['SET'], 1.0 / 2)
        grammar.add_rule('SET', 'male_', ['SET'], 1.0 / 2)
        grammar.add_rule('SET', 'samegender_', ['SET', 'C'], 1.0)

    if 'Generation' in nterms:
        # GENERATION
        grammar.add_rule('SET', 'generation0_', ['SET', 'C'], 1.0/3)
        grammar.add_rule('SET', 'generation1_', ['SET', 'C'], 1.0/3)
        grammar.add_rule('SET', 'generation2_', ['SET', 'C'], 1.0/3)

    if 'Ancestry' in nterms:
        # CEST
        grammar.add_rule('SET', 'ancestors', ['SET', 'C'], 1.0)
        grammar.add_rule('SET', 'descendants', ['SET', 'C'], 1.0)

    if 'Paternity' in nterms:
        # TERNAL
        grammar.add_rule('SET', 'maternal_', ['SET', 'C'], 1.0)
        grammar.add_rule('SET', 'paternal_', ['SET', 'C'], 1.0)

    if 'English' in nterms:
        if compositional:
            lhs = 'SET'
        else:
            lhs = 'O'

        # ENGLISH
        grammar.add_rule('SET', 'brothers_', [lhs, 'C'], 1.0)
        grammar.add_rule('SET', 'sisters_', [lhs, 'C'], 1.0)
        grammar.add_rule('SET', 'moms_', [lhs, 'C'], 1.0)
        grammar.add_rule('SET', 'dads_', [lhs, 'C'], 1.0)
        grammar.add_rule('SET', 'children_', [lhs, 'C'], 1.0)
        grammar.add_rule('SET', 'uncles_', [lhs, 'C'], 1.0)
        grammar.add_rule('SET', 'aunts_', [lhs, 'C'], 1.0)
        grammar.add_rule('SET', 'grandpas_', [lhs, 'C'], 1.0)
        grammar.add_rule('SET', 'grandmas_', [lhs, 'C'], 1.0)
        grammar.add_rule('SET', 'cousins_', [lhs, 'C'], 1.0)

    if recursive and words is not None:
        for w in words:
            grammar.add_rule('SET', 'recurse_', [q(w), 'C', 'SET'], 1.0)

    if 'objects' in terms:
        if compositional:
            for o in objects:
                grammar.add_rule('SET', 'set', ["[\'%s\']" % o], abstractP/len(objects))
        else:
            for o in objects:
                grammar.add_rule('O', 'set', ["[\'%s\']" % o], abstractP/len(objects))

    if 'all' in terms:
        grammar.add_rule('SET', 'all_', ['C'], 1.0)

    if 'X' in terms:
        if compositional:
            grammar.add_rule('SET', 'X', None, 10.0) # Had to give high prob to make pcfg well-defined
        else:
            grammar.add_rule('O', 'X', None, 10.0) # Had to give high prob to make pcfg well-defined

    return grammar
Exemplo n.º 31
0
grammar.add_rule('START', '', ['BOOL'], 1.)

grammar.add_rule('BOOL', '(%s == %s)', ['NUMBER', 'NUMBER'], 1.)
grammar.add_rule('BOOL', '(not %s)', ['BOOL'], 1.)

grammar.add_rule('BOOL', '(%s and %s)', ['BOOL', 'BOOL'], 1.)
grammar.add_rule('BOOL', '(%s or %s)',  ['BOOL', 'BOOL'], 1.)  # use the short_circuit form

grammar.add_rule('NUMBER', 'x', None, 1.)
grammar.add_rule('NUMBER', '1', None, 1.)
grammar.add_rule('NUMBER', '0', None, 1.)
grammar.add_rule('NUMBER', 'plus_', ['NUMBER', 'NUMBER'], 1.)
grammar.add_rule('NUMBER', 'minus_', ['NUMBER', 'NUMBER'], 1.)

for w in WORDS:
    grammar.add_rule('BOOL', 'lexicon', [q(w), 'NUMBER'], 1.)

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Data
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

from LOTlib.DataAndObjects import FunctionData

def make_data(n=1, alpha=0.99):
    data = []
    for x in xrange(1, 10):
        data.append( FunctionData(input=['even', x], output=(x % 2 == 0), alpha=alpha) )
        data.append( FunctionData(input=['odd',  x], output=(x % 2 == 1), alpha=alpha) )
    return data*n

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Exemplo n.º 32
0
# -*- coding: utf-8 -*-
"""
        A quick script to load some large data and re-run-evaluate it to generate a file readable by plot_learning_curve.R
"""

import pickle
from LOTlib.Miscellaneous import q
from LOTlib.Examples.Number.Model import *

LARGE_DATA_SIZE = 1000

if __name__ == "__main__":

    #now evaluate on different amounts of data too:
    huge_data = make_data(LARGE_DATA_SIZE)
    print "# Generated data!"

    allfs = pickle.load(open("mpi-run.pkl")) # for now, use data from the run on February 10
    print "# Loaded!"

    # save this with a huge data set -- eval with average ll
    H = allfs.get_all()

    [h.compute_posterior(huge_data) for h in H]

    # show the *average* ll for each hypothesis
    for h in H:
        if h.prior > float("-inf"):
            print h.prior, h.likelihood/float(LARGE_DATA_SIZE), q(h.get_knower_pattern()),  q(h)
Exemplo n.º 33
0
        self.ll_decay = ll_decay # needed here

def make_hypothesis(**kwargs):
    return MyHypothesis(grammar=grammar, rrAlpha=1.0, **kwargs)

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Main
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

if __name__ == "__main__":

    from LOTlib import break_ctrlc
    from LOTlib.Inference.Samplers.MetropolisHastings import MHSampler
    from LOTlib.Miscellaneous import q

    # Create an initial hypothesis
    # This is where we set a number of relevant variables -- whether to use RR, alpha, etc.Z
    h0 = MyHypothesis(grammar, ll_decay=1.0, rrAlpha=1.0, args=['x'])

    data = make_data()

    # Run the vanilla sampler. Without steps, it will run infinitely
    # this prints out posterior (posterior_score), prior, likelihood,
    for h in break_ctrlc(MHSampler(h0, data, 10000, skip=100, shortcut_likelihood=False)):
        print h.posterior_score, h.prior, h.likelihood, q(h)

    # This setup requires the *later* data to be upweighted, meaning that hypotheses that get
    # later data wrong should be given lower likelhood. But also with the decay, the overall
    # magnitude of the likelihood decreases.

Exemplo n.º 34
0
grammar.add_rule('START', 'False', None, 0.1)

grammar.add_rule('BOOL', 'and_', ['BOOL', 'BOOL'], 0.1)
grammar.add_rule('BOOL', 'or_', ['BOOL', 'BOOL'], 0.05)
grammar.add_rule('BOOL', 'not_', ['BOOL'], 0.025)
grammar.add_rule('BOOL', 'iff_', ['BOOL', 'BOOL'], 0.0249)
grammar.add_rule(
    'BOOL', 'implies_', ['BOOL', 'BOOL'], 0.0001
)  # if we sample hypotheses (below), we will have high uncertainty on this
grammar.add_rule('BOOL', '', ['FEATURE'], 0.8)

grammar.add_rule('FEATURE', 'is_shape_', ['x', 'SHAPE'], 0.3)
grammar.add_rule('FEATURE', 'is_color_', ['x', 'COLOR'], 0.7)

for i, s in enumerate(SHAPES):
    grammar.add_rule('SHAPE', '%s' % q(s), None, 2.0 * (i + 1))

for i, c in enumerate(COLORS):
    grammar.add_rule('COLOR', '%s' % q(c), None, 1.0 / len(COLORS))

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Hypothesis
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

from LOTlib.Hypotheses.LOTHypothesis import LOTHypothesis
from LOTlib.Hypotheses.Likelihoods.BinaryLikelihood import BinaryLikelihood


class MyHypothesis(BinaryLikelihood, LOTHypothesis):
    def __init__(self, grammar=grammar, **kwargs):
        LOTHypothesis.__init__(self,
Exemplo n.º 35
0
from LOTlib.Grammar import Grammar
from LOTlib.Miscellaneous import q

base_grammar = Grammar()
base_grammar.add_rule('START', 'flatten2str', ['LIST', 'sep=\"\"'], 1.0)
base_grammar.add_rule('LIST', 'if_', ['BOOL', 'LIST', 'LIST'], 1.)
base_grammar.add_rule('LIST', 'cons_', ['ATOM', 'LIST'], 1.)
base_grammar.add_rule('LIST', 'cons_', ['LIST', 'LIST'], 1.)
base_grammar.add_rule('LIST', 'cdr_', ['LIST'], 1.)
base_grammar.add_rule('LIST', 'car_', ['LIST'], 1.)
base_grammar.add_rule('LIST', '\'\'', None, 2)
# base_grammar.add_rule('LIST', 'recurse_', [], 1.)

base_grammar.add_rule('BOOL', 'empty_', ['LIST'], 1.)
base_grammar.add_rule('BOOL', 'flip_', [''], 1.)

from copy import deepcopy

a_grammar = deepcopy(base_grammar)
for x in 'a':
    a_grammar.add_rule('ATOM', q(x), None, 2)

eng_grammar = deepcopy(base_grammar)
for x in 'davtn':
    eng_grammar.add_rule('ATOM', q(x), None, 2)

Exemplo n.º 36
0
def makeBiasedGrammar(objects,
                      bias,
                      nterms=[
                          'Tree', 'Set', 'Gender', 'Generation', 'Ancestry',
                          'Paternity', 'English'
                      ],
                      terms=['X', 'objects', 'all'],
                      recursive=False,
                      words=None,
                      compositional=True):
    """
    Define a weighted PCFG for tree relations
        objects: a python list of strings for each person in the context
        bias: a python dictionary, bias[primitive] = weight (float)
        nterms: a python list of primitive families
        terms: a python list of terminals
        recursive: BOOL for should grammar be recursive?
        words: a python list of words to recurse
        compositional: BOOL for if english primitives can be composed

    returns
        a LOTlib Grammar object
    """

    grammar = Grammar()

    grammar.add_rule('START', '', ['SET'], 1.0)

    if 'Tree' in nterms:
        grammar.add_rule('SET', 'parents_of_', ['SET', 'C'],
                         bias['parents_of_'])
        grammar.add_rule('SET', 'children_of_', ['SET', 'C'],
                         bias['children_of_'])
        grammar.add_rule('SET', 'spouses_of_', ['SET', 'C'],
                         bias['spouses_of_'])

    if 'Set' in nterms:
        grammar.add_rule('SET', 'union_', ['SET', 'SET'], bias['union_'])
        grammar.add_rule('SET', 'complement_', ['SET', 'C'],
                         bias['complement_'])
        grammar.add_rule('SET', 'intersection_', ['SET', 'SET'],
                         bias['intersection_'])
        grammar.add_rule('SET', 'setdifference_', ['SET', 'SET'],
                         bias['setdifference_'])

    if 'Gender' in nterms:
        grammar.add_rule('SET', 'female_', ['SET'], bias['female_'])
        grammar.add_rule('SET', 'male_', ['SET'], bias['male_'])

    if 'Generation' in nterms:
        grammar.add_rule('SET', 'generation0_', ['SET', 'C'],
                         bias['generation0_'])
        grammar.add_rule('SET', 'generation1_', ['SET', 'C'],
                         bias['generation1_'])
        grammar.add_rule('SET', 'generation2_', ['SET', 'C'],
                         bias['generation2_'])

    if 'Ancestry' in nterms:
        grammar.add_rule('SET', 'ancestors', ['SET', 'C'], bias['ancestors'])
        grammar.add_rule('SET', 'descendants', ['SET', 'C'],
                         bias['descendants'])

    if 'Paternity' in nterms:
        grammar.add_rule('SET', 'maternal_', ['SET', 'C'], bias['maternal_'])
        grammar.add_rule('SET', 'paternal_', ['SET', 'C'], bias['paternal_'])

    if 'English' in nterms:
        if compositional:
            lhs = 'SET'
        else:
            lhs = 'O'

        grammar.add_rule('SET', 'brothers_', [lhs, 'C'], bias['brothers_'])
        grammar.add_rule('SET', 'sisters_', [lhs, 'C'], bias['sisters_'])
        grammar.add_rule('SET', 'moms_', [lhs, 'C'], bias['moms_'])
        grammar.add_rule('SET', 'dads_', [lhs, 'C'], bias['dads_'])
        grammar.add_rule('SET', 'childz_', [lhs, 'C'], bias['children_'])
        grammar.add_rule('SET', 'uncles_', [lhs, 'C'], bias['uncles_'])
        grammar.add_rule('SET', 'aunts_', [lhs, 'C'], bias['aunts_'])
        grammar.add_rule('SET', 'grandpas_', [lhs, 'C'], bias['grandpas_'])
        grammar.add_rule('SET', 'grandmas_', [lhs, 'C'], bias['grandmas_'])
        grammar.add_rule('SET', 'cousins_', [lhs, 'C'], bias['cousins_'])

    if recursive and words is not None:
        for w in words:
            grammar.add_rule('SET', 'recurse_', [q(w), 'C', 'SET'],
                             bias['recurse_' + w])

    if 'objects' in terms:
        if compositional:
            for o in objects:
                grammar.add_rule('SET', 'set', ["[\'%s\']" % o],
                                 bias['terminal_' + o])
        else:
            for o in objects:
                grammar.add_rule('O', 'set', ["[\'%s\']" % o],
                                 bias['terminal_' + o])

    if 'all' in terms:
        grammar.add_rule('SET', 'all_', ['C'], bias['all_'])

    if 'X' in terms:
        if compositional:
            grammar.add_rule(
                'SET', 'X', None, bias['terminal_X']
            )  # Had to give high prob to make pcfg well-defined
        else:
            grammar.add_rule(
                'O', 'X', None, bias['terminal_X']
            )  # Had to give high prob to make pcfg well-defined

    return grammar
Exemplo n.º 37
0
'''
from LOTlib.Inference.Proposals.InsertDeleteProposal import InsertDeleteProposal
h0 = NumberExpression(grammar, proposal_function=InsertDeleteProposal(grammar))
'''

# store hypotheses we've found
allhyp = TopN(N=1000)

# ========================================================================================================
# Run the standard RationalRules sampler

mh_sampler = MHSampler(h0, data, STEPS, skip=SKIP)

for h in lot_iter(mh_sampler):
    if TRACE:
        print q(get_knower_pattern(h)), h.posterior_score, h.compute_prior(), h.compute_likelihood(data), qq(h)

    # add h to our priority queue, with priority of its log probability, h.posterior_score
    allhyp.add(h)

# ========================================================================================================
#  now re-evaluate everything we found on new data
'''
huge_data = generate_data(LARGE_DATA_SIZE)

save this with a huge data set -- eval with average ll
H = allhyp.get_sorted()

compute the posterior for each hypothesis
[ h.compute_posterior(huge_data) for h in H]
Exemplo n.º 38
0
TARGET_CONCEPTS = [lambda x: and_(is_shape_(x,'square'), is_color_(x,'blue')),
            lambda x: or_(is_shape_(x,'triangle'), is_color_(x,'green')),
            lambda x: or_(is_shape_(x,'square'), is_color_(x,'red')),
            lambda x: and_(not_(is_shape_(x,'rectangle')), is_color_(x,'red')),
            lambda x: and_(not_(is_shape_(x,'square')), not_(is_color_(x,'blue'))),
            lambda x: and_(is_shape_(x,'rectangle'), is_color_(x,'green')),
            lambda x: or_(not_(is_shape_(x,'triangle')), is_color_(x,'red')) ]




# ------------------------------------------------------------------
# Set up the grammar
# Here, we create our own instead of using DefaultGrammars.Nand because
# we don't want a BOOL/PREDICATE distinction
# ------------------------------------------------------------------
FEATURE_WEIGHT = 2. # Probability of expanding to a terminal

grammar = Grammar()

grammar.add_rule('START', '', ['BOOL'], 1.0)

grammar.add_rule('BOOL', 'nand_', ['BOOL', 'BOOL'], 1.0/3.)
grammar.add_rule('BOOL', 'nand_', ['True', 'BOOL'], 1.0/3.)
grammar.add_rule('BOOL', 'nand_', ['False', 'BOOL'], 1.0/3.)

# And finally, add the primitives
for s in SHAPES: grammar.add_rule('BOOL', 'is_shape_', ['x', q(s)], FEATURE_WEIGHT)
for c in COLORS: grammar.add_rule('BOOL', 'is_color_', ['x', q(c)], FEATURE_WEIGHT)