class XY(FormalLanguage): """ The XY language discussed in Pullum & Gazdar, originally from Chomsky 1963 pg 378-9 This is the set of all strings xy where x!=y """ def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'a%s', ['S'], 1.0) self.grammar.add_rule('S', 'b%s', ['S'], 1.0) self.grammar.add_rule('S', 'a', None, 1.0) self.grammar.add_rule('S', 'b', None, 1.0) def terminals(self): return list('ab') def sample_string(self): while True: x = str(self.grammar.generate()) y = str(self.grammar.generate()) if x != y: return x+y def all_strings(self): for l in itertools.count(1): for x in compute_all_strings(l, alphabet=self.terminals()): for y in compute_all_strings(l, alphabet=self.terminals()): if x != y: yield x+y
class AmBnCmDn(FormalLanguage): """ See Shieber 1985 """ def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', '%s%s', ['A', 'B'], 1.0) self.grammar.add_rule('A', 'a%s', ['A'], 1.0) self.grammar.add_rule('A', 'a', None, 1.0) self.grammar.add_rule('B', 'b%s', ['B'], 1.0) self.grammar.add_rule('B', 'b', None, 1.0) def terminals(self): return list('abcd') def sample_string(self): # fix that this is not CF s = str(self.grammar.generate()) # from a^m b^n s = s+'c'*s.count('a') + 'd'*s.count('b') return s def all_strings(self): for r in itertools.count(1): for n,m in partitions(r, 2, 1): # partition into two groups (NOTE: does not return both orders) yield 'a'*n + 'b'*m + 'c'*n + 'd'*m if n != m: yield 'a'*m + 'b'*n + 'c'*m + 'd'*n
class Count(FormalLanguage): """ The language ababbabbbabbbb etc """ def __init__(self): # This grammar is just a proxy, it gets replaced in sample self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'a%s', ['S'], 1.0) self.grammar.add_rule('S', 'a', None, 1.0) def sample_string(self): proxy = str(self.grammar.generate()) out = '' for i in range(len(proxy)): out = out+'a' + 'b'*(i+1) return out def terminals(self): return list('ab') def all_strings(self): for n in itertools.count(0): out = '' for i in range(n): out = out + 'a' + 'b' * (i + 1) yield out
class AnBm(FormalLanguage): """ A^n B^m, m>n, with n, m-n sampled from a geometric """ def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'a%sb', ['S'], 1.0) self.grammar.add_rule('S', 'ab', None, 1.0) def terminals(self): return list('ab') def sample_string(self): # fix that this is not CF s = str(self.grammar.generate()) # from a^n b^n mmn=1 while random() < 0.5: mmn += 1 s = s+'b'*mmn return s def all_strings(self): for r in itertools.count(1): for n,m in partitions(r, 2, 1): # partition into two groups (NOTE: does not return both orders) if m>n: yield 'a'*n + 'b'*m if n>m: yield 'a'*m + 'b'*n
class ABn(FormalLanguage): def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'ab%s', ['S'], 1.0) self.grammar.add_rule('S', 'ab', None, 1.0) def terminals(self): return list('ab')
class Dyck(FormalLanguage): def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', '(%s)', ['S'], 1.0) self.grammar.add_rule('S', '()%s', ['S'], 1.0) self.grammar.add_rule('S', '', None, 1.0) def terminals(self): return list('()')
class AnBnCn(FormalLanguage): def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'a%sb', ['S'], 1.0) self.grammar.add_rule('S', 'ab', None, 1.0) def terminals(self): return list('abc') def sample_string(self): # fix that this is not CF s = str(self.grammar.generate()) return s + 'c'*(len(s)/2)
class HudsonKamNewport(FormalLanguage): """ From Hudson Kam & Newport, simplifying out words to only POS Goal is to investigate learning of probabilities on N+DET vs N (no det) Here, we do not include mass/count subcategories on nouns http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.436.7524&rep=rep1&type=pdf """ def __init__(self): self.grammar = Grammar(start='S') """ V = transitive verb v = intransitive verb """ self.grammar.add_rule('S', '%s%s', ['v', 'NP'], 1.0) self.grammar.add_rule('S', '%s%s%s', ['V', 'NP', 'NP'], 1.0) self.grammar.add_rule('S', '!%s%s', ['v', 'NP'], 1.0) self.grammar.add_rule('S', '!%s%s%s', ['V', 'NP', 'NP'], 1.0) def terminals(self): return list('!vVnd') def all_strings(self): for g in self.grammar.enumerate(): yield str(g)
class An(FormalLanguage): def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'a%s', ['S'], 1.0) self.grammar.add_rule('S', 'a', None, 1.0) def terminals(self): return list('a') def all_strings(self): n=1 while True: yield 'a'*n n += 1
class Dyck(FormalLanguage): def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', '(%s)', ['S'], 1.0) self.grammar.add_rule('S', '()%s', ['S'], 1.0) self.grammar.add_rule('S', '()', None, 1.0) def terminals(self): return list(')(') def all_strings(self): for n in itertools.count(1): for s in dyck_at_depth(n): yield s
class AB(FormalLanguage): def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'a%s', ['S'], 1.0) self.grammar.add_rule('S', 'b%s', ['S'], 1.0) self.grammar.add_rule('S', 'a', None, 1.0) self.grammar.add_rule('S', 'b', None, 1.0) def terminals(self): return list('ab') def all_strings(self): for l in itertools.count(1): for s in compute_all_strings(l, alphabet='ab'): yield s
class AnBmCmAn(FormalLanguage): def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'a%sa', ['S'], 1.0) self.grammar.add_rule('S', 'a%sa', ['T'], 1.0) self.grammar.add_rule('T', 'b%sc', ['T'], 1.0) self.grammar.add_rule('T', 'bc', None, 1.0) def terminals(self): return list('abcd') def sample_string(self): return str(self.grammar.generate()) def all_strings(self): for r in itertools.count(1): for n,m in partitions(r, 2, 1): # partition into two groups (NOTE: does not return both orders) yield 'a'*n + 'b'*m + 'c'*m + 'a'*n if n != m: yield 'a'*m + 'b'*n + 'c'*n + 'a'*m
class NewportAslin(FormalLanguage): """ From Newport & Aslin 2004, Learning at a distance I Only the 1-3 syllable word frames, Table 1, reduced to single characters ba=b te=t gu=g do=d pi=p ra=r ke=k du=u lo=l ki=i middles: di=1 ku=2 to=3 pa=4 """ def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'b%st', ['MID'], 1.0) self.grammar.add_rule('S', 'g%sd', ['MID'], 1.0) self.grammar.add_rule('S', 'p%sr', ['MID'], 1.0) self.grammar.add_rule('S', 'k%su', ['MID'], 1.0) self.grammar.add_rule('S', 'l%si', ['MID'], 1.0) self.grammar.add_rule('MID', '1', None, 1.0) self.grammar.add_rule('MID', '2', None, 1.0) self.grammar.add_rule('MID', '3', None, 1.0) self.grammar.add_rule('MID', '4', None, 1.0) def terminals(self): return list('btgdprkuli1234') def all_strings(self): for g in self.grammar.enumerate(): yield str(g)
class Gomez(FormalLanguage): """ Gomez (2002) language 1b """ def __init__(self, X): assert X < len(OTHER_TERMINALS) self.X=X self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'a%sd', ['X'], 1.0) self.grammar.add_rule('S', 'b%se', ['X'], 1.0) for x in OTHER_TERMINALS[:self.X]: self.grammar.add_rule('X', '%s'%x, None, 1.0) def terminals(self): return list('abde'+OTHER_TERMINALS[:self.X] ) def all_strings(self): for g in self.grammar.enumerate(): yield str(g)
class ABA(FormalLanguage): """ Similar to Marcus ABB experiment, except we allow AAA (for simplicity) """ def __init__(self): self.grammar = Grammar(start='S') # NOTE: This grammar does not capture the rule -- we do that in sample! self.grammar.add_rule('S', '%s%s', ['T','T'], 1.0) for t in self.terminals(): self.grammar.add_rule('T', t, None, 1.0) def sample_string(self): # fix that this is not CF s = str(self.grammar.generate()) return s + s[0] # copy the first element def terminals(self): return list('gGtTnNlL') # ga gi ta ti na ni la li def all_strings(self): for t1 in self.terminals(): for t2 in self.terminals(): yield t1 + t2 + t1
class XX(FormalLanguage): """ An xx language (for discussion see Gazdar & Pullum 1982) """ def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'a%s', ['S'], 1.0) self.grammar.add_rule('S', 'b%s', ['S'], 1.0) self.grammar.add_rule('S', 'a', None, 1.0) self.grammar.add_rule('S', 'b', None, 1.0) def terminals(self): return list('ab') def sample_string(self): # fix that this is not CF s = str(self.grammar.generate()) # from (a+b)+ return s+s # xx language def all_strings(self): for l in itertools.count(1): for s in compute_all_strings(l, alphabet='ab'): yield s + s
class XXR(FormalLanguage): """ (a,b)+ strings followed by their reverse. This can be generated by a CFG """ def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'a%s', ['S'], 1.0) self.grammar.add_rule('S', 'b%s', ['S'], 1.0) self.grammar.add_rule('S', 'a', None, 1.0) self.grammar.add_rule('S', 'b', None, 1.0) def terminals(self): return list('ab') def sample_string(self): # fix that this is not CF s = str(self.grammar.generate()) # from {a,b}+ return s+''.join(reversed(s)) def all_strings(self): for l in itertools.count(1): for s in compute_all_strings(l, alphabet='ab'): yield s + s[::-1]
class Saffran(FormalLanguage): """ From Saffran, Aslin, Newport studies. Strings consisting of tupiro golabu bidaku padoti coded here with single characters: tpr glb Bdk PDT """ def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', '%s%s', ['T', 'S'], 1.0) self.grammar.add_rule('S', '%s', ['T'], 1.0) self.grammar.add_rule('T', 'tpr', None, 0.25) self.grammar.add_rule('T', 'glb', None, 0.25) self.grammar.add_rule('T', 'Bdk', None, 0.25) self.grammar.add_rule('T', 'PDT', None, 0.25) def terminals(self): return list('tprglbBdkPDT') def all_strings(self): for g in self.grammar.enumerate(): yield str(g)
""" Define a grammar. """ from LOTlib.Grammar import Grammar grammar = Grammar() grammar.add_rule('START', '', ['EXPR'], 1.0) grammar.add_rule('EXPR', 'star_', ['EXPR'], 1.0) grammar.add_rule('EXPR', 'question_', ['EXPR'], 1.0) grammar.add_rule('EXPR', 'plus_', ['EXPR'], 1.0) grammar.add_rule('EXPR', 'or_', ['EXPR', 'EXPR'], 1.0) grammar.add_rule('EXPR', 'str_append_', ['TERMINAL', 'EXPR'], 5.0) grammar.add_rule('EXPR', 'terminal_', ['TERMINAL'], 5.0) for v in 'abc.': grammar.add_rule('TERMINAL', v, None, 1.0)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Define the grammar # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ TERMINAL_WEIGHT = 5.0 from LOTlib.Grammar import Grammar grammar = Grammar() grammar.add_rule('START', '', ['EXPR'], 1.0) grammar.add_rule('EXPR', '(%s + %s)', ['EXPR', 'EXPR'], 1.0) grammar.add_rule('EXPR', '(%s * %s)', ['EXPR', 'EXPR'], 1.0) # grammar.add_rule('EXPR', '(%s / %s)', ['EXPR', 'EXPR'], 1.0) grammar.add_rule('EXPR', '(%s - %s)', ['EXPR', 'EXPR'], 1.0) # grammar.add_rule('EXPR', 'plus_', ['EXPR', 'EXPR'], 1.0) # grammar.add_rule('EXPR', 'times_', ['EXPR', 'EXPR'], 1.0) grammar.add_rule('EXPR', 'divide_', ['EXPR', 'EXPR'], 1.0) # grammar.add_rule('EXPR', 'subtract_', ['EXPR', 'EXPR'], 1.0) grammar.add_rule('EXPR', 'exp_', ['EXPR'], 1.0) grammar.add_rule('EXPR', 'log_', ['EXPR'], 1.0) grammar.add_rule('EXPR', 'pow_', ['EXPR', 'EXPR'], 1.0) # including this gives lots of overflow grammar.add_rule('EXPR', 'sin_', ['EXPR'], 1.0) grammar.add_rule('EXPR', 'cos_', ['EXPR'], 1.0) grammar.add_rule('EXPR', 'tan_', ['EXPR'], 1.0) grammar.add_rule(
""" Set up the grammar. Note: This was updated on Dec 3 2012, after the language submission. We now include AND/OR/NOT, and S, and removed nonempty. """ from LOTlib.Grammar import Grammar grammar = Grammar() grammar.add_rule('START', 'presup_', ['BOOL', 'BOOL'], 1.0) grammar.add_rule('START', 'presup_', ['True', 'BOOL'], 1.0) grammar.add_rule('START', 'presup_', ['False', 'BOOL'], 1.0) grammar.add_rule('START', 'presup_', ['False', 'False'], 1.0) grammar.add_rule('START', 'presup_', ['True', 'True'], 1.0) grammar.add_rule('BOOL', 'not_', ['BOOL'], 1.0) #grammar.add_rule('BOOL', 'and_', ['BOOL', 'BOOL'], 1.0) #grammar.add_rule('BOOL', 'or_', ['BOOL', 'BOOL'], 1.0) grammar.add_rule('BOOL', 'nonempty_', ['SET'], 1.0) # don't need this if we do logical operations grammar.add_rule('BOOL', 'empty_', ['SET'], 1.0) grammar.add_rule('BOOL', 'subset_', ['SET', 'SET'], 1.0) #grammar.add_rule('BOOL', 'exhaustive_', ['SET', 'context.S'], 1.0) grammar.add_rule('BOOL', 'cardinality1_', ['SET'], 1.0) # if cardinalities are included, don't include these! grammar.add_rule('BOOL', 'cardinality2_', ['SET'], 1.0) grammar.add_rule('BOOL', 'cardinality3_', ['SET'], 1.0)
from LOTlib.Eval import register_primitive register_primitive(LOTlib.Miscellaneous.flatten2str) # # # # # # # # # # # # # # # # # # # # # # # # # # # # # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Grammar # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ TERMINAL_WEIGHT = 15 grammar = Grammar() # flattern2str lives at the top, and it takes a cons, cdr, car structure and projects it to a string grammar.add_rule('START', 'flatten2str', ['EXPR'], 1.0) grammar.add_rule('EXPR', 'sample_', ['SET'], 1.) grammar.add_rule('EXPR', 'cons_', ['EXPR', 'EXPR'], 1.0 / 2.0) grammar.add_rule('SET', '"%s"', ['STRING'], 1.0) grammar.add_rule('STRING', '%s%s', ['TERMINAL', 'STRING'], 1.0) grammar.add_rule('STRING', '%s', ['TERMINAL'], 1.0) grammar.add_rule('TERMINAL', 'g', None, TERMINAL_WEIGHT) grammar.add_rule('TERMINAL', 'e', None, TERMINAL_WEIGHT) grammar.add_rule('TERMINAL', 'k', None, TERMINAL_WEIGHT) grammar.add_rule('TERMINAL', 's', None, TERMINAL_WEIGHT) grammar.add_rule('TERMINAL', 'f', None, TERMINAL_WEIGHT) grammar.add_rule('TERMINAL', 'n', None, TERMINAL_WEIGHT) grammar.add_rule('TERMINAL', 'm', None, TERMINAL_WEIGHT)
class SimpleEnglish(FormalLanguage): """ A simple English language with a few kinds of recursion all at once """ def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', '%s%s', ['NP', 'VP'], 2.0) self.grammar.add_rule('NP', 'd%sn', ['AP'], 1.0) self.grammar.add_rule('NP', 'dn', None, 1.0) self.grammar.add_rule('NP', 'n', None, 1.0) self.grammar.add_rule('AP', 'a%s', ['AP'], 1.0) self.grammar.add_rule('AP', 'a', None, 2.0) self.grammar.add_rule('VP', 'v', None, 1.0) self.grammar.add_rule('VP', 'v%s', ['NP'], 1.0) self.grammar.add_rule('VP', 'vt%s', ['S'], 1.0) self.grammar.add_rule('S', 'i%sh%s', ['S', 'S'], 1.0) # add if S then S grammar def terminals(self): return list('dnavtih')
class Reber(FormalLanguage): """ From Reber, 1967 """ def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'T%s', ['S1'], 1.0) self.grammar.add_rule('S', 'V%s', ['S3'], 1.0) self.grammar.add_rule('S1', 'P%s', ['S1'], 1.0) self.grammar.add_rule('S1', 'T%s', ['S2'], 1.0) self.grammar.add_rule('S3', 'X%s', ['S3'], 1.0) self.grammar.add_rule('S3', 'V%s', ['S4'], 1.0) self.grammar.add_rule('S2', 'X%s', ['S3'], 1.0) self.grammar.add_rule('S2', 'S', None, 1.0) self.grammar.add_rule('S4', 'P%s', ['S2'], 1.0) self.grammar.add_rule('S4', 'S', None, 1.0) def terminals(self): return list('PSTVX') def all_strings(self): for g in self.grammar.enumerate(): yield str(g)
class SimpleEnglish(FormalLanguage): """ A simple English language with a few kinds of recursion all at once """ def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', '%s%s', ['NP', 'VP'], 4.0) self.grammar.add_rule('NP', 'd%sn', ['AP'], 1.0) self.grammar.add_rule('NP', 'dn', None, 1.0) self.grammar.add_rule('NP', 'n', None, 2.0) self.grammar.add_rule('AP', 'a%s', ['AP'], 1.0) self.grammar.add_rule('AP', 'a', None, 3.0) #self.grammar.add_rule('NP', '%s%s', ['NP', 'PP'], 1.0) # a little ambiguity #self.grammar.add_rule('VP', '%s%s', ['VP', 'PP'], 1.0) #self.grammar.add_rule('PP', 'p%s', ['NP'], 1.0) self.grammar.add_rule('VP', 'v', None, 2.0) # intransitive self.grammar.add_rule('VP', 'v%s', ['NP'], 1.0) # transitive self.grammar.add_rule('VP', 'vt%s', ['S'], 1.0) # v that S #self.grammar.add_rule('S', 'i%sh%s', ['S', 'S'], 1.0) # add if S then S grammar -- seems hard, and unnattural to get so many def terminals(self): return list('dnavt') def all_strings(self): for g in self.grammar.enumerate(): yield str(g)
from LOTlib.DataAndObjects import FunctionData from LOTlib.Grammar import Grammar from LOTlib.Hypotheses.LOTHypothesis import LOTHypothesis from LOTlib.Inference.MetropolisHastings import mh_sample from math import log # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # A simple grammar for scheme, including lambda grammar = Grammar() # A very simple version of lambda calculus grammar.add_rule('START', '', ['EXPR'], 1.0) grammar.add_rule('EXPR', 'apply_', ['FUNC', 'EXPR'], 1.0) grammar.add_rule('EXPR', 'x', None, 5.0) grammar.add_rule('FUNC', 'lambda', ['EXPR'], 1.0, bv_type='EXPR', bv_args=None) grammar.add_rule('EXPR', 'cons_', ['EXPR', 'EXPR'], 1.0) grammar.add_rule('EXPR', 'cdr_', ['EXPR'], 1.0) grammar.add_rule('EXPR', 'car_', ['EXPR'], 1.0) grammar.add_rule('EXPR', '[]', None, 1.0) # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # A class for scheme hypotheses that just computes the input/output pairs with the appropriate probability class SchemeFunction(LOTHypothesis): # Prior, proposals, __init__ are all inherited from LOTHypothesis def compute_single_likelihood(self, datum): """
WORDS = ['even', 'odd'] # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Grammar # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ from LOTlib.Grammar import Grammar from LOTlib.Miscellaneous import q grammar = Grammar() grammar.add_rule('START', '', ['BOOL'], 1.) grammar.add_rule('BOOL', '(%s == %s)', ['NUMBER', 'NUMBER'], 1.) grammar.add_rule('BOOL', '(not %s)', ['BOOL'], 1.) grammar.add_rule('BOOL', '(%s and %s)', ['BOOL', 'BOOL'], 1.) grammar.add_rule('BOOL', '(%s or %s)', ['BOOL', 'BOOL'], 1.) # use the short_circuit form grammar.add_rule('NUMBER', 'x', None, 1.) grammar.add_rule('NUMBER', '1', None, 1.) grammar.add_rule('NUMBER', '0', None, 1.) grammar.add_rule('NUMBER', 'plus_', ['NUMBER', 'NUMBER'], 1.) grammar.add_rule('NUMBER', 'minus_', ['NUMBER', 'NUMBER'], 1.) for w in WORDS: grammar.add_rule('BOOL', 'lexicon', [q(w), 'NUMBER'], 1.) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Data # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
def makeGrammar(objects, nterms=[ 'Tree', 'Set', 'Gender', 'Generation', 'Ancestry', 'Paternity', 'English' ], terms=['X', 'objects', 'all'], recursive=False, words=None, compositional=True, abstractP=10.0): """ Define a uniform PCFG for tree relations objects: a python list of strings for each person in the context nterms: a python list of primitive families terms: a python list of terminals recursive: BOOL for should grammar be recursive? words: a python list of words to recurse compositional: BOOL for if english primitives can be composed abstractP: float for non-uniform weight on abstraction, i.e. the speaker, X, primitive returns a LOTlib Grammar object """ grammar = Grammar() grammar.add_rule('START', '', ['SET'], 1.0) if 'Tree' in nterms: grammar.add_rule('SET', 'parents_of_', ['SET', 'C'], 1.0) grammar.add_rule('SET', 'children_of_', ['SET', 'C'], 1.0) grammar.add_rule('SET', 'spouses_of_', ['SET', 'C'], 1.0) if 'Set' in nterms: grammar.add_rule('SET', 'union_', ['SET', 'SET'], 1.0) grammar.add_rule('SET', 'complement_', ['SET', 'C'], 1.0) grammar.add_rule('SET', 'intersection_', ['SET', 'SET'], 1.0) grammar.add_rule('SET', 'setdifference_', ['SET', 'SET'], 1.0) if 'Gender' in nterms: grammar.add_rule('SET', 'female_', ['SET'], 1.0) grammar.add_rule('SET', 'male_', ['SET'], 1.0) grammar.add_rule('SET', 'samegender_', ['SET', 'C'], 1.0) if 'Generation' in nterms: grammar.add_rule('SET', 'generation0_', ['SET', 'C'], 1.0) grammar.add_rule('SET', 'generation1_', ['SET', 'C'], 1.0) grammar.add_rule('SET', 'generation2_', ['SET', 'C'], 1.0) if 'GenerationS' in nterms: grammar.add_rule('SET', 'generation0_', ['SET', 'C'], 1.0) grammar.add_rule('SET', 'generation1s_', ['SET', 'C'], 1.0) grammar.add_rule('SET', 'generation2s_', ['SET', 'C'], 1.0) if 'Ancestry' in nterms: grammar.add_rule('SET', 'ancestors', ['SET', 'C'], 1.0) grammar.add_rule('SET', 'descendants', ['SET', 'C'], 1.0) if 'Paternity' in nterms: grammar.add_rule('SET', 'maternal_', ['SET', 'C'], 1.0) grammar.add_rule('SET', 'paternal_', ['SET', 'C'], 1.0) if 'Taboo' in nterms: grammar.add_rule('SET', 'close_', ['SET', 'C'], 1.0) #grammar.add_rule('SET', 'frat_', ['SET', 'C'], 1.0) #grammar.add_rule('SET', 'sor_', ['SET', 'C'], 1.0) if 'English' in nterms: if compositional: lhs = 'SET' else: lhs = 'O' grammar.add_rule('SET', 'brothers_', [lhs, 'C'], 1.0) grammar.add_rule('SET', 'sisters_', [lhs, 'C'], 1.0) grammar.add_rule('SET', 'moms_', [lhs, 'C'], 1.0) grammar.add_rule('SET', 'dads_', [lhs, 'C'], 1.0) grammar.add_rule('SET', 'childz_', [lhs, 'C'], 1.0) grammar.add_rule('SET', 'uncles_', [lhs, 'C'], 1.0) grammar.add_rule('SET', 'aunts_', [lhs, 'C'], 1.0) grammar.add_rule('SET', 'grandpas_', [lhs, 'C'], 1.0) grammar.add_rule('SET', 'grandmas_', [lhs, 'C'], 1.0) grammar.add_rule('SET', 'cousins_', [lhs, 'C'], 1.0) if recursive and words is not None: for w in words: grammar.add_rule('SET', 'recurse_', [q(w), 'C', 'SET'], 1.0) if 'objects' in terms: if compositional: for o in objects: grammar.add_rule('SET', 'set', ["[\'%s\']" % o], abstractP / len(objects)) else: for o in objects: grammar.add_rule('O', 'set', ["[\'%s\']" % o], abstractP / len(objects)) if 'all' in terms: grammar.add_rule('SET', 'all_', ['C'], 1.0) if 'X' in terms: if compositional: grammar.add_rule( 'SET', 'X', None, 10.0) # Had to give high prob to make pcfg well-defined else: grammar.add_rule( 'O', 'X', None, 10.0) # Had to give high prob to make pcfg well-defined return grammar
""" This folder defines a bunch of "standard" grammars. In each, we do NOT specify the terminals, which typically expand the nonterminal PREDICATE->... """ from LOTlib.Grammar import Grammar DEFAULT_FEATURE_WEIGHT = 5.0 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SimpleBoolean_noTF = Grammar() SimpleBoolean_noTF.add_rule('START', '', ['BOOL'], 1.0) SimpleBoolean_noTF.add_rule('BOOL', 'and_', ['BOOL', 'BOOL'], 1.0) SimpleBoolean_noTF.add_rule('BOOL', 'or_', ['BOOL', 'BOOL'], 1.0) SimpleBoolean_noTF.add_rule('BOOL', 'not_', ['BOOL'], 1.0) SimpleBoolean_noTF.add_rule('BOOL', '', ['PREDICATE'], DEFAULT_FEATURE_WEIGHT) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SimpleBoolean = Grammar() SimpleBoolean.add_rule('START', 'False', None, 1.0) SimpleBoolean.add_rule('START', 'True', None, 1.0) SimpleBoolean.add_rule('START', '', ['BOOL'], 1.0) SimpleBoolean.add_rule('BOOL', 'and_', ['BOOL', 'BOOL'], 1.0)
def makeCharGrammar(context): ''' Defines a PCFG for characteristic hypotheses, specific to the context context: a Kinship context object returns a LOTlib Grammar object ''' char_grammar = Grammar() char_grammar.add_rule('START', '', ['CHAR'], 1.0) char_grammar.add_rule('CHAR', 'union_', ['CHAR', 'CHAR'], 1.0) char_grammar.add_rule('CHAR', 'complement_', ['CHAR', 'C'], 1.0) char_grammar.add_rule('CHAR', 'intersection_', ['CHAR', 'CHAR'], 1.0) char_grammar.add_rule('CHAR', 'setdifference_', ['CHAR', 'CHAR'], 1.0) char_grammar.add_rule('CHAR', 'feature_', ['KEY', 'NUM', 'C'], float(len(context.features[0]) - 1)) for f in xrange(len(context.features[0]) - 1): char_grammar.add_rule('KEY', str(f + 1), None, 1.0) char_grammar.add_rule('NUM', '1', None, 1.0) char_grammar.add_rule('NUM', '0', None, 1.0) return char_grammar
def makeBiasedGrammar(objects, bias, nterms=[ 'Tree', 'Set', 'Gender', 'Generation', 'Ancestry', 'Paternity', 'English' ], terms=['X', 'objects', 'all'], recursive=False, words=None, compositional=True): """ Define a weighted PCFG for tree relations objects: a python list of strings for each person in the context bias: a python dictionary, bias[primitive] = weight (float) nterms: a python list of primitive families terms: a python list of terminals recursive: BOOL for should grammar be recursive? words: a python list of words to recurse compositional: BOOL for if english primitives can be composed returns a LOTlib Grammar object """ grammar = Grammar() grammar.add_rule('START', '', ['SET'], 1.0) if 'Tree' in nterms: grammar.add_rule('SET', 'parents_of_', ['SET', 'C'], bias['parents_of_']) grammar.add_rule('SET', 'children_of_', ['SET', 'C'], bias['children_of_']) grammar.add_rule('SET', 'spouses_of_', ['SET', 'C'], bias['spouses_of_']) if 'Set' in nterms: grammar.add_rule('SET', 'union_', ['SET', 'SET'], bias['union_']) grammar.add_rule('SET', 'complement_', ['SET', 'C'], bias['complement_']) grammar.add_rule('SET', 'intersection_', ['SET', 'SET'], bias['intersection_']) grammar.add_rule('SET', 'setdifference_', ['SET', 'SET'], bias['setdifference_']) if 'Gender' in nterms: grammar.add_rule('SET', 'female_', ['SET'], bias['female_']) grammar.add_rule('SET', 'male_', ['SET'], bias['male_']) if 'Generation' in nterms: grammar.add_rule('SET', 'generation0_', ['SET', 'C'], bias['generation0_']) grammar.add_rule('SET', 'generation1_', ['SET', 'C'], bias['generation1_']) grammar.add_rule('SET', 'generation2_', ['SET', 'C'], bias['generation2_']) if 'Ancestry' in nterms: grammar.add_rule('SET', 'ancestors', ['SET', 'C'], bias['ancestors']) grammar.add_rule('SET', 'descendants', ['SET', 'C'], bias['descendants']) if 'Paternity' in nterms: grammar.add_rule('SET', 'maternal_', ['SET', 'C'], bias['maternal_']) grammar.add_rule('SET', 'paternal_', ['SET', 'C'], bias['paternal_']) if 'English' in nterms: if compositional: lhs = 'SET' else: lhs = 'O' grammar.add_rule('SET', 'brothers_', [lhs, 'C'], bias['brothers_']) grammar.add_rule('SET', 'sisters_', [lhs, 'C'], bias['sisters_']) grammar.add_rule('SET', 'moms_', [lhs, 'C'], bias['moms_']) grammar.add_rule('SET', 'dads_', [lhs, 'C'], bias['dads_']) grammar.add_rule('SET', 'childz_', [lhs, 'C'], bias['children_']) grammar.add_rule('SET', 'uncles_', [lhs, 'C'], bias['uncles_']) grammar.add_rule('SET', 'aunts_', [lhs, 'C'], bias['aunts_']) grammar.add_rule('SET', 'grandpas_', [lhs, 'C'], bias['grandpas_']) grammar.add_rule('SET', 'grandmas_', [lhs, 'C'], bias['grandmas_']) grammar.add_rule('SET', 'cousins_', [lhs, 'C'], bias['cousins_']) if recursive and words is not None: for w in words: grammar.add_rule('SET', 'recurse_', [q(w), 'C', 'SET'], bias['recurse_' + w]) if 'objects' in terms: if compositional: for o in objects: grammar.add_rule('SET', 'set', ["[\'%s\']" % o], bias['terminal_' + o]) else: for o in objects: grammar.add_rule('O', 'set', ["[\'%s\']" % o], bias['terminal_' + o]) if 'all' in terms: grammar.add_rule('SET', 'all_', ['C'], bias['all_']) if 'X' in terms: if compositional: grammar.add_rule( 'SET', 'X', None, bias['terminal_X'] ) # Had to give high prob to make pcfg well-defined else: grammar.add_rule( 'O', 'X', None, bias['terminal_X'] ) # Had to give high prob to make pcfg well-defined return grammar
class Milne(FormalLanguage): """ From https://www.sciencedirect.com/science/article/pii/S0306452217304645#f0025 """ def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', '%s', ['A'], 1.0) self.grammar.add_rule('A', 'a%s', ['D'], 1.0) self.grammar.add_rule('A', 'a%s', ['C'], 1.0) self.grammar.add_rule('D', 'd%s', ['C'], 1.0) self.grammar.add_rule('C', 'c%s', ['G'], 1.0) self.grammar.add_rule('C', 'c%s', ['F'], 1.0) self.grammar.add_rule('G', 'g%s', ['F'], 1.0) self.grammar.add_rule('F', 'f', None, 1.0) self.grammar.add_rule('F', 'f%s', ['X'], 1.0) # last two states are X,Y so they aren't D,C self.grammar.add_rule('X', 'c', None, 1.0) self.grammar.add_rule('X', 'c%s', ['Y'], 1.0) self.grammar.add_rule('Y', 'g', None, 1.0) def terminals(self): return list('acdgf') def all_strings(self): for g in self.grammar.enumerate(): yield str(g)
from LOTlib.Grammar import Grammar from LOTlib.BasicPrimitives import * from LOTlib.Inference.MetropolisHastings import mh_sample from LOTlib.FiniteBestSet import FiniteBestSet from LOTlib.Miscellaneous import * from LOTlib.DataAndObjects import * from LOTlib.Hypotheses.GaussianLOTHypothesis import GaussianLOTHypothesis #from SimpleMPI import MPI_map from random import randint ## The grammar G = Grammar() G.add_rule('START', '', ['EXPR'], 1.0) G.add_rule('EXPR', 'plus_', ['EXPR', 'EXPR'], 1.0) G.add_rule('EXPR', 'times_', ['EXPR', 'EXPR'], 1.0) G.add_rule('EXPR', 'divide_', ['EXPR', 'EXPR'], 1.0) G.add_rule('EXPR', 'subtract_', ['EXPR', 'EXPR'], 1.0) G.add_rule('EXPR', 'exp_', ['EXPR'], 1.0) G.add_rule('EXPR', 'log_', ['EXPR'], 1.0) G.add_rule('EXPR', 'pow_', ['EXPR', 'EXPR'], 1.0) # including this gives lots of overflow G.add_rule('EXPR', 'sin_', ['EXPR'], 1.0) G.add_rule('EXPR', 'cos_', ['EXPR'], 1.0) G.add_rule('EXPR', 'tan_', ['EXPR'], 1.0) G.add_rule('EXPR', 'x', None, 5.0) # these terminals should have None for their function type; the literals
- 5^n - 6^n - 7^n - 8^n - 9^n - 10^n - range[1,100] Interval Rules: p = (lambda - 1) --------------------------------- - all range[n,m] subset of r[1,100], such that n <=m (5,050 rules like this!) """ mix_grammar = Grammar() mix_grammar.add_rule('START', '', ['INTERVAL'], 1.) mix_grammar.add_rule('START', '', ['MATH'], 1.) mix_grammar.add_rule('MATH', 'mapset_', ['FUNC', 'DOMAIN_RANGE'], 1.) mix_grammar.add_rule('DOMAIN_RANGE', 'range_set_', ['1', '100'], 1.) mix_grammar.add_rule('FUNC', 'lambda', ['EXPR'], 1., bv_type='X', bv_p=1.) # Math rules (30-40 of these) # --------------------------- # Odd numbers mix_grammar.add_rule('EXPR', 'plus_', ['ODD', str(1)], 1.) mix_grammar.add_rule('ODD', 'times_', ['X', str(2)], 1.) # Primes mix_grammar.add_rule('EXPR', 'isprime_', ['X'], 1.)
NCONSTANTS = 4 CONSTANT_NAMES = ["C%i" % i for i in xrange(NCONSTANTS)] # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Define the grammar # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ from LOTlib.Grammar import Grammar grammar = Grammar() grammar.add_rule("START", "", ["EXPR"], 1.0) grammar.add_rule("EXPR", "plus_", ["EXPR", "EXPR"], 1.0) grammar.add_rule("EXPR", "times_", ["EXPR", "EXPR"], 1.0) grammar.add_rule("EXPR", "divide_", ["EXPR", "EXPR"], 1.0) grammar.add_rule("EXPR", "subtract_", ["EXPR", "EXPR"], 1.0) grammar.add_rule("EXPR", "exp_", ["EXPR"], 1.0) grammar.add_rule("EXPR", "log_", ["EXPR"], 1.0) grammar.add_rule("EXPR", "pow_", ["EXPR", "EXPR"], 1.0) # including this gives lots of overflow grammar.add_rule("EXPR", "sin_", ["EXPR"], 1.0) grammar.add_rule("EXPR", "cos_", ["EXPR"], 1.0) grammar.add_rule("EXPR", "tan_", ["EXPR"], 1.0) grammar.add_rule("EXPR", "x", None, 5.0) # these terminals should have None for their function type; the literals grammar.add_rule("EXPR", "1.0", None, 5.0) # Supplement the grammar for c in CONSTANT_NAMES:
word = target.sample_utterance(all_words, context) data.append( UtteranceData(utterance=word, context=context, possible_utterances=all_words) ) return data ############################################################ # Set up the grammar grammar = Grammar() """ Note: This was updated on Dec 3 2012, after the language submission. We now include AND/OR/NOT, and S, and removed nonempty """ grammar.add_rule('START', 'presup_', ['BOOL', 'BOOL'], 1.0) grammar.add_rule('START', 'presup_', ['True', 'BOOL'], 1.0) grammar.add_rule('START', 'presup_', ['False', 'BOOL'], 1.0) grammar.add_rule('START', 'presup_', ['False', 'False'], 1.0) grammar.add_rule('START', 'presup_', ['True', 'True'], 1.0) grammar.add_rule('BOOL', 'not_', ['BOOL'], 1.0) grammar.add_rule('BOOL', 'and_', ['BOOL', 'BOOL'], 1.0) grammar.add_rule('BOOL', 'or_', ['BOOL', 'BOOL'], 1.0) #grammar.add_rule('BOOL', 'nonempty_', ['SET'], 1.0) # don't need this if we do logical operations grammar.add_rule('BOOL', 'empty_', ['SET'], 1.0) grammar.add_rule('BOOL', 'subset_', ['SET', 'SET'], 1.0) grammar.add_rule('BOOL', 'exhaustive_', ['SET', 'context.S'], 1.0)
from LOTlib.Grammar import Grammar from LOTlib.Evaluation.Eval import evaluate_expression, register_primitive """ A simple demo for how to define FunctionNodes All of these primitives are defined in the LOTlib.Primitives package, as well as many others. In general, the PCFG generates FunctionNode trees via "generate" and then these are printed via str(...). """ grammar = Grammar() # Nonterminal START -> Nonterminal EXPR (with no function call) grammar.add_rule('START', '', ['EXPR'], 1.0) # And "EXPR" can rewrite as "1.0" -- and this expansion has probability proportional to 5.0 grammar.add_rule('EXPR', '1.0', None, 5.0) # some other simple terminals # these are given much higher probability in order to keep the PCFG well-defined grammar.add_rule('EXPR', '0.0', None, 3.0) grammar.add_rule('EXPR', 'TAU', None, 3.0) grammar.add_rule('EXPR', 'E', None, 3.0) # To have a string terminal, it must be quoted: #grammar.add_rule('EXPR', '\'e\'', None, 3.0) # Then this is one way to use the variable "x" of the function. # This gets named as the argument in evaluate_expression below grammar.add_rule('EXPR', 'x', None, 10.0)
from LOTlib.Miscellaneous import unique from LOTlib.Grammar import Grammar from LOTlib.Hypotheses.LOTHypothesis import LOTHypothesis G = Grammar() G.add_rule('START','',['String'],1.0) G.add_rule('String','One',['Number'],1.0) G.add_rule('String','Two',['Number','Number'],1.0) G.add_rule('String','Three',['Number','Number','Number'],1.0) G.add_rule('Number','1','i',1.0) G.add_rule('Number','2','ii',1.0) G.add_rule('Number','3','iii',1.0) for i in xrange(100): print G.generate()
# Grammar # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ import LOTlib.Miscellaneous from LOTlib.Miscellaneous import q # # # # # # # # # # # # # # # # # # # # # # # # # # # # TERMINAL_WEIGHT = 2. from LOTlib.Grammar import Grammar ## Here we use the _d primitives which manipulate an entire distribution of strings. This tends to be much faster. grammar = Grammar() grammar.add_rule('START', '', ['EXPR'], 1.0) grammar.add_rule('EXPR', 'if_d', ['BOOL', 'EXPR', 'EXPR'], 1.) grammar.add_rule('BOOL', 'and_d', ['BOOL', 'BOOL'], 1.) grammar.add_rule('BOOL', 'or_d', ['BOOL', 'BOOL'], 1.) grammar.add_rule('BOOL', 'not_d', ['BOOL'], 1.) grammar.add_rule('EXPR', 'cons_d', ['EXPR', 'EXPR'], 1.) grammar.add_rule('EXPR', 'cdr_d', ['EXPR'], 1.) grammar.add_rule('EXPR', 'car_d', ['EXPR'], 1.) grammar.add_rule('BOOL', 'equal_d', ['EXPR', 'EXPR'], 1.) # grammar.add_rule('BOOL', 'empty_d', ['EXPR'], 1.) grammar.add_rule('BOOL', 'flip_d(0.5)', None, TERMINAL_WEIGHT)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Grammar # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ from LOTlib.Miscellaneous import qq from LOTlib.Grammar import Grammar grammar = Grammar() grammar.add_rule('START', '', ['Pabstraction'], 1.0) # a predicate abstraction # lambdaUsePredicate is where you can use the predicate defined in lambdaDefinePredicate grammar.add_rule( 'Pabstraction', 'apply_', ['lambdaUsePredicate', 'lambdaDefinePredicate'], 1.0, ) grammar.add_rule('lambdaUsePredicate', 'lambda', ['INNER-BOOL'], 5.0, bv_type='INNER-BOOL', bv_args=['OBJECT'], bv_prefix='F') grammar.add_rule('lambdaUsePredicate', 'lambda', ['Pabstraction'], 1.0, bv_type='INNER-BOOL', bv_args=['OBJECT'], bv_prefix='F')
""" This version uses Flip.py """ from LOTlib.Grammar import Grammar base_grammar = Grammar() base_grammar.add_rule('START', '', ['START2'], 1.0) base_grammar.add_rule( 'START2', '', ['LIST'], 1.0) # just to make isnertions/deletions at the top easier base_grammar.add_rule('LIST', '(%s if %s else %s)', ['LIST', 'BOOL', 'LIST'], 1.) base_grammar.add_rule('LIST', 'strcons_', ['LIST', 'LIST'], 1.) # upweighted (3?) to help in search/proposals base_grammar.add_rule('LIST', 'strcdr_', ['LIST'], 1.) base_grammar.add_rule('LIST', 'strcar_', ['LIST'], 1.) base_grammar.add_rule('LIST', '', ['ATOM'], 3.0) # Terminals base_grammar.add_rule('LIST', 'x', None, 3.0) # the argument # base_grammar.add_rule('LIST', '', ['ATOMSEQ'], 3.0) # Terminals # base_grammar.add_rule('ATOMSEQ', '%s+%s', ['ATOMSEQ', 'ATOM'], 1.0) # Terminals # base_grammar.add_rule('ATOMSEQ', '%s', ['ATOM'], 2.0) # Terminals # If we want to allow sampling from a set of "words" # base_grammar.add_rule('LIST', 'C.uniform_sample([%s])', ['WORDLIST'], 1.) # Uniform sample of a word list # base_grammar.add_rule('WORDLIST', '%s', ['WORD'], 1.) # flip within a context
""" An example of generating quantified logic with lambdas. See FOL.py for inference about first-order logic """ from LOTlib.Grammar import Grammar # Create a grammar: G = Grammar() G.add_rule('BOOL', 'x', None, 2.0) # X is a terminal, so arguments=None # Each of these is a function, requiring some arguments of some type G.add_rule('BOOL', 'and_', ['BOOL', 'BOOL'], 1.0) G.add_rule('BOOL', 'or_', ['BOOL', 'BOOL'], 1.0) G.add_rule('BOOL', 'not_', ['BOOL'], 1.0) G.add_rule('BOOL', 'exists_', ['FUNCTION', 'SET'], 0.50) G.add_rule('BOOL', 'forall_', ['FUNCTION', 'SET'], 0.50) G.add_rule('SET', 'S', None, 1.0) # And allow us to create a new kind of function G.add_rule('FUNCTION', 'lambda', ['BOOL'], 1.0, bv_name='BOOL', bv_args=None) # bvtype means we introduce a bound variable below G.BV_WEIGHT = 2.0 # When we introduce bound variables, they have this (relative) probability for i in xrange(1000): x = G.generate('BOOL') print x.log_probability(), x
''' def compute_single_likelihood(self, datum): try: return log(datum.alpha * (self(*datum.input) == datum.output) + (1.0-datum.alpha) / 2.0) except RecursionDepthException as e: # we get this from recursing too deep -- catch and thus treat "ret" as None return -Infinity ''' # Define a grammar object # Defaultly this has a start symbol called 'START' but we want to call # it 'EXPR' grammar = Grammar(start='EXPR') # Define some operations grammar.add_rule('EXPR', '(%s + %s)', ['EXPR', 'EXPR'], 1.0) grammar.add_rule('EXPR', '(%s * %s)', ['EXPR', 'EXPR'], 1.0) grammar.add_rule('EXPR', '(float(%s) / float(%s))', ['EXPR', 'EXPR'], 1.0) grammar.add_rule('EXPR', '(-%s)', ['EXPR'], 1.0) # And define some numbers. We'll give them a 1/n^2 probability for n in xrange(1, 10): grammar.add_rule('EXPR', str(n), None, 10.0 / n**2) data = [FunctionData(input=[6], output=12, alpha=0.95)] #h = MyHypothesis() #print h.compute_prior(), h.compute_likelihood(data), h # define a "starting hypothesis". This one is essentially copied by # all proposers, so the sampler doesn't need to know its type or anything.
MAX_NODES = 10 # How many FunctionNodes are allowed in a hypothesis? If we make this, say, 20, things may slow down a lot WORDS = [ 'one_', 'two_', 'three_', 'four_', 'five_', 'six_', 'seven_', 'eight_', 'nine_', 'ten_' ] ######################################################################################################## ## Define a PCFG ## The priors here are somewhat hierarchical by type in generation, tuned to be a little more efficient ## (but the actual RR prior does not care about these probabilities) grammar = Grammar(start='WORD') grammar.add_rule('BOOL', 'and_', ['BOOL', 'BOOL'], 1. / 3.) grammar.add_rule('BOOL', 'or_', ['BOOL', 'BOOL'], 1. / 3.) grammar.add_rule('BOOL', 'not_', ['BOOL'], 1. / 3.) grammar.add_rule('BOOL', 'True', None, 1.0 / 2.) grammar.add_rule('BOOL', 'False', None, 1.0 / 2.) ## note that this can take basically any types for return values grammar.add_rule('WORD', 'if_', ['BOOL', 'WORD', 'WORD'], 0.5) grammar.add_rule('WORD', 'ifU_', ['BOOL', 'WORD'], 0.5) # if returning undef if condition not met grammar.add_rule('BOOL', 'cardinality1_', ['SET'], 1.0) grammar.add_rule('BOOL', 'cardinality2_', ['SET'], 1.0) grammar.add_rule('BOOL', 'cardinality3_', ['SET'], 1.0)
from LOTlib.Eval import register_primitive register_primitive(LOTlib.Miscellaneous.flatten2str) # # # # # # # # # # # # # # # # # # # # # # # # # # # # # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Grammar # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ TERMINAL_WEIGHT = 15 grammar = Grammar() grammar.add_rule('START', 'flatten2str', ['EXPR'], 1.0) grammar.add_rule('EXPR', 'sample_', ['SET'], 1.) grammar.add_rule('EXPR', 'cons_', ['EXPR', 'EXPR'], 1.0/2.0) grammar.add_rule('SET', '"%s"', ['STRING'], 1.0) grammar.add_rule('STRING', '%s%s', ['TERMINAL', 'STRING'], 1.0) grammar.add_rule('STRING', '%s', ['TERMINAL'], 1.0) grammar.add_rule('TERMINAL', 'e', None, TERMINAL_WEIGHT ) grammar.add_rule('TERMINAL', 'I', None, TERMINAL_WEIGHT) grammar.add_rule('TERMINAL', 'a', None, TERMINAL_WEIGHT ) grammar.add_rule('TERMINAL', 'A', None, TERMINAL_WEIGHT ) grammar.add_rule('TERMINAL', 'u', None, TERMINAL_WEIGHT )
""" An example of inference over first-order logical expressions. Here, we take sets of objects and generate quantified descriptions """ from LOTlib.Miscellaneous import unique from LOTlib.Grammar import Grammar from LOTlib.Hypotheses.LOTHypothesis import LOTHypothesis grammar = Grammar() grammar.add_rule('START', '', ['QUANT'], 1.0) # Very simple -- one allowed quantifier grammar.add_rule('QUANT', 'exists_', ['FUNCTION', 'SET'], 1.00) grammar.add_rule('QUANT', 'forall_', ['FUNCTION', 'SET'], 1.00) # The thing we are a function of grammar.add_rule('SET', 'S', None, 1.0) # And allow us to create a new kind of function grammar.add_rule('FUNCTION', 'lambda', ['BOOL'], 1.0, bv_type='OBJECT') grammar.add_rule('BOOL', 'and_', ['BOOL', 'BOOL'], 1.0) grammar.add_rule('BOOL', 'or_', ['BOOL', 'BOOL'], 1.0) grammar.add_rule('BOOL', 'not_', ['BOOL'], 1.0) # non-terminal arguments get passed as normal python arguments grammar.add_rule( 'BOOL', 'is_color_', ['OBJECT', '\'red\''], 5.00) # --> is_color_(OBJECT, 'red') --> OBJECT.color == 'red' grammar.add_rule('BOOL', 'is_color_', ['OBJECT', '\'blue\''], 5.00)
""" Just create some combinators and reduce them. """ from LOTlib.Grammar import Grammar from LOTlib.Hypotheses.LOTHypothesis import LOTHypothesis from LOTlib.Miscellaneous import q from LOTlib.Evaluation.Primitives.Functional import cons_ # for evaling G = Grammar() G.add_rule('START', 'cons_', ['START', 'START'], 2.0) G.add_rule('START', 'I', None, 1.0) G.add_rule('START', 'S', None, 1.0) G.add_rule('START', 'K', None, 1.0) from LOTlib.Evaluation.CombinatoryLogic import combinator_reduce from LOTlib.Evaluation.EvaluationException import EvaluationException for _ in range(10000): t = G.generate() lst = t.liststring() print lst, "\t->\t", try: print combinator_reduce(lst) except EvaluationException as e:
""" Just create some combinators and reduce them. """ from LOTlib.Grammar import Grammar grammar = Grammar() grammar.add_rule('START', 'cons_', ['START', 'START'], 2.0) grammar.add_rule('START', 'I', None, 1.0) grammar.add_rule('START', 'S', None, 1.0) grammar.add_rule('START', 'K', None, 1.0)
class SimpleEnglish(FormalLanguage): """ A simple English language with a few kinds of recursion all at once """ def __init__(self, max_length=6): self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'S', ['NP', 'VP'], 1.0) self.grammar.add_rule('NP', 'NP', ['d', 'AP', 'n'], 1.0) self.grammar.add_rule('AP', 'AP', ['a', 'AP'], 1.0) self.grammar.add_rule('AP', 'AP', None, 1.0) self.grammar.add_rule('VP', 'VP', ['v'], 1.0) self.grammar.add_rule('VP', 'VP', ['v', 'NP'], 1.0) self.grammar.add_rule('VP', 'VP', ['v', 't', 'S'], 1.0) FormalLanguage.__init__(self, max_length) def all_strings(self, max_length): for x in self.grammar.enumerate(d=max_length): s = ''.join(x.all_leaves()) if len(s) < max_length: yield s
fn = FunctionNode(fn, 'EXPR', 'apply_', [fn, copy(args[i])]) try: return lambda_reduce(fn) except RuntimeError: return None if __name__=="__main__": ## Make a simple grammar for lambda calculus from LOTlib.Grammar import Grammar G = Grammar() # Here, rules creating smaller lambdas are higher prob; created simpler lambdas are also higher prob G.add_rule('START', '', ['EXPR'], 1.0) G.add_rule('EXPR', 'lambda', ['EXPR'], 2.0, bv_type='EXPR', bv_args=None, bv_p=2.0) G.add_rule('EXPR', 'apply_', ['EXPR', 'EXPR'], 1.0) # And print some expressions and reduce for _ in xrange(1000): t = G.generate() try: print lambdastring(t) print lambdastring(lambda_reduce(t)) except EvaluationException as e: print "***", e, lambdastring(t) print "\n"
""" This folder defines a bunch of "standard" grammars. In each, we do NOT specify the terminals, which typically expand the nonterminal PREDICATE->... """ from LOTlib.Grammar import Grammar DEFAULT_FEATURE_WEIGHT = 5.0 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SimpleBoolean_noTF = Grammar() SimpleBoolean_noTF.add_rule('START', '', ['BOOL'], 1.0) SimpleBoolean_noTF.add_rule('BOOL', 'and_', ['BOOL', 'BOOL'], 1.0) SimpleBoolean_noTF.add_rule('BOOL', 'or_', ['BOOL', 'BOOL'], 1.0) SimpleBoolean_noTF.add_rule('BOOL', 'not_', ['BOOL'], 1.0) SimpleBoolean_noTF.add_rule('BOOL', '', ['PREDICATE'], DEFAULT_FEATURE_WEIGHT) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SimpleBoolean = Grammar() SimpleBoolean.add_rule('START', 'False', None, DEFAULT_FEATURE_WEIGHT) SimpleBoolean.add_rule('START', 'True', None, DEFAULT_FEATURE_WEIGHT) SimpleBoolean.add_rule('START', '', ['BOOL'], 1.0) SimpleBoolean.add_rule('BOOL', 'and_', ['BOOL', 'BOOL'], 1.0)
class BerwickPilato(FormalLanguage): """ From Figure 3a of Berwick & Pilato 1987 Ignores tense J = Judy g = gives G = gave d = does D = did e = get i = is W = was h = has H = had N = given v = giving V = give m = may M = might j = have b = being B = been E = be o = bread """ def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'J%s', ['S1'], 1.0) self.grammar.add_rule('S1', 'g%s', ['S4'], 1.0) self.grammar.add_rule('S1', 'G%s', ['S4'], 1.0) self.grammar.add_rule('S1', 'd%s', ['S3'], 1.0) self.grammar.add_rule('S1', 'D%s', ['S3'], 1.0) self.grammar.add_rule('S1', 'i%s', ['S6'], 1.0) self.grammar.add_rule('S1', 'w%s', ['S6'], 1.0) self.grammar.add_rule('S1', 'h%s', ['S5'], 1.0) self.grammar.add_rule('S1', 'H%s', ['S5'], 1.0) self.grammar.add_rule('S1', 'm%s', ['S2'], 1.0) self.grammar.add_rule('S1', 'M%s', ['S2'], 1.0) self.grammar.add_rule('S2', 'j%s', ['S5'], 1.0) self.grammar.add_rule('S2', 'E%s', ['S6'], 1.0) self.grammar.add_rule('S2', 'V%s', ['S4'], 1.0) self.grammar.add_rule('S3', 'e%s', ['S7'], 1.0) self.grammar.add_rule('S3', 'V%s', ['S4'], 1.0) self.grammar.add_rule('S4', 'o', None, 1.0) self.grammar.add_rule('S4', 'o', None, 1.0) self.grammar.add_rule('S5', 'N%s', ['S4'], 1.0) self.grammar.add_rule('S5', 'B%s', ['S6'], 1.0) self.grammar.add_rule('S6', 'b%s', ['S7'], 1.0) self.grammar.add_rule('S6', 'v%s', ['S4'], 1.0) self.grammar.add_rule('S6', 'N%s', ['S4'], 1.0) self.grammar.add_rule('S7', 'N%s', ['S4'], 1.0) def terminals(self): return list('JgGdDeiWhHNvVmMjbBEo') def all_strings(self): for g in self.grammar.enumerate(): yield str(g)
Here, we invent simple predicates whose value is determined by a set membership (BASE-SET), and express logical concepts over those predicates. Data is set up to be like magnetism, with positives (pi) and negatives (ni) that interact with each other but not within groups. This is simple because there's only two types of things, and you observe all interactions. See ComplexMagnetism.py for a more complex case... """ from LOTlib.Miscellaneous import qq from LOTlib.Grammar import Grammar from LOTlib.DataAndObjects import FunctionData from LOTlib.FunctionNode import cleanFunctionNodeString grammar = Grammar() grammar.add_rule('START', '', ['Pabstraction'], 1.0) # a predicate abstraction # lambdaUsePredicate is where you can use the predicate defined in lambdaDefinePredicate grammar.add_rule('Pabstraction', 'apply_', ['lambdaUsePredicate', 'lambdaDefinePredicate'], 1.0, ) grammar.add_rule('lambdaUsePredicate', 'lambda', ['INNER-BOOL'], 5.0, bv_type='INNER-BOOL', bv_args=['OBJECT'], bv_prefix='F') grammar.add_rule('lambdaUsePredicate', 'lambda', ['Pabstraction'], 1.0, bv_type='INNER-BOOL', bv_args=['OBJECT'], bv_prefix='F') # Define a predicate that will just check if something is in a BASE-SET grammar.add_rule('lambdaDefinePredicate', 'lambda', ['lambdaDefinePredicateINNER'], 1.0, bv_type='OBJECT', bv_args=None, bv_prefix='z') # the function on objects, that allows them to be put into classes (analogous to a logical model here) grammar.add_rule('lambdaDefinePredicateINNER', 'is_in_', ['OBJECT', 'BASE-SET'], 1.0) # Here's how things will look with applylambda: #grammar.add_rule('START', 'applylambda', ['INNER-BOOL'], 10.0, bv_type='INNER-BOOL', bv_args=['OBJECT'], bv_prefix='F') #grammar.add_rule('START', 'applylambda', ['START'], 1.0, bv_type='INNER-BOOL', bv_args=['OBJECT'], bv_prefix='F')
return reduce(mycons([[x, z], [y, z]], lst[4:]), maxsteps=maxsteps - 1) else: return mycons( first, [reduce(x, maxsteps=floor(maxsteps / len(lst))) for x in lst[1:]]) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # A grammar for simple CL expressions #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ from LOTlib.Grammar import Grammar grammar = Grammar(start="CLEXPR") # flattern2str lives at the top, and it takes a cons, cdr, car structure and projects it to a string grammar.add_rule('CLEXPR', '[%s, %s]', ['CLEXPR', 'CLEXPR'], 1.0) grammar.add_rule('CLEXPR', '"I"', None, 1.0) grammar.add_rule('CLEXPR', '"S"', None, 1.0) grammar.add_rule('CLEXPR', '"K"', None, 1.0) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Just look a little #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if __name__ == "__main__": import LOTlib while not LOTlib.SIG_INTERRUPTED: x = eval(str(grammar.generate())) print x
# Yuan's version: from LOTlib.Grammar import Grammar base_grammar = Grammar() base_grammar.add_rule('START', 'flatten2str', ['LIST', 'sep=\"\"'], 1.0) base_grammar.add_rule('LIST', 'if_', ['BOOL', 'LIST', 'LIST'], 1.) base_grammar.add_rule('LIST', 'cons_', ['ATOM', 'LIST'], 1./6.) base_grammar.add_rule('LIST', 'cons_', ['LIST', 'LIST'], 1./6.) base_grammar.add_rule('LIST', 'cdr_', ['LIST'], 1./3.) base_grammar.add_rule('LIST', 'car_', ['LIST'], 1./3.) base_grammar.add_rule('LIST', '', ['ATOM'], 3.0) base_grammar.add_rule('LIST', '\'\'', None, 1.0) # base_grammar.add_rule('LIST', 'recurse_', [], 1.) # This is added by factorizedDataHypothesis base_grammar.add_rule('BOOL', 'empty_', ['LIST'], 1.) base_grammar.add_rule('BOOL', 'flip_(p=%s)', ['PROB'], 1.) for i in xrange(1,10): base_grammar.add_rule('PROB', '0.%s' % i, None, 1.) base_grammar.add_rule('LIST', 'recurse_(%s)', ['SELFF'], 1.0) # can call myself if __name__ == "__main__": for _ in xrange(1000): print base_grammar.generate()
""" A finite English grammar that's used to test functions in FunctionNode """ from LOTlib.Grammar import Grammar g = Grammar() g.add_rule('START', 'NP', ['NP', 'VP'], 1) g.add_rule('NP', '', ['the boy'], 1) g.add_rule('NP', '', ['the ball'], 1) g.add_rule('VP', '', ['ate the dog'], 1) g.add_rule('VP', '', ['ate the chicken'], 1) def log_probability(tree): return 0 # TODO: stub if __name__ == "__main__": for i in xrange(100): print(g.generate())
from LOTlib.Eval import register_primitive register_primitive(LOTlib.Miscellaneous.flatten2str) # # # # # # # # # # # # # # # # # # # # # # # # # # # # # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Grammar # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ TERMINAL_WEIGHT = 15 grammar = Grammar() # flattern2str lives at the top, and it takes a cons, cdr, car structure and projects it to a string grammar.add_rule('START', 'flatten2str', ['EXPR'], 1.0) grammar.add_rule('EXPR', 'sample_', ['SET'], 1.) grammar.add_rule('EXPR', 'cons_', ['EXPR', 'EXPR'], 1.0/2.0) grammar.add_rule('SET', '"%s"', ['STRING'], 1.0) grammar.add_rule('STRING', '%s%s', ['TERMINAL', 'STRING'], 1.0) grammar.add_rule('STRING', '%s', ['TERMINAL'], 1.0) grammar.add_rule('TERMINAL', 'g', None, TERMINAL_WEIGHT) grammar.add_rule('TERMINAL', 'e', None, TERMINAL_WEIGHT) grammar.add_rule('TERMINAL', 'k', None, TERMINAL_WEIGHT) grammar.add_rule('TERMINAL', 's', None, TERMINAL_WEIGHT)
""" A small finite grammar without bound variables. """ from LOTlib.Grammar import Grammar import math g = Grammar() g.add_rule("START", "S", ["NP", "VP"], 1) g.add_rule("NP", "NP", ["det", "N"], 1) g.add_rule("VP", "VP", ["V", "NP"], 1) g.add_rule("det", "the", None, 1) g.add_rule("det", "a", None, 1) g.add_rule("N", "cat", None, 1) g.add_rule("N", "human", None, 1) g.add_rule("V", "likes", None, 1) g.add_rule("V", "kills", None, 1) g.add_rule("V", "eats", None, 1) def log_probability(tree): # every tree has an equal probability of being generated return math.log(0.5*0.5*(1.0/3)*0.5*0.5) if __name__ == "__main__": for i in xrange(100): print(g.generate())
from LOTlib.Grammar import Grammar from LOTlib.Hypotheses.LOTHypothesis import LOTHypothesis from LOTlib.DataAndObjects import FunctionData from LOTlib.Inference.MetropolisHastings import mh_sample from math import log # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # A simple grammar for scheme, including lambda G = Grammar() # A very simple version of lambda calculus G.add_rule('START', '', ['EXPR'], 1.0) G.add_rule('EXPR', 'apply_', ['FUNC', 'EXPR'], 1.0) G.add_rule('EXPR', 'x', None, 5.0) G.add_rule('FUNC', 'lambda', ['EXPR'], 1.0, bv_type='EXPR', bv_args=None) G.add_rule('EXPR', 'cons_', ['EXPR', 'EXPR'], 1.0) G.add_rule('EXPR', 'cdr_', ['EXPR'], 1.0) G.add_rule('EXPR', 'car_', ['EXPR'], 1.0) G.add_rule('EXPR', '[]', None, 1.0) # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # A class for scheme hypotheses that just computes the input/output pairs with the appropriate probability class SchemeFunction(LOTHypothesis): # Prior, proposals, __init__ are all inherited from LOTHypothesis def compute_single_likelihood(self, datum, response):
all_possible_data = [('', set(sample_sets_of_objects(n, all_objects))) for n in xrange(1, 10)] # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Grammar # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ from LOTlib.Grammar import Grammar from LOTlib.Miscellaneous import q # The priors here are somewhat hierarchical by type in generation, tuned to be a little more efficient # (but the actual RR prior does not care about these probabilities) grammar = Grammar() grammar.add_rule('START', '', ['WORD'], 1.0) grammar.add_rule('BOOL', 'and_', ['BOOL', 'BOOL'], 1. / 3.) grammar.add_rule('BOOL', 'or_', ['BOOL', 'BOOL'], 1. / 3.) grammar.add_rule('BOOL', 'not_', ['BOOL'], 1. / 3.) grammar.add_rule('BOOL', 'True', None, 1.0 / 2.) grammar.add_rule('BOOL', 'False', None, 1.0 / 2.) # note that this can take basically any types for return values grammar.add_rule('WORD', '(%s if %s else %s)', ['WORD', 'BOOL', 'WORD'], 0.5) grammar.add_rule('WORD', q('undef'), None, 0.5) # grammar.add_rule('WORD', 'if_', ['BOOL', 'WORD', q('undef')], 0.5) # grammar.add_rule('WORD', 'ifU_', ['BOOL', 'WORD'], 0.5) # if returning undef if condition not met