class AnBmCnDm(FormalLanguage): def __init__(self): self.grammarA = Grammar(start='A') self.grammarA.add_rule('A', 'a%s', ['A'], 2.0) self.grammarA.add_rule('A', 'a', None, 1.0) self.grammarB = Grammar(start='B') self.grammarB.add_rule('B', 'b%s', ['B'], 2.0) self.grammarB.add_rule('B', 'b', None, 1.0) def terminals(self): return list('abcd') def sample_string(self): a = str(self.grammarA.generate()) b = str(self.grammarB.generate()) return a + b + ('c' * len(a)) + ('d' * len(b)) def all_strings(self): for r in itertools.count(1): for n, m in partitions( r, 2, 1 ): # partition into two groups (NOTE: does not return both orders) yield 'a' * n + 'b' * m + 'c' * n + 'a' * m if n != m: yield 'a' * m + 'b' * n + 'c' * m + 'a' * n
class AnBm(FormalLanguage): """ A^n B^m, m>n, with n, m-n sampled from a geometric """ def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'a%sb', ['S'], 1.0) self.grammar.add_rule('S', 'ab', None, 1.0) def terminals(self): return list('ab') def sample_string(self): # fix that this is not CF s = str(self.grammar.generate()) # from a^n b^n mmn=1 while random() < 0.5: mmn += 1 s = s+'b'*mmn return s def all_strings(self): for r in itertools.count(1): for n,m in partitions(r, 2, 1): # partition into two groups (NOTE: does not return both orders) if m>n: yield 'a'*n + 'b'*m if n>m: yield 'a'*m + 'b'*n
class Count(FormalLanguage): """ The language ababbabbbabbbb etc """ def __init__(self): # This grammar is just a proxy, it gets replaced in sample self.grammar = Grammar(start='S') self.grammar.add_rule( 'S', 'a%s', ['S'], 1.5 ) # if we make this 2, then we end up with things that are waay too long self.grammar.add_rule('S', 'a', None, 1.0) def sample_string(self): proxy = str(self.grammar.generate()) out = '' for i in range(len(proxy)): out = out + 'a' + 'b' * (i + 1) return out def terminals(self): return list('ab') def all_strings(self): for n in itertools.count(0): out = '' for i in range(n): out = out + 'a' + 'b' * (i + 1) yield out
class Count(FormalLanguage): """ The language ababbabbbabbbb etc """ def __init__(self): # This grammar is just a proxy, it gets replaced in sample self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'a%s', ['S'], 1.0) self.grammar.add_rule('S', 'a', None, 1.0) def sample_string(self): proxy = str(self.grammar.generate()) out = '' for i in range(len(proxy)): out = out+'a' + 'b'*(i+1) return out def terminals(self): return list('ab') def all_strings(self): for n in itertools.count(0): out = '' for i in range(n): out = out + 'a' + 'b' * (i + 1) yield out
class XXI(FormalLanguage): """ A string x, then x "inverted" where as and bs are swapped """ def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'a%s', ['S'], 2.0) self.grammar.add_rule('S', 'b%s', ['S'], 2.0) self.grammar.add_rule('S', 'a', None, 1.0) self.grammar.add_rule('S', 'b', None, 1.0) def terminals(self): return list('ab') def sample_string(self): while True: x = str(self.grammar.generate()) if len(x) == 0: continue v = re.sub(r"a", "t", x) v = re.sub(r"b", "a", v) v = re.sub(r"t", "b", v) return x + v def all_strings(self): for l in itertools.count(1): for x in compute_all_strings(l, alphabet=self.terminals()): v = re.sub(r"a", "t", x) v = re.sub(r"b", "a", v) v = re.sub(r"t", "b", v) yield x + v
class AmBnCmDn(FormalLanguage): """ See Shieber 1985 """ def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', '%s%s', ['A', 'B'], 1.0) self.grammar.add_rule('A', 'a%s', ['A'], 1.0) self.grammar.add_rule('A', 'a', None, 1.0) self.grammar.add_rule('B', 'b%s', ['B'], 1.0) self.grammar.add_rule('B', 'b', None, 1.0) def terminals(self): return list('abcd') def sample_string(self): # fix that this is not CF s = str(self.grammar.generate()) # from a^m b^n s = s + 'c' * s.count('a') + 'd' * s.count('b') return s def all_strings(self): for r in itertools.count(1): for n, m in partitions( r, 2, 1 ): # partition into two groups (NOTE: does not return both orders) yield 'a' * n + 'b' * m + 'c' * n + 'd' * m if n != m: yield 'a' * m + 'b' * n + 'c' * m + 'd' * n
class ABAnBn(FormalLanguage): """ An AB language followed by AnBn """ def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'a%s', ['S'], 1.0) self.grammar.add_rule('S', 'b%s', ['S'], 1.0) self.grammar.add_rule('S', '%s', ['Q'], 2.0) self.grammar.add_rule('Q', 'a%sb', ['Q'], 2.0) self.grammar.add_rule('Q', 'ab', None, 1.0) def terminals(self): return list('ab') def sample_string(self): while True: x = str(self.grammar.generate()) y = str(self.grammar.generate()) if x != y: return x + y def all_strings(self): assert False
class AmBnCmDn(FormalLanguage): """ See Shieber 1985 """ def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', '%s%s', ['A', 'B'], 1.0) self.grammar.add_rule('A', 'a%s', ['A'], 1.0) self.grammar.add_rule('A', 'a', None, 1.0) self.grammar.add_rule('B', 'b%s', ['B'], 1.0) self.grammar.add_rule('B', 'b', None, 1.0) def terminals(self): return list('abcd') def sample_string(self): # fix that this is not CF s = str(self.grammar.generate()) # from a^m b^n s = s+'c'*s.count('a') + 'd'*s.count('b') return s def all_strings(self): for r in itertools.count(1): for n,m in partitions(r, 2, 1): # partition into two groups (NOTE: does not return both orders) yield 'a'*n + 'b'*m + 'c'*n + 'd'*m if n != m: yield 'a'*m + 'b'*n + 'c'*m + 'd'*n
class XY(FormalLanguage): """ The XY language discussed in Pullum & Gazdar, originally from Chomsky 1963 pg 378-9 This is the set of all strings xy where x!=y """ def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'a%s', ['S'], 1.0) self.grammar.add_rule('S', 'b%s', ['S'], 1.0) self.grammar.add_rule('S', 'a', None, 1.0) self.grammar.add_rule('S', 'b', None, 1.0) def terminals(self): return list('ab') def sample_string(self): while True: x = str(self.grammar.generate()) y = str(self.grammar.generate()) if x != y: return x+y def all_strings(self): for l in itertools.count(1): for x in compute_all_strings(l, alphabet=self.terminals()): for y in compute_all_strings(l, alphabet=self.terminals()): if x != y: yield x+y
class AnBk(FormalLanguage): """ A^n B^k, k>n, with n, k-n sampled from a geometric """ def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'a%sb', ['S'], 2.0) self.grammar.add_rule('S', 'ab', None, 1.0) def terminals(self): return list('ab') def sample_string(self): # fix that this is not CF s = str(self.grammar.generate()) # from a^n b^n mmn = 1 while random() < (2. / 3.): mmn += 1 s = s + 'b' * mmn return s def all_strings(self): for r in itertools.count(1): for n, m in partitions( r, 2, 1 ): # partition into two groups (NOTE: does not return both orders) if m > n: yield 'a' * n + 'b' * m if n > m: yield 'a' * m + 'b' * n
class XY(FormalLanguage): """ An XY language discussed in Pullum & Gazdar, originally from Chomsky 1963 pg 378-9 This is the set of all strings xy where x!=y. Note this is CF, contrary to Chomsky Here for simplicity we will just use {a,b} as the alphabet """ def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'a%s', ['S'], 2.0) self.grammar.add_rule('S', 'b%s', ['S'], 2.0) self.grammar.add_rule('S', 'a', None, 1.0) self.grammar.add_rule('S', 'b', None, 1.0) def terminals(self): return list('ab') def sample_string(self): while True: x = str(self.grammar.generate()) y = str(self.grammar.generate()) if x != y: return x+y def all_strings(self): for l in itertools.count(1): for x in compute_all_strings(l, alphabet=self.terminals()): for y in compute_all_strings(l, alphabet=self.terminals()): if x != y: yield x+y
class AnBmAnBm(FormalLanguage): def __init__(self): self.grammarA = Grammar(start='A') self.grammarA.add_rule('A', 'a%s', ['A'], 2.0) self.grammarA.add_rule('A', 'a', None, 1.0) self.grammarB = Grammar(start='B') self.grammarB.add_rule('B', 'b%s', ['B'], 2.0) self.grammarB.add_rule('B', 'b', None, 1.0) def terminals(self): return list('ab') def sample_string(self): a = str(self.grammarA.generate()) b = str(self.grammarB.generate()) return a + b + a + b def all_strings(self): raise NotImplementedError
class AnBnp1Cnp2(FormalLanguage): def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'a%s', ['S'], 2.0) self.grammar.add_rule('S', 'a', None, 1.0) def terminals(self): return list('abc') def sample_string(self): # fix that this is not CF s = str(self.grammar.generate()) return s + 'b' * (len(s) + 1) + 'c' * (len(s) + 2)
class AnBnCn(FormalLanguage): def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'a%sb', ['S'], 1.0) self.grammar.add_rule('S', 'ab', None, 1.0) def terminals(self): return list('abc') def sample_string(self): # fix that this is not CF s = str(self.grammar.generate()) return s + 'c'*(len(s)/2)
def generate(self, x='*USE_START*', d=0): """ RealValueGrammar.generate may create gaussians or uniforms when given "*gaussian*" and "*uniform*" as the nonterminal type. Otherwise, this is identical to LOTlib.Grammar """ if x == '*USE_START*': x = self.start if x=='*gaussian*': # TODO: HIGHLY EXPERIMENTAL!! # Wow this is really terrible for mixing... v = np.random.normal() gp = normlogpdf(v, 0.0, 1.0) return FunctionNode(returntype=x, name=str(v), args=None, generation_probability=gp, ruleid=0, resample_p=CONSTANT_RESAMPLE_P ) ##TODO: FIX THE ruleid elif x=='*uniform*': v = np.random.rand() gp = 0.0 return FunctionNode(returntype=x, name=str(v), args=None, generation_probability=gp, ruleid=0, resample_p=CONSTANT_RESAMPLE_P ) ##TODO: FIX THE ruleid else: # Else call normal generation Grammar.generate(self,x,d=d)
class AnBmCnm(FormalLanguage): def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'a%s', ['S'], 2.0) self.grammar.add_rule('S', 'a%s', ['T'], 1.0) self.grammar.add_rule('T', 'b%s', ['T'], 2.0) self.grammar.add_rule('T', 'b', None, 1.0) def terminals(self): return list('abc') def sample_string(self): s = str(self.grammar.generate()) return s + ('c' * (s.count('a') * s.count('b')))
class WeW(FormalLanguage): def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'a%s', ['S'], 2.0) self.grammar.add_rule('S', 'b%s', ['S'], 2.0) self.grammar.add_rule('S', 'a', None, 1.0) self.grammar.add_rule('S', 'b', None, 1.0) def terminals(self): return list('ab') def sample_string(self): w = str(self.grammar.generate()) return w * len(w)
class ABnUBAn(FormalLanguage): def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'x%s', ['S'], 2.0) self.grammar.add_rule('S', 'x', None, 1.0) def terminals(self): return list('ab') def sample_string(self): # fix that this is not CF s = str(self.grammar.generate()) if random() < 0.5: return re.sub(r"x", "ab", s) else: return re.sub(r"x", "ba", s)
class AnUAnBn(FormalLanguage): def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'a%s', ['S'], 2.0) self.grammar.add_rule('S', 'a', None, 1.0) def terminals(self): return list('ab') def sample_string(self): # fix that this is not CF s = str(self.grammar.generate()) if random() < 0.5: return s else: return s + "b" * len(s)
class Unequal(FormalLanguage): def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'a%s', ['S'], 2.0) self.grammar.add_rule('S', 'b%s', ['S'], 2.0) self.grammar.add_rule('S', 'a', None, 1.0) self.grammar.add_rule('S', 'b', None, 1.0) def sample_string(self): while True: s = str(self.grammar.generate()) if (s.count("a") != s.count("b")): return s def terminals(self): return list('ab')
class ABnen(FormalLanguage): #((AB)^n)^n def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'ab%s', ['S'], 1.0) self.grammar.add_rule('S', 'ab', None, 2.0) def terminals(self): return list('ab') def sample_string(self): s = str(self.grammar.generate()) return s * (len(s) / 2) def all_strings(self): raise NotImplementedError
class AnBnC2n(FormalLanguage): def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'a%sb', ['S'], 2.0) self.grammar.add_rule('S', 'ab', None, 1.0) def terminals(self): return list('abc') def sample_string(self): # fix that this is not CF s = str(self.grammar.generate()) return s + 'c' * (len(s)) def all_strings(self): n = 1 while True: yield 'a' * n + 'b' * n + 'c' * 2 * n n += 1
class AnBmCmAn(FormalLanguage): def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'a%sa', ['S'], 1.0) self.grammar.add_rule('S', 'a%sa', ['T'], 1.0) self.grammar.add_rule('T', 'b%sc', ['T'], 1.0) self.grammar.add_rule('T', 'bc', None, 1.0) def terminals(self): return list('abcd') def sample_string(self): return str(self.grammar.generate()) def all_strings(self): for r in itertools.count(1): for n,m in partitions(r, 2, 1): # partition into two groups (NOTE: does not return both orders) yield 'a'*n + 'b'*m + 'c'*m + 'a'*n if n != m: yield 'a'*m + 'b'*n + 'c'*n + 'a'*m
class XX(FormalLanguage): """ An xx language (for discussion see Gazdar & Pullum 1982) """ def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'a%s', ['S'], 2.0) self.grammar.add_rule('S', 'b%s', ['S'], 2.0) self.grammar.add_rule('S', 'a', None, 1.0) self.grammar.add_rule('S', 'b', None, 1.0) def terminals(self): return list('ab') def sample_string(self): # fix that this is not CF s = str(self.grammar.generate()) # from (a+b)+ return s + s # xx language def all_strings(self): for l in itertools.count(1): for s in compute_all_strings(l, alphabet=self.terminals()): yield s + s
class AnBmCmAn(FormalLanguage): def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'a%sa', ['S'], 2.0) self.grammar.add_rule('S', 'a%sa', ['T'], 1.0) self.grammar.add_rule('T', 'b%sc', ['T'], 2.0) self.grammar.add_rule('T', 'bc', None, 1.0) def terminals(self): return list('abcd') def sample_string(self): return str(self.grammar.generate()) def all_strings(self): for r in itertools.count(1): for n, m in partitions( r, 2, 1 ): # partition into two groups (NOTE: does not return both orders) yield 'a' * n + 'b' * m + 'c' * m + 'a' * n if n != m: yield 'a' * m + 'b' * n + 'c' * n + 'a' * m
class XXX(FormalLanguage): """ An xxx language -- indexed but not mildly context sensitive (see https://en.wikipedia.org/wiki/Indexed_language) """ def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'a%s', ['S'], 2.0) self.grammar.add_rule('S', 'b%s', ['S'], 2.0) self.grammar.add_rule('S', 'a', None, 1.0) self.grammar.add_rule('S', 'b', None, 1.0) def terminals(self): return list('ab') def sample_string(self): # fix that this is not CF s = str(self.grammar.generate()) # from (a+b)+ return s + s + s # xxx language def all_strings(self): for l in itertools.count(1): for s in compute_all_strings(l, alphabet=self.terminals()): yield s + s + s
class ABA(FormalLanguage): """ Similar to Marcus ABB experiment, except we allow AAA (for simplicity) """ def __init__(self): self.grammar = Grammar(start='S') # NOTE: This grammar does not capture the rule -- we do that in sample! self.grammar.add_rule('S', '%s%s', ['T','T'], 1.0) for t in self.terminals(): self.grammar.add_rule('T', t, None, 1.0) def sample_string(self): # fix that this is not CF s = str(self.grammar.generate()) return s + s[0] # copy the first element def terminals(self): return list('gGtTnNlL') # ga gi ta ti na ni la li def all_strings(self): for t1 in self.terminals(): for t2 in self.terminals(): yield t1 + t2 + t1
class XXR(FormalLanguage): """ (a,b)+ strings followed by their reverse. This can be generated by a CFG """ def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'a%s', ['S'], 1.0) self.grammar.add_rule('S', 'b%s', ['S'], 1.0) self.grammar.add_rule('S', 'a', None, 1.0) self.grammar.add_rule('S', 'b', None, 1.0) def terminals(self): return list('ab') def sample_string(self): # fix that this is not CF s = str(self.grammar.generate()) # from {a,b}+ return s + ''.join(reversed(s)) def all_strings(self): for l in itertools.count(1): for s in compute_all_strings(l, alphabet='ab'): yield s + s[::-1]
class XXR(FormalLanguage): """ (a,b)+ strings followed by their reverse. This can be generated by a CFG """ def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'a%s', ['S'], 1.0) self.grammar.add_rule('S', 'b%s', ['S'], 1.0) self.grammar.add_rule('S', 'a', None, 1.0) self.grammar.add_rule('S', 'b', None, 1.0) def terminals(self): return list('ab') def sample_string(self): # fix that this is not CF s = str(self.grammar.generate()) # from {a,b}+ return s+''.join(reversed(s)) def all_strings(self): for l in itertools.count(1): for s in compute_all_strings(l, alphabet='ab'): yield s + s[::-1]
class XX(FormalLanguage): """ An xx language (for discussion see Gazdar & Pullum 1982) """ def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'a%s', ['S'], 1.0) self.grammar.add_rule('S', 'b%s', ['S'], 1.0) self.grammar.add_rule('S', 'a', None, 1.0) self.grammar.add_rule('S', 'b', None, 1.0) def terminals(self): return list('ab') def sample_string(self): # fix that this is not CF s = str(self.grammar.generate()) # from (a+b)+ return s+s # xx language def all_strings(self): for l in itertools.count(1): for s in compute_all_strings(l, alphabet='ab'): yield s + s
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # A grammar for simple CL expressions # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ from LOTlib.Grammar import Grammar grammar = Grammar(start="CLEXPR") # flattern2str lives at the top, and it takes a cons, cdr, car structure and projects it to a string grammar.add_rule("CLEXPR", "[%s, %s]", ["CLEXPR", "CLEXPR"], 1.0) grammar.add_rule("CLEXPR", '"I"', None, 1.0) grammar.add_rule("CLEXPR", '"S"', None, 1.0) grammar.add_rule("CLEXPR", '"K"', None, 1.0) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Just look a little # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if __name__ == "__main__": import LOTlib while not LOTlib.SIG_INTERRUPTED: x = eval(str(grammar.generate())) print x try: print reduce(x) except CombinatorReduceException: print "NON-HALT"
# Etc. # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Conditional: # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # if_ gets printed specially (see LOTlib.FunctionNode.__str__). Here COND is a name that is made up # here for conditional expressions grammar.add_rule('EXPR', 'if_', ['COND', 'EXPR', 'EXPR'], 1.0) grammar.add_rule('COND', 'gt_', ['EXPR', 'EXPR'], 1.0) grammar.add_rule('COND', 'eq_', ['EXPR', 'EXPR'], 1.0) # Note that because if_ prints specially, it is correctly handled (via short circuit evaluation) # so that we don't eval both branches unnecessarily for _ in xrange(1000): t = grammar.generate() # Default is to generate from 'START'; else use 'START=t' to generate from type t # Now x is a FunctionNode # We can compile it via LOTlib.Miscellaneous.evaluate_expression # This says that t is a *function* with arguments 'x' (allowed via the grammar above) # The alternative way to do this would be to put a lambda at the top of each tree f = evaluate_expression(t, args=['x']) print t # will call x.__str__ and display as a pythonesque string print map(f, range(0,10))
""" An example of generating quantified logic with lambdas. See FOL.py for inference about first-order logic """ from LOTlib.Grammar import Grammar # Create a grammar: G = Grammar() G.add_rule('BOOL', 'x', None, 2.0) # X is a terminal, so arguments=None # Each of these is a function, requiring some arguments of some type G.add_rule('BOOL', 'and_', ['BOOL', 'BOOL'], 1.0) G.add_rule('BOOL', 'or_', ['BOOL', 'BOOL'], 1.0) G.add_rule('BOOL', 'not_', ['BOOL'], 1.0) G.add_rule('BOOL', 'exists_', ['FUNCTION', 'SET'], 0.50) G.add_rule('BOOL', 'forall_', ['FUNCTION', 'SET'], 0.50) G.add_rule('SET', 'S', None, 1.0) # And allow us to create a new kind of function G.add_rule('FUNCTION', 'lambda', ['BOOL'], 1.0, bv_name='BOOL', bv_args=None) # bvtype means we introduce a bound variable below G.BV_WEIGHT = 2.0 # When we introduce bound variables, they have this (relative) probability for i in xrange(1000): x = G.generate('BOOL') print x.log_probability(), x
""" A finite grammar used in the test-1.py file TODO: I don't know what this test file is supposed to test! """ from LOTlib.Grammar import Grammar g = Grammar() g.add_rule('START', '', ['NT1'], 1.0) g.add_rule('NT1', 'A', [], 1.00) g.add_rule('NT1', 'B', ['NT2'], 2.00) g.add_rule('NT1', 'C', ['NT3', 'NT3'], 3.70) g.add_rule('NT2', 'X', None, 1.0) g.add_rule('NT3', 'Y', None, 1.0) g.add_rule('NT3', 'Z', None, 1.25) def log_probability(tree): return 0 # TODO: stub if __name__ == "__main__": for i in xrange(100): print(g.generate())
""" An example of generating quantified logic with lambdas. See FOL.py for inference about first-order logic """ from LOTlib.Grammar import Grammar # Create a grammar: grammar = Grammar() grammar.add_rule('BOOL', 'x', None, 2.0) # X is a terminal, so arguments=None # Each of these is a function, requiring some arguments of some type grammar.add_rule('BOOL', 'and_', ['BOOL', 'BOOL'], 1.0) grammar.add_rule('BOOL', 'or_', ['BOOL', 'BOOL'], 1.0) grammar.add_rule('BOOL', 'not_', ['BOOL'], 1.0) grammar.add_rule('BOOL', 'exists_', ['FUNCTION', 'SET'], 0.50) grammar.add_rule('BOOL', 'forall_', ['FUNCTION', 'SET'], 0.50) grammar.add_rule('SET', 'S', None, 1.0) # And allow us to create a new kind of function grammar.add_rule('FUNCTION', 'lambda', ['BOOL'], 1.0, bv_type='BOOL', bv_args=None) # bvtype means we introduce a bound variable below grammar.BV_WEIGHT = 2.0 # When we introduce bound variables, they have this (relative) probability for i in xrange(1000): x = grammar.generate('BOOL') print x.log_probability(), x
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ from LOTlib.Proposals import * #iip = InverseInlineThunk(grammar, replacetype='BOOL') #for j in lot_iter(xrange(1000)): #print "-----------------------------------\n\n" #t = grammar.generate() #for i in lot_iter(xrange(10)): #print "\t", t ##t = iip.propose_tree(t) for i in xrange(10000): print grammar.generate() # Or we can make them as hypotheses (functions of S): #for i in xrange(100): #print LOTHypothesis(grammar, args=['S']) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Or real inference: ## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #from LOTlib.DataAndObjects import FunctionData, Obj # for nicely managing data #from LOTlib.Inference.MetropolisHastings import mh_sample # for running MCMC ## Make up some data -- here just one set containing {red, red, green} colors
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Hypothesis # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ from LOTlib.Hypotheses.LOTHypothesis import LOTHypothesis from LOTlib.Hypotheses.Likelihoods.BinaryLikelihood import BinaryLikelihood class MyHypothesis(BinaryLikelihood, LOTHypothesis): def __init__(self, grammar=grammar, **kwargs): LOTHypothesis.__init__(self, grammar=grammar, display="lambda x : %s", maxnodes=150, **kwargs) def make_hypothesis(**kwargs): return MyHypothesis(**kwargs) hset = set([make_hypothesis(value=grammar.generate()) for _ in xrange(10000)]) hypotheses = list(hset) # for h in hypotheses: # print h # hypotheses = [] # for t in grammar.enumerate(d=6): # hypotheses.append(make_hypothesis(value=t)) print "# Generated ", len(hypotheses), " hypotheses" # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Data # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ from LOTlib.DataAndObjects import FunctionData, make_all_objects
# Take each partition, which doesn't have leaves, and generate leaves, setting # it to a random generation (fill in the leaves with random hypotheses) for p in partitions: print "# Initializing partition:", p print ">>", p for n in p.subnodes(): # set to not resample these setattr( n, 'p_propose', 0.0 ) ## NOTE: Hypothesis proposals must be sensitive to resample_p for this to work! # and fill in the missing leaves with a random generation for i, a in enumerate(n.args): if grammar.is_nonterminal(a): n.args[i] = grammar.generate(a) print "# Initialized %s partitions" % len(partitions) grammar.add_rule('STRING', '%s%s', ['TERMINAL', 'STRING'], 1.0) grammar.add_rule('STRING', '%s', ['TERMINAL'], 1.0) grammar.add_rule('TERMINAL', 'g', None, TERMINAL_WEIGHT) grammar.add_rule('TERMINAL', 'a', None, TERMINAL_WEIGHT) grammar.add_rule('TERMINAL', 'i', None, TERMINAL_WEIGHT) grammar.add_rule('TERMINAL', 'k', None, TERMINAL_WEIGHT) grammar.add_rule('TERMINAL', 's', None, TERMINAL_WEIGHT) grammar.add_rule('TERMINAL', 'f', None, TERMINAL_WEIGHT) grammar.add_rule('TERMINAL', 'n', None, TERMINAL_WEIGHT) grammar.add_rule('TERMINAL', 'm', None, TERMINAL_WEIGHT) grammar.add_rule('TERMINAL', 'h', None, TERMINAL_WEIGHT) grammar.add_rule('TERMINAL', 'N', None, TERMINAL_WEIGHT)
#grammar.add_rule('FUNCTION', 'lambda', ['EXPR'], 1.0, bv_type='BOOL', bv_args=['EXPR']) # Etc. # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Conditional: # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # if_ gets printed specially (see LOTlib.FunctionNode.__str__). Here COND is a name that is made up # here for conditional expressions grammar.add_rule('EXPR', 'if_', ['COND', 'EXPR', 'EXPR'], 1.0) grammar.add_rule('COND', 'gt_', ['EXPR', 'EXPR'], 1.0) grammar.add_rule('COND', 'eq_', ['EXPR', 'EXPR'], 1.0) # Note that because if_ prints specially, it is correctly handled (via short circuit evaluation) # so that we don't eval both branches unnecessarily for _ in xrange(1000): t = grammar.generate( ) # Default is to generate from 'START'; else use 'START=t' to generate from type t # Now x is a FunctionNode # We can compile it via LOTlib.Miscellaneous.evaluate_expression # This says that t is a *function* with arguments 'x' (allowed via the grammar above) # The alternative way to do this would be to put a lambda at the top of each tree f = evaluate_expression(t, args=['x']) print t # will call x.__str__ and display as a pythonesque string print map(f, range(0, 10))
""" A finite English grammar that's used to test functions in FunctionNode """ from LOTlib.Grammar import Grammar g = Grammar() g.add_rule('START','NP',['NP', 'VP'],1) g.add_rule('NP','',['the boy'],1) g.add_rule('NP','',['the ball'],1) g.add_rule('VP','',['ate the dog'],1) g.add_rule('VP','',['ate the chicken'],1) def log_probability(tree): return 0 # TODO: stub if __name__ == "__main__": for i in xrange(100): print(g.generate())
else: return math.log(0.5) + log_probability_age(ls) # probability of a given noun occurring def log_probability_noun(ls): return math.log(1.0 / 2) # probability of a given noun phrase occurring def log_probability_nounphrase(ls): prob_det = 1.0 # there is only one determiner return math.log(prob_det) + log_probability_noun(ls[2]) def log_probability_age(ls): # compute the sum of all unnormalized probabilities for the rules INT --> x sum_prob_int = 12.0 # check whether we have a bound variable or not if ls[2][1][0] == 'a2': prob_int = 10.0 / sum_prob_int else: prob_int = 1.0 / sum_prob_int return math.log(prob_int) if __name__ == "__main__": for i in xrange(100): tree = g.generate() print tree, tree.log_probability()
from LOTlib.Grammar import Grammar from LOTlib.Hypotheses.LOTHypothesis import LOTHypothesis from LOTlib.Miscellaneous import q from LOTlib.Evaluation.Primitives.Functional import cons_ # for evaling grammar = Grammar() grammar.add_rule("START", "cons_", ["START", "START"], 2.0) grammar.add_rule("START", "I", None, 1.0) grammar.add_rule("START", "S", None, 1.0) grammar.add_rule("START", "K", None, 1.0) if __name__ == "__main__": from LOTlib.Evaluation.CombinatoryLogic import combinator_reduce from LOTlib.Evaluation.EvaluationException import EvaluationException for _ in range(10000): t = grammar.generate() lst = t.liststring() print lst, "\t->\t", try: print combinator_reduce(lst) except EvaluationException as e: print "*Probable-NON-HALT*"
else: return math.log(0.5) + log_probability_age(ls) # probability of a given noun occurring def log_probability_noun(ls): return math.log(1.0 / 2) # probability of a given noun phrase occurring def log_probability_nounphrase(ls): prob_det = 1.0 # there is only one determiner return math.log(prob_det) + log_probability_noun(ls[2]) def log_probability_age(ls): # compute the sum of all unnormalized probabilities for the rules INT --> x sum_prob_int = 12.0 # check whether we have a bound variable or not if ls[2][1][0] == "a2": prob_int = 10.0 / sum_prob_int else: prob_int = 1.0 / sum_prob_int return math.log(prob_int) if __name__ == "__main__": for i in xrange(100): tree = g.generate() print tree, tree.log_probability()
from LOTlib.Miscellaneous import unique from LOTlib.Grammar import Grammar from LOTlib.Hypotheses.LOTHypothesis import LOTHypothesis G = Grammar() G.add_rule('START','',['String'],1.0) G.add_rule('String','One',['Number'],1.0) G.add_rule('String','Two',['Number','Number'],1.0) G.add_rule('String','Three',['Number','Number','Number'],1.0) G.add_rule('Number','1','i',1.0) G.add_rule('Number','2','ii',1.0) G.add_rule('Number','3','iii',1.0) for i in xrange(100): print G.generate()
# Just generate from this grammar # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ from LOTlib.Proposals import * #iip = InverseInlineThunk(grammar, replacetype='BOOL') #for j in lot_iter(xrange(1000)): #print "-----------------------------------\n\n" #t = grammar.generate() #for i in lot_iter(xrange(10)): #print "\t", t ##t = iip.propose_tree(t) for i in xrange(10000): print grammar.generate() # Or we can make them as hypotheses (functions of S): #for i in xrange(100): #print LOTHypothesis(grammar, args=['S']) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Or real inference: ## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #from LOTlib.DataAndObjects import FunctionData, Obj # for nicely managing data #from LOTlib.Inference.MetropolisHastings import mh_sample # for running MCMC ## Make up some data -- here just one set containing {red, red, green} colors #data = [ FunctionData(args=[ {Obj(color='red'), Obj(color='red'), Obj(color='green')} ], \ #output=True) ]
""" from LOTlib.Grammar import Grammar from LOTlib.Hypotheses.LOTHypothesis import LOTHypothesis from LOTlib.Miscellaneous import q from LOTlib.Evaluation.Primitives.Functional import cons_ # for evaling G = Grammar() G.add_rule('START', 'cons_', ['START', 'START'], 2.0) G.add_rule('START', 'I', None, 1.0) G.add_rule('START', 'S', None, 1.0) G.add_rule('START', 'K', None, 1.0) from LOTlib.Evaluation.CombinatoryLogic import combinator_reduce from LOTlib.Evaluation.EvaluationException import EvaluationException for _ in range(10000): t = G.generate() lst = t.liststring() print lst, "\t->\t", try: print combinator_reduce(lst) except EvaluationException as e: print "*Probable-NON-HALT*"
# # create a function # base_grammar.add_rule('ABSTRACTIONS', 'apply_', ['<<LIST,LIST>,LIST>', '<LIST,LIST>'], 1.) # base_grammar.add_rule('<<LIST,LIST>,LIST>', 'lambda', ['ABSTRACTIONS'], 1., bv_type='LIST', bv_args=['LIST'], bv_p=5.0, bv_prefix='F') base_grammar.add_rule('ABSTRACTIONS', '', ['LIST'], 3.0) # # from random import random # from LOTlib.Eval import primitive # from LOTlib.Primitives import * # # @primitive # def optional_(x, y): # if random() < 0.5: # return cons_(x,y) # else: # return y # # @primitive # def geometric_(x,y): # # geometric number of xes followed by y # if random() < 0.5: # return y # else: # return cons_(x, geometric_(x,y)) if __name__ == "__main__": for _ in xrange(1000): print base_grammar.generate()
# Yuan's version: from LOTlib.Grammar import Grammar base_grammar = Grammar() base_grammar.add_rule('START', 'flatten2str', ['LIST', 'sep=\"\"'], 1.0) base_grammar.add_rule('LIST', 'if_', ['BOOL', 'LIST', 'LIST'], 1.) base_grammar.add_rule('LIST', 'cons_', ['ATOM', 'LIST'], 1./6.) base_grammar.add_rule('LIST', 'cons_', ['LIST', 'LIST'], 1./6.) base_grammar.add_rule('LIST', 'cdr_', ['LIST'], 1./3.) base_grammar.add_rule('LIST', 'car_', ['LIST'], 1./3.) base_grammar.add_rule('LIST', '', ['ATOM'], 3.0) base_grammar.add_rule('LIST', '\'\'', None, 1.0) # base_grammar.add_rule('LIST', 'recurse_', [], 1.) # This is added by factorizedDataHypothesis base_grammar.add_rule('BOOL', 'empty_', ['LIST'], 1.) base_grammar.add_rule('BOOL', 'flip_(p=%s)', ['PROB'], 1.) for i in xrange(1,10): base_grammar.add_rule('PROB', '0.%s' % i, None, 1.) base_grammar.add_rule('LIST', 'recurse_(%s)', ['SELFF'], 1.0) # can call myself if __name__ == "__main__": for _ in xrange(1000): print base_grammar.generate()
t = deepcopy(t) # just make sure it's a copy (may not be necessary) partitions.append(t) # Take each partition, which doesn't have leaves, and generate leaves, setting # it to a random generation (fill in the leaves with random hypotheses) for p in partitions: print "# Initializing partition:", p print ">>", p for n in p.subnodes(): # set to not resample these setattr(n, 'p_propose', 0.0) ## NOTE: Hypothesis proposals must be sensitive to resample_p for this to work! # and fill in the missing leaves with a random generation for i, a in enumerate(n.args): if grammar.is_nonterminal(a): n.args[i] = grammar.generate(a) print "# Initialized %s partitions" % len(partitions) grammar.add_rule('STRING', '%s%s', ['TERMINAL', 'STRING'], 1.0) grammar.add_rule('STRING', '%s', ['TERMINAL'], 1.0) grammar.add_rule('TERMINAL', 'g', None, TERMINAL_WEIGHT) grammar.add_rule('TERMINAL', 'a', None, TERMINAL_WEIGHT) grammar.add_rule('TERMINAL', 'i', None, TERMINAL_WEIGHT) grammar.add_rule('TERMINAL', 'k', None, TERMINAL_WEIGHT) grammar.add_rule('TERMINAL', 's', None, TERMINAL_WEIGHT) grammar.add_rule('TERMINAL', 'f', None, TERMINAL_WEIGHT) grammar.add_rule('TERMINAL', 'n', None, TERMINAL_WEIGHT) grammar.add_rule('TERMINAL', 'm', None, TERMINAL_WEIGHT) grammar.add_rule('TERMINAL', 'h', None, TERMINAL_WEIGHT)
return None if __name__ == "__main__": ## Make a simple grammar for lambda calculus from LOTlib.Grammar import Grammar G = Grammar() # Here, rules creating smaller lambdas are higher prob; created simpler lambdas are also higher prob G.add_rule('START', '', ['EXPR'], 1.0) G.add_rule('EXPR', 'lambda', ['EXPR'], 2.0, bv_type='EXPR', bv_args=None, bv_p=2.0) G.add_rule('EXPR', 'apply_', ['EXPR', 'EXPR'], 1.0) # And print some expressions and reduce for _ in xrange(1000): t = G.generate() try: print lambdastring(t) print lambdastring(lambda_reduce(t)) except EvaluationException as e: print "***", e, lambdastring(t) print "\n"
return ll / self.likelihood_temperature # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Main # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if __name__ == "__main__": from LOTlib.Inference.Samplers.StandardSample import standard_sample from LOTlib.Inference.Samplers.MetropolisHastings import MHSampler from LOTlib import break_ctrlc #standard_sample(make_hypothesis, make_data, show_skip=9, save_top=False) for p in break_ctrlc(partitions): print "Starting on partition ", p # Now we have to go in and fill in the nodes that are nonterminals # We can do this with generate (must pull from github) v = grammar.generate(deepcopy(p)) h0 = MyHypothesis(grammar, value=v) size = 100 data = [FunctionData(input=[], output={'h e s': size, 'm e s': size, 'm e g': size, 'h e g': size, 'm e n': size, 'h e m': size, 'm e k': size, 'k e s': size, 'h e k': size, 'k e N': size, 'k e g': size, 'h e n': size, 'm e N': size, 'k e n': size, 'h e N': size, 'f e N': size, 'g e N': size, 'n e N': size, 'n e s': size, 'f e n': size, 'g e n': size, 'g e m': size, 'f e m': size, 'g e k': size, 'f e k': size, 'f e g': size, 'f e s': size, 'n e g': size, 'k e m': size, 'n e m': size, 'g e s': size, 'n e k': size})] for h in break_ctrlc(MHSampler(h0, data, steps=1000, skip=0)): # Show the partition and the hypothesis print h.posterior_score, p, h