class XY(FormalLanguage): """ An XY language discussed in Pullum & Gazdar, originally from Chomsky 1963 pg 378-9 This is the set of all strings xy where x!=y. Note this is CF, contrary to Chomsky Here for simplicity we will just use {a,b} as the alphabet """ def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'a%s', ['S'], 2.0) self.grammar.add_rule('S', 'b%s', ['S'], 2.0) self.grammar.add_rule('S', 'a', None, 1.0) self.grammar.add_rule('S', 'b', None, 1.0) def terminals(self): return list('ab') def sample_string(self): while True: x = str(self.grammar.generate()) y = str(self.grammar.generate()) if x != y: return x+y def all_strings(self): for l in itertools.count(1): for x in compute_all_strings(l, alphabet=self.terminals()): for y in compute_all_strings(l, alphabet=self.terminals()): if x != y: yield x+y
def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', '%s%s', ['T', 'S'], 2.0) self.grammar.add_rule('S', '%s', ['T'], 1.0) self.grammar.add_rule('T', 'baa', None, 1.0) # We are going to put a probability distribution on the words so that they can be evaluated reasonably, otherwise its hard to score uniform self.grammar.add_rule('T', 'dii', None, 1.0) self.grammar.add_rule('T', 'guuu', None, 1.0)
class AnBm(FormalLanguage): """ A^n B^m, m>n, with n, m-n sampled from a geometric """ def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'a%sb', ['S'], 1.0) self.grammar.add_rule('S', 'ab', None, 1.0) def terminals(self): return list('ab') def sample_string(self): # fix that this is not CF s = str(self.grammar.generate()) # from a^n b^n mmn=1 while random() < 0.5: mmn += 1 s = s+'b'*mmn return s def all_strings(self): for r in itertools.count(1): for n,m in partitions(r, 2, 1): # partition into two groups (NOTE: does not return both orders) if m>n: yield 'a'*n + 'b'*m if n>m: yield 'a'*m + 'b'*n
def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', '%s%s', ['A', 'B'], 1.0) self.grammar.add_rule('A', 'a%s', ['A'], 1.0) self.grammar.add_rule('A', 'a', None, 1.0) self.grammar.add_rule('B', 'b%s', ['B'], 1.0) self.grammar.add_rule('B', 'b', None, 1.0)
class XY(FormalLanguage): """ The XY language discussed in Pullum & Gazdar, originally from Chomsky 1963 pg 378-9 This is the set of all strings xy where x!=y """ def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'a%s', ['S'], 1.0) self.grammar.add_rule('S', 'b%s', ['S'], 1.0) self.grammar.add_rule('S', 'a', None, 1.0) self.grammar.add_rule('S', 'b', None, 1.0) def terminals(self): return list('ab') def sample_string(self): while True: x = str(self.grammar.generate()) y = str(self.grammar.generate()) if x != y: return x+y def all_strings(self): for l in itertools.count(1): for x in compute_all_strings(l, alphabet=self.terminals()): for y in compute_all_strings(l, alphabet=self.terminals()): if x != y: yield x+y
def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', '%saaa%s', ['T', 'T'], 2.0) self.grammar.add_rule('T', 'a%s', ['T'], 2.0) self.grammar.add_rule('T', 'b%s', ['T'], 2.0) self.grammar.add_rule('T', 'a', None, 1.0) self.grammar.add_rule('T', 'b', None, 1.0)
def __init__(self): # This grammar is just a proxy, it gets replaced in sample self.grammar = Grammar(start='S') self.grammar.add_rule( 'S', 'a%s', ['S'], 1.5 ) # if we make this 2, then we end up with things that are waay too long self.grammar.add_rule('S', 'a', None, 1.0)
class AnBk(FormalLanguage): """ A^n B^k, k>n, with n, k-n sampled from a geometric """ def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'a%sb', ['S'], 2.0) self.grammar.add_rule('S', 'ab', None, 1.0) def terminals(self): return list('ab') def sample_string(self): # fix that this is not CF s = str(self.grammar.generate()) # from a^n b^n mmn = 1 while random() < (2. / 3.): mmn += 1 s = s + 'b' * mmn return s def all_strings(self): for r in itertools.count(1): for n, m in partitions( r, 2, 1 ): # partition into two groups (NOTE: does not return both orders) if m > n: yield 'a' * n + 'b' * m if n > m: yield 'a' * m + 'b' * n
class Count(FormalLanguage): """ The language ababbabbbabbbb etc """ def __init__(self): # This grammar is just a proxy, it gets replaced in sample self.grammar = Grammar(start='S') self.grammar.add_rule( 'S', 'a%s', ['S'], 1.5 ) # if we make this 2, then we end up with things that are waay too long self.grammar.add_rule('S', 'a', None, 1.0) def sample_string(self): proxy = str(self.grammar.generate()) out = '' for i in range(len(proxy)): out = out + 'a' + 'b' * (i + 1) return out def terminals(self): return list('ab') def all_strings(self): for n in itertools.count(0): out = '' for i in range(n): out = out + 'a' + 'b' * (i + 1) yield out
def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'a%sb', ['S'], 1.0) self.grammar.add_rule('S', 'a%sb', ['Cstar'], 1.0) self.grammar.add_rule('Cstar', 'c', None, 1.0) self.grammar.add_rule('Cstar', 'c%s', ['Cstar'], 1.0)
class AnBmCnDm(FormalLanguage): def __init__(self): self.grammarA = Grammar(start='A') self.grammarA.add_rule('A', 'a%s', ['A'], 2.0) self.grammarA.add_rule('A', 'a', None, 1.0) self.grammarB = Grammar(start='B') self.grammarB.add_rule('B', 'b%s', ['B'], 2.0) self.grammarB.add_rule('B', 'b', None, 1.0) def terminals(self): return list('abcd') def sample_string(self): a = str(self.grammarA.generate()) b = str(self.grammarB.generate()) return a + b + ('c' * len(a)) + ('d' * len(b)) def all_strings(self): for r in itertools.count(1): for n, m in partitions( r, 2, 1 ): # partition into two groups (NOTE: does not return both orders) yield 'a' * n + 'b' * m + 'c' * n + 'a' * m if n != m: yield 'a' * m + 'b' * n + 'c' * m + 'a' * n
class Count(FormalLanguage): """ The language ababbabbbabbbb etc """ def __init__(self): # This grammar is just a proxy, it gets replaced in sample self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'a%s', ['S'], 1.0) self.grammar.add_rule('S', 'a', None, 1.0) def sample_string(self): proxy = str(self.grammar.generate()) out = '' for i in range(len(proxy)): out = out+'a' + 'b'*(i+1) return out def terminals(self): return list('ab') def all_strings(self): for n in itertools.count(0): out = '' for i in range(n): out = out + 'a' + 'b' * (i + 1) yield out
class XXI(FormalLanguage): """ A string x, then x "inverted" where as and bs are swapped """ def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'a%s', ['S'], 2.0) self.grammar.add_rule('S', 'b%s', ['S'], 2.0) self.grammar.add_rule('S', 'a', None, 1.0) self.grammar.add_rule('S', 'b', None, 1.0) def terminals(self): return list('ab') def sample_string(self): while True: x = str(self.grammar.generate()) if len(x) == 0: continue v = re.sub(r"a", "t", x) v = re.sub(r"b", "a", v) v = re.sub(r"t", "b", v) return x + v def all_strings(self): for l in itertools.count(1): for x in compute_all_strings(l, alphabet=self.terminals()): v = re.sub(r"a", "t", x) v = re.sub(r"b", "a", v) v = re.sub(r"t", "b", v) yield x + v
def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', '%s%s', ['T', 'S'], 1.0) self.grammar.add_rule('S', '%s', ['T'], 1.0) self.grammar.add_rule('T', 'tpr', None, 0.25) self.grammar.add_rule('T', 'glb', None, 0.25) self.grammar.add_rule('T', 'Bdk', None, 0.25) self.grammar.add_rule('T', 'PDT', None, 0.25)
def __init__(self): self.grammarA = Grammar(start='A') self.grammarA.add_rule('A', 'a%s', ['A'], 2.0) self.grammarA.add_rule('A', 'a', None, 1.0) self.grammarB = Grammar(start='B') self.grammarB.add_rule('B', 'b%s', ['B'], 2.0) self.grammarB.add_rule('B', 'b', None, 1.0)
def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'a%s', ['S'], 1.0) self.grammar.add_rule('S', 'b%s', ['S'], 1.0) self.grammar.add_rule('S', '%s', ['Q'], 2.0) self.grammar.add_rule('Q', 'a%sb', ['Q'], 2.0) self.grammar.add_rule('Q', 'ab', None, 1.0)
class An(FormalLanguage): def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'a%s', ['S'], 1.0) self.grammar.add_rule('S', 'a', None, 1.0) def terminals(self): return list('a')
class ABn(FormalLanguage): def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'ab%s', ['S'], 1.0) self.grammar.add_rule('S', 'ab', None, 1.0) def terminals(self): return list('ab')
def __init__(self, X): assert X < len(OTHER_TERMINALS) self.X = X self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'a%sd', ['X'], 1.0) self.grammar.add_rule('S', 'b%se', ['X'], 1.0) for x in OTHER_TERMINALS[:self.X]: self.grammar.add_rule('X', '%s' % x, None, 1.0)
class Dyck(FormalLanguage): def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', '(%s)', ['S'], 1.0) self.grammar.add_rule('S', '()%s', ['S'], 1.0) self.grammar.add_rule('S', '', None, 1.0) def terminals(self): return list('()')
class HudsonKamNewport(FormalLanguage): """ From Hudson Kam & Newport, simplifying out words to only POS Goal is to investigate learning of probabilities on N+DET vs N (no det) Here, we do not include mass/count subcategories on nouns http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.436.7524&rep=rep1&type=pdf """ def __init__(self): self.grammar = Grammar(start='S') """ V = transitive verb v = intransitive verb """ self.grammar.add_rule('S', '%s%s', ['v', 'NP'], 1.0) self.grammar.add_rule('S', '%s%s%s', ['V', 'NP', 'NP'], 1.0) self.grammar.add_rule('S', '!%s%s', ['v', 'NP'], 1.0) self.grammar.add_rule('S', '!%s%s%s', ['V', 'NP', 'NP'], 1.0) def terminals(self): return list('!vVnd') def all_strings(self): for g in self.grammar.enumerate(): yield str(g)
class ABAnBn(FormalLanguage): """ An AB language followed by AnBn """ def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'a%s', ['S'], 1.0) self.grammar.add_rule('S', 'b%s', ['S'], 1.0) self.grammar.add_rule('S', '%s', ['Q'], 2.0) self.grammar.add_rule('Q', 'a%sb', ['Q'], 2.0) self.grammar.add_rule('Q', 'ab', None, 1.0) def terminals(self): return list('ab') def sample_string(self): while True: x = str(self.grammar.generate()) y = str(self.grammar.generate()) if x != y: return x + y def all_strings(self): assert False
def __init__(self): self.grammar = Grammar(start='S') """ V = transitive verb v = intransitive verb """ self.grammar.add_rule('S', '%s%s', ['v', 'NP'], 1.0) self.grammar.add_rule('S', '%s%s%s', ['V', 'NP', 'NP'], 1.0) self.grammar.add_rule('S', '!%s%s', ['v', 'NP'], 1.0) self.grammar.add_rule('S', '!%s%s%s', ['V', 'NP', 'NP'], 1.0)
class AnBnp1Cnp2(FormalLanguage): def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'a%s', ['S'], 2.0) self.grammar.add_rule('S', 'a', None, 1.0) def terminals(self): return list('abc') def sample_string(self): # fix that this is not CF s = str(self.grammar.generate()) return s + 'b' * (len(s) + 1) + 'c' * (len(s) + 2)
def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', '%s%s', ['AP', 'BP'], 1.0) self.grammar.add_rule('S', '%s%s%s', ['AP', 'BP', 'CP'], 1.0) self.grammar.add_rule('AP', 'oA', None, 1.0) self.grammar.add_rule('AP', 'oAD', None, 1.0) # two terminals, A,D self.grammar.add_rule('BP', 'uE', None, 1.0) self.grammar.add_rule('BP', 'a%sF', ['CP'], 1.0) self.grammar.add_rule('CP', 'iC', None, 1.0) self.grammar.add_rule('CP', 'iCD', None, 1.0)
def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'b%st', ['MID'], 1.0) self.grammar.add_rule('S', 'g%sd', ['MID'], 1.0) self.grammar.add_rule('S', 'p%sr', ['MID'], 1.0) self.grammar.add_rule('S', 'k%su', ['MID'], 1.0) self.grammar.add_rule('S', 'l%si', ['MID'], 1.0) self.grammar.add_rule('MID', '1', None, 1.0) self.grammar.add_rule('MID', '2', None, 1.0) self.grammar.add_rule('MID', '3', None, 1.0) self.grammar.add_rule('MID', '4', None, 1.0)
def __init__(self, max_length=6): self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'S', ['NP', 'VP'], 1.0) self.grammar.add_rule('NP', 'NP', ['d', 'AP', 'n'], 1.0) self.grammar.add_rule('AP', 'AP', ['a', 'AP'], 1.0) self.grammar.add_rule('AP', 'AP', None, 1.0) self.grammar.add_rule('VP', 'VP', ['v'], 1.0) self.grammar.add_rule('VP', 'VP', ['v', 'NP'], 1.0) self.grammar.add_rule('VP', 'VP', ['v', 't', 'S'], 1.0) FormalLanguage.__init__(self, max_length)
class AnBnCn(FormalLanguage): def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'a%sb', ['S'], 1.0) self.grammar.add_rule('S', 'ab', None, 1.0) def terminals(self): return list('abc') def sample_string(self): # fix that this is not CF s = str(self.grammar.generate()) return s + 'c'*(len(s)/2)
def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', '%s%s', ['NP', 'VP'], 2.0) self.grammar.add_rule('NP', 'd%sn', ['AP'], 1.0) self.grammar.add_rule('NP', 'dn', None, 1.0) self.grammar.add_rule('NP', 'n', None, 1.0) self.grammar.add_rule('AP', 'a%s', ['AP'], 1.0) self.grammar.add_rule('AP', 'a', None, 2.0) self.grammar.add_rule('VP', 'v', None, 1.0) self.grammar.add_rule('VP', 'v%s', ['NP'], 1.0) self.grammar.add_rule('VP', 'vt%s', ['S'], 1.0) self.grammar.add_rule('S', 'i%sh%s', ['S', 'S'], 1.0) # add if S then S grammar
class ABnABAn(FormalLanguage): def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'ab%saba', ['S'], 2.0) self.grammar.add_rule('S', 'ababa', None, 1.0) def terminals(self): return list('ab') def all_strings(self): n = 1 while True: yield 'ab' * n + 'aba' * (n) n += 1
def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule( 'S', 'b%st', ['MID'], 1.0 ) # We're going to put a probability distribution on this so that it can be evaluated like everything else (otherwise its top 25 strings are not meaningful since its all uniform!) self.grammar.add_rule('S', 'g%sd', ['MID'], 1.0) self.grammar.add_rule('S', 'p%sr', ['MID'], 1.0) self.grammar.add_rule('S', 'k%su', ['MID'], 1.0) self.grammar.add_rule('S', 'l%si', ['MID'], 1.0) self.grammar.add_rule('MID', '1', None, 1.0) self.grammar.add_rule('MID', '2', None, 1.0) self.grammar.add_rule('MID', '3', None, 1.0) self.grammar.add_rule('MID', '4', None, 1.0)
class Dyck(FormalLanguage): def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', '(%s)', ['S'], 1.0) self.grammar.add_rule('S', '()%s', ['S'], 1.0) self.grammar.add_rule('S', '()', None, 1.0) def terminals(self): return list(')(') def all_strings(self): for n in itertools.count(1): for s in dyck_at_depth(n): yield s
class WeW(FormalLanguage): def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'a%s', ['S'], 2.0) self.grammar.add_rule('S', 'b%s', ['S'], 2.0) self.grammar.add_rule('S', 'a', None, 1.0) self.grammar.add_rule('S', 'b', None, 1.0) def terminals(self): return list('ab') def sample_string(self): w = str(self.grammar.generate()) return w * len(w)
class AnCBn(FormalLanguage): def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'a%sb', ['S'], 2.0) self.grammar.add_rule('S', 'acb', None, 1.0) def terminals(self): return list('acb') def all_strings(self): n = 1 while True: yield 'a' * n + 'c' + 'b' * n n += 1
class AB(FormalLanguage): def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'a%s', ['S'], 2.0) self.grammar.add_rule('S', 'b%s', ['S'], 2.0) self.grammar.add_rule('S', 'a', None, 1.0) self.grammar.add_rule('S', 'b', None, 1.0) def terminals(self): return list('ab') def all_strings(self): for l in itertools.count(1): for s in compute_all_strings(l, alphabet='ab'): yield s
class An(FormalLanguage): def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'a%s', ['S'], 1.0) self.grammar.add_rule('S', 'a', None, 1.0) def terminals(self): return list('a') def all_strings(self): n=1 while True: yield 'a'*n n += 1
class AB(FormalLanguage): def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'a%s', ['S'], 1.0) self.grammar.add_rule('S', 'b%s', ['S'], 1.0) self.grammar.add_rule('S', 'a', None, 1.0) self.grammar.add_rule('S', 'b', None, 1.0) def terminals(self): return list('ab') def all_strings(self): for l in itertools.count(1): for s in compute_all_strings(l, alphabet='ab'): yield s
class AnUAnBn(FormalLanguage): def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'a%s', ['S'], 2.0) self.grammar.add_rule('S', 'a', None, 1.0) def terminals(self): return list('ab') def sample_string(self): # fix that this is not CF s = str(self.grammar.generate()) if random() < 0.5: return s else: return s + "b" * len(s)
def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'a%sb', ['S'], 1.0) self.grammar.add_rule('S', 'a%sb', ['Cstar'], 1.0) self.grammar.add_rule('Cstar', '', None, 1.0) self.grammar.add_rule('Cstar', 'c%s', ['Cstar'], 1.0)
def __init__(self, X): assert X < len(OTHER_TERMINALS) self.X=X self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'a%sd', ['X'], 1.0) self.grammar.add_rule('S', 'b%se', ['X'], 1.0) for x in OTHER_TERMINALS[:self.X]: self.grammar.add_rule('X', '%s'%x, None, 1.0)
class AnBmCmAn(FormalLanguage): def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'a%sa', ['S'], 1.0) self.grammar.add_rule('S', 'a%sa', ['T'], 1.0) self.grammar.add_rule('T', 'b%sc', ['T'], 1.0) self.grammar.add_rule('T', 'bc', None, 1.0) def terminals(self): return list('abcd') def sample_string(self): return str(self.grammar.generate()) def all_strings(self): for r in itertools.count(1): for n,m in partitions(r, 2, 1): # partition into two groups (NOTE: does not return both orders) yield 'a'*n + 'b'*m + 'c'*m + 'a'*n if n != m: yield 'a'*m + 'b'*n + 'c'*n + 'a'*m
class Gomez(FormalLanguage): """ Gomez (2002) language 1b """ def __init__(self, X): assert X < len(OTHER_TERMINALS) self.X=X self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'a%sd', ['X'], 1.0) self.grammar.add_rule('S', 'b%se', ['X'], 1.0) for x in OTHER_TERMINALS[:self.X]: self.grammar.add_rule('X', '%s'%x, None, 1.0) def terminals(self): return list('abde'+OTHER_TERMINALS[:self.X] ) def all_strings(self): for g in self.grammar.enumerate(): yield str(g)
def generate(self, x='*USE_START*', d=0): """ RealValueGrammar.generate may create gaussians or uniforms when given "*gaussian*" and "*uniform*" as the nonterminal type. Otherwise, this is identical to LOTlib.Grammar """ if x == '*USE_START*': x = self.start if x=='*gaussian*': # TODO: HIGHLY EXPERIMENTAL!! # Wow this is really terrible for mixing... v = np.random.normal() gp = normlogpdf(v, 0.0, 1.0) return FunctionNode(returntype=x, name=str(v), args=None, generation_probability=gp, ruleid=0, resample_p=CONSTANT_RESAMPLE_P ) ##TODO: FIX THE ruleid elif x=='*uniform*': v = np.random.rand() gp = 0.0 return FunctionNode(returntype=x, name=str(v), args=None, generation_probability=gp, ruleid=0, resample_p=CONSTANT_RESAMPLE_P ) ##TODO: FIX THE ruleid else: # Else call normal generation Grammar.generate(self,x,d=d)
def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', '%s%s', ['NP', 'VP'], 2.0) self.grammar.add_rule('NP', 'd%sn', ['AP'], 1.0) self.grammar.add_rule('NP', 'dn', None, 1.0) self.grammar.add_rule('NP', 'n', None, 1.0) self.grammar.add_rule('AP', 'a%s', ['AP'], 1.0) self.grammar.add_rule('AP', 'a', None, 2.0) self.grammar.add_rule('VP', 'v', None, 1.0) self.grammar.add_rule('VP', 'v%s', ['NP'], 1.0) self.grammar.add_rule('VP', 'vt%s', ['S'], 1.0) self.grammar.add_rule('S', 'i%sh%s', ['S','S'], 1.0) # add if S then S grammar
class ABA(FormalLanguage): """ Similar to Marcus ABB experiment, except we allow AAA (for simplicity) """ def __init__(self): self.grammar = Grammar(start='S') # NOTE: This grammar does not capture the rule -- we do that in sample! self.grammar.add_rule('S', '%s%s', ['T','T'], 1.0) for t in self.terminals(): self.grammar.add_rule('T', t, None, 1.0) def sample_string(self): # fix that this is not CF s = str(self.grammar.generate()) return s + s[0] # copy the first element def terminals(self): return list('gGtTnNlL') # ga gi ta ti na ni la li def all_strings(self): for t1 in self.terminals(): for t2 in self.terminals(): yield t1 + t2 + t1
class XX(FormalLanguage): """ An xx language (for discussion see Gazdar & Pullum 1982) """ def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'a%s', ['S'], 1.0) self.grammar.add_rule('S', 'b%s', ['S'], 1.0) self.grammar.add_rule('S', 'a', None, 1.0) self.grammar.add_rule('S', 'b', None, 1.0) def terminals(self): return list('ab') def sample_string(self): # fix that this is not CF s = str(self.grammar.generate()) # from (a+b)+ return s+s # xx language def all_strings(self): for l in itertools.count(1): for s in compute_all_strings(l, alphabet='ab'): yield s + s
class XXR(FormalLanguage): """ (a,b)+ strings followed by their reverse. This can be generated by a CFG """ def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'a%s', ['S'], 1.0) self.grammar.add_rule('S', 'b%s', ['S'], 1.0) self.grammar.add_rule('S', 'a', None, 1.0) self.grammar.add_rule('S', 'b', None, 1.0) def terminals(self): return list('ab') def sample_string(self): # fix that this is not CF s = str(self.grammar.generate()) # from {a,b}+ return s+''.join(reversed(s)) def all_strings(self): for l in itertools.count(1): for s in compute_all_strings(l, alphabet='ab'): yield s + s[::-1]
class Saffran(FormalLanguage): """ From Saffran, Aslin, Newport studies. Strings consisting of tupiro golabu bidaku padoti coded here with single characters: tpr glb Bdk PDT """ def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', '%s%s', ['T', 'S'], 1.0) self.grammar.add_rule('S', '%s', ['T'], 1.0) self.grammar.add_rule('T', 'tpr', None, 0.25) self.grammar.add_rule('T', 'glb', None, 0.25) self.grammar.add_rule('T', 'Bdk', None, 0.25) self.grammar.add_rule('T', 'PDT', None, 0.25) def terminals(self): return list('tprglbBdkPDT') def all_strings(self): for g in self.grammar.enumerate(): yield str(g)
def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'T%s', ['S1'], 1.0) self.grammar.add_rule('S', 'V%s', ['S3'], 1.0) self.grammar.add_rule('S1', 'P%s', ['S1'], 1.0) self.grammar.add_rule('S1', 'T%s', ['S2'], 1.0) self.grammar.add_rule('S3', 'X%s', ['S3'], 1.0) self.grammar.add_rule('S3', 'V%s', ['S4'], 1.0) self.grammar.add_rule('S2', 'X%s', ['S3'], 1.0) self.grammar.add_rule('S2', 'S', None, 1.0) self.grammar.add_rule('S4', 'P%s', ['S2'], 1.0) self.grammar.add_rule('S4', 'S', None, 1.0)
def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', '%s%s', ['NP', 'VP'], 4.0) self.grammar.add_rule('NP', 'd%sn', ['AP'], 1.0) self.grammar.add_rule('NP', 'dn', None, 1.0) self.grammar.add_rule('NP', 'n', None, 2.0) self.grammar.add_rule('AP', 'a%s', ['AP'], 1.0) self.grammar.add_rule('AP', 'a', None, 3.0) #self.grammar.add_rule('NP', '%s%s', ['NP', 'PP'], 1.0) # a little ambiguity #self.grammar.add_rule('VP', '%s%s', ['VP', 'PP'], 1.0) #self.grammar.add_rule('PP', 'p%s', ['NP'], 1.0) self.grammar.add_rule('VP', 'v', None, 2.0) # intransitive self.grammar.add_rule('VP', 'v%s', ['NP'], 1.0) # transitive self.grammar.add_rule('VP', 'vt%s', ['S'], 1.0) # v that S
def __init__(self): self.grammar = Grammar(start='S') self.grammar.add_rule('S', 'J%s', ['S1'], 1.0) self.grammar.add_rule('S1', 'g%s', ['S4'], 1.0) self.grammar.add_rule('S1', 'G%s', ['S4'], 1.0) self.grammar.add_rule('S1', 'd%s', ['S3'], 1.0) self.grammar.add_rule('S1', 'D%s', ['S3'], 1.0) self.grammar.add_rule('S1', 'i%s', ['S6'], 1.0) self.grammar.add_rule('S1', 'w%s', ['S6'], 1.0) self.grammar.add_rule('S1', 'h%s', ['S5'], 1.0) self.grammar.add_rule('S1', 'H%s', ['S5'], 1.0) self.grammar.add_rule('S1', 'm%s', ['S2'], 1.0) self.grammar.add_rule('S1', 'M%s', ['S2'], 1.0) self.grammar.add_rule('S2', 'j%s', ['S5'], 1.0) self.grammar.add_rule('S2', 'E%s', ['S6'], 1.0) self.grammar.add_rule('S2', 'V%s', ['S4'], 1.0) self.grammar.add_rule('S3', 'e%s', ['S7'], 1.0) self.grammar.add_rule('S3', 'V%s', ['S4'], 1.0) self.grammar.add_rule('S4', 'o', None, 1.0) self.grammar.add_rule('S4', 'o', None, 1.0) self.grammar.add_rule('S5', 'N%s', ['S4'], 1.0) self.grammar.add_rule('S5', 'B%s', ['S6'], 1.0) self.grammar.add_rule('S6', 'b%s', ['S7'], 1.0) self.grammar.add_rule('S6', 'v%s', ['S4'], 1.0) self.grammar.add_rule('S6', 'N%s', ['S4'], 1.0) self.grammar.add_rule('S7', 'N%s', ['S4'], 1.0)
from LOTlib.Grammar import Grammar from LOTlib.Hypotheses.LOTHypothesis import LOTHypothesis from LOTlib.DataAndObjects import FunctionData from LOTlib.Inference.MetropolisHastings import mh_sample from math import log # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # A simple grammar for scheme, including lambda G = Grammar() # A very simple version of lambda calculus G.add_rule('START', '', ['EXPR'], 1.0) G.add_rule('EXPR', 'apply_', ['FUNC', 'EXPR'], 1.0) G.add_rule('EXPR', 'x', None, 5.0) G.add_rule('FUNC', 'lambda', ['EXPR'], 1.0, bv_type='EXPR', bv_args=None) G.add_rule('EXPR', 'cons_', ['EXPR', 'EXPR'], 1.0) G.add_rule('EXPR', 'cdr_', ['EXPR'], 1.0) G.add_rule('EXPR', 'car_', ['EXPR'], 1.0) G.add_rule('EXPR', '[]', None, 1.0) # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # A class for scheme hypotheses that just computes the input/output pairs with the appropriate probability class SchemeFunction(LOTHypothesis): # Prior, proposals, __init__ are all inherited from LOTHypothesis def compute_single_likelihood(self, datum, response):
NCONSTANTS = 4 CONSTANT_NAMES = ["C%i" % i for i in xrange(NCONSTANTS)] # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Define the grammar # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ from LOTlib.Grammar import Grammar grammar = Grammar() grammar.add_rule("START", "", ["EXPR"], 1.0) grammar.add_rule("EXPR", "plus_", ["EXPR", "EXPR"], 1.0) grammar.add_rule("EXPR", "times_", ["EXPR", "EXPR"], 1.0) grammar.add_rule("EXPR", "divide_", ["EXPR", "EXPR"], 1.0) grammar.add_rule("EXPR", "subtract_", ["EXPR", "EXPR"], 1.0) grammar.add_rule("EXPR", "exp_", ["EXPR"], 1.0) grammar.add_rule("EXPR", "log_", ["EXPR"], 1.0) grammar.add_rule("EXPR", "pow_", ["EXPR", "EXPR"], 1.0) # including this gives lots of overflow grammar.add_rule("EXPR", "sin_", ["EXPR"], 1.0) grammar.add_rule("EXPR", "cos_", ["EXPR"], 1.0) grammar.add_rule("EXPR", "tan_", ["EXPR"], 1.0) grammar.add_rule("EXPR", "x", None, 5.0) # these terminals should have None for their function type; the literals grammar.add_rule("EXPR", "1.0", None, 5.0) # Supplement the grammar for c in CONSTANT_NAMES: