def demo(): from en.parser.nltk_lite.parse import cfg nonterminals = 'S VP NP PP P N Name V Det' (S, VP, NP, PP, P, N, Name, V, Det) = [cfg.Nonterminal(s) for s in nonterminals.split()] grammar = cfg.parse_grammar(""" S -> NP VP PP -> P NP PP -> 'up' 'over' NP NP -> Det N NP -> NP PP VP -> V NP VP -> VP PP Det -> 'a' Det -> 'the' Det -> 'my' NP -> 'I' N -> 'dog' N -> 'man' N -> 'park' N -> 'statue' V -> 'saw' P -> 'in' P -> 'with' """) def cb(grammar): print(grammar) top = Tk() editor = CFGEditor(top, grammar, cb) Label(top, text='\nTesting CFG Editor\n').pack() Button(top, text='Quit', command=top.destroy).pack() top.mainloop()
def demo2(): from en.parser.nltk_lite.parse import cfg nonterminals = 'S VP NP PP P N Name V Det' (S, VP, NP, PP, P, N, Name, V, Det) = [cfg.Nonterminal(s) for s in nonterminals.split()] productions = ( # Syntactic Productions cfg.Production(S, [NP, VP]), cfg.Production(NP, [Det, N]), cfg.Production(NP, [NP, PP]), cfg.Production(VP, [VP, PP]), cfg.Production(VP, [V, NP, PP]), cfg.Production(VP, [V, NP]), cfg.Production(PP, [P, NP]), cfg.Production(PP, []), cfg.Production(PP, ['up', 'over', NP]), # Lexical Productions cfg.Production(NP, ['I']), cfg.Production(Det, ['the']), cfg.Production(Det, ['a']), cfg.Production(N, ['man']), cfg.Production(V, ['saw']), cfg.Production(P, ['in']), cfg.Production(P, ['with']), cfg.Production(N, ['park']), cfg.Production(N, ['dog']), cfg.Production(N, ['statue']), cfg.Production(Det, ['my']), ) grammar = cfg.Grammar(S, productions) text = 'I saw a man in the park'.split() d = CFGDemo(grammar, text) d.mainloop()
def _child_names(tree): names = [] for child in tree: if isinstance(child, Tree): names.append(cfg.Nonterminal(child.node)) else: names.append(child) return names
def _trace_stack(self, stack, remaining_text, marker=' '): """ Print trace output displaying the given stack and text. @rtype: C{None} @param marker: A character that is printed to the left of the stack. This is used with trace level 2 to print 'S' before shifted stacks and 'R' before reduced stacks. """ str = ' ' + marker + ' [ ' for elt in stack: if isinstance(elt, Tree): str += ` cfg.Nonterminal(elt.node) ` + ' ' else: str += ` elt ` + ' ' str += '* ' + ' '.join(remaining_text) + ']' print str
def grammar(terminals): (S, Star, Plus, Qmk, Paren) = [cfg.Nonterminal(s) for s in 'S*+?('] rules = [pcfg.Production(S, [Star], prob=0.2), pcfg.Production(S, [Plus], prob=0.2), pcfg.Production(S, [Qmk], prob=0.2), pcfg.Production(S, [Paren], prob=0.2), pcfg.Production(S, [S, S], prob=0.1), pcfg.Production(Star, [S, '*'], prob=1), pcfg.Production(Plus, [S, '+'], prob=1), pcfg.Production(Qmk, [S, '?'], prob=1), pcfg.Production(Paren, ['(', S, ')'], prob=1)] prob_term = 0.1/len(terminals) # divide remaining pr. mass for terminal in terminals: rules.append(pcfg.Production(S, [terminal], prob=prob_term)) return pcfg.Grammar(S, rules)
def _trace_fringe(self, tree, treeloc=None): """ Print trace output displaying the fringe of C{tree}. The fringe of C{tree} consists of all of its leaves and all of its childless subtrees. @rtype: C{None} """ if treeloc == (): print "*", if isinstance(tree, Tree): if len(tree) == 0: print `cfg.Nonterminal(tree.node)`, for i in range(len(tree)): if treeloc is not None and i == treeloc[0]: self._trace_fringe(tree[i], treeloc[1:]) else: self._trace_fringe(tree[i]) else: print `tree`,
def productions(self): """ Generate the productions that correspond to the non-terminal nodes of the tree. For each subtree of the form (P: C1 C2 ... Cn) this produces a production of the form P -> C1 C2 ... Cn. @rtype: list of C{cfg.Production}s """ if not isinstance(self.node, str): raise TypeError, 'Productions can only be generated from trees having node labels that are strings' prods = [ cfg.Production(cfg.Nonterminal(self.node), _child_names(self)) ] for child in self: if isinstance(child, Tree): prods += child.productions() return prods
def demo(): """ Create a shift reduce parser demo, using a simple grammar and text. """ from en.parser.nltk_lite.parse import cfg nonterminals = 'S VP NP PP P N Name V Det' (S, VP, NP, PP, P, N, Name, V, Det) = [cfg.Nonterminal(s) for s in nonterminals.split()] productions = ( # Syntactic Productions cfg.Production(S, [NP, VP]), cfg.Production(NP, [Det, N]), cfg.Production(NP, [NP, PP]), cfg.Production(VP, [VP, PP]), cfg.Production(VP, [V, NP, PP]), cfg.Production(VP, [V, NP]), cfg.Production(PP, [P, NP]), # Lexical Productions cfg.Production(NP, ['I']), cfg.Production(Det, ['the']), cfg.Production(Det, ['a']), cfg.Production(N, ['man']), cfg.Production(V, ['saw']), cfg.Production(P, ['in']), cfg.Production(P, ['with']), cfg.Production(N, ['park']), cfg.Production(N, ['dog']), cfg.Production(N, ['statue']), cfg.Production(Det, ['my']), ) grammar = cfg.Grammar(S, productions) # tokenize the sentence sent = list( tokenize.whitespace('my dog saw a man in the park with a statue')) ShiftReduceDemo(grammar, sent).mainloop()