def insert_tags(thechart, tokens): """ Initialize a chart parser based on the results of a tagger. """ tagged_tokens = list(tagger.tag(tokens)) for i in range(len(tagged_tokens)): word, tag = tagged_tokens[i] leafedge = chart.LeafEdge(word, i) thechart.insert( chart.TreeEdge((i, i + 1), cfg.Nonterminal(tag), [word], dot=1), [leafedge])
def insert_tags(thechart, tokens): for i in range(len(tokens)): word = tokens[i] results = kimmorules.recognize(word.lower()) for surface, feat in results: match = re.match(r"PREFIX\('.*?'\)(.*?)\(.*", feat) if match: pos = match.groups()[0] else: pos = feat.split('(')[0] print surface, pos leafedge = chart.LeafEdge(word, i) thechart.insert( chart.TreeEdge((i, i + 1), cfg.Nonterminal(pos), [word], dot=1), [leafedge])
def _trace_stack(self, stack, remaining_text, marker=' '): """ Print trace output displaying the given stack and text. @rtype: C{None} @param marker: A character that is printed to the left of the stack. This is used with trace level 2 to print 'S' before shifted stacks and 'R' before reduced stacks. """ str = ' ' + marker + ' [ ' for elt in stack: if isinstance(elt, Tree): str += ` cfg.Nonterminal(elt.node) ` + ' ' else: str += ` elt ` + ' ' str += '* ' + string.join(remaining_text) + ']' print str
def grammar(terminals): (S, Expr, Star, Plus, Qmk, Paren) = [cfg.Nonterminal(s) for s in 'SE*+?('] rules = [cfg.WeightedProduction(Expr, [Star], prob=0.2), cfg.WeightedProduction(Expr, [Plus], prob=0.2), cfg.WeightedProduction(Expr, [Qmk], prob=0.2), cfg.WeightedProduction(Expr, [Paren], prob=0.2), cfg.WeightedProduction(S, [Expr], prob=0.5), cfg.WeightedProduction(S, [S, Expr], prob=0.5), cfg.WeightedProduction(Star, [Expr, '*'], prob=1), cfg.WeightedProduction(Plus, [Expr, '+'], prob=1), cfg.WeightedProduction(Qmk, [Expr, '?'], prob=1), cfg.WeightedProduction(Paren, ['(', S, ')'], prob=1)] prob_term = 0.2/len(terminals) # divide remaining pr. mass for terminal in terminals: rules.append(cfg.WeightedProduction(Expr, [terminal], prob=prob_term)) return cfg.WeightedGrammar(S, rules)
def _trace_fringe(self, tree, treeloc=None): """ Print trace output displaying the fringe of C{tree}. The fringe of C{tree} consists of all of its leaves and all of its childless subtrees. @rtype: C{None} """ if treeloc == (): print "*", if isinstance(tree, Tree): if len(tree) == 0: print `cfg.Nonterminal(tree.node)`, for i in range(len(tree)): if treeloc is not None and i == treeloc[0]: self._trace_fringe(tree[i], treeloc[1:]) else: self._trace_fringe(tree[i]) else: print `tree`,
def demo(): """ Create a shift reduce parser demo, using a simple grammar and text. """ from nltk import cfg nonterminals = 'S VP NP PP P N Name V Det' (S, VP, NP, PP, P, N, Name, V, Det) = [cfg.Nonterminal(s) for s in nonterminals.split()] productions = ( # Syntactic Productions cfg.Production(S, [NP, VP]), cfg.Production(NP, [Det, N]), cfg.Production(NP, [NP, PP]), cfg.Production(VP, [VP, PP]), cfg.Production(VP, [V, NP, PP]), cfg.Production(VP, [V, NP]), cfg.Production(PP, [P, NP]), # Lexical Productions cfg.Production(NP, ['I']), cfg.Production(Det, ['the']), cfg.Production(Det, ['a']), cfg.Production(N, ['man']), cfg.Production(V, ['saw']), cfg.Production(P, ['in']), cfg.Production(P, ['with']), cfg.Production(N, ['park']), cfg.Production(N, ['dog']), cfg.Production(N, ['statue']), cfg.Production(Det, ['my']), ) grammar = cfg.Grammar(S, productions) # tokenize the sentence sent = 'my dog saw a man in the park with a statue'.split() ShiftReduceDemo(grammar, sent).mainloop()
def _starter_edge(self, start_sym): """Return a 'starter edge' that expands to the start symbol.""" root = cfg.Nonterminal('[INIT]') return TreeEdge((0, 0), root, (start_sym, ))