Пример #1
0
 def insert_tags(thechart, tokens):
     """
     Initialize a chart parser based on the results of a tagger.
     """
     tagged_tokens = list(tagger.tag(tokens))
     for i in range(len(tagged_tokens)):
         word, tag = tagged_tokens[i]
         leafedge = chart.LeafEdge(word, i)
         thechart.insert(
             chart.TreeEdge((i, i + 1), cfg.Nonterminal(tag), [word],
                            dot=1), [leafedge])
Пример #2
0
 def insert_tags(thechart, tokens):
     for i in range(len(tokens)):
         word = tokens[i]
         results = kimmorules.recognize(word.lower())
         for surface, feat in results:
             match = re.match(r"PREFIX\('.*?'\)(.*?)\(.*", feat)
             if match: pos = match.groups()[0]
             else: pos = feat.split('(')[0]
             print surface, pos
             leafedge = chart.LeafEdge(word, i)
             thechart.insert(
                 chart.TreeEdge((i, i + 1),
                                cfg.Nonterminal(pos), [word],
                                dot=1), [leafedge])
Пример #3
0
 def _trace_stack(self, stack, remaining_text, marker=' '):
     """
     Print trace output displaying the given stack and text.
     
     @rtype: C{None}
     @param marker: A character that is printed to the left of the
         stack.  This is used with trace level 2 to print 'S'
         before shifted stacks and 'R' before reduced stacks.
     """
     str = '  ' + marker + ' [ '
     for elt in stack:
         if isinstance(elt, Tree):
             str += ` cfg.Nonterminal(elt.node) ` + ' '
         else:
             str += ` elt ` + ' '
     str += '* ' + string.join(remaining_text) + ']'
     print str
Пример #4
0
def grammar(terminals):
    (S, Expr, Star, Plus, Qmk, Paren) = [cfg.Nonterminal(s) for s in 'SE*+?(']
    rules = [cfg.WeightedProduction(Expr, [Star], prob=0.2),
             cfg.WeightedProduction(Expr, [Plus], prob=0.2),
             cfg.WeightedProduction(Expr, [Qmk], prob=0.2),
             cfg.WeightedProduction(Expr, [Paren], prob=0.2),
             cfg.WeightedProduction(S, [Expr], prob=0.5),
             cfg.WeightedProduction(S, [S, Expr], prob=0.5),
             cfg.WeightedProduction(Star, [Expr, '*'], prob=1),
             cfg.WeightedProduction(Plus, [Expr, '+'], prob=1),
             cfg.WeightedProduction(Qmk, [Expr, '?'], prob=1),
             cfg.WeightedProduction(Paren, ['(', S, ')'], prob=1)]

    prob_term = 0.2/len(terminals) # divide remaining pr. mass
    for terminal in terminals:
        rules.append(cfg.WeightedProduction(Expr, [terminal], prob=prob_term))

    return cfg.WeightedGrammar(S, rules)
Пример #5
0
    def _trace_fringe(self, tree, treeloc=None):
        """
        Print trace output displaying the fringe of C{tree}.  The
        fringe of C{tree} consists of all of its leaves and all of
        its childless subtrees.

        @rtype: C{None}
        """
        
        if treeloc == (): print "*",
        if isinstance(tree, Tree):
            if len(tree) == 0: print `cfg.Nonterminal(tree.node)`,
            for i in range(len(tree)):
                if treeloc is not None and i == treeloc[0]:
                    self._trace_fringe(tree[i], treeloc[1:])
                else:
                    self._trace_fringe(tree[i])
        else:
            print `tree`,
Пример #6
0
def demo():
    """
    Create a shift reduce parser demo, using a simple grammar and
    text. 
    """

    from nltk import cfg
    nonterminals = 'S VP NP PP P N Name V Det'
    (S, VP, NP, PP, P, N, Name, V,
     Det) = [cfg.Nonterminal(s) for s in nonterminals.split()]

    productions = (
        # Syntactic Productions
        cfg.Production(S, [NP, VP]),
        cfg.Production(NP, [Det, N]),
        cfg.Production(NP, [NP, PP]),
        cfg.Production(VP, [VP, PP]),
        cfg.Production(VP, [V, NP, PP]),
        cfg.Production(VP, [V, NP]),
        cfg.Production(PP, [P, NP]),

        # Lexical Productions
        cfg.Production(NP, ['I']),
        cfg.Production(Det, ['the']),
        cfg.Production(Det, ['a']),
        cfg.Production(N, ['man']),
        cfg.Production(V, ['saw']),
        cfg.Production(P, ['in']),
        cfg.Production(P, ['with']),
        cfg.Production(N, ['park']),
        cfg.Production(N, ['dog']),
        cfg.Production(N, ['statue']),
        cfg.Production(Det, ['my']),
    )

    grammar = cfg.Grammar(S, productions)

    # tokenize the sentence
    sent = 'my dog saw a man in the park with a statue'.split()

    ShiftReduceDemo(grammar, sent).mainloop()
Пример #7
0
 def _starter_edge(self, start_sym):
     """Return a 'starter edge' that expands to the start symbol."""
     root = cfg.Nonterminal('[INIT]')
     return TreeEdge((0, 0), root, (start_sym, ))