Python Production示例，nltk.cfg.Production Python示例

示例#1

0

显示文件

def demo():
    """
    A demonstration showing how C{Grammar}s can be created and used.
    """

    from nltk import cfg

    # Create some nonterminals
    S, NP, VP, PP = cfg.nonterminals('S, NP, VP, PP')
    N, V, P, Det = cfg.nonterminals('N, V, P, Det')
    VP_slash_NP = VP / NP

    print 'Some nonterminals:', [S, NP, VP, PP, N, V, P, Det, VP / NP]
    print '    S.symbol() =>', ` S.symbol() `
    print

    print cfg.Production(S, [NP])

    # Create some Grammar Productions
    grammar = cfg.parse_grammar("""
    S -> NP VP
    PP -> P NP
    NP -> Det N
    NP -> NP PP
    VP -> V NP
    VP -> VP PP
    Det -> 'a'
    Det -> 'the'
    N -> 'dog'
    N -> 'cat'
    V -> 'chased'
    V -> 'sat'
    P -> 'on'
    P -> 'in'
    """)

    print 'A Grammar:', ` grammar `
    print '    grammar.start()       =>', ` grammar.start() `
    print '    grammar.productions() =>',
    # Use string.replace(...) is to line-wrap the output.
    print ` grammar.productions() `.replace(',', ',\n' + ' ' * 25)
    print

示例#2

0

显示文件

文件： paradigmquery.py 项目： steven-cutting/icsisumm

    def parse(self, p_string):
        """
        Parses a string and stores the resulting hierarchy of "domains"
        "hierarchies" and "tables"

        For the sake of NLP I've parsed the string using the nltk 
        context free grammar library.

        A query is a "sentence" and can either be a domain, hierarchy or a table.
        A domain is simply a word.
        A hierarchy is expressed as "domain/domain"
        A table is exressed as "table(sentence, sentence, sentence)"

        Internally the query is represented as a nltk.parse.tree

        Process:
          1. string is tokenized
          2. develop a context free grammar
          3. parse
          4. convert to a tree representation
        """
        self.nltktree = None

        # Store the query string
        self.string = p_string

        # Tokenize the query string, allowing only strings, parentheses,
        # forward slashes and commas.
        re_all = r'table[(]|\,|[)]|[/]|\w+'
        data_tokens = tokenize.regexp(self.string, re_all)

        # Develop a context free grammar
        # S = sentence, T = table, H = hierarchy, D = domain
        O, T, H, D = cfg.nonterminals('O, T, H, D')

        # Specify the grammar
        productions = (
            # A sentence can be either a table, hierarchy or domain
            cfg.Production(O, [D]),
            cfg.Production(O, [H]),
            cfg.Production(O, [T]),

            # A table must be the following sequence:
            # "table(", sentence, comma, sentence, comma, sentence, ")"
            cfg.Production(T, ['table(', O, ',', O, ',', O, ')']),

            # A hierarchy must be the following sequence:
            # domain, forward slash, domain
            cfg.Production(H, [D, '/', D]),
            # domain, forward slash, another operator
            cfg.Production(H, [D, '/', O]))

        # Add domains to the cfg productions
        # A domain is a token that is entirely word chars
        re_domain = compile(r'^\w+$')
        # Try every token and add if it matches the above regular expression
        for tok in data_tokens:
            if re_domain.match(tok):
                prod = cfg.Production(D, [tok]),
                productions = productions + prod

        # Make a grammar out of our productions
        grammar = cfg.Grammar(O, productions)
        rd_parser = parse.RecursiveDescentParser(grammar)

        # Tokens need to be redefined.
        # It disappears after first use, and I don't know why.
        tokens = tokenize.regexp_tokenize(self.string, re_all)
        toklist = list(tokens)

        # Store the parsing.
        # Only the first one, as the grammar should be completely nonambiguous.
        try:
            self.parseList = rd_parser.get_parse_list(toklist)[0]
        except IndexError:
            print "Could not parse query."
            return

        # Set the nltk.parse.tree tree for this query to the global sentence
        string = str(self.parseList)
        string2 = string.replace(":", "").replace("')'", "").replace(
            "table(", "").replace("','", "").replace("'", "").replace("/", "")
        self.nltktree = parse.tree.bracket_parse(string2)

        # Store the resulting nltk.parse.tree tree
        self.parseTree = QuerySentence(self.nltktree)
        self.xml = self.parseTree.toXML()

示例#3

0

显示文件

def demo():
    """
    Create a shift reduce parser demo, using a simple grammar and
    text. 
    """

    from nltk import cfg
    nonterminals = 'S VP NP PP P N Name V Det'
    (S, VP, NP, PP, P, N, Name, V,
     Det) = [cfg.Nonterminal(s) for s in nonterminals.split()]

    productions = (
        # Syntactic Productions
        cfg.Production(S, [NP, VP]),
        cfg.Production(NP, [Det, N]),
        cfg.Production(NP, [NP, PP]),
        cfg.Production(VP, [VP, PP]),
        cfg.Production(VP, [V, NP, PP]),
        cfg.Production(VP, [V, NP]),
        cfg.Production(PP, [P, NP]),

        # Lexical Productions
        cfg.Production(NP, ['I']),
        cfg.Production(Det, ['the']),
        cfg.Production(Det, ['a']),
        cfg.Production(N, ['man']),
        cfg.Production(V, ['saw']),
        cfg.Production(P, ['in']),
        cfg.Production(P, ['with']),
        cfg.Production(N, ['park']),
        cfg.Production(N, ['dog']),
        cfg.Production(N, ['statue']),
        cfg.Production(Det, ['my']),
    )

    grammar = cfg.Grammar(S, productions)

    # tokenize the sentence
    sent = 'my dog saw a man in the park with a statue'.split()

    ShiftReduceDemo(grammar, sent).mainloop()

示例#4

0

显示文件

文件： featurechart.py 项目： sushengyang/NLP-project

def demo():
    import sys, time

    S = GrammarCategory.parse('S')
    VP = GrammarCategory.parse('VP')
    NP = GrammarCategory.parse('NP')
    PP = GrammarCategory.parse('PP')
    V = GrammarCategory.parse('V')
    N = GrammarCategory.parse('N')
    P = GrammarCategory.parse('P')
    Name = GrammarCategory.parse('Name')
    Det = GrammarCategory.parse('Det')
    DetSg = GrammarCategory.parse('Det[-pl]')
    DetPl = GrammarCategory.parse('Det[+pl]')
    NSg = GrammarCategory.parse('N[-pl]')
    NPl = GrammarCategory.parse('N[+pl]')

    # Define some grammatical productions.
    grammatical_productions = [
        cfg.Production(S, (NP, VP)),
        cfg.Production(PP, (P, NP)),
        cfg.Production(NP, (NP, PP)),
        cfg.Production(VP, (VP, PP)),
        cfg.Production(VP, (V, NP)),
        cfg.Production(VP, (V, )),
        cfg.Production(NP, (DetPl, NPl)),
        cfg.Production(NP, (DetSg, NSg))
    ]

    # Define some lexical productions.
    lexical_productions = [
        cfg.Production(NP, ('John', )),
        cfg.Production(NP, ('I', )),
        cfg.Production(Det, ('the', )),
        cfg.Production(Det, ('my', )),
        cfg.Production(Det, ('a', )),
        cfg.Production(NSg, ('dog', )),
        cfg.Production(NSg, ('cookie', )),
        cfg.Production(V, ('ate', )),
        cfg.Production(V, ('saw', )),
        cfg.Production(P, ('with', )),
        cfg.Production(P, ('under', )),
    ]

    earley_grammar = cfg.Grammar(S, grammatical_productions)
    earley_lexicon = {}
    for prod in lexical_productions:
        earley_lexicon.setdefault(prod.rhs()[0].upper(), []).append(prod.lhs())

    def lexicon(word):
        return earley_lexicon.get(word.upper(), [])

    sent = 'I saw John with a dog with my cookie'
    print "Sentence:\n", sent
    from nltk import tokenize
    tokens = list(tokenize.whitespace(sent))
    t = time.time()
    cp = FeatureEarleyChartParse(earley_grammar, lexicon, trace=1)
    trees = cp.get_parse_list(tokens)
    print "Time: %s" % (time.time() - t)
    for tree in trees:
        print tree

示例#5

0

显示文件

def demo():
    import sys, time

    S = FeatStructNonterminal('S')
    VP = FeatStructNonterminal('VP')
    NP = FeatStructNonterminal('NP')
    PP = FeatStructNonterminal('PP')
    V = FeatStructNonterminal('V')
    N = FeatStructNonterminal('N')
    P = FeatStructNonterminal('P')
    Name = FeatStructNonterminal('Name')
    Det = FeatStructNonterminal('Det')
    DetSg = FeatStructNonterminal('Det[-pl]')
    DetPl = FeatStructNonterminal('Det[+pl]')
    NSg = FeatStructNonterminal('N[-pl]')
    NPl = FeatStructNonterminal('N[+pl]')

    # Define some grammatical productions.
    grammatical_productions = [
        cfg.Production(S, (NP, VP)),
        cfg.Production(PP, (P, NP)),
        cfg.Production(NP, (NP, PP)),
        cfg.Production(VP, (VP, PP)),
        cfg.Production(VP, (V, NP)),
        cfg.Production(VP, (V, )),
        cfg.Production(NP, (DetPl, NPl)),
        cfg.Production(NP, (DetSg, NSg))
    ]

    # Define some lexical productions.
    lexical_productions = [
        cfg.Production(NP, ('John', )),
        cfg.Production(NP, ('I', )),
        cfg.Production(Det, ('the', )),
        cfg.Production(Det, ('my', )),
        cfg.Production(Det, ('a', )),
        cfg.Production(NSg, ('dog', )),
        cfg.Production(NSg, ('cookie', )),
        cfg.Production(V, ('ate', )),
        cfg.Production(V, ('saw', )),
        cfg.Production(P, ('with', )),
        cfg.Production(P, ('under', )),
    ]

    earley_lexicon = defaultdict(list)
    for prod in lexical_productions:
        earley_lexicon[prod.rhs()[0]].append(prod.lhs())
    #print "Lexicon:"
    #print earley_lexicon
    earley_grammar = cfg.Grammar(S, grammatical_productions, earley_lexicon)
    print earley_grammar

    sent = 'I saw John with a dog with my cookie'
    print "Sentence:\n", sent
    tokens = sent.split()
    t = time.time()
    cp = FeatureEarleyChartParser(earley_grammar, trace=1)
    trees = cp.nbest_parse(tokens)
    print "Time: %s" % (time.time() - t)
    for tree in trees:
        print tree