示例#1
0
文件: cfg.py 项目: wrand/tweater
def demo2():
    from nltk import Nonterminal, Production, ContextFreeGrammar
    nonterminals = 'S VP NP PP P N Name V Det'
    (S, VP, NP, PP, P, N, Name, V,
     Det) = [Nonterminal(s) for s in nonterminals.split()]
    productions = (
        # Syntactic Productions
        Production(S, [NP, VP]),
        Production(NP, [Det, N]),
        Production(NP, [NP, PP]),
        Production(VP, [VP, PP]),
        Production(VP, [V, NP, PP]),
        Production(VP, [V, NP]),
        Production(PP, [P, NP]),
        Production(PP, []),
        Production(PP, ['up', 'over', NP]),

        # Lexical Productions
        Production(NP, ['I']),
        Production(Det, ['the']),
        Production(Det, ['a']),
        Production(N, ['man']),
        Production(V, ['saw']),
        Production(P, ['in']),
        Production(P, ['with']),
        Production(N, ['park']),
        Production(N, ['dog']),
        Production(N, ['statue']),
        Production(Det, ['my']),
    )
    grammar = ContextFreeGrammar(S, productions)

    text = 'I saw a man in the park'.split()
    d = CFGDemo(grammar, text)
    d.mainloop()
示例#2
0
def if_then_else_demo():
    """
    Demo if-then-else grammar
    """
    from nltk.grammar import Nonterminal, Production, ContextFreeGrammar
    nonterminals = 'E E1 PLUS T T1 TIMES F LPAREN RPAREN ID'
    (E, E1, PLUS, T, T1, TIMES, F, LPAREN, RPAREN,
     ID) = [Nonterminal(s) for s in nonterminals.split()]
    productions = (
        Production(E, [T, E1]),
        Production(E1, [PLUS, T, E1]),
        Production(E1, []),
        Production(T, [F, T1]),
        Production(T1, [TIMES, F, T1]),
        Production(T1, []),
        Production(F, [LPAREN, E, RPAREN]),
        Production(F, [ID]),
        Production(PLUS, ['+']),
        Production(TIMES, ['*']),
        Production(LPAREN, ['(']),
        Production(RPAREN, [')']),
        Production(ID, ['a']),
        Production(ID, ['b']),
        Production(ID, ['c']),
    )
    grammar = ContextFreeGrammar(E, productions)

    text = "a * b + c".split()
    RecursiveDescentApp(grammar, text).mainloop()
示例#3
0
 def add_new_vocab_rule(self, rule):
     """
     Adds a new vocabulary rule to the set of rules, and
     recreates self.cfg and self.parser.
     """
     self.rules.append(Production(NT(rule[0]), rule[1]))
     self.cfg = ContextFreeGrammar(NT("S"), self.rules)
     self.parser = EarleyChartParser(self.cfg, trace=0)
示例#4
0
    def set_grammar(self, grammar):
        """
        Asign a new grammar to the parser

        Args:
            - parser_args: needs grammar of type :class:`ContextFreeGrammar`
        """
        self._grammar = ContextFreeGrammar(Nonterminal('S'), grammar)
示例#5
0
    def parse_NP(self, sen):
        """
        Parses a partial sentence (that is, usually a noun phrase.
        Returns the parse, or returns a tuple.
        """
        try:
            cfg_temp = ContextFreeGrammar(NT("NP"), self.rules)
            parser_temp = EarleyChartParser(cfg_temp, trace=0)
            parse = parser_temp.nbest_parse(sen.strip().split(" "), trace=0)
        except:
            print traceback.format_exc()
        else:
            if parse:
                return parse[0]

        print "failure"
        return None
示例#6
0
def fail_demo():
    """
    Demo grammar that should not work with backtracking for all inputs
    """
    from nltk.grammar import Nonterminal, Production, ContextFreeGrammar
    S = Nonterminal('S')
    A = Nonterminal('A')
    productions = (
        Production(S, [ A, S, A ]),
        Production(S, [ A, A ]),
        Production(A, [ 'a' ]),
        )
    grammar = ContextFreeGrammar(S, productions)

    text = "a a a a a a".split()
    #text = "a a a a".split()

    RecursiveDescentApp(grammar, text).mainloop()
示例#7
0
文件: cfg.py 项目: wrand/tweater
    def __init__(self, parent, cfg=None, set_cfg_callback=None):
        self._parent = parent
        if cfg is not None: self._cfg = cfg
        else: self._cfg = ContextFreeGrammar(Nonterminal('S'), [])
        self._set_cfg_callback = set_cfg_callback

        self._highlight_matching_nonterminals = 1

        # Create the top-level window.
        self._top = Toplevel(parent)
        self._init_bindings()

        self._init_startframe()
        self._startframe.pack(side='top', fill='x', expand=0)
        self._init_prodframe()
        self._prodframe.pack(side='top', fill='both', expand=1)
        self._init_buttons()
        self._buttonframe.pack(side='bottom', fill='x', expand=0)

        self._textwidget.focus()
示例#8
0
def app():
    """
    Create a shift reduce parser app, using a simple grammar and
    text.
    """

    from nltk.grammar import Nonterminal, Production, ContextFreeGrammar
    nonterminals = 'S VP NP PP P N Name V Det'
    (S, VP, NP, PP, P, N, Name, V,
     Det) = [Nonterminal(s) for s in nonterminals.split()]

    productions = (
        # Syntactic Productions
        Production(S, [NP, VP]),
        Production(NP, [Det, N]),
        Production(NP, [NP, PP]),
        Production(VP, [VP, PP]),
        Production(VP, [V, NP, PP]),
        Production(VP, [V, NP]),
        Production(PP, [P, NP]),

        # Lexical Productions
        Production(NP, ['I']),
        Production(Det, ['the']),
        Production(Det, ['a']),
        Production(N, ['man']),
        Production(V, ['saw']),
        Production(P, ['in']),
        Production(P, ['with']),
        Production(N, ['park']),
        Production(N, ['dog']),
        Production(N, ['statue']),
        Production(Det, ['my']),
    )

    grammar = ContextFreeGrammar(S, productions)

    # tokenize the sentence
    sent = 'my dog saw a man in the park with a statue'.split()

    ShiftReduceApp(grammar, sent).mainloop()
示例#9
0
    def __init__(self, rules_file="rules.gr", vocab_file="vocabulary.gr"):
        """
        Reads in grammar rules (from rules_file) and vocab rules (from
        vocab_file) and creates self.cfg (a ContextFreeGrammar) and
        self.parser (a EarleyChartParser).
        """
        self.rules = []
        test_sentences = []

        # get the rules from rules_file
        grammar = open(rules_file, "r")
        line = grammar.readline()
        while line:
            if line.strip() != "" and not line.strip().startswith("#"):
                line = line[2:]
                parts = line.partition("\t")
                lhs = parts[0].strip()
                rhs = [NT(x) for x in parts[2].strip().split(" ")]
                self.rules.append(Production(NT(lhs), rhs))
            line = grammar.readline()
        grammar.close()

        # get the rules from vocab_file
        vocab = open(vocab_file, "r")
        line = vocab.readline()
        while line:
            if line.strip() != "" and not line.strip().startswith("#"):
                line = line[2:]
                parts = line.partition("\t")
                lhs = parts[0].strip()
                rhs = parts[2].strip().lower().split(" ")
                self.rules.append(Production(NT(lhs), rhs))
            line = vocab.readline()
        vocab.close()

        # create the grammar and parser
        self.cfg = ContextFreeGrammar(NT("S"), self.rules)
        self.parser = EarleyChartParser(self.cfg, trace=0)
示例#10
0
文件: cfg.py 项目: wrand/tweater
 def _apply(self, *e):
     productions = self._parse_productions()
     start = Nonterminal(self._start.get())
     cfg = ContextFreeGrammar(start, productions)
     if self._set_cfg_callback is not None:
         self._set_cfg_callback(cfg)
示例#11
0
    def parse(self, p_string):
        """
        Parses a string and stores the resulting hierarchy of "domains"
        "hierarchies" and "tables"

        For the sake of NLP I've parsed the string using the nltk 
        context free grammar library.

        A query is a "sentence" and can either be a domain, hierarchy or a table.
        A domain is simply a word.
        A hierarchy is expressed as "domain/domain"
        A table is exressed as "table(sentence, sentence, sentence)"

        Internally the query is represented as a nltk.parse.tree

        Process:
          1. string is tokenized
          2. develop a context free grammar
          3. parse
          4. convert to a tree representation
        """
        self.nltktree = None

        # Store the query string
        self.string = p_string

        # Tokenize the query string, allowing only strings, parentheses,
        # forward slashes and commas.
        re_all = r'table[(]|\,|[)]|[/]|\w+'
        data_tokens = tokenize.regexp_tokenize(self.string, re_all)

        # Develop a context free grammar
        # S = sentence, T = table, H = hierarchy, D = domain
        O, T, H, D = nonterminals('O, T, H, D')

        # Specify the grammar
        productions = (
            # A sentence can be either a table, hierarchy or domain
            Production(O, [D]),
            Production(O, [H]),
            Production(O, [T]),

            # A table must be the following sequence:
            # "table(", sentence, comma, sentence, comma, sentence, ")"
            Production(T, ['table(', O, ',', O, ',', O, ')']),

            # A hierarchy must be the following sequence:
            # domain, forward slash, domain
            Production(H, [D, '/', D]),
            # domain, forward slash, another operator
            Production(H, [D, '/', O]))

        # Add domains to the cfg productions
        # A domain is a token that is entirely word chars
        re_domain = compile(r'^\w+$')
        # Try every token and add if it matches the above regular expression
        for tok in data_tokens:
            if re_domain.match(tok):
                prod = Production(D, [tok]),
                productions = productions + prod

        # Make a grammar out of our productions
        grammar = ContextFreeGrammar(O, productions)
        rd_parser = parse.RecursiveDescentParser(grammar)

        # Tokens need to be redefined.
        # It disappears after first use, and I don't know why.
        tokens = tokenize.regexp_tokenize(self.string, re_all)
        toklist = list(tokens)

        # Store the parsing.
        # Only the first one, as the grammar should be completely nonambiguous.
        try:
            self.parseList = rd_parser.get_parse_list(toklist)[0]
        except IndexError:
            print "Could not parse query."
            return

        # Set the nltk.parse.tree tree for this query to the global sentence
        string = str(self.parseList)
        string2 = string.replace(":", "").replace("')'", "").replace(
            "table(", "").replace("','", "").replace("'", "").replace("/", "")
        self.nltktree = parse.tree.bracket_parse(string2)

        # Store the resulting nltk.parse.tree tree
        self.parseTree = QuerySentence(self.nltktree)
        self.xml = self.parseTree.toXML()
示例#12
0
import nltk
from nltk.corpus import treebank
from nltk.grammar import ContextFreeGrammar, Nonterminal

tbank_productions = set(production for sent in treebank.parsed_sents()
                        for production in sent.productions())
tbank_grammar = ContextFreeGrammar(Nonterminal('S'), list(tbank_productions))

print tbank_grammar