Exemplo n.º 1
0
    def add_verb(self, form, root, past, present, ppart=None):
        if ppart is None:
            ppart = past
        (pos, proc) = form

        postf = pos.copy()
        postf["tense"] = False

        postt = pos.copy()
        postt["tense"] = True

        self.add_rule(cfg.Production(postf.copy(), [root]), proc(root, False))
        self.add_rule(cfg.Production(postt.copy(), [past]), proc(root, "past"))
        self.add_rule(cfg.Production(postt.copy(), [present]),
                      proc(root, "present"))
        self.add_rule(
            cfg.Production(GrammarCategory(pos="V_part", form=pos["pos"]),
                           [ppart]), proc(root, "past-participle"))
Exemplo n.º 2
0
 def add_lexicon(self, preterminal, terminals):
     self.parser = None
     if isinstance(preterminal, str):
         preterminal = Category.parse(preterminal)
         if not isinstance(preterminal, cfg.Nonterminal):
             preterminal = cfg.Nonterminal(preterminal)
     for terminal in terminals:
         prod = cfg.Production(preterminal, [terminal])
         self.lexicon.append(prod)
         self.productions.append(prod)
    def parse_rule(self, text):
        tokens = text.split()

        i = 0
        while i < len(tokens):
            if tokens[i].endswith(",") or tokens[i].endswith(":"):
                replacement = " ".join(tokens[i:i + 2])
                tokens[i:i + 2] = [replacement]
            else:
                i += 1

        return cfg.Production(
            GrammarCategory.parse(tokens[0]),
            map(lambda x: GrammarCategory.parse(x), tokens[2:]))
Exemplo n.º 4
0
    def parse_rule(self, text):
        # Remove the start and end quotes
        tokens = text.split()
        if (len(tokens) > 3 and "'" in text):
            quote_split = text.split("'")
            space_split = text.split()
            tokens = space_split[:2] + [quote_split[1]]

        i = 0
        while i < len(tokens):
            if tokens[i].endswith(",") or tokens[i].endswith(":"):
                replacement = " ".join(tokens[i:i + 2])
                tokens[i:i + 2] = [replacement]
            else:
                i += 1

        return cfg.Production(
            GrammarCategory.parse(tokens[0]),
            map(lambda x: GrammarCategory.parse(x), tokens[2:]))
Exemplo n.º 5
0
def demo():
    import sys, time

    S = GrammarCategory.parse('S')
    VP = GrammarCategory.parse('VP')
    NP = GrammarCategory.parse('NP')
    PP = GrammarCategory.parse('PP')
    V = GrammarCategory.parse('V')
    N = GrammarCategory.parse('N')
    P = GrammarCategory.parse('P')
    Name = GrammarCategory.parse('Name')
    Det = GrammarCategory.parse('Det')
    DetSg = GrammarCategory.parse('Det[-pl]')
    DetPl = GrammarCategory.parse('Det[+pl]')
    NSg = GrammarCategory.parse('N[-pl]')
    NPl = GrammarCategory.parse('N[+pl]')

    # Define some grammatical productions.
    grammatical_productions = [
        cfg.Production(S, (NP, VP)),
        cfg.Production(PP, (P, NP)),
        cfg.Production(NP, (NP, PP)),
        cfg.Production(VP, (VP, PP)),
        cfg.Production(VP, (V, NP)),
        cfg.Production(VP, (V, )),
        cfg.Production(NP, (DetPl, NPl)),
        cfg.Production(NP, (DetSg, NSg))
    ]

    # Define some lexical productions.
    lexical_productions = [
        cfg.Production(NP, ('John', )),
        cfg.Production(NP, ('I', )),
        cfg.Production(Det, ('the', )),
        cfg.Production(Det, ('my', )),
        cfg.Production(Det, ('a', )),
        cfg.Production(NSg, ('dog', )),
        cfg.Production(NSg, ('cookie', )),
        cfg.Production(V, ('ate', )),
        cfg.Production(V, ('saw', )),
        cfg.Production(P, ('with', )),
        cfg.Production(P, ('under', )),
    ]

    earley_grammar = cfg.Grammar(S, grammatical_productions)
    earley_lexicon = {}
    for prod in lexical_productions:
        earley_lexicon.setdefault(prod.rhs()[0].upper(), []).append(prod.lhs())

    def lexicon(word):
        return earley_lexicon.get(word.upper(), [])

    sent = 'I saw John with a dog with my cookie'
    print "Sentence:\n", sent
    from nltk import tokenize
    tokens = list(tokenize.whitespace(sent))
    t = time.time()
    cp = FeatureEarleyChartParse(earley_grammar, lexicon, trace=1)
    trees = cp.get_parse_list(tokens)
    print "Time: %s" % (time.time() - t)
    for tree in trees:
        print tree