def demo_legacy_grammar(): """ Check that batch_interpret() is compatible with legacy grammars that use a lowercase 'sem' feature. Define 'test.fcfg' to be the following """ g = nltk.parse_fcfg(""" % start S S[sem=<hello>] -> 'hello' """) print "Reading grammar: %s" % g print "*" * 20 for reading in batch_interpret(['hello'], g, semkey='sem'): syn, sem = reading[0] print print "output: ", sem
grammar = nltk.parse_fcfg( """ % start S # ################### # Grammar Productions # ################### S[-INV] -> NP VP S[-INV]/?x -> NP VP/?x S[-INV] -> NP S/NP S[-INV] -> Adv[+NEG] S[+INV] S[+INV] -> V[+AUX] NP VP S[+INV]/?x -> V[+AUX] NP VP/?x SBar -> Comp S[-INV] SBar/?x -> Comp S[-INV]/?x VP -> V[SUBCAT=intrans, -AUX] VP -> V[SUBCAT=trans, -AUX] NP VP/?x -> V[SUBCAT=trans, -AUX] NP/?x VP -> V[SUBCAT=clause, -AUX] SBar VP/?x -> V[SUBCAT=clause, -AUX] SBar/?x VP -> V[+AUX] VP VP/?x -> V[+AUX] VP/?x # ################### # Lexical Productions # ################### V[SUBCAT=intrans, -AUX] -> 'walk' | 'sing' V[SUBCAT=trans, -AUX] -> 'see' | 'like' V[SUBCAT=clause, -AUX] -> 'say' | 'claim' V[+AUX] -> 'do' | 'can' NP[-WH] -> 'you' | 'cats' | 'Sam' NP[+WH] -> 'who' | 'what' Adv[+NEG] -> 'rarely' | 'never' NP/NP -> Comp -> 'that' """)
grammar = nltk.parse_fcfg( """ S -> NP[CASE=?c, GND=?g, NUM=?n, DET=?d] NP[CASE=?c, GND=?g, NUM=?n, DET=?d] -> Det[CASE=?c, GND=?g, NUM=?n, DET=?d] N[CASE=?c, GND=?g, NUM=?n] NP[CASE=?c, GND=?g, NUM=?n, DET=?d] -> Det[CASE=?c, GND=?g, NUM=?n, DET=?d] Adj[CASE=?c, GND=?g, NUM=?n, DET=?d] N[CASE=?c, GND=?g, NUM=?n] NP[CASE=?c, GND=?g, NUM=pl, DET=?d] -> N[CASE=?c, GND=?g, NUM=pl] NP[CASE=?c, GND=?g, NUM=pl, DET=?d] -> Adj[CASE=?c, GND=?g, NUM=pl, DET=?d] N[CASE=?c, GND=?g, NUM=pl] Det[CASE=nom, GND=m, NUM=sg, DET=bes] -> 'der' Det[CASE=gen, GND=m, NUM=sg, DET=bes] -> 'des' Det[CASE=dat, GND=m, NUM=sg, DET=bes] -> 'dem' Det[CASE=akk, GND=m, NUM=sg, DET=bes] -> 'den' Det[CASE=nom, GND=m, NUM=sg, DET=unb] -> 'ein' Det[CASE=gen, GND=m, NUM=sg, DET=unb] -> 'eines' Det[CASE=dat, GND=m, NUM=sg, DET=unb] -> 'einem' Det[CASE=akk, GND=m, NUM=sg, DET=unb] -> 'einen' Det[CASE=nom, GND=w, NUM=sg, DET=bes] -> 'die' Det[CASE=gen, GND=w, NUM=sg, DET=bes] -> 'der' Det[CASE=dat, GND=w, NUM=sg, DET=bes] -> 'der' Det[CASE=akk, GND=w, NUM=sg, DET=bes] -> 'die' Det[CASE=nom, GND=w, NUM=sg, DET=unb] -> 'eine' Det[CASE=gen, GND=w, NUM=sg, DET=unb] -> 'einer' Det[CASE=dat, GND=w, NUM=sg, DET=unb] -> 'einer' Det[CASE=akk, GND=w, NUM=sg, DET=unb] -> 'eine' Det[CASE=nom, GND=n, NUM=sg, DET=bes] -> 'das' Det[CASE=gen, GND=n, NUM=sg, DET=bes] -> 'des' Det[CASE=dat, GND=n, NUM=sg, DET=bes] -> 'dem' Det[CASE=akk, GND=n, NUM=sg, DET=bes] -> 'das' Det[CASE=nom, GND=n, NUM=sg, DET=unb] -> 'ein' Det[CASE=gen, GND=n, NUM=sg, DET=unb] -> 'eines' Det[CASE=dat, GND=n, NUM=sg, DET=unb] -> 'einem' Det[CASE=akk, GND=n, NUM=sg, DET=unb] -> 'ein' Det[CASE=nom, NUM=pl] -> 'die' Det[CASE=gen, NUM=pl] -> 'der' Det[CASE=dat, NUM=pl] -> 'den' Det[CASE=akk, NUM=pl] -> 'die' N[CASE=nom, GND=m, NUM=sg] -> 'Baum' N[CASE=gen, GND=m, NUM=sg] -> 'Baumes' N[CASE=dat, GND=m, NUM=sg] -> 'Baum' | 'Baume' N[CASE=akk, GND=m, NUM=sg] -> 'Baum' N[CASE=nom, GND=m, NUM=pl] -> 'Bäume' N[CASE=gen, GND=m, NUM=pl] -> 'Bäume' N[CASE=dat, GND=m, NUM=pl] -> 'Bäumen' N[CASE=akk, GND=m, NUM=pl] -> 'Bäume' N[GND=w, NUM=sg] -> 'Kuh' N[CASE=nom, GND=w, NUM=pl] -> 'Kühe' N[CASE=gen, GND=w, NUM=pl] -> 'Kühe' N[CASE=dat, GND=w, NUM=pl] -> 'Kühen' N[CASE=akk, GND=w, NUM=pl] -> 'Kühe' N[CASE=nom, GND=n, NUM=sg] -> 'Buch' N[CASE=gen, GND=n, NUM=sg] -> 'Buches' N[CASE=dat, GND=n, NUM=sg] -> 'Buch' | 'Buche' N[CASE=akk, GND=n, NUM=sg] -> 'Buch' N[CASE=nom, GND=n, NUM=pl] -> 'Bücher' N[CASE=gen, GND=n, NUM=pl] -> 'Bücher' N[CASE=dat, GND=n, NUM=pl] -> 'Büchern' N[CASE=akk, GND=n, NUM=pl] -> 'Bücher' Adj[CASE=nom, NUM=sg, DET=bes] -> 'alte' Adj[CASE={gen,dat}, NUM=sg, DET=bes] -> 'alten' Adj[CASE=akk, GND=m, NUM=sg, DET=bes] -> 'alten' Adj[CASE=akk, GND=w, NUM=sg, DET=bes] -> 'alten' Adj[NUM=pl, DET=bes] -> 'alten' Adj[CASE=nom, GND=m, NUM=sg, DET=unb] -> 'alter' Adj[CASE=gen, GND=m, NUM=sg, DET=unb] -> 'alten' Adj[CASE={nom,akk}, GND=w, NUM=sg, DET=unb] -> 'alte' Adj[CASE=gen, GND=w, NUM=sg, DET=unb] -> 'alten' Adj[CASE={nom,akk}, GND=n, NUM=sg, DET=unb] -> 'altes' Adj[CASE=gen, GND=n, NUM=sg, DET=unb] -> 'alten' Adj[NUM=pl, DET=bes] -> 'alten' """)
grammar = nltk.parse_fcfg(""" % start S # Grammar Productions S -> NP[CASE=?c, AGR=?a] NP[CASE=?c, AGR=?a] -> Det[CASE=?c, AGR=?a] N[CASE=?c, AGR=?a] # Lexical Productions # masc Det[CASE=nom, AGR=[GND=masc,NUM=sg]] -> 'der' | 'ein' Det[CASE=gen, AGR=[GND=masc,NUM=sg]] -> 'des' | 'eines' Det[CASE=dat, AGR=[GND=masc,NUM=sg]] -> 'dem' | 'einem' Det[CASE=acc, AGR=[GND=masc,NUM=sg]] -> 'den' | 'einen' # fem Det[CASE=nom, AGR=[GND=fem,NUM=sg]] -> 'die' | 'eine' Det[CASE=gen, AGR=[GND=fem,NUM=sg]] -> 'der' | 'einer' Det[CASE=dat, AGR=[GND=fem,NUM=sg]] -> 'der' | 'einer' Det[CASE=acc, AGR=[GND=fem,NUM=sg]] -> 'die' | 'eine' # neu Det[CASE=nom, AGR=[GND=neu,NUM=sg]] -> 'das' | 'ein' Det[CASE=gen, AGR=[GND=neu,NUM=sg]] -> 'des' | 'eines' Det[CASE=dat, AGR=[GND=neu,NUM=sg]] -> 'dem' | 'einem' Det[CASE=acc, AGR=[GND=neu,NUM=sg]] -> 'das' | 'ein' # Nouns N[CASE=gen, AGR=[GND=masc,NUM=sg]] -> 'Hundes' N[CASE=nom, AGR=[GND=masc,NUM=sg]] -> 'Hund' N[AGR=[GND=fem,NUM=sg]] -> 'Katze' N[CASE=gen, AGR=[GND=masc,NUM=sg]] -> 'Kindes' N[AGR=[GND=neu,NUM=sg]] -> 'Kind' """)
grammar = nltk.parse_fcfg( """ % start S # Grammar Productions S -> NP[CASE=nom, AGR=?a] VP[AGR=?a] NP[CASE=?c, AGR=?a] -> PRO[CASE=?c, AGR=?a] NP[CASE=?c, AGR=?a] -> Det[CASE=?c, AGR=?a] N[CASE=?c, AGR=?a] # inserted: NP[CASE=gen, AGR=?a] -> Det[CASE=gen, AGR=?a] N[CASE=gen, AGR=?a] NP[CASE=gen] VP[AGR=?a] -> IV[AGR=?a] VP[AGR=?a] -> TV[OBJCASE=?c, AGR=?a] NP[CASE=?c] # inserted: VP[AGR=?a] -> DV[AGR=?a] NP[CASE=acc] NP[CASE=gen] # Lexical Productions # Singular determiners # masc Det[CASE=nom, AGR=[GND=masc,PER=3,NUM=sg]] -> 'der' Det[CASE=gen, AGR=[GND=masc,PER=3,NUM=sg]] -> 'des' Det[CASE=dat, AGR=[GND=masc,PER=3,NUM=sg]] -> 'dem' Det[CASE=acc, AGR=[GND=masc,PER=3,NUM=sg]] -> 'den' # fem Det[CASE=nom, AGR=[GND=fem,PER=3,NUM=sg]] -> 'die' Det[CASE=gen, AGR=[GND=fem,PER=3,NUM=sg]] -> 'der' Det[CASE=dat, AGR=[GND=fem,PER=3,NUM=sg]] -> 'der' Det[CASE=acc, AGR=[GND=fem,PER=3,NUM=sg]] -> 'die' # Plural determiners Det[CASE=nom, AGR=[PER=3,NUM=pl]] -> 'die' Det[CASE=gen, AGR=[PER=3,NUM=pl]] -> 'der' Det[CASE=dat, AGR=[PER=3,NUM=pl]] -> 'den' Det[CASE=acc, AGR=[PER=3,NUM=pl]] -> 'die' # Nouns N[AGR=[GND=masc,PER=3,NUM=sg]] -> 'Hund' N[CASE=nom, AGR=[GND=masc,PER=3,NUM=pl]] -> 'Hunde' N[CASE=dat, AGR=[GND=masc,PER=3,NUM=pl]] -> 'Hunden' N[CASE=acc, AGR=[GND=masc,PER=3,NUM=pl]] -> 'Hunde' N[AGR=[GND=fem,PER=3,NUM=sg]] -> 'Katze' N[AGR=[GND=fem,PER=3,NUM=pl]] -> 'Katzen' # inserted (nicht vollstaendig - nur fuer Aufgabe relevante Faelle): N[CASE=gen, AGR=[GND=masc,PER=3,NUM=sg]] -> 'Diebstahls' N[AGR=[GND=fem,PER=3,NUM=sg]] -> 'Frau' N[AGR=[GND=neut,PER=3,NUM=pl]] -> 'Gelder' N[AGR=[GND=masc,PER=3,NUM=sg]] -> 'Buergermeisters' # Pronouns PRO[CASE=nom, AGR=[PER=1,NUM=sg]] -> 'ich' PRO[CASE=acc, AGR=[PER=1,NUM=sg]] -> 'mich' PRO[CASE=dat, AGR=[PER=1,NUM=sg]] -> 'mir' PRO[CASE=nom, AGR=[PER=2,NUM=sg]] -> 'du' PRO[CASE=nom, AGR=[PER=3,NUM=sg]] -> 'er' | 'sie' | 'es' PRO[CASE=nom, AGR=[PER=1,NUM=pl]] -> 'wir' PRO[CASE=acc, AGR=[PER=1,NUM=pl]] -> 'uns' PRO[CASE=dat, AGR=[PER=1,NUM=pl]] -> 'uns' PRO[CASE=nom, AGR=[PER=2,NUM=pl]] -> 'ihr' PRO[CASE=nom, AGR=[PER=3,NUM=pl]] -> 'sie' # Verbs IV[AGR=[NUM=sg,PER=1]] -> 'komme' IV[AGR=[NUM=sg,PER=2]] -> 'kommst' IV[AGR=[NUM=sg,PER=3]] -> 'kommt' IV[AGR=[NUM=pl, PER=1]] -> 'kommen' IV[AGR=[NUM=pl, PER=2]] -> 'kommt' IV[AGR=[NUM=pl, PER=3]] -> 'kommen' TV[OBJCASE=acc, AGR=[NUM=sg,PER=1]] -> 'sehe' | 'mag' TV[OBJCASE=acc, AGR=[NUM=sg,PER=2]] -> 'siehst' | 'magst' TV[OBJCASE=acc, AGR=[NUM=sg,PER=3]] -> 'sieht' | 'mag' TV[OBJCASE=dat, AGR=[NUM=sg,PER=1]] -> 'folge' | 'helfe' TV[OBJCASE=dat, AGR=[NUM=sg,PER=2]] -> 'folgst' | 'hilfst' TV[OBJCASE=dat, AGR=[NUM=sg,PER=3]] -> 'folgt' | 'hilft' TV[OBJCASE=acc, AGR=[NUM=pl,PER=1]] -> 'sehen' | 'moegen' TV[OBJCASE=acc, AGR=[NUM=pl,PER=2]] -> 'sieht' | 'moegt' TV[OBJCASE=acc, AGR=[NUM=pl,PER=3]] -> 'sehen' | 'moegen' TV[OBJCASE=dat, AGR=[NUM=pl,PER=1]] -> 'folgen' | 'helfen' TV[OBJCASE=dat, AGR=[NUM=pl,PER=2]] -> 'folgt' | 'helft' TV[OBJCASE=dat, AGR=[NUM=pl,PER=3]] -> 'folgen' | 'helfen' # inserted: DV[AGR=[NUM=sg,PER=1]] -> 'bezichtige' DV[AGR=[NUM=sg,PER=2]] -> 'bezichtigst' DV[AGR=[NUM=sg,PER=3]] -> 'bezichtigt' DV[AGR=[NUM=pl,PER=1]] -> 'bezichtigen' DV[AGR=[NUM=pl,PER=2]] -> 'bezichtigt' DV[AGR=[NUM=pl,PER=3]] -> 'bezichtigen' """)
grammar = nltk.parse_fcfg( """ S -> NP[CASE=?c, GND=?g, NUM=?n] NP[CASE=?c, GND=?g, NUM=?n] -> Det[CASE=?c, GND=?g, NUM=?n] N[CASE=?c, GND=?g, NUM=?n] Det[CASE=nom, GND=m, NUM=sg] -> 'der' | 'ein' Det[CASE=gen, GND=m, NUM=sg] -> 'des' | 'eines' Det[CASE=dat, GND=m, NUM=sg] -> 'dem' | 'einem' Det[CASE=akk, GND=m, NUM=sg] -> 'den' | 'einen' Det[CASE=nom, GND=w, NUM=sg] -> 'die' | 'eine' Det[CASE=gen, GND=w, NUM=sg] -> 'der' | 'einer' Det[CASE=dat, GND=w, NUM=sg] -> 'der' | 'einer' Det[CASE=akk, GND=w, NUM=sg] -> 'die' | 'eine' Det[CASE=nom, GND=n, NUM=sg] -> 'das' | 'ein' Det[CASE=gen, GND=n, NUM=sg] -> 'des' | 'eines' Det[CASE=dat, GND=n, NUM=sg] -> 'dem' | 'einem' Det[CASE=akk, GND=n, NUM=sg] -> 'das' | 'ein' N[CASE=nom, GND=m, NUM=sg] -> 'Baum' N[CASE=gen, GND=m, NUM=sg] -> 'Baumes' N[CASE=dat, GND=m, NUM=sg] -> 'Baum' | 'Baume' N[CASE=akk, GND=m, NUM=sg] -> 'Baum' N[GND=w, NUM=sg] -> 'Kuh' N[CASE=nom, GND=n, NUM=sg] -> 'Buch' N[CASE=gen, GND=n, NUM=sg] -> 'Buches' N[CASE=dat, GND=n, NUM=sg] -> 'Buch' | 'Buche' N[CASE=akk, GND=n, NUM=sg] -> 'Buch' """)
grammar = nltk.parse_fcfg( """ % start S # Grammar Productions S -> NP[CASE=nom, AGR=?a] VP[AGR=?a] # inserted: S -> ADV VP2[AGR=?a] NP[CASE=nom, AGR=?a] NP[CASE=?c, AGR=?a] -> PRO[CASE=?c, AGR=?a] NP[CASE=?c, AGR=?a] -> Det[CASE=?c, AGR=?a] N[CASE=?c, AGR=?a] VP[AGR=?a] -> IV[AGR=?a] VP[AGR=?a] -> TV[OBJCASE=?c, AGR=?a] NP[CASE=?c] # inserted (next two): VP2[AGR=?a] -> IV[AGR=?a] VP2[AGR=?a] -> TV[OBJCASE=?c, AGR=?a] NP[CASE=nom] # Lexical Productions # Singular determiners # masc Det[CASE=nom, AGR=[GND=masc,PER=3,NUM=sg]] -> 'der' Det[CASE=dat, AGR=[GND=masc,PER=3,NUM=sg]] -> 'dem' Det[CASE=acc, AGR=[GND=masc,PER=3,NUM=sg]] -> 'den' # fem Det[CASE=nom, AGR=[GND=fem,PER=3,NUM=sg]] -> 'die' Det[CASE=dat, AGR=[GND=fem,PER=3,NUM=sg]] -> 'der' Det[CASE=acc, AGR=[GND=fem,PER=3,NUM=sg]] -> 'die' # Plural determiners Det[CASE=nom, AGR=[PER=3,NUM=pl]] -> 'die' Det[CASE=dat, AGR=[PER=3,NUM=pl]] -> 'den' Det[CASE=acc, AGR=[PER=3,NUM=pl]] -> 'die' # Nouns N[AGR=[GND=masc,PER=3,NUM=sg]] -> 'Hund' N[CASE=nom, AGR=[GND=masc,PER=3,NUM=pl]] -> 'Hunde' N[CASE=dat, AGR=[GND=masc,PER=3,NUM=pl]] -> 'Hunden' N[CASE=acc, AGR=[GND=masc,PER=3,NUM=pl]] -> 'Hunde' N[AGR=[GND=fem,PER=3,NUM=sg]] -> 'Katze' N[AGR=[GND=fem,PER=3,NUM=pl]] -> 'Katzen' # Pronouns PRO[CASE=nom, AGR=[PER=1,NUM=sg]] -> 'ich' PRO[CASE=acc, AGR=[PER=1,NUM=sg]] -> 'mich' PRO[CASE=dat, AGR=[PER=1,NUM=sg]] -> 'mir' PRO[CASE=nom, AGR=[PER=2,NUM=sg]] -> 'du' PRO[CASE=nom, AGR=[PER=3,NUM=sg]] -> 'er' | 'sie' | 'es' PRO[CASE=nom, AGR=[PER=1,NUM=pl]] -> 'wir' PRO[CASE=acc, AGR=[PER=1,NUM=pl]] -> 'uns' PRO[CASE=dat, AGR=[PER=1,NUM=pl]] -> 'uns' PRO[CASE=nom, AGR=[PER=2,NUM=pl]] -> 'ihr' PRO[CASE=nom, AGR=[PER=3,NUM=pl]] -> 'sie' # Verbs IV[AGR=[NUM=sg,PER=1]] -> 'komme' IV[AGR=[NUM=sg,PER=2]] -> 'kommst' IV[AGR=[NUM=sg,PER=3]] -> 'kommt' IV[AGR=[NUM=pl, PER=1]] -> 'kommen' IV[AGR=[NUM=pl, PER=2]] -> 'kommt' IV[AGR=[NUM=pl, PER=3]] -> 'kommen' TV[OBJCASE=acc, AGR=[NUM=sg,PER=1]] -> 'sehe' | 'mag' TV[OBJCASE=acc, AGR=[NUM=sg,PER=2]] -> 'siehst' | 'magst' TV[OBJCASE=acc, AGR=[NUM=sg,PER=3]] -> 'sieht' | 'mag' TV[OBJCASE=dat, AGR=[NUM=sg,PER=1]] -> 'folge' | 'helfe' TV[OBJCASE=dat, AGR=[NUM=sg,PER=2]] -> 'folgst' | 'hilfst' TV[OBJCASE=dat, AGR=[NUM=sg,PER=3]] -> 'folgt' | 'hilft' TV[OBJCASE=acc, AGR=[NUM=pl,PER=1]] -> 'sehen' | 'moegen' TV[OBJCASE=acc, AGR=[NUM=pl,PER=2]] -> 'sieht' | 'moegt' TV[OBJCASE=acc, AGR=[NUM=pl,PER=3]] -> 'sehen' | 'moegen' TV[OBJCASE=dat, AGR=[NUM=pl,PER=1]] -> 'folgen' | 'helfen' TV[OBJCASE=dat, AGR=[NUM=pl,PER=2]] -> 'folgt' | 'helft' TV[OBJCASE=dat, AGR=[NUM=pl,PER=3]] -> 'folgen' | 'helfen' # inserted: Adverbs ADV -> 'heute' """)
def load_grammar (self): fn = self.prefix + '-grammar.txt' self.grammar = parse_fcfg(open(fn)) self.parser = FeatureChartParser(self.grammar)
def starter_grammar(): if DataDir is None: raise Exception, "Not defined: CL" fn = os.path.join(DataDir, "starter_grammar.txt") return nltk.parse_fcfg(open(fn))