return self.static_dics.get(tokens[0],[]) else: return [] def __setitem__(self,tok,cats): self.static_dics[tok] = cats def has_key(self,tok): return (tok in self.static_dics) def get(self,toklist,defval): ret = self.__getitem__(toklist) return ret parser = CCGParser() parser.combinators = [LApp,RApp,LB,RB,Conj,RT("NP[sbj]"),LBx] parser.terminators = ["ROOT","S","S[wq]","S[q]","S[imp]"] parser.lexicon = Lexicon() parser.concatenator = "" def tokenize(s): if len(s)==0: return s elif s[-1]==".": tokens = s[:-1].split() tokens.append( s[-1] ) return tokens else: return s.split() if __name__=="__main__": def __repr__(s):
def __setitem__(self, tok, cats): self.static_dics[tok] = cats def has_key(self, tok): return (tok in self.static_dics) def get(self, toklist, defval): ret = self.__getitem__(toklist) return ret parser = CCGParser() parser.combinators = [LApp, RApp, LB, RB, Conj, RT("NP[sbj]"), LBx] parser.terminators = ["ROOT", "S", "S[wq]", "S[q]", "S[imp]"] parser.lexicon = Lexicon() parser.concatenator = "" def tokenize(s): if len(s) == 0: return s elif s[-1] == ".": tokens = s[:-1].split() tokens.append(s[-1]) return tokens else: return s.split() if __name__ == "__main__":
if lt != Symbol("NP"): return None if rt == [BwdApp, Symbol("S[pss]"), Symbol("NP")]: return lt return None parser = CCGParser() parser.combinators = [ LApp, RApp, LB, RB, LT("NP"), LT("S\\NP"), RT("NP"), Conj, SkipComma, Rel ] parser.terminators = ["ROOT", "S", "S[q]", "S[wq]", "S[imp]"] parser.lexicon = default_lexicon() parser.concatenator = " " def run(text, type=0): for tokens in tokenize(text): print(u"test run : tokens={0}".format(str(tokens))) for t in parser.parse(tokens): if type == 0: for r in t.leaves(): print(u"{0}\t{1}".format(r.token, r.catname)) break else: print(t.show()) break print("")
#-- special rule for English def Rel(lt,rt): if lt!=Symbol("NP"): return None if rt==[BwdApp , Symbol("S[pss]") , Symbol("NP")]: return lt return None parser = CCGParser() parser.combinators = [LApp,RApp,LB,RB,LT("NP"),LT("S\\NP"),RT("NP"),Conj,SkipComma,Rel] parser.terminators = ["ROOT","S","S[q]","S[wq]","S[imp]"] parser.lexicon = default_lexicon() parser.concatenator = " " def run(text,type=0): for tokens in tokenize(text): print(u"test run : tokens={0}".format(str(tokens))) for t in parser.parse(tokens): if type==0: for r in t.leaves(): print(u"{0}\t{1}".format(r.token , r.catname)) break else: print( t.show() ) break