Пример #1
0
        else:
           return []
    def __setitem__(self,tok,cats):
        self.static_dics[tok] = cats
    def has_key(self,tok):
        return (tok in self.static_dics)
    def get(self,toklist,defval):
        ret = self.__getitem__(toklist)
        return ret


parser = CCGParser()
parser.combinators = [LApp,RApp,LB,RB,Conj,RT("NP[sbj]"),LBx]
parser.terminators = ["ROOT","S","S[wq]","S[q]","S[imp]"]
parser.lexicon = Lexicon()
parser.concatenator = ""

def tokenize(s):
    if len(s)==0:
        return s
    elif s[-1]==".":
        tokens = s[:-1].split()
        tokens.append( s[-1] )
        return tokens
    else:
        return s.split()


if __name__=="__main__":
   def __repr__(s):
       if sys.stdout.encoding=='UTF-8':
Пример #2
0
        return None
    if rt == [BwdApp, Symbol("S[pss]"), Symbol("NP")]:
        return lt
    return None


parser = CCGParser()
parser.combinators = [
    LApp, RApp, LB, RB,
    LT("NP"),
    LT("S\\NP"),
    RT("NP"), Conj, SkipComma, Rel
]
parser.terminators = ["ROOT", "S", "S[q]", "S[wq]", "S[imp]"]
parser.lexicon = default_lexicon()
parser.concatenator = " "


def run(text, type=0):
    for tokens in tokenize(text):
        print(u"test run : tokens={0}".format(str(tokens)))
        for t in parser.parse(tokens):
            if type == 0:
                for r in t.leaves():
                    print(u"{0}\t{1}".format(r.token, r.catname))
                break
            else:
                print(t.show())
                break
        print("")