def extract_verb_case(dg): # extract the list of verb(= yogen) and cases # list up all the verb nodes for node in dg.nodelist: util.log( jpdep.node2str(node) ) if is_yogen(node): util.log( " <-- detected yogen: %s" % normalize_yogen(node) ) # if yogen, list up all its dependees for case_cand in [dg.nodelist[i] for i in node['deps']]: if is_case(case_cand): util.log( " <-- detected case: %s" % jpdep.node2str(case_cand) ) util.log("")
def is_case(node): bhead_tag = node['tag'][node['bhead']] bform_tag = node['tag'][node['bform']] bform_surface = bform_tag[-1] if bform_tag[0] == u"助詞" and bform_tag[1] == u"格助詞" and (bform_surface in [u"ガ", u"ヲ", u"ニ", u"ト", u"デ", u"カラ", u"ヨリ", u"ヘ", u"マデ"]): return True elif bhead_tag[0] == u"名詞" and bform_tag[0:2] == [u"名詞", u"接尾"]: print "=== detected noun+suffix: %s" % jpdep.node2str(node) return True else: return False