def extract_cp(phs): if not phs: return [] cps = [] for pi, ph in phs.items(): previous_w = mynlp.WordClass() if not ph.parent_id: continue for word in ph.words: if previous_w.pos_detail == "形式名詞": continue if word.pos_detail == "格助詞" and previous_w.pos == "名詞": if ph.parent_id < 0: continue parent = phs[ph.parent_id] parent_self_w = _extract_self_sufficient_word(parent.words) if not parent_self_w: continue cp = {} cp["noun"] = previous_w.base cp["particle"] = word.particle cp["predicate"] = parent_self_w.base cp["cateogory"] = word.category cps.append(cp) return cps
def _knp(sentence): if sentence == "": return None knp = pyknp.KNP() try: result = knp.parse(sentence) except: return None # 情報をクラスに格納 phrases = OrderedDict() # 文節クラスのディクショナリ for bnst in result.bnst_list(): ph = mynlp.PhraseClass(parent_id=bnst.parent_id, dpndtype=bnst.dpndtype) # この文節にふくまれる単語情報を格納 for mrph in bnst.mrph_list(): # mrph_list:文節内の形態素リスト word = mynlp.WordClass(surface=mrph.midasi, base=mrph.genkei, yomi=mrph.yomi) # 品詞関連詳細情報 pos_info = mrph.spec().split(" ") # or .new_spec() # 表層形 読み 見出し語 品詞大分類 品詞大分類ID 品詞細分類 品詞細分類ID 活用型 活用型ID 活用形 活用形ID 意味情報 word.pos = pos_info[3] # 品詞 word.pos_detail = pos_info[5] # 品詞細分類 # 意味情報関連 imis = mrph.imis.split() # 代表表記,漢字読み,カテゴリなど for imi in imis: if "代表表記" in imi: word.descriptions = imi.split(":", 1)[-1] elif "カテゴリ" in imi: word.category = imi.split(":", 1)[-1] elif "ドメイン" in imi: word.domain = imi.split(":", 1)[-1] elif ("人名:" in imi) or ("地名:" in imi): # 固有名詞 word.proper_noun = imi.split(":", 1)[-1] else: word.another = word.another + imi + " " ph.words.append(word) phrases[bnst.bnst_id] = ph for ph_i, ph in phrases.items(): if ph.parent_id != -1: phrases[ph.parent_id].children.append(ph_i) return phrases
def _embed_to_class(bnsts): phs = OrderedDict() if not bnsts: return None for bnst_i, bnst in enumerate(bnsts): ph = mynlp.PhraseClass(parent_id=bnst["parent_id"], parent=bnst["parent"], children=bnst["children"], dpndtype=bnst["dpndtype"]) for mrph in bnst["words"]: word = mynlp.WordClass(mrph["surface"],mrph["base"],mrph["yomi"], mrph["pos"],mrph["pos_detail"],mrph["descriptions"], mrph["category"],mrph["domain"],mrph["another"], mrph["proper_noun"]) ph.words.append(word) phs[bnst_i] = ph return phs
def _cross_word(w1, w2): nw = mynlp.WordClass() nw.base = w1.surface + w2.base nw.surface = w1.surface + w2.surface nw.yomi = w1.yomi + w2.yomi nw.pos = w2.pos nw.pos_detail = w2.pos_detail ll = w1.original_words if len(w2.original_words) > 0: ll.extend(w2.original_words) else: ll.append(w2.base) nw.original_words = ll try: nw.descriptions = w1.descriptions + "," + w2.descriptions except: if w1.descriptions: nw.descriptions = w1.descriptions try: nw.cateegory = w1.category.extend(w2.category) except: if w2.category: nw.cateegory = w2.category try: nw.domain = w1.domain.extend(w2.domain) except: if w2.domain: nw.domain = w2.domain try: nw.another = w1.another.extend(w2.another) except: if w2.domain: nw.domain = w2.domain try: nw.another = w1.another.extend(w2.another) except: if w2.another: nw.another = w2.another try: nw.proper_noun = w1.proper_noun.extend(w2.proper_noun) except: if w2.proper_noun: nw.proper_noun = w2.proper_noun return nw