def word_count()->list: lines = mecab(file) longest_N_list = [] word_list = [] for line in lines: for word in line: word_list.append(word['surface']) return(Counter(word_list))
def get_v_sur(): lines = mecab(file) t = 0 V_sur = [] for line in lines: for word in line: if word['pos'] == '動詞': surface = word['surface'] V_sur.append(surface) t += 1 if t == 5: break return (V_sur)
def get_v_base(): lines = mecab(file) t = 0 V_base = [] for line in lines: for word in line: if word['pos'] == '動詞': base = word['base'] V_base.append(base) t += 1 if t == 5: break return (V_base)
def get_greedy_n(): lines = mecab(file) t = 0 longest_N_list = [] Name_phrase = [] for line in lines: for word in line: if word['pos'] == '名詞': Name_phrase.append(word['surface']) else: if len(Name_phrase) > 1: longest_N_list.append("".join(Name_phrase)) Name_phrase = [] t += 1 if t == 10: break return (longest_N_list)
def co_occ(target) -> list: lines = mecab(file) co_list = [] nouns = [] tar_sen = [] find_tar = 0 for line in lines: for word in line: if word['surface'] == '猫': find_tar = 1 else: nouns.append(word['surface']) if word['surface'] == "。": if find_tar == 1: for noun in nouns: co_list.append(noun) nouns = [] find_tar = 0 return (co_list)