def word_count() -> list: lines = data_mapping(analysis_file) longest_N_list = [] word_list = [] for line in lines: for word in line: word_list.append(word['surface']) return (Counter(word_list))
def get_v_sur(): lines = data_mapping(analysis_file) t = 0 V_sur = [] for line in lines: for word in line: if word['pos'] == '動詞': surface = word['surface'] V_sur.append(surface) t += 1 if t == 5: break return(V_sur)
def get_v_base(): lines = data_mapping(analysis_file) t = 0 V_base = [] for line in lines: for word in line: if word['pos'] == '動詞': base = word['base'] V_base.append(base) t += 1 if t == 5: break return(V_base)
def get_n_phrase(): lines = data_mapping(analysis_file) t = 0 N_phrase = [] for line in lines: for i in range(1, len(line) - 1): if line[i]['surface'] == 'の' \ and line[i - 1]['pos'] == '名詞' \ and line[i + 1]['pos'] == '名詞': phrase = line[i-1]['surface'] + line[i]['surface'] + line[i+1]['surface'] N_phrase.append(phrase) t += 1 if t == 10: break return(N_phrase)
def co_occ(target) -> list: lines = data_mapping(analysis_file) co_list = [] nouns = [] tar_sen = [] find_tar = 0 for line in lines: for word in line: if word['surface'] == "猫": find_tar = 1 else: nouns.append(word['surface']) if word['surface'] == "。": if find_tar == 1: for noun in nouns: co_list.append(noun) nouns = [] find_tar = 0 return (co_list)