Пример #1
0
def word_count() -> list:
    lines = data_mapping(analysis_file)
    longest_N_list = []
    word_list = []
    for line in lines:
        for word in line:
            word_list.append(word['surface'])
    return (Counter(word_list))
Пример #2
0
def get_v_sur():
    lines = data_mapping(analysis_file)
    t = 0
    V_sur = []
    for line in lines:
        for word in line:
            if word['pos'] == '動詞':
                surface = word['surface']
                V_sur.append(surface)
        t += 1
        if t == 5:
            break
    return(V_sur)
Пример #3
0
def get_v_base():
    lines = data_mapping(analysis_file)
    t = 0
    V_base = []
    for line in lines:
        for word in line:
            if word['pos'] == '動詞':
                base = word['base']
                V_base.append(base)
        t += 1
        if t == 5:
            break
    return(V_base)
Пример #4
0
def get_n_phrase():
    lines = data_mapping(analysis_file)
    t = 0
    N_phrase = []
    for line in lines:
        for i in range(1, len(line) - 1):
            if line[i]['surface'] == 'の' \
                    and line[i - 1]['pos'] == '名詞' \
                    and line[i + 1]['pos'] == '名詞':
                phrase = line[i-1]['surface'] + line[i]['surface'] + line[i+1]['surface']
                N_phrase.append(phrase)
        t += 1
        if t == 10:
            break
    return(N_phrase)
Пример #5
0
def co_occ(target) -> list:
    lines = data_mapping(analysis_file)
    co_list = []
    nouns = []
    tar_sen = []
    find_tar = 0
    for line in lines:
        for word in line:
            if word['surface'] == "猫": find_tar = 1
            else: nouns.append(word['surface'])
            if word['surface'] == "。":
                if find_tar == 1:
                    for noun in nouns:
                        co_list.append(noun)
                nouns = []
                find_tar = 0
    return (co_list)