Exemplo n.º 1
0
def word_analysis(str):

    en = regexp_tokenize(str, "([A-Za-z]+),https")
    T = list(Text(en))
    texts = remove_special_characters(str)
    noun = Hannanum().nouns(texts)
    noun.extend(T)
    print(noun)
    count = Counter(noun)
    noun_list = count.most_common(30)
    print(count)
    index = 0
    count_ = []
    for v in noun_list:
        if len(v[0]) >= 1:
            index = index + 1
            count_.append(v)
            if index >= 10:
                break

    return count_