예제 #1
0
def show_word_pie():
    word_df = news_pandas.load_news(os.path.join(results_path, 'word_df.csv'))
    word_df['wordvec'] = word_df['wordvec'].map(eval)
    n_clusters = counter.get_num_of_value_no_repeat(word_df['word_label'])
    word_label_value = [word_df[word_df['word_label'] == i].shape[0] for i in range(n_clusters)]
    word_label_yticks = [word_df[word_df['word_label'] == i]['word'][:5].tolist() for i in range(n_clusters)]
    drawing.draw_clustering_analysis_pie(n_clusters, word_label_value, word_label_yticks)
예제 #2
0
def show_hot_pie():
    try:
        df_non_outliers = news_pandas.load_news(os.path.join(results_path, 'news_non_outliers.csv'))
        df_non_outliers['content_cut'] = df_non_outliers['content_cut'].map(eval)
    except FileNotFoundError:
        messagebox.showinfo('Message', '请先对新闻内容文本进行聚类!')
        return
    rank_num = counter.get_num_of_value_no_repeat(df_non_outliers['rank'])
    value = [df_non_outliers[df_non_outliers['rank'] == i].shape[0] for i in range(1, rank_num + 1)]
    yticks1 = [counter.get_most_common_words(df_non_outliers[df_non_outliers['rank'] == i]['content_cut'],
                                             top_n=5) for i in range(1, rank_num + 1)]
    # yticks2 = [modeling.get_key_sentences('\n'.join(df_non_outliers[df_non_outliers['rank'] == i]['title_']),
    #                                       num=1) for i in range(1, rank_num + 1)]
    drawing.draw_clustering_analysis_pie(rank_num, value, yticks1)