def show_hot_barh(): try: df_non_outliers = news_pandas.load_news( os.path.join(results_path, 'news_non_outliers.csv')) df_non_outliers['content_cut'] = df_non_outliers['content_cut'].map( eval) except FileNotFoundError: messagebox.showinfo('Message', '请先对新闻内容文本进行聚类!') return rank_num = counter.get_num_of_value_no_repeat(df_non_outliers['rank']) value = [ df_non_outliers[df_non_outliers['rank'] == i].shape[0] for i in range(1, rank_num + 1) ] yticks1 = [ str( counter.get_most_common_words( df_non_outliers[df_non_outliers['rank'] == i]['content_cut'], top_n=10)) + str(i) for i in range(1, rank_num + 1) ] # yticks2 = [modeling.get_key_sentences('\n'.join(df_non_outliers[df_non_outliers['rank'] == i]['title_']), # num=1) for i in range(1, rank_num + 1)] drawing.draw_clustering_analysis_barh(rank_num, value, yticks1, title='热点新闻分布饼图')
def show_word_barh(): word_df = news_pandas.load_news(os.path.join(results_path, 'word_df.csv')) word_df['wordvec'] = word_df['wordvec'].map(eval) n_clusters = counter.get_num_of_value_no_repeat(word_df['word_label']) word_label_value = [word_df[word_df['word_label'] == i].shape[0] for i in range(n_clusters)] word_label_yticks = [str(word_df[word_df['word_label'] == i]['word'][:10].tolist()) + str(i + 1) for i in range(n_clusters)] drawing.draw_clustering_analysis_barh(n_clusters, word_label_value, word_label_yticks, title='词汇聚类条形图')