def createWordCloudForTopic(self, topicNumber): print('createWordCloudForTopic: {}'.format(topicNumber)) if (topicNumber < 0) or (topicNumber >= self.numberOfTopics): return False, '' word_frequencies = self.ldamodel.show_topic(topicNumber, number_of_topic_words) # Generate a word cloud image wordFreqDict = dict(word_frequencies) wordcloud = WordCloud(width=600, height=600).fit_words(wordFreqDict) wordcloud.background_color = 'white' fig = plt.figure(figsize=(6,6)) plt.imshow(wordcloud, interpolation='bilinear') plt.axis("off") # Save the image to a temp folder to be sent by Flask filePath = os.path.join('./state/Temp', 'wordcloud.png') plt.savefig(filePath) plt.close(fig) # Return file as a base64 encoded string with open(filePath, 'rb') as f: image_read = f.read() return True, base64.encodestring(image_read).decode("utf-8")
def show_cloud(words): d = {} for a, x in words: d[a] = x import matplotlib.pyplot as plt from wordcloud import WordCloud wordcloud = WordCloud() wordcloud.prefer_horizontal = 1 wordcloud.width = 800 wordcloud.height = 800 wordcloud.background_color = 'white' wordcloud.generate_from_frequencies(frequencies=d) plt.figure(figsize=(8, 8), facecolor=None) plt.imshow(wordcloud.recolor(color_func=grey_color_func, random_state=3), interpolation="bilinear") plt.axis("off") plt.show()
font_size, position, orientation, random_state=None, **kwargs): return "hsl(0, 0%%, %d%%)" % (80 + font_size / 3) mask = np.array(Image.open(mask_path)) image_color = ImageColorGenerator(mask) text = open(data_path).read() #word cloud settings wc = WordCloud(font_path=content_font_path) wc.max_words = 500 wc.background_color = "#FFFFFF" wc.min_font_size = 7 wc.scale = 5 wc.prefer_horizontal = 1 wc.mode = 'RGBA' wc.mask = mask #wc creation wc = wc.generate(text) default_colors = wc.to_array() wc.recolor(color_func=image_color, random_state=3) image = wc.to_image() #add border image_size = image.size
def output(): # Load the dictionary database_dict = pickle.load(open( data_folder_path + 'updated_database_dict.p', 'rb')) # pull 'ID' (url_string) from input field and store it url_string = request.args.get('ID') # pull the number of clusters from the user form if request.args.get('Nclusters'): Nclusters = int(request.args.get('Nclusters')) else: Nclusters = 3 if len(url_string) < 5: url_string = 'http://www.nytimes.com/2015/09/20/opinion/sunday/a-toxic-work-world.html' if url_string not in database_dict.keys(): database_dict = update_database(url_string, database_dict) headline = database_dict[url_string]['title'] abstract = database_dict[url_string]['abstract'] article_summary = dict(headline = headline, abstract = abstract) # Generate a wordcloud plot for the article Nkeywords = len(database_dict[url_string]['keyword_dict']) word_freq_list = [(entry['value'], Nkeywords - float(entry['rank'])) for entry in database_dict[url_string]['keyword_dict']] # clean up wordcloud styles title_wordcloud = WordCloud().generate_from_frequencies(word_freq_list) title_wordcloud.background_color = 'white' title_wordcloud.recolor(color_func=custom_color_func) # Prepare figure for output.html fig = Figure() fig.set_facecolor('None') ax = fig.add_subplot(111) ax.imshow(title_wordcloud) ax.set_axis_off() canvas = FigureCanvas(fig) title_cloud_png_output = StringIO.StringIO() canvas.print_png(title_cloud_png_output) title_cloud_png_output = title_cloud_png_output.getvalue().encode('base64') # Get the three representative comments rep_comments = get_representative_comments(database_dict[url_string]['comments_df'], Nclusters) senti_pos = database_dict[url_string]['comments_df']['senti_pos'] # Get the pie chart fig = Figure() fig.set_facecolor('None') ax = fig.add_subplot(111) color_repo = ['#4D4D4D', '#5DA5DA', '#FAA43A', '#60BD68', '#F17CB0', '#B2912F', '#B276B2', '#DECF3F', '#F15854'] sizes = [rep_comments[i]['count'] for i in range(Nclusters)] colors = [color_repo[i] for i in range(Nclusters)] sorted_sizes_args = np.argsort(sizes)[::-1] labels = ['Cluster ' + str(i+1) for i in range(Nclusters)] sorted_sizes = sorted(sizes)[::-1] ax.pie(sorted_sizes, labels=labels, colors=colors, autopct='%1.1f%%', shadow=True, startangle=90) ax.set_axis_off() canvas = FigureCanvas(fig) pie_png_output = StringIO.StringIO() canvas.print_png(pie_png_output) pie_png_output = pie_png_output.getvalue().encode("base64") # Sentiment plot fig = Figure() fig.set_facecolor('None') ax = fig.add_subplot(111) ax.hist(senti_pos, bins = 20) ax.set_xlim(0,1) ax.set_xlabel('Sentiment Scale') ax.set_ylabel('Number of Comments') ax.set_xticks(np.linspace(0, 1, 5)) senti_labels = [item.get_text() for item in ax.get_xticklabels()] senti_labels[0] = 'Neg' senti_labels[2] = '0' senti_labels[-1] = 'Pos' ax.set_xticklabels(senti_labels) canvas=FigureCanvas(fig) png_output = StringIO.StringIO() canvas.print_png(png_output) png_output = png_output.getvalue().encode("base64") # Return word clouds for different comments word_cloud_comments = {} # key: cluster_label, val: fig_data for lab in range(Nclusters): comment_keywords = get_keywords(rep_comments[lab]['comment']) # comment_keywords = rep_comments[lab]['cluster_keywords'] keyword_wordfreq_list = [ (word, len(comment_keywords) - i) for i,word in enumerate(comment_keywords)] wordcloud = WordCloud().generate_from_frequencies(keyword_wordfreq_list) wordcloud.background_color = 'white' wordcloud.recolor(color_func=custom_color_func) fig = Figure() fig.set_facecolor('None') ax = fig.add_subplot(111) ax.imshow(wordcloud) ax.set_axis_off() canvas=FigureCanvas(fig) comment_cloud_png_output = StringIO.StringIO() canvas.print_png(comment_cloud_png_output) comment_cloud_png_output = comment_cloud_png_output.getvalue().encode("base64") word_cloud_comments[lab] = urllib.quote(comment_cloud_png_output.rstrip('\n')) print word_cloud_comments[0].__class__.__name__ comment_div_str = '' for i in range(Nclusters): temp_str = '<div class="row"> <div class="col-md-12" > <h4> Cluster ' + str(i+1) + '</h4> <div class="col-md-5" > <img src="data:image/png;base64,' + word_cloud_comments[sorted_sizes_args[i]] + '" width="400" height="300"/> </div>' + '<div class="col-md-7" > <p class="lead"> ' + rep_comments[sorted_sizes_args[i]]['comment'] + '</p> </div>' + '</div> </div> ' # temp_str = '<div class="row"> <div class="col-md-12" > <h4> Cluster ' + str(i+1) + 'count: ' + str(sizes[sorted_sizes_args[i]]) + '</h4> <div class="col-md-5" > <img src="data:image/png;base64,' + word_cloud_comments[sorted_sizes_args[i]] + '" width="400" height="300"/> </div>' + '<div class="col-md-7" > <p class="lead"> ' + rep_comments[sorted_sizes_args[i]]['comment'] + '</p> </div>' + '</div> </div> ' comment_div_str += temp_str return render_template("output.html", title_png = urllib.quote(title_cloud_png_output.rstrip('\n')), article_summary = article_summary, RepComment = rep_comments, comment_coulds = word_cloud_comments, pie_png = urllib.quote(pie_png_output.rstrip('\n')), img_data=urllib.quote(png_output.rstrip('\n')), Nclusters = Nclusters, comment_div_str=comment_div_str)