def draw_cloud(dataframe, column): # Join the different processed titles together. long_string = ','.join(list(dataframe[column])) # Create a WordCloud object wordcloud = WordCloud(background_color="white", max_words=5000, contour_width=6, contour_color='steelblue') # Generate a word cloud wordcloud.generate(long_string) # Visualize the word cloud return wordcloud.to_image()
def create_word_cloud(x="result.png"): f = open("word_repeat_word_cloud", encoding="utf8") text = f.read() stopwords = add_stop_words(['نیست']) stopwords = add_stop_words(['هست']) stopwords = add_stop_words(['میکنیم']) stopwords = add_stop_words(['کردند']) stopwords = add_stop_words(['کنید']) stopwords = add_stop_words(['میکنند']) stopwords = add_stop_words(['کردم']) stopwords = add_stop_words(['کردیم']) stopwords = add_stop_words(['داریم']) stopwords = add_stop_words(['کرده']) stopwords = add_stop_words(['کرد']) stopwords = add_stop_words(['میکند']) stopwords = add_stop_words(['میکنم']) stopwords = add_stop_words(['هستیم']) stopwords = add_stop_words(['کردید']) stopwords = add_stop_words(['کنیم']) stopwords = add_stop_words(['کنند']) stopwords = add_stop_words(['باشیم']) stopwords = add_stop_words(['کند']) stopwords = add_stop_words(['کند']) stopwords = add_stop_words(['میشود']) stopwords = add_stop_words(['میشویم']) stopwords = add_stop_words(['میشوید']) stopwords = add_stop_words(['اینها']) # Generate a word cloud image wordcloud = PersianWordCloud(only_persian=True, max_words=300, margin=0, width=1000, height=1000, min_font_size=1, collocations=False, max_font_size=500, stopwords=stopwords, background_color="black").generate(text) # Display the generated image: image = wordcloud.to_image() image.show() image.save(x) f.close()
def genwordcloud(fulltext): try: wordcloud = WordCloud(max_font_size=60, max_words=30, background_color="white", collocations=False).generate(fulltext) # wordcloud.recolor (color_func=color_func, random_state=3) image = wordcloud.to_image() output = io.BytesIO() image.save(output, format="PNG") print(hostname, now(), '/wordcloud: Generated wordcloud image.', file=sys.stderr) return output.getvalue() except: print(hostname, now(), '/wordcloud: Error generating wordcloud image.', file=sys.stderr) return None
# Import the wordcloud library import wordcloud # Join the different processed titles together. s = " " long_string = s.join(papers['title_processed']) # print (long_string) # Create a WordCloud object wordcloud = wordcloud.WordCloud() # Generate a word cloud wordcloud.generate(long_string) # Visualize the word cloud wordcloud.to_image() import matplotlib.pyplot as plt plt.figure() plt.imshow(wordcloud, interpolation="bilinear") plt.show() # ## 6. Prepare the text for LDA analysis #The main text analysis method that we will use is latent Dirichlet allocation (LDA). # LDA is able to perform topic detection on large document sets, determining what the main 'topics' are in a large unlabeled set of texts. # A 'topic' is a collection of words that tend to co-occur often. # The hypothesis is that LDA might be able to clarify what the different topics in the research titles are. # These topics can then be used as a starting point for further analysis.</p> #LDA does not work directly on text data. First, it is necessary to convert the documents into a simple vector representation. # This representation will then be used by LDA to determine the topics. # Each entry of a 'document vector' will correspond with the number of times a word occurred in the document.