def wordcloud(datafile): #remove stop words, the most common words in a language vectorizer=CountVectorizer(stop_words='english') for word in vectorizer.get_stop_words(): STOPWORDS.add(word) STOPWORDS.add("said") pony_mask = np.array(Image.open("../pinkyB.jpg")) wc = WordCloud(background_color="black", max_words=2000, mask=pony_mask, stopwords=STOPWORDS) #init dictionary with the five categories categoriesSet = set(datafile["Category"]) categoriesDict = dict.fromkeys(categoriesSet,"") #Conditional Selection # business = datafile.ix[datafile["Category"]=="Business"] # print business["Content"].size #fill index with data from cv for index, row in datafile.iterrows(): categoriesDict[row["Category"]] += str(row["Content"]) for category, text in categoriesDict.iteritems(): wc.generate(text) image = wc.to_image() image.save("../wordcloud/wordcloud_" + category + ".jpg") return
def main(): wr=WordReader() # wlist=wr.word_reader('data1/dt01.txt') wcount='' for root,dirs,files in os.walk('data2'): for file in files: file_path=os.path.join(root,file) wlist=wr.word_reader(file_path) wcount+=wlist back_coloring = np.array(Image.open("./sky.png")) wc = WordCloud( background_color="white", #背景颜色 max_words=1000,# 词云显示的最大词数 mask=back_coloring,#设置背景图片 max_font_size=150, #字体最大值 random_state=42, ) wc.generate(wcount) # # wc.generate_from_frequencies(word_list) # wc.fit_words(word_list) plt.figure() plt.imshow(wc) plt.axis("off") plt.show()
def run_yt(): yt = ds.acquire_youtube() ytimg = imread("ytlogo.png") wc = WordCloud(mask=ytimg) image_colors = ImageColorGenerator(ytimg) wc.generate(word_list_yt(ds.mean(yt[0]), ds.standard_deviation(yt[0]))) plt.imshow(wc.recolor(color_func = image_colors)) plt.axis("off") plt.savefig('popularWordsYT.png', bbox_inches = 'tight', dpi = 200) words, vidcount = ds.word_count_yt('title', ds.mean(yt[0]), ds.standard_deviation(yt[0])) data1 = [] data2 = [] labels = [] count = 0 for w in sorted(words, key=words.get, reverse=True): labels.append(w) data1.append(1000 * words[w]/vidcount) count +=1 if count == 10: break words, vidcount = ds.word_count_dailymotion( 0, 0 ) for w in labels: data2.append(1000 * words[w]/vidcount) create_dualbargraph(data1, data2, labels, 'wordUseCompYT.png') Theta = da.yt_thetas() for x in xrange(len(Theta)): Theta[x] = Theta[x]/10000 print Theta create_bargraph(Theta,('duration', 'date created', 'y-intercept'), 'barGraphYT.png')
def txt2pic(txt_file, out_png, font_path, mask_file): text_address = path.abspath(txt_file) text = open(text_address).read() #读取文本 text_cut = jieba.cut(text) #分词 new_textlist = ' '.join(text_cut) #组合 pic_address = path.abspath(mask_file) pic = imread(pic_address) #读取图片 pic_color = ImageColorGenerator(pic) #根据图片生成颜色函数 wc = WordCloud(background_color='white', #构造wordcloud类 mask=pic, width = 750, height = 750, max_font_size = 80, random_state=30, font_path=font_path, max_words=500, min_font_size=2, color_func=pic_color ) wc.generate(new_textlist) #生成词云图 plt.figure() #画图 plt.imshow(wc) plt.axis("off") plt.show() wc.to_file(out_png) #保存图片
def run_dm(): dm = ds.acquire_dailymotion() dmimg = imread("dmlogo.png") # Read the whole text. wc = WordCloud(mask=dmimg) image_colors = ImageColorGenerator(dmimg) wc.generate(word_list_dailymotion(ds.mean(dm[0]), ds.standard_deviation(dm[0]))) # Open a plot of the generated image. plt.imshow(wc.recolor(color_func=image_colors)) plt.axis("off") plt.savefig('popularWordsDM.png', bbox_inches = 'tight', dpi = 200) words, vidcount = ds.word_count_dailymotion(ds.mean(dm[0]), ds.standard_deviation(dm[0])) data1 = [] data2 = [] labels = [] count = 0 for w in sorted(words, key=words.get, reverse=True): labels.append(w) data1.append(1000 * words[w]/vidcount) count +=1 if count == 10: break words, vidcount = ds.word_count_dailymotion( 0, 0 ) for w in labels: data2.append(1000 * words[w]/vidcount) create_dualbargraph(data1, data2, labels, 'wordUseCompDM.png') create_bargraph(da.dm_thetas(),('fans','duration','date created', 'y-intercept'), 'barGraphDM.png')
def cloudplot(person): person = re.sub(r'\+', ' ', person) text = GetTextRange(Emails, person) text = rmBoring(rmNonAlpha(text)).decode('ascii', 'ignore') plt.clf() d = path.dirname(path.abspath(__file__)) hilcolor = np.array(Image.open(path.join(d, "static/img/hillarylogo.jpg"))) wc = WordCloud(background_color="white", max_words=150, mask=hilcolor, stopwords=STOPWORDS.add("said"), max_font_size=80, random_state=42, relative_scaling = 0.5) wc.generate(text) image_colors = ImageColorGenerator(hilcolor) plt.imshow(wc.recolor(color_func=image_colors)) plt.axis("off") fig = plt.gcf() img = StringIO.StringIO() fig.savefig(img) img.seek(0) return send_file(img, mimetype='image/png')
def generateWordCloud(node, contribs, wordsToShow=None, normalize=True, normMin=0, normMax=1): contrib = contribs[node] if (normalize): contrib = normalizeContrib(contrib, normMin, normMax) # generate text text = generateText(contrib, wordsToShow) # load mask d = path.dirname(__file__) circle_mask = imread(path.join(d, "black_circle_mask_whitebg.png")) # gen word cloud wc = WordCloud(background_color="white", max_words=2000, mask=circle_mask) wc.generate(text) # store to file wc.to_file(path.join(d, "node.png")) # show useColorFunc = False #normalize if (useColorFunc): plt.imshow(wc.recolor( color_func=pos_neg_color_func )) else: plt.imshow(wc) plt.axis("off") plt.show()
def get_word_cloud(content, file_name, dict = BASE_DICT, folder_path=BASE_PATH+os.sep+"WordCloud", font_path=BASE_PATH+os.sep+"WordCloud"+os.sep+"幼圆.ttf", width=400, height=200, margin=5, ranks_only=False, prefer_horizontal=0.9, mask=None, scale=1, color_func=None, max_words=200, stopwords=None, random_state=None, background_color='white', max_font_size=None): wc = WordCloud(font_path=font_path,width=width,height=height,margin=margin,ranks_only=ranks_only, prefer_horizontal=prefer_horizontal,mask=mask,scale=scale,max_words=max_words, stopwords=stopwords,random_state=random_state,background_color=background_color,max_font_size=max_font_size#,color_func=color_func ) # if dict: # jieba.load_userdict(dict) # after = ' '.join(jieba.cut(content, cut_all=False)) file_with_path = "{BASE_PATH}{sep}{file}".format( BASE_PATH=folder_path ,sep=os.sep,file=file_name) wc.generate(content) print(file_with_path) wc.to_file(file_with_path) return file_name
def wordcloudplot_focus(self, yizhongzhazha=None, backimage=None): """Do wordcloud plots for contacts. need to run relationship() first to get self._relationship. Parameters yizhongzhazha: pandas object by loading the data backimage: background image file's directory Returns: basic word cloud plots saved in files """ if yizhongzhazha is None: print("Need load message table first.") return if self._contacts_topN is None: print("need to run relationship() first.") return if backimage is not None: custompic = imread(backimage) else: custompic = None if not os.path.exists('./wordcloud'): os.makedirs('./wordcloud') wordcloud = WordCloud(background_color="white", mask=custompic, max_words=2000,scale=3) for k in range(len(self._contacts_topN)): text=self._relationship.iloc[:,k] text_to_wordcloud=[] for i in range(len(text)): text_to_wordcloud.append((text.index.values[i]+' ')*text[i]) text=''.join(text_to_wordcloud) wordcloud.generate(text) wordcloud.to_file("./wordcloud/"+self._relationship.columns[k]+'2.png')
def genwordcloud(texts,mask=None,font_path=None,background_color='white'): '''生成词云 parameter ---------- mask: RGBA模式数组,最后一个分量是alpha通道, 默认会生成一个900*1200的椭圆 font_path: 采用的字体,建议采用安卓默认字体DroidSansFallback.ttf return ------- img:可以直接img.save('test.png') ''' from PIL import Image try: from wordcloud import WordCloud except: #raise Exception('wordcloud need install wordcloud package.') print('wordcloud need install wordcloud package.') return None if mask is None: tmp=np.zeros((900,1200),dtype=np.uint8) for i in range(tmp.shape[0]): for j in range(tmp.shape[1]): if (i-449.5)**2/(430**2)+(j-599.5)**2/(580**2)>1: tmp[i,j]=255 mask=np.zeros((900,1200,4),dtype=np.uint8) mask[:,:,0]=tmp mask[:,:,1]=tmp mask[:,:,2]=tmp mask[:,:,3]=255 else: mask=np.array(Image.open(mask)) wordcloud = WordCloud(background_color = background_color,font_path=font_path, mask = mask) wordcloud.generate(texts) img=wordcloud.to_image() return img
def word_cloud(posts): text = u' '.join(post['content'] for post in posts) ''' if os.path.isdir('/home/public/stanford-corenlp-full-2015-04-20/'): proc = CoreNLP("pos", corenlp_jars=["/home/public/stanford-corenlp-full-2015-04-20/*"]) sentenses = proc.parse_doc(text)['sentences'] text = '' for sentence in sentenses: text += u' '.join(sentence['lemmas']) + u' ' ''' wordcloud = WordCloud(background_color="white", width=1200, height=900, margin=0) wordcloud.generate(text) fig = plt.gcf() # fig.set_size_inches(15, 8.5) # Open a plot of the generated image. plt.imshow(wordcloud) plt.axis("off") imgdata = StringIO.StringIO() fig.savefig(imgdata, format='png', bbox_inches='tight') imgdata.seek(0) # rewind the data plt.close() uri = urllib.quote(base64.b64encode(imgdata.buf)) return uri
def main(save_files = False, db_filename = '../output/database.sqlite'): conn = sqlite3.connect(db_filename) c = conn.cursor() # Retrieve papers c.execute('''SELECT * FROM Papers''') paper_content = c.fetchall() conn.close() titles = '' for pc in paper_content: titles += pc[1] # A Marvin Minsky mask mask = np.array(Image.open("../files/minsky_mask.png")) wc = WordCloud(background_color="white", max_words=2000, mask=mask, stopwords=STOPWORDS.copy()) # Generate word cloud wc.generate(titles) if (save_files): # Store to file wc.to_file("../files/title_cloud.png") # Show word cloud plt.imshow(wc) plt.axis("off") # Show mask # plt.figure() # plt.imshow(mask, cmap=plt.cm.gray) # plt.axis("off") plt.show()
def make_cloud(words, image, size=10, filename='figures/cloud.png', max_words=200, horizontal=0.8): # Remove URLs, 'RT' text, screen names, etc my_stopwords = ['RT', 'amp', 'lt'] words_no_urls = ' '.join([word for word in words.split() if word not in my_stopwords]) # Add stopwords, if needed stopwords = STOPWORDS.copy() stopwords.add("RT") stopwords.add('amp') stopwords.add('lt') # Load up a logo as a mask & color image logo = imread(image) # Generate colors image_colors = ImageColorGenerator(logo) # Generate plot wc = WordCloud(stopwords=stopwords, mask=logo, color_func=image_colors, scale=0.8, max_words=max_words, background_color='white', random_state=42, prefer_horizontal=horizontal) wc.generate(words_no_urls) plt.figure(figsize=(size, size)) plt.imshow(wc) plt.axis("off") plt.savefig(filename)
def generate_cloud(): d = path.dirname(__file__) janice = open(path.join(d, 'messages.txt')).read() group_mask = misc.imread(path.join(d, "mask.png"), flatten=True) wc = WordCloud(background_color="white", max_words = 2000, mask=group_mask) wc.generate(text) wc.to_file(path.join(d, "masked.jpg"))
def test_coloring_black_works(): # check that using black colors works. mask = np.zeros((50, 50, 3)) image_colors = ImageColorGenerator(mask) wc = WordCloud(width=50, height=50, random_state=42, color_func=image_colors, min_font_size=1) wc.generate(THIS)
def test_repeat(): short_text = "Some short text" wc = WordCloud(stopwords=[]).generate(short_text) assert_equal(len(wc.layout_), 3) wc = WordCloud(max_words=50, stopwords=[], repeat=True).generate(short_text) # multiple of word count larger than max_words assert_equal(len(wc.layout_), 51) # relative scaling doesn't work well with repeat assert_equal(wc.relative_scaling, 0) # all frequencies are 1 assert_equal(len(wc.words_), 3) assert_array_equal(list(wc.words_.values()), 1) frequencies = [w[0][1] for w in wc.layout_] assert_array_equal(frequencies, 1) repetition_text = "Some short text with text" wc = WordCloud(max_words=52, stopwords=[], repeat=True) wc.generate(repetition_text) assert_equal(len(wc.words_), 4) # normalized frequencies assert_equal(wc.words_['text'], 1) assert_equal(wc.words_['with'], .5) assert_equal(len(wc.layout_), wc.max_words) frequencies = [w[0][1] for w in wc.layout_] # check that frequencies are sorted assert_true(np.all(np.diff(frequencies) <= 0))
def make_word_cloud(text, save_path, background_color='black'): # text expected to a string or a list of [(word, count), ...] from wordcloud import WordCloud import os def col_fun(word, *args, **kw): return '#333' if type(text) == str: big_string = text else: big_string = '' for word in text: big_string = big_string + ''.join((word[0]+' ') * word[1]) # print 'trying to make cloud: %s' % save_path # print os.getcwd() wc = WordCloud(background_color=background_color, color_func=col_fun, max_words=10000, height=200, width=700, font_path='app/static/fonts/NanumScript.ttc').generate(big_string) wc.generate(big_string) wc.to_file('app/%s' % save_path)
def create_word_cloud(ballots, chart_directory, image_name, mask_file, stop_words, word_counts=None): """ Generates a word cloud from given ballots. """ if word_counts is None: word_counts=[25, 50, 100, 1000] text = '' for ballot in ballots: text = ''.join((text, ballot.feedback,)) all_stop_words = STOPWORDS all_stop_words |= set(stop_words) for word_count in word_counts: if mask_file: color_mask = imread(mask_file) image_colors = ImageColorGenerator(color_mask) wc = WordCloud(background_color="white", max_words=word_count, mask=color_mask, stopwords=all_stop_words, color_func=image_colors, max_font_size=80, random_state=42) else: wc = WordCloud(background_color="white", max_words=word_count, stopwords=all_stop_words, max_font_size=80, random_state=42) wc.generate(text) axis_image = plt.imshow(wc) plt.axis("off") image_name_with_count = '{0}-{1}.png'.format(image_name, str(word_count)) logger.info('...creating word cloud {0}'.format(image_name_with_count)) save_location = os.path.join(chart_directory, image_name_with_count) plt.savefig(save_location) plt.close()
def test_random_state(): # check_list that random state makes everything deterministic wc = WordCloud(random_state=0) wc2 = WordCloud(random_state=0) wc.generate(THIS) wc2.generate(THIS) assert_array_equal(wc, wc2)
def generateWordCloud(node, contribs, maskImg=None, wordsToShow=100, normalize=True, normMin=0, normMax=1): contrib = contribs[node] # Find side of largest magnitude # take k words from that side # normalize those words (may need abs val) # generate text from those normalized values # contrib should be sorted high to low maxVal = abs(contrib[0][1]) minVal = abs(contrib[-1][1]) #print (contrib[0][1], contrib[-1][1]) #print ("max min = ", maxVal, minVal) newContrib = [] if (maxVal > minVal): # use front #newContrib = contrib[0:min(len(contrib), wordsToShow))] for i in range(min(len(contrib), wordsToShow)): newContrib.append(contrib[i]) else: # use back for i in range(min(len(contrib), wordsToShow) - 1, -1, -1): newContrib.append(contrib[i]) # if the most significant value was negative # swap the sign of all values in contrib for j in range(len(newContrib)): c = newContrib[j] newContrib[j] = (c[0], -1*c[1]) #print ("new contrib = ", newContrib) if (normalize): contrib = normalizeContrib(newContrib, normMin, normMax) # for each value in normalized contrib # assign color value for c in contrib: word, val = c # add word to color map #wordColorMap[word] = int(round(255*(1-val))) wordColorMap[word] = int(round(200*(1-val))) # generate text text = generateText(contrib, min(len(contrib), wordsToShow)) # gen word cloud #s = time.time() wc = WordCloud(background_color="white", max_words=2000, mask=maskImg) wc.generate(text) wc.recolor(color_func=gray_color_func) #e = time.time() #print ("word cloud only time = ", (e-s)) return wc
def word_cloud(text): """ This function makes a wordcloud object and attempts to generate a word cloud using the collected messages. """ wc = WordCloud() wc.generate(text) wc.to_file('test.png')
def make_cloud(self, text): self.start = random.randint(0, 255) cloud = WordCloud(font_path = 'bot/fonts/' + random.choice(os.listdir('bot/fonts/')), background_color = 'black', width = 1280, height = 720, scale = 1, color_func = self.light_colour_func) cloud.generate(text) cloud.to_file(self.out) return self.out
def generate_wordcloud(self, filename, bg_color='white', color_func=monochrome_color_func): text = ' '.join(self.text) wc = WordCloud(width=1280, height=1024, stopwords=STOPWORDS, background_color=bg_color, color_func=color_func, max_words=100) wc.generate(text) wc.to_file(filename)
def test_writing_to_file(): wc = WordCloud() wc.generate(THIS) # check_list writing to file f = NamedTemporaryFile(suffix=".png") filename = f.name wc.to_file(filename) loaded_image = Image.open(filename) assert_equal(loaded_image.size, (wc.width, wc.height))
def test_writing_to_file(tmpdir): wc = WordCloud() wc.generate(THIS) # check writing to file filename = str(tmpdir.join("word_cloud.png")) wc.to_file(filename) loaded_image = Image.open(filename) assert loaded_image.size == (wc.width, wc.height)
def test_empty_text(): # test originally empty text raises an exception wc = WordCloud(stopwords=[]) with pytest.raises(ValueError): wc.generate('') # test empty-after-filtering text raises an exception wc = WordCloud(stopwords=['a', 'b']) with pytest.raises(ValueError): wc.generate('a b a')
def make_wordcloud(self): """Generate the wordcloud file and save it to static/images/.""" #plt.rcParams['figure.figsize'] = [24.0, 18.0] print('Creating wordcloud') flat_tags = [item for sublist in self.tags for item in sublist] wordcloud = WordCloud(width=1920, height=1080, relative_scaling=.5) wordcloud.generate(' '.join(flat_tags)) wordcloud.to_file(os.path.join('static', 'images', 'wordcloud.png'))
def create_wordcloud(posts): wordcloud_str=' '.join(post['message'] for post in posts) #join all posts together aces_mask=imread("aces.png") #add aces mask wc=WordCloud(background_color="BLACK", mask=aces_mask, stopwords=STOPWORDS.add("will")) #don't include the word "will" in the wordcloud #(not an interesting word and took up a large chunk of the wordcloud) wc.generate(wordcloud_str) plt.axis("off") plt.imshow(wc) plt.show() wc.to_file("aces_wordcloud.png")
def test_collocations(): wc = WordCloud(collocations=False, stopwords=[]) wc.generate(THIS) wc2 = WordCloud(collocations=True, stopwords=[]) wc2.generate(THIS) assert_in("is better", wc2.words_) assert_not_in("is better", wc.words_) assert_not_in("way may", wc2.words_)
return ' '.join(mywordlist) wc = WordCloud( font_path=font_path, background_color="white", max_words=2000, mask=back_coloring, max_font_size=100, random_state=42, width=1000, height=860, margin=2, ) wc.generate(jieba_processing_txt(text)) # create coloring from image image_colors_default = ImageColorGenerator(back_coloring) plt.figure() # recolor wordcloud and show plt.imshow(wc, interpolation="bilinear") plt.axis("off") plt.show() # save wordcloud wc.to_file(path.join(d, imgname1)) # create coloring from image image_colors_byImg = ImageColorGenerator(back_coloring)
else: itchat.send("啦啦啦", msg["FromUserName"]) @itchat.msg_register([PICTURE]) def pic_reply(msg): itchat.send("666", msg["FromUserName"]) itchat.auto_login() itchat.run() # 8、用wordcloud做海报 from wordcloud import WordCloud import chardet import matplotlib.pyplot as plt with open("txt.txt", 'r', encoding="utf-8") as file: text = file.read() wc1 = WordCloud(background_color="pink", width=1000, height=860, font_path="C:\\Windows\\Fonts\\STFANGSO.ttf", margin=2) wc2 = wc1.generate(text) plt.imshow(wc2) plt.axis("off") plt.show() wc2.to_file('hzw.jpg')
wordlist_jieba = jieba.cut_for_search(mytext,HMM=True) # 在每个词之间添加空格 wl_space_split = " ".join(wordlist_jieba) # 设置云词参数 wc = WordCloud( font_path=r'C:\Windows\Fonts\SIMYOU.TTF', width=800, height=600, margin=10, max_font_size=100, background_color='white', min_font_size=10, max_words=500, ) # 生成分词图 wc.generate(wl_space_split) # 将分词图保存 wc.to_file("pngs/"+str(av)+".png") f.close() print(str(av)+"ok") mytext = tt.read() # 删除特殊符号 mytext = re.sub(r"[\s+\.\!\/_,$%^*(+\"\']+|[+——!,。?、~@#¥%……&*()]+", "", mytext) # 进行分词,这里选用的jieba.cut_for_search(互联网分词方法) wordlist_jieba = jieba.cut_for_search(mytext, HMM=True) # 在每个词之间添加空格 wl_space_split = " ".join(wordlist_jieba) # 设置云词参数 wc = WordCloud( font_path=r'C:\Windows\Fonts\SIMYOU.TTF', width=800,
def main(): """ main function of the program this will do whole thigs you expect from program :D """ context = determine_context() # find out where is the context, is it twitter or telegram or normal text # this is the only function that interacts with user directly mask = load_mask() # load image file (png or jpg) # and process it if its necessary # and finally return a numpy array stop_words = load_stop_words() # load stop words from stop words list text, user_id = get_text(context) # load text adn find twiter username (to know the address of save file) text = clean_text(text=text, context=context, stop_words=stop_words) # clean text and remove stop words if it is necessary print_stats(text) #print some stats to know the program is working well wc = WordCloud( mask=mask, background_color=general_config["BG_COLOR"], font_path=general_config["FONT"], include_numbers=False, stopwords=stop_words, max_words=general_config["MAX_WORDS"], contour_width=general_config["LINE_WIDTH"], contour_color=general_config["LINE_COLOR"], max_font_size=general_config["MAX_FONT"], min_font_size=general_config["MIN_FONT"], relative_scaling=0.2, ) wc.generate(text) ######### # generate main image ######### result_image = wc.to_image() ############## # recolor image based on mask # if config[colorful] is true ############# if general_config["COLORFUL_IMAGE"]: image_colors = ImageColorGenerator(mask) result_image = wc.recolor(color_func=image_colors).to_image() ################# # save result image # and cleaned text to out folder ################ # name of text file and image file are based on twitter username # in case of telegram or normal text, it it telegram.png or text.png ################## OUT_FOLDER = general_config['OUT_FOLDER'] make_dir(OUT_FOLDER) print("saving output image to" + f"{OUT_FOLDER}{user_id}.png") result_image.save(f"{OUT_FOLDER}{user_id}.png") with open(f"{OUT_FOLDER}cleaned_{user_id}.txt", "w") as cleaned_result_file: cleaned_result_file.write(text) result_image.show()
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator import matplotlib.pyplot as plt from scipy.misc import imread import random words = [] with open('data/skills.txt', 'r', encoding='utf-8') as f: # e.g. "'SQL' 100" line = f.readline() while line: items = line.split() words += [items[0].replace("'", '')] * int(items[1]) line = f.readline() random.shuffle(words) text = ' '.join(words) # # 背景掩模 color_mask = imread('src/bigdata.jpg') wc = WordCloud(font_path="msyh.ttc", background_color="white", max_words=2000, mask=color_mask, max_font_size=500, random_state=10) image_colors = ImageColorGenerator(color_mask) my_wordcloud = wc.generate(text) plt.imshow(my_wordcloud.recolor(color_func=image_colors)) plt.axis('off') plt.show()
def wc(data, bgcolor, title): plt.figure() wc = WordCloud(background_color=bgcolor, max_words=1000, max_font_size=50) wc.generate(' '.join(data)) plt.imshow(wc) plt.axis('off')
lda = LDA(n_components=number_topics) lda.fit(listofConversations) print("Topics found via LDA:") print_topics(lda, count_vectorizer, number_words) # In[ ]: from wordcloud import WordCloud wordcloud = WordCloud(background_color="white", max_words=5000, contour_width=3, contour_color='steelblue') text = ','.join(withoutStop) wordcloud.generate(text) wordcloud.to_image() # In[ ]: plt = bof[:10].plot(kind="barh", title="10 Most Common Words") # In[ ]:
# read the mask / color image taken from # http://jirkavinse.deviantart.com/art/quot-Real-Life-quot-Alice-282261010 alice_coloring = np.array(Image.open(path.join(d, "hasak.png"))) stopwords = set(STOPWORDS) stopwords.add("said") #背景颜色,显示最多词数,设置背景图片,字体最大值等 wc = WordCloud(background_color="white", max_words=1000, mask=alice_coloring, stopwords=stopwords, max_font_size=500, random_state=42) # generate word cloud #生成词云 wc.generate(text) # create coloring from image image_colors = ImageColorGenerator(alice_coloring) # show plt.imshow(wc, interpolation="bilinear") plt.axis("off") #绘制词云图 plt.figure() # recolor wordcloud and show # we could also give color_func=image_colors directly in the constructor plt.imshow(wc.recolor(color_func=image_colors), interpolation="bilinear") plt.axis("off") plt.figure() plt.imshow(alice_coloring, cmap=plt.cm.gray, interpolation="bilinear")
from wordcloud import WordCloud, ImageColorGenerator # 词云库 # 1、读入txt文本数据 text = open(r'./src/file.txt', "r").read() # 2、结巴分词:cut_all参数可选, True为全模式,False为精确模式,默认精确模式 cut_text = jieba.cut(text, cut_all=False) result = "/".join(cut_text) # 必须给个符号分隔开分词结果,否则不能绘制词云 # 3、初始化自定义背景图片 image = Image.open(r'./src/beijing.jpeg') graph = np.array(image) # 4、产生词云图 # 有自定义背景图:生成词云图由自定义背景图像素大小决定 wc = WordCloud(font_path=r"./hanti.ttf", background_color='white', max_font_size=50, mask=graph) wc.generate(result) # 5、绘制文字的颜色以背景图颜色为参考 image_color = ImageColorGenerator(graph) # 从背景图片生成颜色值 wc.recolor(color_func=image_color) wc.to_file(r"./img/pear_heart.png") # 按照背景图大小保存绘制好的词云图,比下面程序显示更清晰 # 6、显示图片 plt.figure("词云图") # 指定所绘图名称 plt.imshow(wc) # 以图片的形式显示词云 plt.axis("off") # 关闭图像坐标系 plt.show()
print(train[train.identity_hate == 1].iloc[4, 1]) # !ls ../input/imagesforkernal/ stopword = set(STOPWORDS) # clean comments clean_mask = np.array(Image.open("../input/imagesforkernal/safe-zone.png")) clean_mask = clean_mask[:, :, 1] # wordcloud for clean comments subset = train[train.clean == True] text = subset.comment_text.values wc = WordCloud(background_color="black", max_words=2000, mask=clean_mask, stopwords=stopword) wc.generate(" ".join(text)) plt.figure(figsize=(20, 10)) plt.axis("off") plt.title("Words frequented in Clean Comments", fontsize=20) plt.imshow(wc.recolor(colormap='viridis', random_state=17), alpha=0.98) # plt.show() toxic_mask = np.array(Image.open("../input/imagesforkernal/toxic-sign.png")) toxic_mask = toxic_mask[:, :, 1] # wordcloud for clean comments subset = train[train.toxic == 1] text = subset.comment_text.values wc = WordCloud(background_color="black", max_words=4000, mask=toxic_mask, stopwords=stopword)
def plot_wordcloud(text: List[str]) -> None: # nltk.download('stopwords') stop = set(stopwords.words("english")) stop.add("https") stop.add("mention") stop.add("retweet") stop.add("hashtag") stop.add("co") stop.add("rt") stop.add("tco") for i_ in range(10): stop.add(str(i_)) hindi_stopwords = [ "ye", "tu", "k", "ki", "se", "bhi", "kya", "mai", "bhi", "kuch", "mein", "aur", "ab", "toh", "ho", "kyu", "nahi", "ko", "jo", "woh", "tum", "meri", "teri", "apna", "apni", "yeh", "h", "hai", "hain", "pe", "tha", "hai", ] with open("../data/stop_hinglish.txt") as f: xx = f.readlines() xx = [x.strip("\n") for x in xx] hindi_stopwords.extend(xx) stop = stop.union(set(hindi_stopwords)) def _preprocess_text(text): corpus = [] for tweet in text: words = [ w.lower() for w in tweet.split() if (w.lower() not in stop and w.lower() not in string.punctuation) ] corpus.append(words) return corpus corpus = _preprocess_text(text) wordcloud = WordCloud( background_color="white", stopwords=set(stop), max_words=100, max_font_size=30, scale=3, random_state=1, ) wordcloud = wordcloud.generate(str(corpus)) fig = plt.figure(1, figsize=(15, 13)) plt.axis("off") plt.imshow(wordcloud) plt.show()
print(Sheershak) #We pull up the wikipedia page as per the inputs provided by the user Prushtha = wikipedia.page(Sheershak) #Now we extract the contents of the page Soochana = Prushtha.content #We create a set of Stop words here so that these can be supplied as an inputs to generate a word cloud anavanchit_shabdh = set(Anavanchit_Shabdh) #Here we generate the word cloud based upon the image we have selected and we have set the limit to 100 words shabdh_megh = Shabdh_Megh(background_color="white", max_words=100, mask=RangHeen_Chitra, stopwords=anavanchit_shabdh, contour_color='red') #Finally we generate the Word Cloud shabdh_megh.generate(Soochana) # Here the text is filled into the word cloud #We generate an instance of word cloud we created and display via matplotlib (generally used to show graphs) Chitra_Pradarshan.imshow(shabdh_megh, interpolation='bilinear') Chitra_Pradarshan.axis( "off") # By this we ensure that Axis Bars are not displated Chitra_Pradarshan.show() #we savee the created file onto a picture file shabdh_megh.to_file("Temp_Picture.png")
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator import jieba import matplotlib.pyplot as plt import numpy as np from PIL import Image fh = open("E:\\文学\\天局.txt", 'r') text = fh.read() fenci = " ".join(jieba.cut(text)) backgroud_Image = np.array( Image.open("F:\\Python\\procedure\\词云\\materials\\alice_color.png")) # 设置停用词 stopwords = set(STOPWORDS) stopwords.add("仿佛") wc = WordCloud( background_color='white', # 设置背景颜色 mask=backgroud_Image, # 设置背景图片 stopwords=STOPWORDS, # 设置停用词 font_path='C:\\Windows\\Fonts\\simkai.ttf', # 设置字体格式,如不设置显示不了中文 ) wc.generate(fenci) image_colors = ImageColorGenerator(backgroud_Image) wc.recolor(color_func=image_colors) plt.imshow(wc) plt.axis('off') plt.show()
# 对句子进行分词 def seg_sentence(sentence): sentence_seged = jieba.cut(sentence.strip()) stopwords = stopwordslist() outstr = '' for word in sentence_seged: if word not in stopwords: if word != '\t': outstr += word outstr += " " return outstr if __name__ == '__main__': df = pd.read_csv("csv/xiebuyazheng.csv", header=None, encoding="utf-8") cut_text = '' for row in df[0].values: cut_text += seg_sentence(row) color_mask = imread("timg.jpeg") cloud = WordCloud( font_path='simhei.ttf', # 字体最好放在与脚本相同的目录下,而且必须设置 background_color='white', # mask=color_mask, max_words=2000, max_font_size=100) word_cloud = cloud.generate(cut_text) plt.imshow(word_cloud) plt.axis('off') plt.show()
###开始绘制 import matplotlib as mpl import matplotlib.pyplot as plt #mpl.rcParams['font.sans-serif'] = ['SimHei'] #mpl.rcParams['font.family']='sans-serif' mpl.rcParams['font.size'] = 10 from wordcloud import WordCloud, STOPWORDS backgroud_Image = plt.imread(path + '词云\\词云1.jpg') wc = WordCloud(font_path='simhei.ttf', stopwords=STOPWORDS, background_color='white', max_words=1000, mask=backgroud_Image) wc.generate(' '.join(ys_cut)) plt.imshow(wc) plt.axis("off") plt.savefig(path + '词云\\ys.png', dpi=1000) plt.show() backgroud_Image = plt.imread(path + '词云\\词云5.jpg') wc = WordCloud(font_path='simhei.ttf', stopwords=STOPWORDS, background_color='white', max_words=1000, mask=backgroud_Image) wc.generate(' '.join(ls_cut)) plt.imshow(wc) plt.axis("off") plt.savefig(path + '词云\\ls.png', dpi=1000)
number = str(random.uniform(1, 9999)) templates = os.listdir("./Template/") alien_mask = np.array(PIL.Image.open('./Template/' + choice(templates))) wc = WordCloud(font_path=path, background_color='white', margin=5, width=800, height=800, mask=alien_mask, max_words=2000, max_font_size=60, random_state=42) wc = wc.generate(comment) image_colors = ImageColorGenerator(alien_mask) plt.figure() # 重新着色,使用背景图片中的颜色 plt.imshow(wc.recolor(color_func=image_colors)) plt.axis("off") # 绘制背景图片为颜色的图片 # 保存图片 wc.to_file('./OutFiles/out-image-' + number + '.jpg') plt.close() print './OutFiles/out-image-out' + number + '.jpg'
from wordcloud import WordCloud # Biblioteca para trabalhar com nuvem de palavras # pip install wordcloud # Verificando a lista das stopwords: stopwords.words('portuguese') stops = stopwords.words('english') # Mapa Cores = cores que iremos preencher nossa lista de palavras mapa_cores = ListedColormap(['orange', 'green', 'red', 'magenta']) # Criando a nuvem de palavras: nuvem = WordCloud(background_color='white', colormap=mapa_cores, stopwords=stops, max_words=100) # Gerando a nuvem: nuvem.generate(todo_texto) plt.imshow(nuvem) # Matriz de Termos Frequentes: # Primeiro vamos remover as stopwords palavras_semstop = [p for p in palavras if p not in stops] len(palavras_semstop) # Removendo pontuação: import string palavras_sem_pontuacao = [p for p in palavras_semstop if p not in string.punctuation] frequencia = nltk.FreqDist(palavras_sem_pontuacao)
def make_wordcloud(strs, stopwords=[]): import base64 from io import BytesIO from jieba import cut from wordcloud import WordCloud from wordcloud.wordcloud import np from wordcloud.wordcloud import Image from collections import Counter sw = ['!', '"', '#', '$', '%', '&', "'", '(', ')', '*', '+', ',', '-', '--', '.', '..', '...', '......', '...................', './', '.一', '.数', '.日', '/', '//', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', '://', '::', ';', '<', '=', '>', '>>', '?', '@', 'A', 'Lex', '[', '\\', ']', '^', '_', '`', 'exp', 'sub', 'sup', '|', '}', '~', '~~~~', '·', '×', '×××', 'Δ', 'Ψ', 'γ', 'μ', 'φ', 'φ.', 'В', '—', '——', '———', '‘', '’', '’‘', '“', '”', '”,', '…', '……', '…………………………………………………③', '′∈', '′|', '℃', 'Ⅲ', '↑', '→', '∈[', '∪φ∈', '≈', '①', '②', '②c', '③', '③]', '④', '⑤', '⑥', '⑦', '⑧', '⑨', '⑩', '──', '■', '▲', '、', '。', '〈', '〉', '《', '》', '》),', '」', '『', '』', '【', '】', '〔', '〕', '〕〔', '㈧', '一', '一.', '一一', '一下', '一个', '一些', '一何', '一切', '一则', '一则通过', '一天', '一定', '一方面', '一旦', '一时', '一来', '一样', '一次', '一片', '一番', '一直', '一致', '一般', '一起', '一转眼', '一边', '一面', '七', '万一', '三', '三天两头', '三番两次', '三番五次', '上', '上下', '上升', '上去', '上来', '上述', '上面', '下', '下列', '下去', '下来', '下面', '不', '不一', '不下', '不久', '不了', '不亦乐乎', '不仅', '不仅...而且', '不仅仅', '不仅仅是', '不会', '不但', '不但...而且', '不光', '不免', '不再', '不力', '不单', '不变', '不只', '不可', '不可开交', '不可抗拒', '不同', '不外', '不外乎', '不够', '不大', '不如', '不妨', '不定', '不对', '不少', '不尽', '不尽然', '不巧', '不已', '不常', '不得', '不得不', '不得了', '不得已', '不必', '不怎么', '不怕', '不惟', '不成', '不拘', '不择手段', '不敢', '不料', '不断', '不日', '不时', '不是', '不曾', '不止', '不止一次', '不比', '不消', '不满', '不然', '不然的话', '不特', '不独', '不由得', '不知不觉','不管', '不管怎样', '不经意', '不胜', '不能', '不能不', '不至于', '不若', '不要', '不论', '不起', '不足', '不过', '不迭', '不问', '不限', '与', '与其', '与其说', '与否', '与此同时', '专门', '且', '且不说', '且说', '两者', '严格', '严重', '个', '个人', '个别', '中小', '中间', '丰富', '串行', '临', '临到', '为', '为主', '为了', '为什么', '为什麽', '为何', '为止', '为此', '为着', '主张', '主要', '举凡', '举行', '乃', '乃至', '乃至于', '么', '之', '之一', '之前', '之后', '之後', '之所以', '之类', '乌乎', '乎', '乒', '乘', '乘势', '乘机', '乘胜', '乘虚', '乘隙', '九', '也', '也好', '也就是说', '也是', '也罢', '了', '了解', '争取', '二', '二来', '二话不说', '二话没说', '于', '于是', '于是乎', '云云', '云尔', '互', '互相', '五', '些', '交口', '亦', '产生', '亲口', '亲手', '亲眼', '亲自', '亲身', '人', '人人', '人们', '人家', '人民', '什么', '什么样', '什麽', '仅', '仅仅', '今','今后', '今天', '今年', '今後', '介于', '仍', '仍旧', '仍然', '从', '从不', '从严', '从中', '从事', '从今以后', '从优', '从古到今', '从古至今', '从头', '从宽', '从小', '从新', '从无到有', '从早到晚', '从未', '从来', '从此', '从此以后', '从而', '从轻', '从速', '从重', '他', '他人', '他们', '他是', '他的', '代替', '以', '以上', '以下', '以为', '以便', '以免', '以前', '以及', '以后', '以外', '以後', '以故', '以期', '以来', '以至', '以至于', '以致', '们', '任', '任何', '任凭', '任务', '企图', '伙同', '会', '伟大', '传', '传说','传闻', '似乎', '似的', '但', '但凡', '但愿', '但是', '何', '何乐而不为', '何以', '何况', '何处', '何妨', '何尝', '何必', '何时', '何止', '何苦', '何须', '余外', '作为', '你', '你们', '你是', '你的', '使', '使得', '使用', '例如', '依', '依据', '依照', '依靠', '便', '便于', '促进', '保持', '保管', '保险', '俺', '俺们', '倍加', '倍感', '倒不如', '倒不如说', '倒是', '倘', '倘使', '倘或', '倘然', '倘若', '借', '借以', '借此', '假使', '假如', '假若', '偏偏', '做到', '偶尔', '偶而', '傥然', '像', '儿', '允许', '元/吨', '充其极','充其量', '充分', '先不先', '先后', '先後', '先生', '光', '光是', '全体', '全力', '全年', '全然', '全身心', '全部', '全都', '全面', '八', '八成', '公然', '六', '兮', '共', '共同', '共总', '关于', '其', '其一', '其中', '其二', '其他', '其余', '其后', '其它', '其实', '其次', '具体', '具体地说', '具体来说', '具体说来', '具有', '兼之', '内', '再', '再其次', '再则', '再有', '再次', '再者', '再者说', '再说', '冒', '冲', '决不', '决定', '决非', '况且', '准备', '凑巧', '凝神', '几', '几乎', '几度', '几时', '几番', '几经', '凡', '凡是', '凭', '凭借', '出', '出于', '出去', '出来', '出现', '分别', '分头', '分期', '分期分批', '切', '切不可', '切切', '切勿', '切莫', '则', '则甚', '刚', '刚好', '刚巧', '刚才', '初', '别', '别人', '别处', '别是', '别的', '别管', '别说', '到', '到了儿', '到处', '到头', '到头来', '到底', '到目前为止', '前后', '前此', '前者', '前进', '前面', '加上', '加之', '加以', '加入', '加强', '动不动', '动辄', '勃然', '匆匆', '十分', '千', '千万', '千万千万', '半', '单', '单单', '单纯', '即', '即令', '即使', '即便', '即刻', '即如', '即将', '即或', '即是说', '即若', '却', '却不', '历', '原来', '去', '又', '又及', '及', '及其', '及时', '及至', '双方', '反之', '反之亦然', '反之则', '反倒', '反倒是', '反应', '反手', '反映', '反而', '反过来', '反过来说', '取得', '取道', '受到', '变成', '古来', '另', '另一个', '另一方面', '另外', '另悉', '另方面', '另行', '只', '只当', '只怕', '只是', '只有','只消', '只要', '只限', '叫', '叫做', '召开', '叮咚', '叮当', '可', '可以', '可好', '可是', '可能', '可见', '各', '各个', '各人', '各位', '各地', '各式', '各种', '各级', '各自', '合理', '同', '同一', '同时', '同样', '后', '后来', '后者', '后面', '向', '向使', '向着', '吓', '吗', '否则', '吧', '吧哒', '吱', '呀', '呃', '呆呆地', '呐', '呕', '呗', '呜','呜呼', '呢', '周围', '呵', '呵呵', '呸', '呼哧', '呼啦', '咋', '和', '咚', '咦', '咧', '咱', '咱们', '咳', '哇', '哈', '哈哈', '哉', '哎', '哎呀', '哎哟', '哗', '哗啦', '哟', '哦', '哩', '哪', '哪个', '哪些', '哪儿', '哪天', '哪年', '哪怕', '哪样', '哪边', '哪里', '哼', '哼唷', '唉', '唯有', '啊', '啊呀', '啊哈', '啊哟', '啐', '啥', '啦', '啪达', '啷当', '喀', '喂', '喏', '喔唷', '喽', '嗡', '嗡嗡', '嗬', '嗯', '嗳', '嘎', '嘎嘎', '嘎登', '嘘', '嘛', '嘻', '嘿', '嘿嘿', '四', '因', '因为', '因了', '因此', '因着', '因而', '固', '固然', '在', '在下', '在于', '地', '均', '坚决', '坚持', '基于', '基本', '基本上', '处在', '处处', '处理', '复杂', '多', '多么', '多亏', '多多', '多多少少', '多多益善', '多少', '多年前', '多年来', '多数', '多次', '够瞧的', '大', '大不了', '大举', '大事', '大体', '大体上', '大凡', '大力', '大多', '大多数', '大大', '大家', '大张旗鼓', '大批', '大抵', '大概', '大略', '大约', '大致', '大都', '大量', '大面儿上', '失去', '奇', '奈', '奋勇', '她', '她们', '她是', '她的', '好', '好在', '好的', '好象', '如', '如上', '如上所述', '如下', '如今', '如何', '如其', '如前所述', '如同', '如常', '如是', '如期', '如果', '如次', '如此', '如此等等', '如若', '始而', '姑且', '存在', '存心', '孰料', '孰知', '宁', '宁可', '宁愿', '宁肯', '它', '它们', '它们的', '它是', '它的', '安全', '完全', '完成', '定', '实现', '实际', '宣布', '容易', '密切', '对', '对于', '对应', '对待', '对方', '对比', '将', '将才', '将要', '将近', '小', '少数', '尔', '尔后', '尔尔', '尔等', '尚且', '尤其', '就', '就地', '就是', '就是了', '就是说', '就此', '就算', '就要', '尽', '尽可能', '尽如人意', '尽心尽力', '尽心竭力', '尽快', '尽早', '尽然', '尽管', '尽管如此', '尽量', '局外', '居然', '届时', '属于', '屡', '屡屡', '屡次', '屡次三番', '岂', '岂但', '岂止', '岂非', '川流不息', '左右', '巨大', '巩固', '差一点', '差不多', '己', '已', '已矣', '已经', '巴', '巴巴', '带', '帮助', '常', '常常', '常言说', '常言说得好', '常言道', '平素', '年复一年', '并', '并不', '并不是', '并且', '并排', '并无', '并没', '并没有', '并肩', '并非', '广大', '广泛', '应当', '应用', '应该', '庶乎', '庶几', '开外', '开始', '开展', '引起', '弗', '弹指之间', '强烈', '强调', '归', '归根到底', '归根结底', '归齐', '当', '当下', '当中', '当儿', '当前', '当即', '当口儿', '当地', '当场', '当头', '当庭', '当时', '当然', '当真', '当着', '形成', '彻夜', '彻底', '彼', '彼时', '彼此', '往', '往往', '待', '待到', '很', '很多', '很少', '後来', '後面', '得', '得了', '得出', '得到', '得天独厚', '得起', '心里', '必', '必定', '必将', '必然', '必要', '必须', '快', '快要', '忽地', '忽然', '怎', '怎么', '怎么办', '怎么样', '怎奈', '怎样', '怎麽', '怕', '急匆匆', '怪', '怪不得', '总之', '总是', '总的来看', '总的来说', '总的说来', '总结', '总而言之', '恍然', '恐怕', '恰似', '恰好', '恰如', '恰巧', '恰恰', '恰恰相反', '恰逢', '您', '您们', '您是', '惟其','惯常', '意思', '愤然', '愿意', '慢说', '成为', '成年', '成年累月', '成心', '我', '我们', '我是', '我的', '或', '或则', '或多或少', '或是', '或曰', '或者', '或许', '战斗', '截然', '截至', '所', '所以', '所在', '所幸', '所有', '所谓', '才', '才能', '扑通', '打', '打从', '打开天窗说亮话', '扩大', '把', '抑或', '抽冷子', '拦腰', '拿', '按', '按时', '按期', '按照', '按理', '按说', '挨个', '挨家挨户', '挨次', '挨着', '挨门挨户', '挨门逐户', '换句话说', '换言之', '据', '据实', '据悉', '据我所知', '据此', '据称', '据说', '掌握', '接下来', '接着', '接著', '接连不断', '放量', '故', '故意', '故此', '故而', '敞开儿', '敢', '敢于', '敢情', '数/', '整个', '断然', '方', '方便', '方才', '方能', '方面', '旁人', '无', '无宁', '无法', '无论', '既', '既...又', '既往', '既是', '既然', '日复一日', '日渐', '日益', '日臻', '日见', '时候', '昂然', '明显', '明确', '是', '是不是', '是以', '是否', '是的', '显然', '显著', '普通', '普遍', '暗中', '暗地里', '暗自', '更', '更为', '更加', '更进一步', '曾', '曾经', '替', '替代', '最', '最后', '最大', '最好', '最後', '最近', '最高', '有', '有些', '有关', '有利', '有力', '有及', '有所', '有效', '有时', '有点', '有的', '有的是', '有着', '有著', '望', '朝', '朝着', '末##末', '本', '本人', '本地', '本着','本身', '权时', '来', '来不及', '来得及', '来看', '来着', '来自', '来讲', '来说', '极', '极为', '极了', '极其', '极力', '极大', '极度', '极端', '构成', '果然', '果真', '某', '某个', '某些', '某某', '根据', '根本', '格外', '梆', '概', '次第', '欢迎', '欤', '正值', '正在', '正如', '正巧', '正常', '正是', '此', '此中', '此后', '此地', '此处', '此外', '此时', '此次', '此间', '殆', '毋宁', '每', '每个', '每天', '每年', '每当', '每时每刻', '每每', '每逢', '比', '比及', '比如', '比如说', '比方', '比照', '比起', '比较', '毕竟', '毫不', '毫无', '毫无例外', '毫无保留地', '汝', '沙沙', '没', '没奈何', '没有', '沿', '沿着', '注意', '活', '深入', '清楚', '满', '满足', '漫说', '焉', '然', '然则', '然后', '然後', '然而', '照', '照着', '牢牢', '特别是', '特殊', '特点', '犹且', '犹自', '独', '独自', '猛然', '猛然间', '率尔', '率然', '现代', '现在', '理应', '理当', '理该', '瑟瑟', '甚且', '甚么', '甚或', '甚而', '甚至', '甚至于', '用', '用来', '甫', '甭', '由', '由于', '由是', '由此', '由此可见', '略', '略为', '略加', '略微', '白', '白白', '的', '的确', '的话','皆可', '目前', '直到', '直接', '相似', '相信', '相反', '相同', '相对', '相对而言', '相应', '相当', '相等', '省得', '看', '看上去', '看出', '看到', '看来', '看样子', '看看', '看见', '看起来', '真是', '真正', '眨眼', '着', '着呢', '矣', '矣乎', '矣哉', '知道', '砰', '确定', '碰巧', '社会主义', '离', '种', '积极', '移动', '究竟', '穷年累月', '突出', '突然', '窃', '立', '立刻', '立即', '立地', '立时', '立马', '竟', '竟然', '竟而', '第', '第二', '等', '等到', '等等', '策略地', '简直', '简而言之', '简言之', '管', '类如', '粗', '精光', '紧接着', '累年', '累次', '纯', '纯粹', '纵', '纵令', '纵使', '纵然', '练习', '组成', '经', '经常', '经过', '结合', '结果', '给', '绝', '绝不', '绝对', '绝非', '绝顶', '继之', '继后', '继续', '继而', '维持', '综上所述', '缕缕', '罢了', '老', '老大', '老是', '老老实实', '考虑', '者', '而', '而且', '而况', '而又', '而后', '而外', '而已', '而是','而言', '而论', '联系', '联袂', '背地里', '背靠背', '能', '能否', '能够', '腾', '自', '自个儿', '自从', '自各儿', '自后', '自家', '自己', '自打', '自身', '臭', '至', '至于', '至今', '至若', '致', '般的', '良好', '若', '若夫', '若是', '若果', '若非', '范围', '莫', '莫不', '莫不然', '莫如', '莫若', '莫非', '获得', '藉以', '虽', '虽则', '虽然', '虽说', '蛮', '行为', '行动', '表明', '表示', '被', '要', '要不', '要不是', '要不然', '要么', '要是', '要求', '见', '规定', '觉得', '譬喻', '譬如', '认为', '认真', '认识', '让', '许多','论', '论说', '设使', '设或', '设若', '诚如', '诚然', '话说', '该', '该当', '说明', '说来', '说说', '请勿', '诸', '诸位', '诸如', '谁', '谁人', '谁料', '谁知', '谨', '豁然', '贼死', '赖以', '赶', '赶快', '赶早不赶晚', '起', '起先', '起初', '起头', '起来', '起见', '起首', '趁', '趁便', '趁势', '趁早', '趁机', '趁热', '趁着', '越是', '距', '跟', '路经', '转动', '转变', '转贴', '轰然', '较', '较为', '较之', '较比', '边', '达到', '达旦', '迄', '迅速', '过', '过于', '过去', '过来', '运用', '近', '近几年来', '近年来', '近来', '还', '还是', '还有', '还要', '这', '这一来', '这个', '这么', '这么些', '这么样', '这么点儿', '这些', '这会儿', '这儿', '这就是说', '这时', '这样', '这次', '这点', '这种', '这般', '这边', '这里', '这麽', '进入', '进去', '进来', '进步', '进而', '进行', '连', '连同', '连声', '连日', '连日来', '连袂', '连连', '迟早', '迫于', '适应', '适当', '适用', '逐步', '逐渐', '通常', '通过', '造成', '逢', '遇到', '遭到', '遵循', '遵照', '避免', '那', '那个', '那么', '那么些', '那么样', '那些', '那会儿', '那儿', '那时', '那末', '那样', '那般','那边', '那里', '那麽', '部分', '都', '鄙人', '采取', '里面', '重大', '重新', '重要', '鉴于', '针对', '长期以来', '长此下去', '长线', '长话短说', '问题', '间或', '防止', '阿', '附近', '陈年', '限制', '陡然', '除', '除了', '除却', '除去', '除外', '除开', '除此', '除此之外', '除此以外', '除此而外', '除非', '随', '随后', '随时', '随着', '随著', '隔夜', '隔日', '难得', '难怪', '难说', '难道', '难道说', '集中', '零', '需要', '非但', '非常', '非徒', '非得', '非特', '非独', '靠', '顶多', '顷', '顷刻', '顷刻之间', '顷刻间', '顺', '顺着', '顿时', '颇', '风雨无阻', '饱', '首先', '马上', '高低', '高兴', '默然', '默默地', '齐', '︿', '!', '#', '$', '%', '&', ''', '(', ')', ')÷(1-', ')、', '*','+', '+ξ', '++', ',', ',也', '-', '-β', '--', '-[*]-', '.', '/', '0', '0:2', '1', '1.', '12%', '2', '2.3%', '3', '4', '5', '5:0', '6', '7', '8', '9', ':', ';', '<', '<±', '<Δ', '<λ', '<φ', '<<', '=', '=″', '=☆', '=(', '=-', '=[', '={', '>', '>λ', '?', '@', 'A', 'LI', 'R.L.', 'ZXFITL', '[', '[①①]', '[①②]', '[①③]', '[①④]', '[①⑤]', '[①⑥]', '[①⑦]', '[①⑧]', '[①⑨]', '[①A]', '[①B]', '[①C]', '[①D]', '[①E]', '[①]', '[①a]', '[①c]', '[①d]', '[①e]', '[①f]', '[①g]', '[①h]', '[①i]', '[①o]', '[②', '[②①]', '[②②]', '[②③]', '[②④', '[②⑤]', '[②⑥]', '[②⑦]', '[②⑧]', '[②⑩]', '[②B]', '[②G]', '[②]', '[②a]', '[②b]', '[②c]', '[②d]', '[②e]', '[②f]', '[②g]', '[②h]', '[②i]', '[②j]', '[③①]', '[③⑩]', '[③F]', '[③]', '[③a]', '[③b]', '[③c]', '[③d]', '[③e]', '[③g]', '[③h]', '[④]', '[④a]', '[④b]', '[④c]', '[④d]', '[④e]', '[⑤]', '[⑤]]', '[⑤a]', '[⑤b]', '[⑤d]', '[⑤e]', '[⑤f]', '[⑥]', '[⑦]', '[⑧]', '[⑨]', '[⑩]', '[*]', '[-', '[]', ']', ']∧′=[', '][', '_', 'a]', 'b]', 'c]', 'e]', 'f]', 'ng昉', '{', '{-', '|', '}', '}>', '~', '~±', '~+', '¥', '-------------------'] if not stopwords: sw += stopwords cutResult = [] for word in cut(strs): if word.strip() and word.strip() not in sw: cutResult.append(word.strip()) # 统计词频 wordFrequency = Counter(cutResult).most_common() # 统计词数量 wordNum = len(wordFrequency) # 生成词云图 imgMask = np.array(Image.open('myapps/static/wordcloud/default.jpg')) wc = WordCloud( font_path='static/msyh.ttc', background_color='white', mask=imgMask) img = wc.generate(' '.join(cutResult)).to_image() # 将词云图转换为 base64 串 imgIo = BytesIO() img.save(imgIo, format='JPEG') img = base64.b64encode(imgIo.getvalue()).decode('utf8') return (True, wordNum, wordFrequency, img)
signatures += ' '.join(jieba.analyse.extract_tags(signature)) # 关键字提取 signatures += ' ' # Image.open,读取指定图片。 im = Image.open('photo.jpg') # 可替换你喜欢的图⽚,在当前文件夹下(相对路径) # np.array,将读入的im转换成背景图数据。 mask = np.array(im) # WordCloud函数,建立词云对象 # mask参数用于设置词云形状,默认的是矩形,可以读入自己选定的图片。margin:画布偏移,默认2像素. word_cloud = WordCloud(font_path='simhei.ttf', background_color='white', max_words=1200, mask=mask, margin=15) # generate,向word_could这个WordCloud对象中加载signatures(文本内容),对全部文本进行自动分词(但是对中文支持不好) word_cloud.generate(signatures) # ImageColorGenerator函数通过mask参数生成词云颜色值 image_colors = ImageColorGenerator(mask) # 用recolor方法重置词云颜色为(color_func=image_colors) word_clour = word_cloud.recolor(color_func=image_colors) # figure函数中,figsize表示输出的绘图对象的宽和高、dpi表示指定绘图对象的分辨率,即每英寸多少个像素,缺省值为80。 plt.figure(figsize=(12, 12), dpi=100) # imshow函数用于对按照样本图片重置颜色的图像进行处理,并显示其格式,但是不能显示。 plt.imshow(word_clour) # 不显示坐标尺寸 plt.axis('off') # 显示词云图 plt.show() # 输出到文件 word_cloud.to_file('signatures.png')
background_color:背景颜色 mask:背景图片 stopwords max_font_size:字体最大大小 ''' wc = WordCloud('./font/Arial.ttf', width=width, height=height, background_color="white", mask=diao_img, font_step=3, max_font_size=30, random_state=False, prefer_horizontal=0.9) wc.generate(lstr) # 提取背景图片的颜色 img_cl = ImageColorGenerator(diao_img) # 显示图片 plt.imshow(wc) plt.axis("off") # 绘制 plt.figure() plt.imshow(wc.recolor(color_func=img_cl)) plt.axis("off")
if word.lower() not in stopwords: word_list.append(word.lower()) # Eliminate non alpha elements text_list = [word.lower() for word in txt_words if word.isalpha()] # calculating and printing the top 10 words top_10_words = Counter(text_list).most_common(10) print('\nThe following are the top 10 words (Count):') for pair in top_10_words: print(' -', pair[0], '(' + str(pair[1]) + ')') ##### WORDCLOUD ##### # transforming the list into a string for displaying text_str = ' '.join(text_list) # defining the wordcloud parameters wc = WordCloud(background_color='white', max_words=2000) # generating word cloud wc.generate(text_str) # storing to file wc.to_file('txt.png') # showing the cloud plt.imshow(wc) plt.axis('off') plt.show()
fig, ax1 = plt.subplots() ax1.bar(repeat.keys(), repeat.values()) fig.autofmt_xdate() plt.savefig('graph.png') plt.show() ''' custom_mask = np.array(Image.open('static/img/twitter_mask.jpg')) wordcloud = WordCloud(background_color='white', contour_width=3, contour_color='Black', max_font_size=300, min_font_size=25) wordcloud.generate(only_emotion) ''' # plot the WordCloud image plt.figure(figsize=(8, 8), facecolor=None) plt.imshow(wordcloud, interpolation="bilinear") plt.axis("off") plt.tight_layout(pad=0) plt.margins(x=0, y=0) plt.savefig('graph1.png') plt.show() ''' ''' # shaping my word cloud twitter_mask = np.array(Image.open("static/img/twitter_mask.png")) print(twitter_mask) # Values of 255 are pure white, whereas values of 1 are black. twitter_mask = twitter_mask.reshape((twitter_mask.shape[0], -1), order='F') #3d into 2d
novel_as_string = ' '.join(word_list) # In[89]: icon = Image.open(WHALE_FILE) image_mask = Image.new(mode='RGB', size=icon.size, color=(255, 255, 255)) image_mask.paste(icon, box=icon) rgb_array = np.array(image_mask) # converts the image object to an array word_cloud = WordCloud(mask=rgb_array, background_color='white', max_words=400, colormap='ocean') word_cloud.generate(novel_as_string) plt.figure(figsize=[16, 8]) plt.imshow(word_cloud, interpolation='bilinear') plt.axis('off') plt.show() # In[90]: rgb_array.shape # In[91]: rgb_array[1023, 2047] # In[92]:
text = "" with open("section10/res/이상한나라의앨리스.txt", "r", encoding="utf-8") as f: #기본 root 'python' text = f.read() print(text) #금지어 설정 → 필요한 만큼 add() 함수 호출해서 추가 #금지어 → Alice, said ignore=set(STOPWORDS) ignore.add("Alice") ignore.add("said") #WordCloud 클래스의 객체 생성 wc=WordCloud(width=1200, height=800, scale=2.0, stopwords=ignore, #금지어 max_font_size=150, #최대 글자 크기 max_words=100 #최대 표시 단어 수 ) gen=wc.generate(text) print(gen.words_) pyplot.figure() pyplot.imshow(gen, interpolation="bilinear") wc.to_file("simple2.png") pyplot.close()
import pandas as pd from wordcloud import WordCloud import matplotlib.pyplot as plt from matplotlib.image import imread with open("cache/befull523.txt", 'r', encoding='utf-8') as f: data = f.read() # print(data) mask_img = imread('common_library/wordcloud/asset/tree.jpg') # 需要白底图 # 词云配置 wc_config = WordCloud( font_path='simhei.ttf', width=800, height=600, background_color=None, mask=mask_img, # 词云形状 ) # 生成词云 word_cloud = wc_config.generate(text=data) # 保存词云 word_cloud.to_file("cache/befull523.jpg") # 显示词云 plt.imshow(word_cloud, interpolation='bilinear') plt.axis('off') plt.show()
# importar libreria de realización de gráficas de MathPlotLib import matplotlib.pyplot as plt # Importar de la libreria para generar nubes de palabras from wordcloud import WordCloud, STOPWORDS # Importar libreria para imagenes from PIL import Image # Crear generador de nubes de palabras wordCloud = WordCloud() # Crear generador de nubes de palabras wordCloud = WordCloud(background_color='white') # Estableciendo el colo de fondo wordCloud = WordCloud(max_words=2000) # Estableciendo el maximo numero de palabras para generar wordCloud = WordCloud(stopwords=set(STOPWORDS)) # Eliminando palabras vacias (Sin significado) wordCloud = WordCloud(mask=np.array(Image.open('dirección'))) # Estableciendo imagen como mascara para la nube de palabras # Generar nube de palabras para un texto especificado wordCloud.generate('texto') # Graficar nube de palabras plt.imshow(wordCloud, interpolation='bilinear') plt.axis('off')
def generate_mask_word_cloud(words, mask, tv_show): char_mask = np.array(Image.open(mask, 'r').convert('RGB')) wc = WordCloud(background_color="white", width=400, height=400, mask=char_mask) wc.generate(words) wc.to_file("./" + tv_show + "/" + tv_show + '-mask-word-cloud.png')
tips.append(json.loads(line)) df_tips = pd.DataFrame(tips) df_ri = df_business_restaurant.loc[(df_business_restaurant['name']=='Ramen Isshin') & criteria] df_ri_tips = df_tips.loc[df_tips['business_id'].isin(df_ri.business_id)] # replace in text def text_prep(text): # filter out non-letters and transform them in lowercase text = re.sub('[^a-z\s]', '', text.lower()) # filter stopwords text = [w for w in text.split() if w not in stopwords.words('english')] return ' '.join(text) pd.set_option('mode.chained_assignment', None) # apply function df_ri_tips['text_cl'] = df_ri_tips['text'].apply(text_prep) # create a word cloud wc = WordCloud(width=1600, height=800, random_state=42, max_words=1000000) wc.generate(str(df_ri_tips['text_cl'])) plt.figure(figsize=(15, 10), facecolor='black') plt.title('Tips of Ramen Isshin', fontsize=40, color='white') plt.imshow(wc, interpolation='bilinear') plt.axis('off') plt.tight_layout(pad=10)
import re df = pd.read_csv("megadata_csv.csv") print(df) df.head() # Load the regular expression library # Remove punctuation df['title'] = df['title'].map(lambda x: re.sub('[,\.!?]', '', x)) # Convert the titles to lowercase df['title'] = df['title'].map(lambda x: x.lower()) df.to_csv("clean.csv") # Join the different processed titles together. long_string = ','.join(list(df['title'].values)) # Create a WordCloud object wordcloud = WordCloud(background_color="white", max_words=5000, contour_width=3, contour_color='steelblue') # Generate a word cloud wordcloud.generate(long_string) # Visualize the word cloud wordcloud.to_image()
# draw word clouds for all sentiments world_clouds_exp = st.beta_expander('Word Clouds By Sentiment') if len(wdf): with world_clouds_exp: word_sentiment = st.selectbox('Sentiment Type', ['positive', 'neutral', 'negative'], key='1') wcdf = wdf[wdf['sentiment']==word_sentiment].copy() words = ' '.join(wcdf['text']) wc_words = ' '.join([w for w in words.split() if not w.startswith('@') and w != 'RT']) if len(wc_words): st.markdown('#### Word cloud for %s sentiment' % (word_sentiment)) wc = WordCloud(stopwords=STOPWORDS, background_color='white', width=768, height=480) fig = plt.figure() plt.imshow(wc.generate(wc_words)) plt.xticks([]) plt.yticks([]) st.pyplot(fig) else: st.markdown("#### No Words to Plot.") st.write('\n\n\n') ## show ent types (bar) by sentiment ent_exp = st.beta_expander('Entities By Sentiment') if len(wdf): with ent_exp: ent_sentiment = st.selectbox('Sentiment Type', ['positive', 'neutral', 'negative'], key='3') etdf = wdf[wdf['sentiment']==ent_sentiment].copy()
def generatewordcloud(party, inputImageFileName, outputImageFileName): global stopwordshearing speakerData = data[data.Party == party] allText = "" for index, row in speakerData.iterrows(): allText += str(row['Text']).lower()+" " allText = allText.replace("e-mail","email") allText = allText.replace("e- mail","email") allText = allText.replace("op-ed","oped") #print (allText) ImageFile.LOAD_TRUNCATED_IMAGES = True img = Image.open(inputImageFileName) img = img.resize((980,1080), Image.ANTIALIAS) sl = STOPWORDS | stopwordshearing speakerArray = np.array(img) wc = WordCloud(background_color="white", max_words=1000, mask=speakerArray, stopwords=sl, random_state=42) wc.generate(allText) print wc.words_ # create coloring from image image_colors = ImageColorGenerator(speakerArray) wc.recolor(color_func=image_colors) wc.to_file(outputImageFileName)