def test_display_wordcloud_valid(self): wc = WordCloud(background_color="white") wc.generate_from_text( "this is a word cloud text lmao lmao lmao lmao lo lo lo lol lol lol lol" ) self.wordcloud.word_clouds.append(wc) self.wordcloud.display_word_cloud()
def generate_wordcloud(): comments = [] with open('maoyan.csv', mode='r', encoding='utf-8') as f: rows = f.readlines() for row in rows: comment = row.split(':')[0] if comment != '': comments.append(comment) comment_after_split = jieba.cut(str(comments), cut_all=False) words = ''.join(comment_after_split) print(words) stopwords = STOPWORDS.copy() stopwords.add('电影') stopwords.add('一出') stopwords.add('好戏') stopwords.add('有点') bg_image = plt.imread('123.jpg') wc = WordCloud(width=1024, height=768, background_color='white', mask=bg_image, stopwords=stopwords, max_font_size=400, random_state=50, font_path='STKAITI.TTF') wc.generate_from_text(words) wc.to_file('output/词云图.jpg') plt.imshow(wc) plt.axis('off') plt.show()
def WorldCloud_pic(text_path, pic_path, font_path): text = open(text_path, 'r', encoding='UTF-8').read() word_list = jieba.cut(text, cut_all=False) wl_space_split = " ".join(word_list) print(wl_space_split) backgroud_Image = plt.imread(pic_path) print('加载图片成功!') stopwords = STOPWORDS.copy() # 使用词云自带的停词表 stopwords.add("哈哈") # 可以加多个屏蔽词 wc = WordCloud( width=1024, height=768, background_color='white', # 设置背景颜色 mask=backgroud_Image, # 设置背景图片 font_path=font_path, # 设置中文字体,若是有中文的话,这句代码必须添加,不然会出现方框,不出现汉字 max_words=600, # 设置最大现实的字数 stopwords=stopwords, # 设置停用词 max_font_size=400, # 设置字体最大值 random_state=50, # 设置有多少种随机生成状态,即有多少种配色方案 ) wc.generate_from_text(wl_space_split) # 开始加载文本 img_colors = ImageColorGenerator(backgroud_Image) wc.recolor(color_func=img_colors) # 字体颜色为背景图片的颜色 plt.imshow(wc) # 显示词云图 plt.axis('off') # 是否显示x轴、y轴下标 plt.show() # 显示 d = path.dirname(__file__) # 获得模块所在的路径的 wc.to_file(path.join(d, "词云.jpg")) print('生成词云成功!')
def wordcloud(): con = sqlite3.connect('douban.db') cur = con.cursor() sql = "select instroduction from movie250" data = cur.execute(sql) text = '' for item in data: text += item[0] cur.close() con.close() cut = jieba.cut(text) string = ' '.join(cut) img = Image.open(r'./static/assets/img/tree.jpg') img_array = np.array(img) wc = WordCloud( background_color='white', mask=img_array, font_path= '/home/yzx/PycharmProjects/douban_flask/templates/MSYH.TTF') wc.generate_from_text(string) #绘制图片 fig = plt.figure(1) plt.imshow(wc) plt.axis('off') plt.show()
def main(): con=sqlite3.connect('movie.db') cur=con.cursor() sql='select introduction from movie250' data=cur.execute(sql) text='' for item in data: text+=item[0] cut =jieba.cut(text) string =' '.join(cut) print(len(string)) #print(string) img=Image.open(r'.\static\assets\img\camel.jfif') img_array=np.array(img) # 将图片转换为数组 wc=WordCloud(background_color='black', mask=img_array, font_path='msyh.ttc') wc.generate_from_text(string) # 绘制图片 fig=plt.figure(1) plt.imshow(wc) plt.axis('off') # 是否显示坐标轴 #plt.show() plt.savefig(r'.\static\assets\img\word.png',dpi=500)
def data_analysis(): ''' 进行得到txt文件内的数据分析 由于数据限制,这里只进行词云分析 :return: None ''' with open("text.txt", 'r', encoding="utf-8") as f: text = f.read() comment = jieba.cut(text, cut_all=False) # 获得文件内容 comment = " ".join(comment) # 解析背景图 bg_img = plt.imread("bg.jpg") # 拦截词 stopwords = set() stopwords.add("爱情公寓") stopwords.add("爱情") stopwords.add("公寓") stopwords.add("电影") # 创建wc对象 wc = WordCloud(width=1800, height=1000, background_color='white', font_path="C:/Windows/Fonts/STFANGSO.ttf", mask=bg_img, stopwords=stopwords, max_font_size=400, random_state=50) wc.generate_from_text(comment) plt.imshow(wc) plt.axis('off') # 不显示坐标轴 plt.show() wc.to_file("result.jpg")
def word_cloud(csv_file, stopwords_path, pic_path): pic_name = csv_file+"_词云图.png" path = os.path.abspath(os.curdir) csv_file = path+ "\\" + csv_file + ".csv" csv_file = csv_file.replace('\\', '\\\\') d = pd.read_csv(csv_file, engine='python', encoding='utf-8') content = [] for i in d['content']: try: i = translate(i) except AttributeError as e: continue else: content.append(i) comment_after_split = jieba.cut(str(content), cut_all=False) wl_space_split = " ".join(comment_after_split) backgroud_Image = plt.imread(pic_path) stopwords = STOPWORDS.copy() with open(stopwords_path, 'r', encoding='utf-8') as f: for i in f.readlines(): stopwords.add(i.strip('\n')) f.close() wc = WordCloud(width=1024, height=768, background_color='white', mask=backgroud_Image, font_path="C:\simhei.ttf", stopwords=stopwords, max_font_size=400, random_state=50) wc.generate_from_text(wl_space_split) img_colors = ImageColorGenerator(backgroud_Image) wc.recolor(color_func=img_colors) plt.imshow(wc) plt.axis('off') plt.show() wc.to_file(pic_name)
def title_word_cloud(): data = pd.read_csv('hupu-pandas.csv', na_values=['NULL']) title_data = data['title'] # with open('title.txt', 'w') as f: # for title in title_data.values: # f.write(title) # f.close() # f = open('title.txt', 'r').read().decode('utf-8') # text = stop_words(f) text = open('words.txt').read().decode('utf-8') image = Image.open(r'C:\Users\lvbiaobiao\Desktop\hupu-1.jpg') graph = np.array(image) cloud = WordCloud( font_path= r'E:\date\python\SourceHanSerifSC_EL-M\SourceHanSerifSC-Regular.otf', max_font_size=18, background_color="white", width=640, height=200, relative_scaling=0.5, mask=graph, max_words=100, colormap='nipy_spectral') cloud.generate_from_text(text) # image_color = ImageColorGenerator(graph) plt.imshow(cloud, interpolation='gaussian') # plt.imshow(cloud.recolor(color_func=image_color)) plt.axis("off") plt.show() cloud.to_file('hupu.png')
def ShowImgHotWord(items, maxwords): npath = os.getcwd() imgfile = npath + '\\ntlk1\\cloud.jpg' back_ground = imread(imgfile) wc = WordCloud( background_color="white", max_words=maxwords, mask=back_ground, max_font_size=100, random_state=42, font_path="C:/Windows/Fonts/STZHONGS.ttf", ) #print(items) wcword = ' '.join(items) wc.generate_from_text(wcword) # 基于彩色图像生成相应彩色 image_colors = ImageColorGenerator(back_ground) wc.recolor(color_func=image_colors) # 显示图片 plt.imshow(wc) # 关闭坐标轴 plt.axis('off') plt.show() # 绘制词云 # plt.figure() # plt.imshow(wc.recolor(color_func=image_colors)) # plt.axis('off') # 保存图片 wc.to_file('123.png')
def wc_english(): # 获取当前文件路径 d = path.dirname(_file_) if "_file_" in locals() else os.getcwd() #获取文本text text = open(path.join(d, 'legend.txt')).read() #读取背景图片 background_Image = np.array(Image.open(path.join(d, 'legend.jpg'))) #提取背景图片颜色 img_colors = ImageColorGenerator(background_Image) #获取文本词排序,可调整stopwords #设置英文停止词 stopwords = set(STOPWORDS) stopwords.add('one') wc = WordCloud( margin=2, scale=2, max_words=200, min_font_size=4, stopwords=stopwords, random_state=42, max_font_size=150, ) wc.generate_from_text(text) wc.recolor(color_func=grey_color_func) wc.to_file('1900prol.png') #显示图像 plt.imshow(wc, interpolation='bilinear') plt.axis('off') plt.tight_layout() #存储图像 #or#plt.savingfig('1900_basic.png',dpi=200) plt.show()
def gene_word_cloud(_text): font = "/System/Library/fonts/PingFang.ttc" wc = WordCloud( width=600, # 默认宽度 height=200, # 默认高度 margin=2, # 边缘 ranks_only=None, prefer_horizontal=0.9, mask=None, # 背景图形,如果想根据图片绘制,则需要设置 color_func=None, max_words=200, # 显示最多的词汇量 stopwords=None, # 停止词设置,修正词云图时需要设置 random_state=None, background_color='#ffffff', # 背景颜色设置,可以为具体颜色,比如:white或者16进制数值。 font_step=1, font_path=font, mode='RGB', regexp=None, collocations=True, normalize_plurals=True, contour_width=0, colormap='viridis', # matplotlib色图,可以更改名称进而更改整体风格 contour_color='Blues', repeat=False, scale=2, min_font_size=10, max_font_size=200) wc.generate_from_text(_text) # 存储图像 wc.to_file('饥饿站台top10关键词textrank.png')
def content_distribution(self, contents): jieba.load_userdict('userdict.txt') content_cut = jieba.cut(str(contents), cut_all=False) words = ' '.join(content_cut) # 设置屏蔽词 # stopwords = STOPWORDS.copy() # stopwords.add('电影') stopwords = set() with open('stopwords.txt', 'r', encoding='utf-8') as f: rows = f.readlines() for row in rows: stopwords.add(row.replace('\n', '')) # print(stopwords) #导入背景图 bg_image = plt.imread('bgimage.jpg') # 设置词云参数,参数分别表示:画布宽高、背景颜色、背景图形状、字体、屏蔽词、最大词的字体大小 wc = WordCloud(width=1200, height=800, background_color='white', mask=bg_image, font_path='STKAITI.ttf', stopwords=stopwords, max_font_size=300, random_state=40) # 分词传入云图 wc.generate_from_text(words) plt.imshow(wc) # 不显示坐标轴 plt.axis('off') plt.show() wc.to_file(self.path + r'/wordcloud.jpg')
def drawCloud(db): table = db.products data = pd.DataFrame(list(table.find())) data = data[['_id']] text = '' for line in data['_id']: r = '[a-zA-Z0-9’!"#$%&\'()*+,-./:;<=>?@,。?★、…【】《》?“”‘’![\\]^_`{|}~]+' line = re.sub(r, '', line.replace('笔记本电脑', '').replace('英寸', '')) text += ' '.join(jieba.cut(line, cut_all=False)) # backgroud_Image = plt.imread('computer.jpeg') print('text>>',text) wc = WordCloud( background_color='white', # mask=backgroud_Image, # font_path='DroidSansMono.ttf',#没有字体,图片是方框乱码 max_words=2000, stopwords=STOPWORDS, max_font_size=130, random_state=30 ) wc.generate_from_text(text) # img_colors = ImageColorGenerator(backgroud_Image) # wc.recolor(color_func=img_colors) plt.imshow(wc) plt.axis('off') wc.to_file("computer.jpg") print("生成词云成功")
def create_word_cloud_img(self, data_frame): # 分隔评论内容 comment = jieba.cut(str(data_frame['comment']), cut_all=False) wl_space_split = " ".join(comment) # 自定义屏蔽词 stopwords = self.get_stop_words() wc = WordCloud( width=1024, height=768, background_color='white', # mask=backgroud_Image, font_path="C:\simhei.ttf", stopwords=stopwords, max_font_size=400, random_state=50) wc.generate_from_text(wl_space_split) plt.imshow(wc) plt.axis('off') # 不显示坐标轴 # plt.show() file_path = '' if self.movie.platform == 0: file_path = './' + self.movie.movie_name + '/【豆瓣】《' + self.movie.movie_name + '》词云图.jpg' elif self.movie.platform == 1: file_path = './' + self.movie.movie_name + '/【猫眼】《' + self.movie.movie_name + '》词云图.jpg' else: file_path = './' + self.movie.movie_name + '/【豆瓣+猫眼】《' + self.movie.movie_name + '》词云图.jpg' wc.to_file(file_path) print('词云图创建完毕')
def create_wordcloud(df): """ 生成地铁名词云 """ # 分词 text = '' for line in df['station']: text += ' '.join(jieba.cut(line, cut_all=False)) text += ' ' backgroud_Image = plt.imread('rocket.jpg') wc = WordCloud( background_color='white', mask=backgroud_Image, font_path='C:\Windows\Fonts\华康俪金黑W8.TTF', max_words=1000, max_font_size=150, min_font_size=15, prefer_horizontal=1, random_state=50, ) wc.generate_from_text(text) img_colors = ImageColorGenerator(backgroud_Image) wc.recolor(color_func=img_colors) # 看看词频高的有哪些 process_word = WordCloud.process_text(wc, text) sort = sorted(process_word.items(), key=lambda e: e[1], reverse=True) print(sort[:50]) plt.imshow(wc) plt.axis('off') wc.to_file("地铁名词云.jpg") print('生成词云成功!')
def make_worldcloud(file_path): text_from_file_with_apath = open(file_path, 'r', encoding='UTF-8').read() wordlist_after_jieba = jieba.cut(text_from_file_with_apath, cut_all=False) wl_space_split = " ".join(wordlist_after_jieba) print(wl_space_split) backgroud_Image = plt.imread('./douban.jpg') print('加载图片成功!') '''设置词云样式''' stopwords = STOPWORDS.copy() stopwords.add("哈哈") stopwords.add("还是") #可以加多个屏蔽词 wc = WordCloud( width=1024, height=768, background_color='white', # 设置背景颜色 mask=backgroud_Image, # 设置背景图片 font_path='E:\simsun.ttf', # 设置中文字体,若是有中文的话,这句代码必须添加,不然会出现方框,不出现汉字 max_words=300, # 设置最大现实的字数 stopwords=stopwords, # 设置停用词 max_font_size=400, # 设置字体最大值 random_state=50, # 设置有多少种随机生成状态,即有多少种配色方案 ) wc.generate_from_text(wl_space_split) #开始加载文本 img_colors = ImageColorGenerator(backgroud_Image) wc.recolor(color_func=img_colors) #字体颜色为背景图片的颜色 plt.imshow(wc) # 显示词云图 plt.axis('off') # 是否显示x轴、y轴下标 plt.show() #显示 # 获得模块所在的路径的 d = path.dirname(__file__) # os.path.join(): 将多个路径组合后返回 wc.to_file(path.join(d, "h11.jpg")) print('生成词云成功!')
def show_fen_ci_beizhu(): # 导入jieba模块,用于中文分词 import jieba # 导入matplotlib,用于生成2D图形 import matplotlib.pyplot as plt # 导入wordcount,用于制作词云图 from wordcloud import WordCloud, STOPWORDS # 获取备注名 remarkNames = [] with open(friends_data, mode='r', encoding='utf-8') as f: rows = f.readlines() for row in rows: remarkName = row.split(',')[1] if remarkName != '': remarkNames.append(remarkName) # 设置分词 split = jieba.cut(str(remarkNames), cut_all=False) # False精准模式分词、True全模式分词 words = ' '.join(split) # 以空格进行拼接 # 导入背景图 bg_image = plt.imread(current_dir+'/010-wechat-bg.jpg') # 设置词云参数,参数分别表示:画布宽高、背景颜色、背景图形状、字体、屏蔽词、最大词的字体大小 wc = WordCloud(width=1024, height=768, background_color='white', mask=bg_image, font_path='STKAITI.TTF', max_font_size=400, random_state=50) # 将分词后数据传入云图 wc.generate_from_text(words) plt.imshow(wc) # 绘制图像 plt.axis('off') # 不显示坐标轴 # 保存结果到本地 wc.to_file(current_dir+'/备注名词云图.jpg')
def word_cloud(csv_file, stopwords_path, pic_path): pic_name = csv_file[:-4] + "_词云图.png" d = pd.read_csv(csv_file, engine='python', encoding='utf-8') content = d['job_desc'].values comment_after_split = jieba.cut(str(content), cut_all=False) wl_space_split = " ".join(comment_after_split) background_image = plt.imread(pic_path) stopwords = STOPWORDS.copy() with open(stopwords_path, 'r', encoding='utf-8') as f: for i in f.readlines(): stopwords.add(i.strip('\n')) f.close() wc = WordCloud(width=1024, height=768, background_color='white', mask=background_image, font_path="simhei.ttf", stopwords=stopwords, max_font_size=400, random_state=50) wc.generate_from_text(wl_space_split) img_colors = ImageColorGenerator(background_image) wc.recolor(color_func=img_colors) plt.imshow(wc) plt.axis('off') plt.show() wc.to_file(pic_name)
def get_wordcloud() -> str: """ 根据电影简介创建词云图片,返回图片路径 return : path (图片路径) """ sql = "select intro from movie250" data = select(sql) intro_str = ' '.join(list(map(lambda x: x[0], data))) # 用jieba提取词语 cut_list = list(jieba.cut(intro_str)) cut_str = ' '.join(cut_list) # 用PIL打开图片,用numpy转换成数组 # img = Image.open("../static/assets/img/tree.jpg") img = Image.open("static/assets/img/tree.jpg") img_arr = np.array(img) # 用WordCloud创建词云 wc = WordCloud( background_color='white', mask=img_arr, # 图片遮罩,数组形式的图片 font_path='msyh.ttc' # 字体所在位置: C:\Windows\Fonts ) wc.generate_from_text(cut_str) # 用matplotlib.pyplot绘制图片,并保存 fig = plt.figure() plt.axis('off') plt.imshow(wc) path = "static/assets/img/word_cloud.jpg" plt.savefig(path, dpi=300) return path
def signatures_cloud(): signatures = [] with open('wechatfriends.txt', mode='r', encoding='utf-8') as f: rows = f.readlines() for i in rows: signature = i.split(',')[5] if signature != '': signatures.append(signature) f.close() split = jieba.cut(str(signatures), cut_all=False) words = ' '.join(split) stopwords = STOPWORDS.copy() stopwords.add('span') stopwords.add('span') stopwords.add('class') stopwords.add('emoji') stopwords.add('emoji1f334') stopwords.add('emoji1f388') stopwords.add('emoji1f33a') stopwords.add('emoji1f33c') stopwords.add('emoji1f633') bg_image = plt.imread('moon.jpeg') wc = WordCloud(width=1000, height=1000, background_color='white', mask=bg_image, font_path='simhei.ttf', stopwords=stopwords, max_font_size=400, random_state=50) wc.generate_from_text(words) # plt.imshow(wc) plt.axis('off') wc.to_file('个性签名云图.jpg')
def genWordCloud(self): self.getComments() keywords = self.getKeywords() text_string = ','.join(keywords) background_Image = np.array(self.genBackground(self.bg)) if self.bg else None wc = WordCloud( width=600, height=200, margin=2, ranks_only=None, prefer_horizontal=0.9, mask=background_Image, color_func=None, max_words=200, stopwords=None, random_state=None, background_color='#ffffff', font_step=1, mode='RGB', regexp=None, collocations=True, normalize_plurals=True, contour_width=0, colormap='viridis', contour_color='Blues', repeat=False, scale=2, min_font_size=10, max_font_size=200) wc.generate_from_text(text_string) plt.imshow(wc, interpolation='bilinear') plt.axis('off') plt.tight_layout() wc.to_file('top10.png') plt.show()
def wordcloud(): if os.path.exists(r'.\static\assets\img\word.png'): # 判断本地是否已存在 images 文件夹,有的话直接开始下载,没有创建一个 print("当前目录下已存在词云图片....") else: conn = sqlite3.connect("comment.db") cursor = conn.cursor() sql = "select distinct message from hotComment" #distinct关键字可查找唯一的记录,过滤重复数据 data = cursor.execute(sql) text = "" for item in data: text = text + item[0] print(text) cursor.close() conn.close() # 分词 cut = jieba.cut(text) string = ' '.join(cut) print(len(string)) img = Image.open(r'.\static\assets\img\wordbg.jpg') # 打开遮罩图片 img_array = np.array(img) word_cloud = WordCloud(background_color='white', mask=img_array, font_path="msyh.ttc") word_cloud.generate_from_text(string) # 绘制图片 fig = plt.figure(1) plt.imshow(word_cloud) plt.axis("off") # plt.show() plt.savefig(r'.\static\assets\img\word.png', dpi=500) return render_template("wordcloud.html")
def creat_wordcloud(df): text = '' for line in df['title']: text += ' '.join(jieba.cut(line, cut_all=False)) text += ' ' background_Image = plt.imread('data/image.jpg') wc = WordCloud(background_color='white', mask=background_Image, font_path='msyh.ttc', max_words=1000, max_font_size=150, min_font_size=15, prefer_horizontal=1, random_state=50) wc.generate_from_text(text) img_colors = ImageColorGenerator(background_Image) wc.recolor(color_func=img_colors) process_word = WordCloud.process_text(wc, text) sort = sorted(process_word.items(), key=lambda e: e[1], reverse=True) print(sort[:50]) plt.imshow(wc) plt.axis('off') wc.to_file('商家标题词云.jpg') print('生成词云成功')
def word(): # 1. 词云所需的文字 con = sqlite3.connect('51job.db') cur = con.cursor() sql = 'select company_name from job' data = cur.execute(sql) text = '' for item in data: text += item[0] cur.close() con.close() # 2.分词 cut = jieba.cut(text) string = ' '.join(cut) # 3.背景图片 img = Image.open(r'./static/images/tree.jpeg') imgArray = np.array(img) # 将图片转换为数组 wc = WordCloud(background_color='white', mask=imgArray, font_path='/Library/Fonts/Songti.ttc') wc.generate_from_text(string) # 4.绘图 flg = plt.figure(1) plt.imshow(wc) plt.axis('off') # 是否显示坐标轴 plt.show() # 显示生成的词云图片 # 输出词云文件到文件 # plt.savefig('./static/images/job.jpg') return render_template('word.html')
def make_worldcloud(wl_space_split): # text_from_file_with_apath = open(file_path, 'r').read() # wordlist_after_jieba = jieba.cut(text_from_file_with_apath, cut_all=False) # wl_space_split = " ".join(wordlist_after_jieba) # print wl_space_split backgroud_Image = plt.imread('E:/python/weibo_analysis/result/muban.jpg') print u'加载图片成功!' '''设置词云样式''' # stopwords = STOPWORDS.copy() # stopwords.add("真的") #可以加多个屏蔽词 wc = WordCloud( width=1024, height=768, background_color='white',# 设置背景颜色 mask=backgroud_Image,# 设置背景图片 font_path='E:/python/weibo_analysis/result/ziti.ttf', # 设置中文字体,若是有中文的话,这句代码必须添加,不然会出现方框,不出现汉字 max_words=200, # 设置最大现实的字数 # stopwords=stopwords,# 设置停用词 max_font_size=300,# 设置字体最大值 random_state=50,# 设置有多少种随机生成状态,即有多少种配色方案 ) wc.generate_from_text(wl_space_split)#开始加载文本 img_colors = ImageColorGenerator(backgroud_Image) wc.recolor(color_func=img_colors)#字体颜色为背景图片的颜色 plt.imshow(wc)# 显示词云图 plt.axis('off')# 是否显示x轴、y轴下标 plt.show()#显示 # 获得模块所在的路径的 d = path.dirname(__file__) # os.path.join(): 将多个路径组合后返回 wc.to_file(path.join(d, "pic.jpg")) print u'生成词云成功!'
def get_word_cloud(comments): comments_after_aplit = jieba.cut(str(comments), cut_all=False) words = ' '.join(comments_after_aplit) # print(words) stopwords = STOPWORDS.copy() stopwords.add('哪吒') stopwords.add('电影') stopwords.add('我命') stopwords.add('不由') bg_img = plt.imread('circle.png') wc = WordCloud(width=1024, height=768, background_color='white', mask=bg_img, stopwords=stopwords, max_font_size=200, random_state=50, font_path='STKAITI.TTF') wc.generate_from_text(words) plt.imshow(wc) plt.axis('off') plt.show() wc.to_file('词云图.jpg')
def keyword_cloud(text): backgroud_Image = plt.imread(r'C:\Users\Administrator\Desktop\词云生成(wordcloud)\timg.jpg') #背景图片 print('加载图片成功!') '''''设置词云样式''' wc = WordCloud( background_color='white',# 设置背景颜色 (画布颜色) height=1200, #蒙版的长宽 width=2000, mask=backgroud_Image,# 设置背景图片 (图片蒙版) font_path='C:\Windows\Fonts\STZHONGS.TTF', # 选择字体 max_words=500, # 设置最大显示的字数 stopwords=STOPWORDS,# 设置停用词 max_font_size=150,# 设置字体最大值 #min_font_size=4,# 设置字体最小值 random_state=30,# 设置有多少种随机生成状态,即有多少种配色方案 scale = 1 #画布放大比例 ) wc.generate_from_text(text) print('开始加载文本') #改变字体颜色 img_colors = ImageColorGenerator(backgroud_Image) #字体颜色为背景图片的颜色 wc.recolor(color_func=img_colors) # 显示词云图 plt.imshow(wc) # 是否显示x轴、y轴下标 plt.axis('off') plt.show() #显示 wc.to_file(r'') #保存 print('生成词云成功!')
def wc_chinese(): text = open(path.join(d, 'langchao2.txt'), encoding='UTF-8-SIG').read() font_path = 'C:\Windows\Fonts\STXIHEI.TTF' background_Image = np.array(Image.open(path.join(d, "circle.jpg"))) img_colors = ImageColorGenerator(background_Image) stopwords = set('') wc = WordCloud( font_path=font_path, margin=2, mask=background_Image, scale=2, max_words=200, min_font_size=4, max_font_size=100, stopwords=stopwords, random_state=42, background_color='white', ) wc.generate_from_text(text) # 获取文本词排序,可调整 stopwords process_word = WordCloud.process_text(wc, text) sort = sorted(process_word.items(), key=lambda e: e[1], reverse=True) print(sort[:50]) # 获取文本词频最高的前50个词 wc.recolor(color_func=img_colors) plt.imshow(wc, interpolation='bilinear') plt.axis('off') plt.tight_layout() plt.savefig('浪潮basic2.png', dpi=200) plt.show()
def data_analysis(): with open("get.txt", 'r') as f: text = f.read() text = jieba.cut(text, cut_all=False) text = " ".join(text) bg_img = plt.imread("bg.jpg") stopwords = set() stopwords_list = [ "整合包", "mod", "整合", "MCBBS", "精彩", "作品", "世界", "生存", "附属", "冒险", "很棒", "光影", "内含音乐" ] for item in stopwords_list: stopwords.add(item) wc = WordCloud(width=1800, height=1000, background_color='white', font_path="C:/Windows/Fonts/STFANGSO.ttf", mask=bg_img, stopwords=stopwords, max_font_size=700, random_state=50) wc.generate_from_text(text) img_colors = ImageColorGenerator(bg_img) wc.recolor(color_func=img_colors) plt.imshow(wc) plt.axis('off') # 不显示坐标轴 plt.show() wc.to_file("result.jpg")
def generate_wordcloud(): comments = [] with open('data/comments.txt', 'r', encoding='utf-8') as f: rows = f.readlines() try: for row in rows: comment = row.split(',')[2] if comment != '': comments.append(comment) except Exception as e: print(e) comment_after_split = jieba.cut(str(comments), cut_all=False) words = ' '.join(comment_after_split) stopwords = STOPWORDS.copy() stopwords.add('电影') stopwords.add('一部') stopwords.add('一个') stopwords.add('没有') stopwords.add('什么') stopwords.add('有点') stopwords.add('感觉') stopwords.add('毒液') stopwords.add('就是') stopwords.add('觉得') bg_image = plt.imread('bg.jpg') wc = WordCloud(background_color='lightblue', mask=bg_image, font_path='STKAITI.TTF', stopwords=stopwords, max_font_size=400, random_state=50) wc.generate_from_text(words) plt.imshow(wc) plt.axis('off') plt.show()
def cloud_for_document(outfile=None, fulltext=None): """Create a wordcloud for the DOCUMENT and save the result in OUTFILE""" assert outfile != None assert fulltext != None wc = WordCloud(width=1024, height=640) wc.generate_from_text(fulltext) plt.imshow(wc) plt.axis("off") plt.savefig(outfile, dpi=300)
def generate_wordcloud(text, bgcolor, width, height, max_words, mask): if mask is not None: mask = _load_mask(mask) wc = WordCloud(relative_scaling=.5, width=width, height=height, background_color=bgcolor, mask=mask, max_words=max_words) return wc.generate_from_text(text)
def wordcloud_image(text, **kwargs): """ Build wordcloud from text """ format = 'PNG' if kwargs.has_key('file_format'): format = kwargs.pop('file_format') args = default_settings args.update(kwargs) wc = WordCloud(**args) wc.generate_from_text(text) img = wc.to_image() del wc imgbuf = BytesIO() img.save(imgbuf, format) width, height = img.size imgbuf.seek(0) del img return imgbuf, (width, height)
df = pd.read_csv('music_message.csv', header=None) text = '' for line in df[2]: text += ' '.join(jieba.cut(line, cut_all=False)) backgroud_Image = plt.imread('job.jpg') stopwords = set('') stopwords.update(['封面', 'none介绍', '介绍', '歌单', '歌曲', '我们', '自己', '没有', '就是', '可以', '知道', '一起', '不是', '因为', '什么', '时候', '还是', '如果', '不要', '那些', '那么', '那个', '所有', '一样', '一直', '不会', '现在', '他们', '这样', '最后', '这个', '只是', '有些', '其实', '开始', '曾经', '所以', '不能', '你们', '已经', '后来', '一切', '一定', '这些', '一些', '只有', '还有']) wc = WordCloud( background_color='white', mask=backgroud_Image, font_path='C:\Windows\Fonts\STZHONGS.TTF', max_words=2000, max_font_size=150, random_state=30, stopwords=stopwords ) wc.generate_from_text(text) # 看看词频高的有哪些,把无用信息去除 process_word = WordCloud.process_text(wc, text) sort = sorted(process_word.items(), key=lambda e:e[1], reverse=True) print(sort[:50]) img_colors = ImageColorGenerator(backgroud_Image) wc.recolor(color_func=img_colors) plt.imshow(wc) plt.axis('off') wc.to_file("活着.jpg") print('生成词云成功!')
stopwords.add('目前') stopwords.add('但是') stopwords.add('然后') stopwords.add('这样') stopwords.add('这种') stopwords.add('曲唑') stopwords.add('如果') stopwords.add('戈舍') # 导入背景图 bg_image = plt.imread('/Users/youpeng/zhizhi/beastcancer/timg.jpg') # 设置词云参数,参数分别表示:画布宽高、背景颜色、背景图形状、字体、屏蔽词、最大词的字体大小 wc = WordCloud( scale=4, background_color='white', mask=bg_image, font_path='/System/Library/Fonts/PingFang.ttc', stopwords=stopwords, max_font_size=400, random_state=50, collocations=False ) # 将分词后数据传入云图 wc.generate_from_text(words) plt.imshow(wc) plt.axis('off') # 不显示坐标轴 plt.show() # 保存结果到本地 wc.to_file('/Users/youpeng/zhizhi/beastcancer/endocrine.jpg')