def word_cloud(item_name,item_name_list,item_num_list,word_size_range): wordcloud = WordCloud(width=1400,height= 900) wordcloud.add("", item_name_list, item_num_list,word_size_range=word_size_range,shape='pentagon') out_file_name = './analyse/'+item_name+'.html' wordcloud.render(out_file_name)
def drawWorldCloud(content, count): name = random.randint(0, 9999) outputFile = './' + str(name) + '.html' cloud = WordCloud('', width=1000, height=600, title_pos='center') cloud.add(' ', content, count, shape='cardioid', background_color='white', max_words=200) cloud.render(outputFile)
def word_cloud(item_name, item_name_list, item_num_list, word_size_range): wordcloud = WordCloud(width=1400, height=900) wordcloud.add("", item_name_list, item_num_list, word_size_range=word_size_range, shape='pentagon') out_file_name = './analyse/' + item_name + '.html' wordcloud.render(out_file_name)
def word_cloud(buckets, path, top_keyword): wordcloud = WordCloud(width=1300, height=900) name = [] value = [] for i, bucket in enumerate(buckets): if bucket['key'] in white_key_word_list or i > top_keyword: continue name.append(bucket['key']) value.append(bucket['doc_count']) wordcloud.add("", name, value, word_size_range=[30, 120]) wordcloud.render(path)
def worldCould_test(c, b): name = pd.Series(unique(c)).values g1 = b.groupby(c).sum().round(2).values value = pd.Series(g1).values worldCoule = WordCloud(width=1200, height=400) worldCoule.add("2018年10月到2019年10月香飘飘各品项出库数据-云词分布", name, value, word_size_range=[20, 100]) worldCoule.show_config() worldCoule.render("E:\\py_data_html\\bar_test015_4.html") print("生成完成")
def make_wordcould(self): a = self.collection.find() key_list = [] count_list = [] for i in a: if i['_id'] not in stopwordslist('zhilianzhaopin/stopwords_two'): key_list.append(i['_id']) count_list.append(i['count']) wd = WordCloud(width=1300, height=620) wd.add('', key_list, count_list, word_size_range=(20, 100)) wd.render('templates/zhilianzhaopin/wordcloud.html')
def word_clout(title, name_list, num_list, word_size_range): '''词云图''' wordcloud = WordCloud(width=1400, height=900) wordcloud.add("", name_list, num_list, word_size_range=word_size_range, shape='pentagon') out_file_name = './analyse/' + title + '.html' wordcloud.render(out_file_name)
def wordcloudtest(self): from pyecharts import WordCloud import random wd = WordCloud('回帖数词云图') key, value = wd.cast(self.wordcloud) shapes = [ 'circle', 'cardioid', 'diamond', 'triangle-forward', 'triangle', 'pentagon', 'star' ] wd.add('', key, value, shape=shapes[0]) wd.render('wordcloudtest.html') print('词云图测试成功')
def showInWordCount(seachname, result): ''' 制作词云 :param seachname: :param result: :return: ''' result = dict(result) name = list(result.keys())[:30] value = list(result.values())[:30] wordcloud = WordCloud(width=1300, height=620) wordcloud.add("", name, value, word_size_range=[20, 100]) wordcloud.render("{}wordcloud.html".format(seachname))
def draw_word_cloud(word_data): from pyecharts import WordCloud word = [] value = [] for (k, v) in word_data.items(): if v < 50: del word_data[k] else: word.append(k) value.append(v) wordcloud = WordCloud(width=1300, height=620) wordcloud.add("", word, value, shape='diamond') wordcloud.render("./wordcloud.html")
def drawWorldCloud(name, rank, chatroomname): outputFile = './result/{}_群成员签名词云图.html'.format(chatroomname) cloud = WordCloud('{} 群成员签名词云图'.format(chatroomname), width=1200, height=600, title_pos='center') cloud.add(' ', name, rank, shape='star', background_color='white', max_words=200) cloud.render(outputFile)
def ciyuntu(): '''词云图''' name = ['SamSClub', 'Macys', 'AmySchumer', 'JurassicWorld', 'CharterCommunications', 'ChickFilA', 'PlanetFitness', 'PitchPerfect', 'Express', 'Home', 'JohnnyDepp', 'LenaDunham', 'LewisHamilton', 'KXAN', 'MaryEllenMark', 'FarrahAbraham', 'RitaOra', 'SerenaWilliams', 'NCAAbaseballtournament', 'PointBreak'] value = [10000, 6181, 4386, 4055, 2467, 2244, 1898, 1484, 1112, 965, 847, 582, 555, 550, 462, 366, 360, 282, 273, 265] wordcloud = WordCloud(width=1300, height=620) wordcloud.add("", name, value, word_size_range=[20, 100]) wordcloud.render('./info/词云图.html')
def word_cloud(item_name, item_name_list, item_num_list, word_size_range): wordcloud = WordCloud(width=1400, height=900) wordcloud.add('', item_name_list, item_num_list, word_size_range=word_size_range, shape='pentagon') analyse_path = './analyse/' if not os.path.exists(analyse_path): os.mkdir(analyse_path) out_file_name = analyse_path + item_name + '.html' wordcloud.render(out_file_name)
def word_cloud(item_name, item_name_list, item_num_list, word_size_range): """ 根据key、value对应的list生成词云 :param item_name: 文件名 :param item_name_list: 词云的文本 :param item_num_list: 词云文本的数量 :param word_size_range: 单词字体大小范围 :return: """ wordcloud = WordCloud(width=1400, height=900) # 生成的词云图轮廓, 有'circle', 'cardioid', 'diamond', 'triangle-forward', 'triangle', 'pentagon', 'star'可选 wordcloud.add("", item_name_list, item_num_list, word_size_range=word_size_range, shape='circle') out_file_name = '/virtualhost/webapp/love/wechat/' + item_name + '.html' wordcloud.render(out_file_name)
def word_cloud(t): name, value = [], [] x, y = t[0], t[1] for i in range(15): # print(x[i][1],y[i][1]) name.append(x[i][0]) name.append(y[i][0]) Str = ''.join(x[i][1].split(',')) #注意这里i[2]是一个对象 value.append(int(Str)) Str = ''.join(y[i][1].split(',')) #注意这里i[2]是一个对象 value.append(int(Str)) wordcloud = WordCloud(width=1300, height=620) wordcloud.add("", name, value, word_size_range=[20, 100]) # wordcloud.show_config() wordcloud.render(pathConfig.HTLMSREPATH + '词云.html')
def word_cloud(item_name, item_name_list, item_num_list, word_size_range): wordcloud = WordCloud(width=1400, height=900) ''' The shape of the "cloud" to draw. Can be any polar equation represented as a callback function, or a keyword present. Available presents are circle (default), cardioid (apple or heart shape curve, the most known polar equation), diamond ( alias of square), triangle-forward, triangle, (alias of triangle-upright, pentagon, and star. ''' wordcloud.add("", item_name_list, item_num_list, word_size_range=word_size_range, shape='heart') out_file_name = './analyse/' + item_name + '.html' wordcloud.render(out_file_name)
def word_count(text): words = jieba.cut(text) word_list = list(word for word in words) df = pd.DataFrame(word_list, columns=['word']) result = df.groupby(['word']).size().sort_values(ascending=False) words = [] count = [] for i in range(0, 50): if result.index[i] in ',.!?,。!?~`、/;;:‘’“”()()': continue else: words.append(result.index[i]) count.append(result[i]) wordcloud = WordCloud('词频统计结果(top50)', height=600) wordcloud.add('', words, count, shape='star') wordcloud.render()
def pic(data, file): pyecharts.configure(jshost=None, echarts_template_dir=None, force_js_embed=None, output_image=None, global_theme='infographic') all_poet = [i[0] for i in data[:30]] all_num = [i[1] for i in data[:30]] br = pyecharts.Bar( title=file.rstrip('.txt') + '最常见的地名', title_top=0, width=1200, height=700, ) br.add('', all_poet, all_num, label_pos='center', is_convert=True, xaxis_interval=0, yaxis_interval=0, is_yaxis_inverse=True) br.use_theme('infographic') br.render(path=file.rstrip('.txt') + '最常见的地名_条形图' + '.html') all_poet = [i[0] for i in data[:700]] all_num = [i[1] for i in data[:700]] wordcloud = WordCloud( title=file.rstrip('.txt') + '最常见的地名' + '\n\n', title_pos='center', width=1500, height=800, ) shape = [ 'circle', 'cardioid', 'diamond', 'triangle-forward', 'triangle', 'pentagon', 'star' ] wordcloud.add('', all_poet, all_num, shape=random.choice(shape), word_gap=20, word_size_range=[10, 120], rotate_step=70) wordcloud.render(path=file.rstrip('.txt') + '最常见的地名_词云' + '.html')
def render(): name = [] # 词条 value = [] # 权重 with open('data.json', 'r') as f: data = json.load(f) for k, v in data.items(): name.append(k) value.append(int(v)) wordcloud = WordCloud(width=1300, height=620) wordcloud.add("Xia", name, value, shape='circle', word_size_range=[20, 100]) wordcloud.render()
def worldcloud(): #读取csv中的title信息,将所有title信息放入一个字符串中 data_string = "" with open("jd.csv", "r") as csvfile: reader = csv.DictReader(csvfile) for row in reader: data_string += row['title'] #使用jieba分词,将上一步获取的字符串分词,并根据分词结果进行统计 seg_count = {} seg_list = jieba.cut(data_string, cut_all=False) for seg in seg_list: if seg not in seg_count.keys(): seg_count[seg] = 1 else: seg_count[seg] += 1 #只保留出现次数在100次以上的词汇(可根据爬取结果更改) seg_count_bigthan100 = seg_count.copy() for k, v in seg_count.items(): if v < 100: seg_count_bigthan100.pop(k) #去除无用的词汇 rubbish_list = [ ' ', "(", ")", "【", "】", '+', '-', '~', '/', '、', ',', '。', '!' ] for i in rubbish_list: try: seg_count_bigthan100.pop(i) except: pass #使用WordCloud生成词云 word = [] count = [] wordcloud = WordCloud(width=1300, height=620) for k, v in seg_count_bigthan100.items(): word.append(k) count.append(v) wordcloud.add("词云图", word, count, word_size_range=[20, 100], shape="diamond") wordcloud.render()
def analyze_signature(): # 个性签名列表 data = [] for user in friends: data.append(user.signature) # 将个性签名列表转为string data = ','.join(data) # 进行分词处理,调用接口进行分词 # 这里不使用jieba或snownlp的原因是无法打包成exe文件或者打包后文件非常大 postData = {'data':data, 'type':'exportword', 'arg':'', 'beforeSend':'undefined'} response = post('http://life.chacuo.net/convertexportword',data=postData) data = response.text.replace('{"status":1,"info":"ok","data":["','').replace('\/','').replace('\\\\','') # 解码 data = data.encode('utf-8').decode('unicode_escape') # 将返回的分词结果json字符串转化为python对象,并做一些处理 data = data.split("=====================================")[0] # 对分词结果数据进行去除一些无意义的词操作 stop_words = [',', ',', '.', '。', '!', '!', ':', ':', '\'', '‘', '’', '“', '”', '的', '了', '是', '=', '\r', '\n', '\r\n', '\t', '以下关键词', '[', ']', '{', '}', '(', ')', '(', ')', 'span', '<', '>', 'class', 'html', '?'] for x in stop_words: data = data.replace(x, "") data = data.replace(' ','') # 将分词结果转化为list,根据分词结果,可以知道以2个空格为分隔符 data = data.split(' ') # 进行词频统计,结果存入字典signature_dict中 signature_dict = {} for word in data: if(word in signature_dict.keys()): signature_dict[word] += 1 else: signature_dict[word] = 1 # 开始绘制词云 name = [x for x in signature_dict.keys()] value = [x for x in signature_dict.values()] wordcloud = WordCloud('微信好友个性签名词云图') wordcloud.add("", name, value, word_size_range=[20, 100]) wordcloud.render('data/好友个性签名词云.html')
def DrawWordCloud(self, title, data, width, height, word_size_range, savepath='./result'): if (os.path.exists(savepath) is None): os.mkdir(savepath) wordcloud = WordCloud(title, width=width, height=height) attrs = [data[i][0] for i in range(len(data))] vals = [data[i][1] for i in range(len(data))] wordcloud.add('', attrs, vals, word_size_range=word_size_range, shape='diamond') wordcloud.render(os.path.join(savepath, '%s.html' % title))
def analysis(page_span): data = get_total_data(page_span) for param_column in list(data): param_count = data.groupby([param_column], as_index=True)[param_column].count() keys = list(param_count.index) values = list(param_count) attr, value = keys, values if list(data).index(param_column) in [8]: chart = WordCloud(param_column, width=800, height=500, title_pos='center') chart.add('', attr, value, shape="circle", is_label_show=True, is_legend_show=False, is_area_show=True) chart.render('%s.html' % param_column) else: # 画图 chart = Pie(param_column, width=800, height=500, title_pos='center') chart.add('', attr, value, center=[50, 50], redius=[10, 30], is_label_show=True, is_legend_show=False, is_area_show=True) chart.render('%s.html' % param_column)
def plot_detail_word_cloud(file_name): stopwords_path = 'stopword.txt' with open(file_name, encoding='gbk') as file: text = file.read() content = text content = re.sub('[,,.。\r\n]', '', content) segment = jieba.lcut(content) words_df = pd.DataFrame({'segment': segment}) stopwords = pd.read_csv(stopwords_path, index_col=False, quoting=3, sep="\t", names=['stopword'], encoding='gbk') words_df = words_df[~words_df.segment.isin(stopwords.stopword)] words_stat = words_df.groupby(by=['segment'])['segment'].agg({"计数": np.size}) words_stat = words_stat.reset_index().sort_values(by=["计数"], ascending=False) test = words_stat.head(200).values codes = [test[i][0] for i in range(0, len(test))] counts = [test[i][1] for i in range(0, len(test))] wordcloud = WordCloud(width=1300, height=620) wordcloud.add(file_name.split('.')[0], codes, counts, word_size_range=[20, 100]) wordcloud.render(file_name.split('.')[0] + "word_cloud.html")
def wordcloud(): """词云图""" # 读取处理数据 wordcloud_sql = "select content from maoyan_wumingzhibei WHERE movie_name='无名之辈'" wordcloud_data = DataBase().create(wordcloud_sql) wordcloud_list = [ re.sub(r'[ /….,!。?\n]', '', "".join(i)) for i in wordcloud_data ] data_dict = dict(Counter(wordcloud_list)) # 生成图表 wordcloud = WordCloud(width=1300, height=620) attr, value = wordcloud.cast(data_dict) wordcloud.add("", attr, value, word_size_range=[30, 90], is_more_utils=True) wordcloud.render("词云图.html")
def popular_name(gener): top15_boy = data.loc[(data['Year'].isin(list(range(2010,2018)))) & (data['Gender'] == 'M'), :].groupby('Name').Count.sum().nlargest(15) boy_total = data.loc[(data['Year'].isin(list(range(2010,2018)))) & (data['Gender'] == 'M'), :].groupby('Name').Count.sum().sum() name = list(top15_boy.index) value = list(top15_boy.values) wordcloud = WordCloud(width=800, height=450,background_color='#f2eada') # feeeed wordcloud.add("", name, value, word_size_range=[20, 100],shape='diamond') return wordcloud.render('popolar name'+str(gener)+'.html')
def pic(data, file): # all_poet = [i[0] for i in data[:30]] # all_num = [i[1] for i in data[:30]] # br = pyecharts.Bar(title=file.rstrip('.txt')+'最高频的多字意象:', title_top=0, width=1200, height=700,) # br.add('', all_poet, all_num, label_pos='center',is_convert=True, xaxis_interval=0, yaxis_interval=0, is_yaxis_inverse=True) # br.use_theme('dark') # br.render(path=file.rstrip('.txt')+'最高频的多字意象:_条形图'+'.html') all_poet = [i[0] for i in data[:500]] all_num = [i[1] for i in data[:500]] wordcloud = WordCloud(title='\n'+file.rstrip('.txt')+'多字意象分析',title_pos='center', width=1300, height=620, ) shape = ['circle', 'cardioid', 'diamond', 'triangle-forward', 'triangle', 'pentagon', 'star'] wordcloud.add('', all_poet, all_num, shape= random.choice(shape), word_gap=20, word_size_range=[10, 120], rotate_step=45) wordcloud.render(path=file.rstrip('.txt')+'最高频的多字意象_词云'+'.html')
def get_pic(items, m, n): # 输入item, 选择显示分布排列为 m-n 的关键词 text_rank = pd.DataFrame(items, columns=['words', 'value']) from pyecharts import WordCloud name = text_rank['words'][m:n + 1] value = text_rank['value'][m:n + 1] word_cloud = WordCloud(width=1300, height=620) word_cloud.add( "", name, value, word_size_range=[20, 100], shape='diamond', ) aft = datetime.datetime.now().strftime('%m%d%H%M') word_cloud.render(r"E:\爬虫pycharm\data\goods_review\reword_cloud" + aft + ".html")
def analysis4(): data = clean_data() contents = list(data["content"].values) try: jieba.analyse.set_stop_words('stop_words.txt') tags = jieba.analyse.extract_tags(str(contents), topK=100, withWeight=True) name = [] value = [] for v, n in tags: # [('好看', 0.5783566110162118), ('特效', 0.2966753295335903), ('不错', 0.22288265823188907),...] name.append(v) value.append(int(n * 10000)) wordcloud = WordCloud(width=1300, height=620) wordcloud.add("", name, value, word_size_range=[20, 100]) wordcloud.render() except Exception as e: print(e)
def fabu_label_mon(conn, width, height, mon): sql_fabu_label = "select name,count from kanban_fabu_label where year(date_sub(create_date,interval 1 month)) = '%s'" \ "and month(date_sub(create_date,interval 1 month))='%s'"%(str(mon)[:4],str(mon)[-2:]) data_fabu_label = pd.read_sql(sql_fabu_label, conn) fabu_label_name = list(data_fabu_label.iloc[:, 0]) fabu_label_count = list(data_fabu_label.iloc[:, 1]) fabu_label_title = str(mon)[:4] + '年' + str(mon)[-2:] + '月土流网发布土地标签词云图' fabu_label_wordcloud = WordCloud(fabu_label_title, width=width, height=height, title_pos='center') fabu_label_wordcloud.add("", fabu_label_name, fabu_label_count, shape="circle") fabu_label_wordcloud.render() return fabu_label_wordcloud
def wordcloud(filepath): try: File.combfile(filepath) sentence = File.readfile(filepath + '\\combfile_Fin.txt') except: sentence = File.readfile(filepath) filepath = os.path.dirname(filepath) sentence = Chineseword.cutword(sentence) worddict = Chineseword.sentencecount(sentence) keylist, valuelist = worddict.keys(), worddict.values() outputFile = filepath + '\\osetwordcloud.html' cloud = WordCloud('wordcloud', width=1000, height=600) cloud.add( ' ', keylist, valuelist, shape='circle', ) cloud.render(outputFile)
def test_wordcloud(): # wordcloud_0 name = ['Sam S Club', 'Macys', 'Amy Schumer', 'Jurassic World', 'Charter Communications', 'Chick Fil A', 'Planet Fitness', 'Pitch Perfect', 'Express', 'Home', 'Johnny Depp', 'Lena Dunham', 'Lewis Hamilton', 'KXAN', 'Mary Ellen Mark', 'Farrah Abraham', 'Rita Ora', 'Serena Williams', 'NCAA baseball tournament', 'Point Break'] value = [10000, 6181, 4386, 4055, 2467, 2244, 1898, 1484, 1112, 965, 847, 582, 555, 550, 462, 366, 360, 282, 273, 265] wordcloud = WordCloud(width=1300, height=620) wordcloud.add("", name, value, word_size_range=[30, 100], rotate_step=66) wordcloud.show_config() wordcloud.render() # wordcloud_1 wordcloud = WordCloud(width=1300, height=620) wordcloud.add("", name, value, word_size_range=[30, 100], shape='diamond') wordcloud.show_config() wordcloud.render()
def pythonWordCloud(x,y,label): wordcloud = WordCloud(width=1300, height=620) wordcloud.add("", x, y, word_size_range=[20, 100],shape="triangle-forward") wordcloud.render() os.system(r"render.html")
#词云图适合表现不同关键词的出现频率或重要性程度。 from pyecharts import WordCloud words = ['python','jupyter','numpy','pandas','matplotlib','sklearn', 'xgboost','lightGBM','simpy','keras','tensorflow', 'hive','hadoop','spark'] counts = [100,90,65,95,50,60,70,70,20,70,80,80,60,60] cloud = WordCloud(title = '数据算法常用工具',width = 600,height = 420) cloud.add(name = 'utils',attr = words,value = counts, shape = "circle",word_size_range = (10,70)) cloud.render('result.词云图示范.html')
def draw_word_wc(name, count): wc = WordCloud(width=1300, height=620) wc.add("", name, count, word_size_range=[20, 100], shape='diamond') wc.render()
# !/usr/bin/env python