def v_positions_history(self, end=yesterdaydash(), rendered=True): """ river chart visulization of positions ratio history use text size to avoid legend overlap in some sense, eg. legend_text_size=8 """ start = self.totcftable.iloc[0].date times = pd.date_range(start, end) tdata = [] for date in times: sdata = sorted( [( date, fob.briefdailyreport(date).get("currentvalue", 0), fob.name, ) for fob in self.fundtradeobj], key=lambda x: x[1], reverse=True, ) tdata.extend(sdata) tr = ThemeRiver() tr.add( series_name=[foj.name for foj in self.fundtradeobj], data=tdata, label_opts=opts.LabelOpts(is_show=False), singleaxis_opts=opts.SingleAxisOpts(type_="time", pos_bottom="10%"), ) if rendered: return tr.render_notebook() else: return tr
def theme_river(open_file) -> ThemeRiver: """ 根据定义好的格式,利用pyecharts绘制主题河流图 :param open_file: :return: """ title_str = str(open_file).strip().split("/")[-1].split(".")[0] title_str = "主题河流图<" + title_str + ">" # 打开文件读取数据 theme_list = [] res_list = [] file_in = open(open_file, "r", encoding="utf-8") for line in file_in.readlines(): line = line.strip().split(",") res_list.append(line) if line[-1] not in theme_list: theme_list.append(line[-1]) c = (ThemeRiver( init_opts=opts.InitOpts(width="1900px", height="900px", page_title=title_str, theme=ThemeType.SHINE)).add( theme_list, res_list, label_opts=opts.LabelOpts(is_show=False), singleaxis_opts=opts.SingleAxisOpts( type_="time", pos_bottom="10%")).set_global_opts( title_opts=opts.TitleOpts( title=title_str))) return c
def themeriver() -> ThemeRiver: a = [1,10,100,110,130,130] #模拟确诊 b = [0,2,20,50,100,102] #模拟治愈 c = [0,1,10,15,18,18] #模拟死亡 x = [1,2,3,4,5,6] #模拟日期 confirm = [list(z) for z in zip(x,a)] for i in confirm: i.append('确诊') heal = [list(z) for z in zip(x,b)] for i in heal: i.append('治愈') dead = [list(z) for z in zip(x,c)] for i in dead: i.append('死亡') data = confirm + heal + dead #print(data) c = ( ThemeRiver() .add( ["DQ", "TY", "SS"], data, singleaxis_opts=opts.SingleAxisOpts(type_="time", pos_bottom="10%"), ) .set_global_opts(title_opts=opts.TitleOpts(title="ThemeRiver-基本示例")) ) return c
def th_base(): c = (ThemeRiver().add( ["休息", "锻炼", "自学", "玩或看剧", "虚空时间", "思考或琐事"], data, singleaxis_opts=opts.SingleAxisOpts(type_="time", pos_bottom="10%"), ).set_global_opts(title_opts=opts.TitleOpts(title="ThemeRiver"))) return c
def v_positions_history(self, end=yesterdaydash(), **vkwds): ''' river chart visulization of positions ratio history use text size to avoid legend overlap in some sense, eg. legend_text_size=8 ''' start = self.totcftable.iloc[0].date times = pd.date_range(start, end) tdata = [] for date in times: sdata = sorted([(date, fob.briefdailyreport(date).get('currentvalue', 0), fob.aim.name) for fob in self.fundtradeobj], key=lambda x: x[1], reverse=True) tdata.extend(sdata) tr = ThemeRiver() tr.add([foj.aim.name for foj in self.fundtradeobj], tdata, is_datazoom_show=True, is_label_show=False, legend_top="0%", legend_orient='horizontal', **vkwds) return tr
def dbscore(movie_name): #读取数据 with open(movie_name + '\\' + movie_name + 'dbscores.txt', 'r+', encoding='utf-8') as m: allthings = m.read().split(',') del allthings[-1] all_date = [] points_dic = {} #按评价进行分类 for div in allthings: point, date = div.split() if date not in all_date: points_dic[date] = { '很差': 0, '较差': 0, '还行': 0, '推荐': 0, '力荐': 0 } all_date.append(date) if point == '很差': points_dic[date]['很差'] += 1 elif point == '较差': points_dic[date]['较差'] += 1 elif point == '还行': points_dic[date]['还行'] += 1 elif point == '推荐': points_dic[date]['推荐'] += 1 else: points_dic[date]['力荐'] += 1 #处理数据 data = [] for month, grade in points_dic.items(): for hzc, num in grade.items(): counting = [] counting.append(month) counting.append(num) counting.append(hzc) data.append(counting) #出图 themeriver = (ThemeRiver(init_opts=opts.InitOpts( theme=ThemeType.WESTEROS)).add( ['很差', '较差', '还行', '推荐', '力荐'], data, singleaxis_opts=opts.SingleAxisOpts(type_="time", pos_bottom="10%"), ).set_global_opts(title_opts=opts.TitleOpts(title=movie_name + " 豆瓣评价分布图", subtitle=None), toolbox_opts=opts.ToolboxOpts(is_show=True), datazoom_opts=[ opts.DataZoomOpts(is_show=True, range_start=0, range_end=100) ])) themeriver.render(movie_name + '\\' + movie_name + 'dbScore.html')
def test_themeriver_basefake_writer(fake_writer): data = [ ["2015/11/08", 10, "DQ"], ["2015/11/20", 30, "TY"], ["2015/11/08", 21, "SS"], ["2015/11/14", 7, "QG"], ["2015/11/22", 4, "SY"], ["2015/11/20", 26, "DD"], ] c = ThemeRiver().add( ["DQ", "TY", "SS", "QG", "SY", "DD"], data, singleaxis_opts=opts.SingleAxisOpts(type_="time", pos_bottom="10%"), ) c.render() _, content = fake_writer.call_args[0] eq_(c.theme, "white") eq_(c.renderer, "canvas")
def draw_river_picture(series_name, data, to_file, svg_name): """ 主题河流图 :param series_name: [str] :param data: [[date, value, series_name]] :param svg_name: svg文件 :param to_file: 结果文件 :return: """ bar = ThemeRiver(init_opts=opts.InitOpts(width="800px", height="600px", bg_color='white')) \ .add( series_name=series_name, data=data, singleaxis_opts=opts.SingleAxisOpts( pos_top="2%", pos_bottom="10%", pos_right="20%", type_="time", name_textstyle_opts=opts.TextStyleOpts(font_size=25), axislabel_opts=opts.LabelOpts(font_size=25) ), label_opts=opts.LabelOpts(is_show=False, position='bottom', distance='200px'), ) \ .set_global_opts( tooltip_opts=opts.TooltipOpts(trigger="axis", axis_pointer_type="line"), toolbox_opts=opts.ToolboxOpts( feature=opts.ToolBoxFeatureOpts( # data_zoom=opts.ToolBoxFeatureDataZoomOpts(is_show=False), # brush=opts.ToolBoxFeatureBrushOpts(is_show=False), ) ), legend_opts=opts.LegendOpts(type_="scroll", pos_left="82%", pos_top="18%", orient="vertical", backgroundColor='rgb(255, 255, 255)', item_width=40, item_height=20, textstyle_opts=opts.TextStyleOpts(font_size=25) ) ) make_snapshot(snapshot, bar.render(to_file), svg_name) # 生成svg图片
def draw_river_picture_right_legend(series_name, data, to_file, svg_name, pos_right, width='800px'): """ 主题河流图 :param width: :param pos_right: :param series_name: [str] :param data: [[date, value, series_name]] :param svg_name: svg文件 :param to_file: 结果文件 :return: """ bar = ThemeRiver(init_opts=opts.InitOpts(width="800px", height="600px", bg_color='white')) \ .add( series_name=series_name, data=data, singleaxis_opts=opts.SingleAxisOpts( pos_top="50", pos_bottom="50", type_="time", name_textstyle_opts=opts.TextStyleOpts(font_size=20), axislabel_opts=opts.LabelOpts(font_size=20) ), label_opts=opts.LabelOpts(is_show=False), ) \ .set_global_opts( tooltip_opts=opts.TooltipOpts(trigger="axis", axis_pointer_type="line"), toolbox_opts=opts.ToolboxOpts( feature=opts.ToolBoxFeatureOpts( data_zoom=opts.ToolBoxFeatureDataZoomOpts(is_show=False), # brush=opts.ToolBoxFeatureBrushOpts(is_show=False), ) ), legend_opts=opts.LegendOpts(pos_right=pos_right, pos_top="2%", orient="vertical", item_width=30, item_height=14, textstyle_opts=opts.TextStyleOpts(font_size=18) ), ) grid = (Grid(init_opts=opts.InitOpts( width=width, height="600px", bg_color='rgb(255, 255, 255)')).add( bar, grid_opts=opts.GridOpts(pos_right='10%'), is_control_axis_index=True)) make_snapshot(snapshot, grid.render(to_file), svg_name) # 生成svg图片
def list_2theme_river(res_list, opts_title_name)->ThemeRiver: """ 根据传入的list数据,利用pyecharts绘制主题河流图 :param res_list: :param opts_title_name: :return: """ c = ( ThemeRiver(init_opts=opts.InitOpts(width="1920px", height="1080px", page_title=opts_title_name, theme=ThemeType.SHINE)) .add(['A1课题', '重点滚动课题', 'A2课题', 'B类课题', '部软科学', '院软科学', '团队课题', '支撑类'], res_list, label_opts=opts.LabelOpts(is_show=False), singleaxis_opts=opts.SingleAxisOpts(type_="time", pos_bottom="10%")) .set_global_opts(title_opts=opts.TitleOpts(title=opts_title_name)) ) return c
def bar_base() -> Bar: data = [ ["2015/11/08", 10, "DQ"], ["2015/11/09", 15, "DQ"], ["2015/11/10", 35, "DQ"], ["2015/11/14", 7, "DQ"], ["2015/11/15", 2, "DQ"], ] c = (ThemeRiver().add( ["DQ", "TY", "SS", "QG", "SY", "DD"], data, singleaxis_opts=opts.SingleAxisOpts(type_="time", pos_bottom="10%"), ).set_global_opts(title_opts=opts.TitleOpts( title="ThemeRiver-基本示例")).dump_options_with_quotes()) return c
def IMDBscore(movie_name): #处理爬取的数据 with open(movie_name+'\\'+movie_name+'Dscores.txt','r+',encoding='utf-8') as m: allthings = m.read().split(',') del allthings[-1] all_date=[] points_dic={} #按分数进行分类 for div in allthings: point,date = div.split() if date not in all_date: points_dic[date]={'好评':0,'中评':0,'差评':0} all_date.append(date) if int(point) <= 3: points_dic[date]['差评']+=1 elif int(point) >= 7: points_dic[date]['好评']+=1 else: points_dic[date]['中评']+=1 #整理便于出图 data = [] for month, grade in points_dic.items(): for hzc, num in grade.items(): counting = [] counting.append(month) counting.append(num) counting.append(hzc) data.append(counting) #出图 themeriver=( ThemeRiver(init_opts=opts.InitOpts(theme=ThemeType.WESTEROS)) .add( ['好评','中评','差评'],data,singleaxis_opts=opts.SingleAxisOpts(type_="time", pos_bottom="10%"), ) .set_global_opts(title_opts=opts.TitleOpts(title=movie_name+" IMDB评价分布主题河流图",subtitle=None) ,toolbox_opts=opts.ToolboxOpts(is_show=True), datazoom_opts=[opts.DataZoomOpts(is_show=True,range_start=0,range_end=100)]) ) themeriver.render(movie_name+'\\'+movie_name+'IMDBScore.html')
def themeriver_example() -> ThemeRiver: data = [ ["2015/11/08", 10, "DQ"], ["2015/11/09", 15, "DQ"], ["2015/11/10", 35, "DQ"], ["2015/11/14", 7, "DQ"], ["2015/11/15", 2, "DQ"], ["2015/11/16", 17, "DQ"], ["2015/11/17", 33, "DQ"], ["2015/11/18", 40, "DQ"], ["2015/11/19", 32, "DQ"], ["2015/11/20", 26, "DQ"], ["2015/11/08", 35, "TY"], ["2015/11/09", 36, "TY"], ["2015/11/10", 37, "TY"], ["2015/11/11", 22, "TY"], ["2015/11/12", 24, "TY"], ["2015/11/13", 26, "TY"], ["2015/11/14", 34, "TY"], ["2015/11/15", 21, "TY"], ["2015/11/16", 18, "TY"], ["2015/11/17", 45, "TY"], ["2015/11/18", 32, "TY"], ["2015/11/19", 35, "TY"], ["2015/11/20", 30, "TY"], ["2015/11/08", 21, "SS"], ["2015/11/09", 25, "SS"], ["2015/11/10", 27, "SS"], ["2015/11/11", 23, "SS"], ["2015/11/12", 24, "SS"], ["2015/11/13", 21, "SS"], ["2015/11/14", 35, "SS"], ["2015/11/15", 39, "SS"], ["2015/11/16", 40, "SS"], ["2015/11/17", 36, "SS"], ["2015/11/18", 33, "SS"], ["2015/11/19", 43, "SS"], ["2015/11/20", 40, "SS"], ["2015/11/14", 7, "QG"], ["2015/11/15", 2, "QG"], ["2015/11/16", 17, "QG"], ["2015/11/17", 33, "QG"], ["2015/11/18", 40, "QG"], ["2015/11/19", 32, "QG"], ["2015/11/20", 26, "QG"], ["2015/11/21", 35, "QG"], ["2015/11/22", 40, "QG"], ["2015/11/23", 32, "QG"], ["2015/11/24", 26, "QG"], ["2015/11/25", 22, "QG"], ["2015/11/08", 10, "SY"], ["2015/11/09", 15, "SY"], ["2015/11/10", 35, "SY"], ["2015/11/11", 38, "SY"], ["2015/11/12", 22, "SY"], ["2015/11/13", 16, "SY"], ["2015/11/14", 7, "SY"], ["2015/11/15", 2, "SY"], ["2015/11/16", 17, "SY"], ["2015/11/17", 33, "SY"], ["2015/11/18", 40, "SY"], ["2015/11/19", 32, "SY"], ["2015/11/20", 26, "SY"], ["2015/11/21", 35, "SY"], ["2015/11/22", 4, "SY"], ["2015/11/23", 32, "SY"], ["2015/11/24", 26, "SY"], ["2015/11/25", 22, "SY"], ["2015/11/08", 10, "DD"], ["2015/11/09", 15, "DD"], ["2015/11/10", 35, "DD"], ["2015/11/11", 38, "DD"], ["2015/11/12", 22, "DD"], ["2015/11/13", 16, "DD"], ["2015/11/14", 7, "DD"], ["2015/11/15", 2, "DD"], ["2015/11/16", 17, "DD"], ["2015/11/17", 33, "DD"], ["2015/11/18", 4, "DD"], ["2015/11/19", 32, "DD"], ["2015/11/20", 26, "DD"], ] c = (ThemeRiver().add( ["DQ", "TY", "SS", "QG", "SY", "DD"], data, singleaxis_opts=opts.SingleAxisOpts(type_="time", pos_bottom="10%"), ).set_global_opts(title_opts=opts.TitleOpts(title="ThemeRiver-基本示例"))) return c
["2015/11/20", 26, "DD"], ["2015/11/21", 35, "DD"], ["2015/11/22", 40, "DD"], ["2015/11/23", 32, "DD"], ["2015/11/24", 26, "DD"], ["2015/11/25", 22, "DD"], ["2015/11/26", 16, "DD"], ["2015/11/27", 22, "DD"], ["2015/11/28", 10, "DD"], ] c = (ThemeRiver(init_opts=opts.InitOpts(width="1600px", height="800px")).add( series_name=series_data, data=data, singleaxis_opts=opts.SingleAxisOpts( pos_top="50", pos_bottom="50", type_="time", )).set_global_opts(tooltip_opts=opts.TooltipOpts( trigger="axis", axis_pointer_type="line")).render("theme_river.html")) # Section4 词云图 from pyecharts.charts import WordCloud data = [ ("生活资源", "999"), ("供热管理", "888"), ("供气质量", "777"), ("生活用水管理", "688"), ("一次供水问题", "588"), ("交通运输", "516"),
def scoring_trend_analysis(self, flag): choose = flag with open(r'./用户影评相关数据/' + self.filmname + '用户影评相关信息.json', 'r', encoding='UTF-8') as f: t1 = json.load(f, strict=False) if choose == '1': self.textBrowser.append("开始生成" + self.filmname + "的评论推荐度与日期分析柱状图......") QApplication.processEvents() if choose == '2': self.textBrowser.append("开始生成" + self.filmname + "的评论推荐度与日期分析折线图......") QApplication.processEvents() if choose == '3': self.textBrowser.append("开始生成" + self.filmname + "的评论推荐度与日期分析河状图......") QApplication.processEvents() # 取出里面的评分数据 score, date, val, command_date_list = [], [], [], [] result = {} for each in t1: command_date_list.append((each['用户推荐度'], each['用户评论时间'])) # 数出各个日期各个得分的数量 for i in set(list(command_date_list)): result[i] = command_date_list.count(i) # dict类型 info = [] # 将计数好的数据重新打包 for key in result: score = key[0] date = key[1] val = result[key] info.append([score, date, val]) info_new = DataFrame(info) # 将字典转换成为数据框 info_new.columns = ['score', 'date', 'votes'] # 按日期升序排列df info_new.sort_values('date', inplace=True) # 插入空缺的数据,每个日期的评分类型应该有5中,依次遍历判断是否存在,若不存在则往新的df中插入新数值 mark = 0 creat_df = pd.DataFrame(columns=['score', 'date', 'votes']) # 创建空的dataframe for i in list(info_new['date']): location = info_new[(info_new.date == i) & (info_new.score == "力荐")].index.tolist() if location == []: creat_df.loc[mark] = ["力荐", i, 0] mark += 1 location = info_new[(info_new.date == i) & (info_new.score == "推荐")].index.tolist() if location == []: creat_df.loc[mark] = ["推荐", i, 0] mark += 1 location = info_new[(info_new.date == i) & (info_new.score == "还行")].index.tolist() if location == []: creat_df.loc[mark] = ["还行", i, 0] mark += 1 location = info_new[(info_new.date == i) & (info_new.score == "较差")].index.tolist() if location == []: creat_df.loc[mark] = ["较差", i, 0] mark += 1 location = info_new[(info_new.date == i) & (info_new.score == "很差")].index.tolist() if location == []: creat_df.loc[mark] = ["很差", i, 0] mark += 1 info_new = info_new.append(creat_df.drop_duplicates(), ignore_index=True) command_date_list = [] info_new.sort_values('date', inplace=True) # 按日期升序排列df,便于找最早date和最晚data,方便后面插值 for index, row in info_new.iterrows(): command_date_list.append([row['date'], row['votes'], row['score']]) attr, v1, v2, v3, v4, v5 = [], [], [], [], [], [] attr = list(sorted(set(info_new['date']))) for i in attr: v1.append( int(info_new[(info_new['date'] == i) & (info_new['score'] == "力荐")]['votes'])) v2.append( int(info_new[(info_new['date'] == i) & (info_new['score'] == "推荐")]['votes'])) v3.append( int(info_new[(info_new['date'] == i) & (info_new['score'] == "还行")]['votes'])) v4.append( int(info_new[(info_new['date'] == i) & (info_new['score'] == "较差")]['votes'])) v5.append( int(info_new[(info_new['date'] == i) & (info_new['score'] == "很差")]['votes'])) # 柱状图 if choose == '1': c = (Bar( init_opts=opts.InitOpts(width="665px", height="500px") ).add_xaxis(attr).add_yaxis("力荐", v1, stack="stack1").add_yaxis( "推荐", v2, stack="stack1").add_yaxis("还行", v3, stack="stack1").add_yaxis( "较差", v4, stack="stack1").add_yaxis( "很差", v5, stack="stack1").reversal_axis().set_series_opts( label_opts=opts.LabelOpts( is_show=False)).set_global_opts( tooltip_opts=opts.TooltipOpts( is_show=True), toolbox_opts=opts.ToolboxOpts( is_show=True, pos_right="30%", ), title_opts=opts.TitleOpts( title="用户评论推荐度柱状图"), datazoom_opts=opts.DataZoomOpts( type_="inside", range_start=0, range_end=100), ).render("./爬虫数据关联可视化/" + self.filmname + "影评可视化数据/bar_reversal_axis.html")) QApplication.processEvents() self.comment_columnar_pic = (Bar( init_opts=opts.InitOpts(width="665px", height="500px") ).add_xaxis(attr).add_yaxis("力荐", v1, stack="stack1").add_yaxis( "推荐", v2, stack="stack1").add_yaxis("还行", v3, stack="stack1").add_yaxis( "较差", v4, stack="stack1").add_yaxis( "很差", v5, stack="stack1").reversal_axis().set_series_opts( label_opts=opts.LabelOpts( is_show=False)).set_global_opts( tooltip_opts=opts.TooltipOpts( is_show=True), toolbox_opts=opts.ToolboxOpts( is_show=True, pos_right="30%", ), title_opts=opts.TitleOpts( title="用户评论推荐度柱状图"), datazoom_opts=opts.DataZoomOpts( type_="inside", range_start=0, range_end=100), )) self.saveflag = '4' self.textBrowser.append("开始生成" + self.filmname + "的评论推荐度与日期分析柱状图完成!") QApplication.processEvents() self.show_scoring_trend_analysis_columnar() QApplication.processEvents() # 折线图 if choose == '2': polyline = (Line( init_opts=opts.InitOpts(width="665px", height="500px") ).add_xaxis(attr).add_yaxis("力荐", v1, stack="stack1").add_yaxis( "推荐", v2, stack="stack1").add_yaxis("还行", v3, stack="stack1").add_yaxis( "较差", v4, stack="stack1").add_yaxis( "很差", v5, stack="stack1").set_global_opts( tooltip_opts=opts.TooltipOpts(is_show=True), toolbox_opts=opts.ToolboxOpts( is_show=True, pos_right="30%", ), title_opts=opts.TitleOpts(title="用户评论推荐度折线图"), datazoom_opts=opts.DataZoomOpts(type_="inside", range_start=0, range_end=100), ).render("./爬虫数据关联可视化/" + self.filmname + "影评可视化数据/line_markpoint.html")) QApplication.processEvents() self.comment_polyline_pic = (Line( init_opts=opts.InitOpts(width="665px", height="500px") ).add_xaxis(attr).add_yaxis("力荐", v1, stack="stack1").add_yaxis( "推荐", v2, stack="stack1").add_yaxis("还行", v3, stack="stack1").add_yaxis( "较差", v4, stack="stack1").add_yaxis( "很差", v5, stack="stack1").set_global_opts( tooltip_opts=opts.TooltipOpts(is_show=True), toolbox_opts=opts.ToolboxOpts( is_show=True, pos_right="30%", ), title_opts=opts.TitleOpts(title="用户评论推荐度折线图"), datazoom_opts=opts.DataZoomOpts(type_="inside", range_start=0, range_end=100), )) self.saveflag = '5' self.textBrowser.append(self.filmname + "的评论推荐度与日期分析折线图完成!") QApplication.processEvents() self.show_scoring_trend_analysis_polyline() QApplication.processEvents() # 河流图 if choose == '3': river = (ThemeRiver( init_opts=opts.InitOpts(width="665px", height="500px")).add( series_name=['力荐', '推荐', '还行', '较差', '很差'], data=command_date_list, singleaxis_opts=opts.SingleAxisOpts(pos_top="50", pos_bottom="50", type_="time"), ).set_global_opts( tooltip_opts=opts.TooltipOpts(is_show=True, trigger="axis", axis_pointer_type="line"), toolbox_opts=opts.ToolboxOpts( is_show=True, pos_right="30%", ), title_opts=opts.TitleOpts(title="推荐度河流图"), datazoom_opts=opts.DataZoomOpts(type_="inside", range_start=0, range_end=100), ).render("./爬虫数据关联可视化/" + self.filmname + "影评可视化数据/theme_river.html")) QApplication.processEvents() self.comment_river_pic = (ThemeRiver( init_opts=opts.InitOpts(width="665px", height="500px")).add( series_name=['力荐', '推荐', '还行', '较差', '很差'], data=command_date_list, singleaxis_opts=opts.SingleAxisOpts(pos_top="50", pos_bottom="50", type_="time"), ).set_global_opts( tooltip_opts=opts.TooltipOpts(is_show=True, trigger="axis", axis_pointer_type="line"), toolbox_opts=opts.ToolboxOpts( is_show=True, pos_right="30%", ), title_opts=opts.TitleOpts(title="推荐度河流图"), datazoom_opts=opts.DataZoomOpts(type_="inside", range_start=0, range_end=100), )) self.saveflag = '6' self.textBrowser.append(self.filmname + "的评论推荐度与日期分析河状图完成!") QApplication.processEvents() self.show_scoring_trend_analysis_river() QApplication.processEvents()
dim=7, name="等级", type_="category", data=["优", "良", "轻度污染", "中度污染", "重度污染", "严重污染"], ), ] parallel = Parallel() # 添加坐标轴和数据 parallel.add_schema(schema=schema).add("", data) parallel.render_notebook() # %% [markdown] # ### Radar -- 雷达图 radar = Radar() radar.add_schema(schema=[ opts.RadarIndicatorItem(name=_k, max_=200) for _k in list("ABCDFG") ]) radar.add("Expectation", [Faker.values()]).add("Reality", [Faker.values()]) radar.render_notebook() # %% [markdown] # ### ThemeRiver -- 流量图 themeriver = ThemeRiver() with open("data/themeriver.json") as j: data = json.load(j) cats = list(set([i[-1] for i in data])) themeriver.add(cats, data, singleaxis_opts=opts.SingleAxisOpts(type_="time")) themeriver.render_notebook()
# 曲風河流圖 data_river = [] for g in genres: df_river = pd.DataFrame(df_year['year'].unique(), columns=['year'], index=df_year['year'].unique()) df_river['year'] = df_river['year'].apply(str) df_river['count'] = df_year[df_year[g] == 1].groupby(by=['year'])[g].sum() df_river['genre'] = np.full(len(df_river), g) df_river['count'] = df_river['count'].fillna(0) data_river.extend(df_river.values.tolist()) river = ThemeRiver(init_opts=opts.InitOpts( width="2000px", height="600px", theme=ThemeType.LIGHT)) river.add( series_name=genres, data=data_river, label_opts=opts.LabelOpts(font_size=10), singleaxis_opts=opts.SingleAxisOpts( pos_top="50", pos_bottom="50", type_="time", ), ) river.set_global_opts( tooltip_opts=opts.TooltipOpts(trigger="axis", axis_pointer_type="line"), title_opts=opts.TitleOpts(title="1901-2020 曲風流變", subtitle="1901-2020", pos_bottom="85%",
["2015/11/09", 15, "DD"], ["2015/11/10", 35, "DD"], ["2015/11/11", 38, "DD"], ["2015/11/12", 22, "DD"], ["2015/11/13", 16, "DD"], ["2015/11/14", 7, "DD"], ["2015/11/15", 2, "DD"], ["2015/11/16", 17, "DD"], ["2015/11/17", 33, "DD"], ["2015/11/18", 4, "DD"], ["2015/11/19", 32, "DD"], ["2015/11/20", 26, "DD"], ["2015/11/21", 35, "DD"], ["2015/11/22", 40, "DD"], ["2015/11/23", 32, "DD"], ["2015/11/24", 26, "DD"], ["2015/11/25", 22, "DD"], ["2015/11/26", 16, "DD"], ["2015/11/27", 22, "DD"], ["2015/11/28", 10, "DD"], ] (ThemeRiver().add( series_name=x_data, data=y_data, singleaxis_opts=opts.SingleAxisOpts(pos_top="50", pos_bottom="50", type_="time"), ).set_global_opts(tooltip_opts=opts.TooltipOpts( trigger="axis", axis_pointer_type="line")).render("theme_river.html"))
def get_themeriver(data, lengend, num_frames): ''' data = [ ["2015/11/08", 10, "DQ"], ["2015/11/09", 15, "DQ"], ["2015/11/10", 35, "DQ"], ["2015/11/14", 7, "DQ"], ["2015/11/15", 2, "DQ"], ["2015/11/16", 17, "DQ"], ["2015/11/17", 33, "DQ"], ["2015/11/18", 40, "DQ"], ["2015/11/19", 32, "DQ"], ["2015/11/20", 26, "DQ"], ["2015/11/08", 35, "TY"], ["2015/11/09", 36, "TY"], ["2015/11/10", 37, "TY"], ["2015/11/11", 22, "TY"], ["2015/11/12", 24, "TY"], ["2015/11/13", 26, "TY"], ["2015/11/14", 34, "TY"], ["2015/11/15", 21, "TY"], ["2015/11/16", 18, "TY"], ["2015/11/17", 45, "TY"], ["2015/11/18", 32, "TY"], ["2015/11/19", 35, "TY"], ["2015/11/20", 30, "TY"], ["2015/11/08", 21, "SS"], ["2015/11/09", 25, "SS"], ["2015/11/10", 27, "SS"], ["2015/11/11", 23, "SS"], ["2015/11/12", 24, "SS"], ["2015/11/13", 21, "SS"], ["2015/11/14", 35, "SS"], ["2015/11/15", 39, "SS"], ["2015/11/16", 40, "SS"], ["2015/11/17", 36, "SS"], ["2015/11/18", 33, "SS"], ["2015/11/19", 43, "SS"], ["2015/11/20", 40, "SS"], ["2015/11/14", 7, "QG"], ["2015/11/15", 2, "QG"], ["2015/11/16", 17, "QG"], ["2015/11/17", 33, "QG"], ["2015/11/18", 40, "QG"], ["2015/11/19", 32, "QG"], ["2015/11/20", 26, "QG"], ["2015/11/21", 35, "QG"], ["2015/11/22", 40, "QG"], ["2015/11/23", 32, "QG"], ["2015/11/24", 26, "QG"], ["2015/11/25", 22, "QG"], ["2015/11/08", 10, "SY"], ["2015/11/09", 15, "SY"], ["2015/11/10", 35, "SY"], ["2015/11/11", 38, "SY"], ["2015/11/12", 22, "SY"], ["2015/11/13", 16, "SY"], ["2015/11/14", 7, "SY"], ["2015/11/15", 2, "SY"], ["2015/11/16", 17, "SY"], ["2015/11/17", 33, "SY"], ["2015/11/18", 40, "SY"], ["2015/11/19", 32, "SY"], ["2015/11/20", 26, "SY"], ["2015/11/21", 35, "SY"], ["2015/11/22", 4, "SY"], ["2015/11/23", 32, "SY"], ["2015/11/24", 26, "SY"], ["2015/11/25", 22, "SY"], ["2015/11/08", 10, "DD"], ["2015/11/09", 15, "DD"], ["2015/11/10", 35, "DD"], ["2015/11/11", 38, "DD"], ["2015/11/12", 22, "DD"], ["2015/11/13", 16, "DD"], ["2015/11/14", 7, "DD"], ["2015/11/15", 2, "DD"], ["2015/11/16", 17, "DD"], ["2015/11/17", 33, "DD"], ["2015/11/18", 4, "DD"], ["2015/11/19", 32, "DD"], ["2015/11/20", 26, "DD"], ] ''' themeriver = ( ThemeRiver().add( #["DQ", "TY", "SS", "QG", "SY", "DD"], lengend, data, singleaxis_opts=opts.SingleAxisOpts( pos_bottom="50%", min_=1, max_=num_frames), #可以通过调整坐标轴来调整图的位置 label_opts=opts.LabelOpts(is_show=False), tooltip_opts=opts.TooltipOpts(formatter="{a}") #不能用 不知道为什么 ).set_global_opts( title_opts=opts.TitleOpts(title="事件链河流图", subtitle="图例按突发性程度降序排序"), #tooltip_opts=opts.TooltipOpts(is_show=True,trigger="item",formatter='{@[0]}'),#formatter目前没效果??? legend_opts=opts.LegendOpts(type_='scroll', pos_left="5%", orient='vertical', pos_top="55%")) #datazoom_opts=opts.DataZoomOpts())#图例设置 ) return themeriver
for idx in range(69): date = dates[idx][5:] for idy, term in enumerate(data[date][topic]): if term[0] not in BLACK_LIST: wc_data[idx][1].append(term) with open("news_top_title.json", "r") as f: top_titles = json.load(f)[topic] with open("news_emotion_count.json", "r") as f: news_ec = json.load(f) emotions = ["负面", "中性", "正面"] with open("cov_cnt.json", "r") as f: cc_data = json.load(f) theme_river = ThemeRiver( init_opts=opts.InitOpts(width="1200px", height="600px")) theme_river.add( series_name=tr_series[topic], data=tr_data[topic], label_opts=opts.LabelOpts(is_show=False), singleaxis_opts=opts.SingleAxisOpts(pos_top="50", pos_bottom="50", type_="time"), ) theme_river.set_global_opts(tooltip_opts=opts.TooltipOpts( trigger="axis", axis_pointer_type="line"), legend_opts=opts.LegendOpts(pos_top="5%", is_show=True)) wc_tl = Timeline(init_opts=opts.InitOpts(width="1200px", height="600px")) idx = 0
def showing_pics(movie_name): #生成页面 page = (Page(page_title=movie_name)) #豆瓣用户豆龄图 #读取用户数据 filename = movie_name + '\\' + movie_name + 'dby.txt' with open(filename, 'r+', encoding='utf-8') as f: year_1 = f.read() years = year_1.split() #进行分层分类 time = { '0<x<=25': 0, '25<x<=50': 0, '50<x<=75': 0, '75<x<=100': 0, '100<x<=125': 0, 'x>125': 0 } for x in years: if int(x) < 25: time['0<x<=25'] += 1 elif int(x) < 50: time['25<x<=50'] += 1 elif int(x) < 75: time['50<x<=75'] += 1 elif int(x) < 100: time['75<x<=100'] += 1 elif int(x) < 125: time['100<x<=125'] += 1 else: time['x>125'] += 1 #整理数据 month_list1 = time.items() #出图 dbmonth = (Pie(init_opts=opts.InitOpts(theme=ThemeType.MACARONS)).add( "", month_list1, radius=["40%", "75%"]).set_global_opts( title_opts=opts.TitleOpts(title=movie_name + " 豆瓣用户“豆龄”分布图", subtitle='单位:月'), toolbox_opts=opts.ToolboxOpts(is_show=True), legend_opts=opts.LegendOpts(orient="vertical", pos_top="15%", pos_left="2%"), ).set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c}"))) #IMDB用户D龄图 #读取用户数据 filename = movie_name + '\\' + movie_name + 'Dy.txt' with open(filename, 'r+', encoding='utf-8') as f: year_1 = f.read() years = year_1.split() #进行分类分层 time = { '0<x<=40': 0, '40<x<=80': 0, '80<x<=120': 0, '120<x<=160': 0, '160<x<=200': 0, 'x>200': 0 } for x in years: if int(x) < 40: time['0<x<=40'] += 1 elif int(x) < 80: time['40<x<=80'] += 1 elif int(x) < 120: time['80<x<=120'] += 1 elif int(x) < 160: time['120<x<=160'] += 1 elif int(x) < 200: time['160<x<=200'] += 1 else: time['x>200'] += 1 #整理 month_list2 = time.items() #出图 Dmonth = (Pie(init_opts=opts.InitOpts(theme=ThemeType.MACARONS)).add( "", month_list2, radius=["40%", "75%"]).set_global_opts( title_opts=opts.TitleOpts(title=movie_name + " IMDB“D龄”分布图", subtitle='单位:月'), toolbox_opts=opts.ToolboxOpts(is_show=True), legend_opts=opts.LegendOpts(orient="vertical", pos_top="15%", pos_left="2%"), ).set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c}"))) #英文词云图 #读取数据 filename = movie_name + '\\' + movie_name + 'Dreview.txt' with open(filename, 'r+', encoding='utf-8') as f: rev_1 = f.read() #处理标点 for i in rev_1: if i in string.punctuation: rev_1 = rev_1.replace(i, " ") #分词+统计 words = rev_1.split() words_dic = Counter(words) #删除一些高频词 file_del = 'ban.txt' with open(file_del, 'r+', encoding='utf-8') as d: del_1 = d.read() Ban_list = del_1.split('\n') #处理数据 words_list = [] for word, time in words_dic.items(): if word.lower() in Ban_list or time < 35: continue else: a_word = (word.title(), time) words_list.append(a_word) #出图 Eng_wordcloud = (WordCloud(init_opts=opts.InitOpts( theme=ThemeType.MACARONS)).add( "", words_list, word_size_range=[20, 100], shape="diamond").set_global_opts( title_opts=opts.TitleOpts(title=movie_name + " WordCloud", subtitle=None), toolbox_opts=opts.ToolboxOpts(is_show=True))) #中文词云图 #读取爬取的文本文件 filename = movie_name + '\\' + movie_name + 'dbreview.txt' with open(filename, 'r+', encoding='utf-8') as f: rev_2 = f.read() #中文分词 cut_douban = " ".join(jieba.cut(rev_2, cut_all=False)) list_douban = cut_douban.split(' ') copy_list = [] for word in list_douban: copy_list.append(word) #删除长度为1的文本 for word in copy_list: if len(word) == 1: list_douban.remove(word) stop = [] review_dic = Counter(list_douban) #利用文件删除一些意义不大的词 file_stop = '删除.txt' with open(file_stop, 'r+', encoding='utf-8') as s: stop_1 = s.read() stop = stop_1.split('\n') #整理 word_list = [] for words, times in review_dic.items(): if words in stop or times < 9: continue else: b_word = (words, times) word_list.append(b_word) #出图 Chn_wordcloud = (WordCloud(init_opts=opts.InitOpts( theme=ThemeType.MACARONS)).add( "", word_list, word_size_range=[20, 100], shape="diamond").set_global_opts( title_opts=opts.TitleOpts(title=movie_name + " 中文词云图", subtitle=None), toolbox_opts=opts.ToolboxOpts(is_show=True))) #豆瓣用户评分图 #读取数据 with open(movie_name + '\\' + movie_name + 'dbscores.txt', 'r+', encoding='utf-8') as m: allthings = m.read().split(',') del allthings[-1] all_date = [] points_dic = {} #按评价进行分类 for div in allthings: point, date = div.split() if date not in all_date: points_dic[date] = { '很差': 0, '较差': 0, '还行': 0, '推荐': 0, '力荐': 0 } all_date.append(date) if point == '很差': points_dic[date]['很差'] += 1 elif point == '较差': points_dic[date]['较差'] += 1 elif point == '还行': points_dic[date]['还行'] += 1 elif point == '推荐': points_dic[date]['推荐'] += 1 else: points_dic[date]['力荐'] += 1 #处理数据 data = [] for month, grade in points_dic.items(): for hzc, num in grade.items(): counting = [] counting.append(month) counting.append(num) counting.append(hzc) data.append(counting) #出图 dbpoints = (ThemeRiver(init_opts=opts.InitOpts( theme=ThemeType.MACARONS)).add( ['很差', '较差', '还行', '推荐', '力荐'], data, singleaxis_opts=opts.SingleAxisOpts(type_="time", pos_bottom="10%"), ).set_global_opts(title_opts=opts.TitleOpts(title=movie_name + " 豆瓣评价分布图", subtitle=None), toolbox_opts=opts.ToolboxOpts(is_show=True), datazoom_opts=[ opts.DataZoomOpts(is_show=True, range_start=0, range_end=100) ])) #IMDB评分图 #处理爬取的数据 with open(movie_name + '\\' + movie_name + 'Dscores.txt', 'r+', encoding='utf-8') as m: allthings = m.read().split(',') del allthings[-1] all_date = [] points_dic = {} #按分数进行分类 for div in allthings: point, date = div.split() if date not in all_date: points_dic[date] = {'好评': 0, '中评': 0, '差评': 0} all_date.append(date) if int(point) <= 3: points_dic[date]['差评'] += 1 elif int(point) >= 7: points_dic[date]['好评'] += 1 else: points_dic[date]['中评'] += 1 #整理便于出图 data = [] for month, grade in points_dic.items(): for hzc, num in grade.items(): counting = [] counting.append(month) counting.append(num) counting.append(hzc) data.append(counting) #出图 Dpoints = (ThemeRiver(init_opts=opts.InitOpts( theme=ThemeType.MACARONS)).add( ['好评', '中评', '差评'], data, singleaxis_opts=opts.SingleAxisOpts(type_="time", pos_bottom="10%"), ).set_global_opts(title_opts=opts.TitleOpts(title=movie_name + " IMDB评价分布主题河流图", subtitle=None), toolbox_opts=opts.ToolboxOpts(is_show=True), datazoom_opts=[ opts.DataZoomOpts(is_show=True, range_start=0, range_end=100) ])) #热点图 #读取城市数据 with open(movie_name + '\\' + movie_name + 'dbplaces.txt', 'r+', encoding='utf-8-sig') as f: heat_1 = f.read() for i in heat_1: if i in string.punctuation: heat_1 = heat_1.replace(i, "") heat = heat_1.split() heat_dic = Counter(heat) #处理一些热点图数据库中没有的城市(此txt还要不停壮大) with open('no city.txt', 'r+', encoding='utf-8-sig') as e: no_city = e.read() no_city_list = no_city.split(' ') #整理数据 heat_list = [] for place, times in heat_dic.items(): if place in no_city_list: continue c_word = (place, times) heat_list.append(c_word) #出图,反复产生图,来处理乱填地区的情况 try: heat_map = (Geo(init_opts=opts.InitOpts( theme=ThemeType.MACARONS)).add_schema(maptype="china").add( "", heat_list).set_series_opts(label_opts=opts.LabelOpts( is_show=False)).set_global_opts( visualmap_opts=opts.VisualMapOpts(), title_opts=opts.TitleOpts(title=movie_name + " 豆瓣观影热点图", subtitle=None), toolbox_opts=opts.ToolboxOpts(is_show=True))) except: a = heat_list.pop(-1) #中文情感分析图 with open(movie_name + '\\' + movie_name + "dbreview.txt", 'r', encoding='utf-8') as f: #读取影评文件 text = f.read() s = SnowNLP(text) chn_senti = [] #进行情感分析并记录数据 for sent in s.sentences: chn_senti.append(SnowNLP(sent).sentiments) times = { '0<=x<=0.25': 0, '0.25<x<=0.5': 0, '0.5<x<=0.75': 0, '0.75<x<=1': 0 } for x in chn_senti: if x < 0 or x > 1: continue elif x <= 0.25: times['0<=x<=0.25'] += 1 elif x <= 0.5: times['0.25<x<=0.5'] += 1 elif x <= 0.75: times['0.5<x<=0.75'] += 1 else: times['0.75<x<=1'] += 1 scores_list = times.items() #出图 ch_motion = (Pie(init_opts=opts.InitOpts(theme=ThemeType.MACARONS)).add( "", scores_list, radius=["40%", "75%"]).set_global_opts( title_opts=opts.TitleOpts(title=movie_name + "豆瓣用户评论情感分析图", subtitle=None), toolbox_opts=opts.ToolboxOpts(is_show=True), legend_opts=opts.LegendOpts(orient="vertical", pos_top="15%", pos_left="2%"), ).set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c}"))) #英文情感分析图 with open(movie_name + '\\' + movie_name + "Dreview.txt", 'r', encoding='utf-8') as f: #读取影评文件 text = f.read() blob = TextBlob(text) eng_senti = [] #进行情感分析并记录数据 for sent in blob.sentences: eng_senti.append(sent.sentiment.polarity) time = {'-1<=x<=-0.5': 0, '-0.5<x<=0': 0, '0<x<=0.5': 0, '0.5<x<=1': 0} for x in eng_senti: if x < -1 or x > 1: continue elif x <= -0.5: time['-1<=x<=-0.5'] += 1 elif x <= 0: time['-0.5<x<=0'] += 1 elif x <= 0.5: time['0<x<=0.5'] += 1 else: time['0.5<x<=1'] += 1 score_list = time.items() #出图 en_motion = (Pie(init_opts=opts.InitOpts(theme=ThemeType.MACARONS)).add( "", score_list, radius=["40%", "75%"]).set_global_opts( title_opts=opts.TitleOpts(title=movie_name + "IMDB用户评论情感分析图", subtitle=None), toolbox_opts=opts.ToolboxOpts(is_show=True), legend_opts=opts.LegendOpts(orient="vertical", pos_top="15%", pos_left="2%"), ).set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c}"))) #将图加入页面中 page.add(dbmonth, Dmonth) page.add(Chn_wordcloud, Eng_wordcloud) page.add(dbpoints, Dpoints) page.add(ch_motion, en_motion) try: page.add(heat_map) except: pass page.render(movie_name + '\\' + movie_name + '数据图.html')