示例#1
0
def avg():
    df = pd.read_csv('maoyan2.csv', error_bad_lines=False)
    df.round(2)
    # 根据日期统计各天的平均值,并保留两位小数
    date_score_avg = df.groupby('date')['score'].mean().round(2)*2
    # 根据评分数据生成柱状图

    bar = Bar('评分走势图', '数据来源:不正经程序员-采集自猫眼',
              title_pos='center', width=1500, height=600)
    # line = Line()
    # line.add('', attr, value)
    bar.add('', date_score_avg.index, date_score_avg.values, is_visualmap=False, visual_range=[0, 3500], visual_text_color='#fff', is_more_utils=True,
            is_label_show=True, xaxis_interval=0, xaxis_rotate=30, mark_line=["average"])
    overlap = Overlap()
    overlap.add(bar)
    # overlap.add(line)
    overlap.show_config()
    overlap.render(
        'picture\评分走势图.html')
        
    # 根据日期统计各天的平均值,并保留两位小数
    date_positive_prob_avg = df.groupby('date')['positive_prob'].mean().round(2)
    bar = Bar('评论情感指数走势图', '数据来源:不正经程序员-采集自猫眼',
              title_pos='center', width=1500, height=600)
    # line = Line()
    # line.add('', attr, value)
    bar.add('', date_positive_prob_avg.index, date_positive_prob_avg.values, is_visualmap=False, visual_range=[0, 3500], visual_text_color='#fff', is_more_utils=True,
            is_label_show=True, xaxis_interval=0, xaxis_rotate=30, mark_line=["average"])
    overlap = Overlap()
    overlap.add(bar)
    # overlap.add(line)
    overlap.show_config()
    overlap.render(
        'picture\评论情感指数走势图.html')
示例#2
0
def test_overlap_1():

    v1 = [10, 20, 30, 40, 50, 60]
    v2 = [30, 30, 30, 30, 30, 30]
    v3 = [50, 50, 50, 50, 50, 50]
    v4 = [10, 10, 10, 10, 10, 10]
    es = EffectScatter("Scatter - EffectScatter 示例")
    es.add("es", v1, v2)
    scatter = Scatter()
    scatter.add("scatter", v1, v3)
    es_1 = EffectScatter()
    es_1.add("es_1", v1, v4, symbol='pin', effect_scale=5)

    overlap = Overlap()
    overlap.add(es)
    overlap.add(scatter)
    overlap.add(es_1)
    overlap.show_config()
    overlap.render()
示例#3
0
def render():
    # 获取评论中所有城市
    with open(r'C:\Users\think\Desktop\情感分析\doc\maoyan.csv',
              mode='r',
              encoding='utf_8_sig') as f:
        rows = f.readlines()
        #print(rows)
        #num = 0
        for row in rows[1:]:
            #print(row)
            #num = num + 1
            #print(num)
            '''
            if num == 10:
             break
            '''
            #print(row)
            #print(row.count(','))
            if row.count(',') != 7:
                continue
            elements = row.split(',')
            #print(elements)
            score = elements[6]
            city = elements[4]
            gender = elements[3]
            if score != '':
                scores.append(float(score) * 2)
                # if float(score) * 2 > 7:
                #     positive_text += comment
                # elif float(score) * 2 < 4:
                #     negative_text += comment
            if city != '':  # 去掉城市名为空的值
                cities.append(city)
            if gender != '':
                genders.append(gender)

    # 按0-10进行排序
    #print(scores)
    score_data = Counter(scores).most_common()
    score_data = sorted(score_data)
    gender_data = Counter(genders).most_common()
    print(gender_data)
    #print(score_data)
    # 定义样式
    style = Style(title_color='#fff',
                  title_pos='center',
                  width=800,
                  height=600,
                  background_color='#404a59')

    # 根据评分数据生成柱状图
    bar = Bar('《海上钢琴师》各评分数量',
              '数据来源:采集自猫眼',
              title_pos='center',
              width=900,
              height=600)
    attr, value = bar.cast(score_data)
    #print(value)
    # line = Line()
    # line.add('', attr, value)
    bar.add('',
            attr,
            value,
            is_visualmap=True,
            visual_range=[0, 3500],
            visual_text_color='#fff',
            is_more_utils=True,
            is_label_show=True)
    overlap = Overlap()
    overlap.add(bar)
    # overlap.add(line)
    overlap.show_config()
    overlap.render(r'C:\Users\think\Desktop\情感分析\picture\评分数量-柱状图.html')

    # 对城市数据和坐标文件中的地名进行处理

    handle(cities)
    data = Counter(cities).most_common()  # 使用Counter类统计出现的次数,并转换为元组列表
    #print(data)
    # 根据城市数据生成地理坐标图
    geo = Geo('观众地理分布', '数据来源:采集自猫眼', **style.init_style)
    attr, value = geo.cast(data)
    # print(attr)
    # print(value)
    geo.add('',
            attr,
            value,
            visual_range=[0, 600],
            maptype='china',
            visual_text_color='#fff',
            symbol_size=7,
            is_visualmap=True,
            is_piecewise=True,
            visual_split_number=10)
    geo.render(r'C:\Users\think\Desktop\情感分析\picture\观众地理分布-地理坐标图.html')

    # 根据城市数据生成柱状图
    data_top20 = Counter(cities).most_common(20)  # 返回出现次数最多的20条
    bar = Bar('观众来源排行TOP20',
              '数据来源:采集自猫眼',
              title_pos='center',
              width=1200,
              height=600)
    attr, value = bar.cast(data_top20)
    bar.add('',
            attr,
            value,
            is_visualmap=True,
            visual_range=[0, 3500],
            visual_text_color='#fff',
            is_more_utils=True,
            is_label_show=True)
    bar.render(r'C:\Users\think\Desktop\情感分析\picture\观众来源排行-柱状图.html')

    #生成观众性别分布图
    # 设置主标题与副标题,标题设置居中,设置宽度为900
    pie = Pie("观众性别分布图", "数据来源:采集自猫眼", title_pos='center', width=900)
    attr, value = geo.cast(gender_data)
    print(value)
    attr = ["其他", "男", "女"]
    # 加入数据,设置坐标位置为【25,50】,上方的colums选项取消显示
    '''
    pie.add("", ["其他","男","女"], value ,visual_range=[0, 3500],
    is_legend_show=False, is_label_show=True, is_more_utils=True)
    '''
    pie.add("", attr, value, is_label_show=True, is_more_utils=True)
示例#4
0
    bar.add('', attr, value, is_visualmap=False, visual_range=[0, 3500], visual_text_color='#fff', is_more_utils=True,
            is_label_show=True)
    bar.render('picture\观众来源排行-柱状图.html')

    # 根据评分数据生成柱状图
    bar = Bar('《海上钢琴师》各评分数量', '数据来源:不正经程序员-采集自猫眼',
              title_pos='center', width=900, height=600)
    attr, value = bar.cast(score_data)
    # line = Line()
    # line.add('', attr, value)
    bar.add('', attr, value, is_visualmap=False, visual_range=[0, 3500], visual_text_color='#fff', is_more_utils=True,
            is_label_show=True)
    overlap = Overlap()
    overlap.add(bar)
    # overlap.add(line)
    overlap.show_config()
    overlap.render(
        'picture\评分数量-柱状图.html')

    # 根据评分数据生成柱状图
    bar = Bar('评价人数走势图', '数据来源:不正经程序员-采集自猫眼',
              title_pos='center', width=1200, height=600)
    attr, value = bar.cast(date_data)
    # line = Line()
    # line.add('', attr, value)
    bar.add('', attr, value, is_visualmap=False, visual_range=[0, 3500], visual_text_color='#fff', is_more_utils=True,
            is_label_show=True)
    overlap = Overlap()
    overlap.add(bar)
    # overlap.add(line)
    overlap.show_config()
示例#5
0
def render():
    global positive_text
    global negative_text
    global luhan_positive_text
    global luhan_negative_text
    # 获取评论中所有城市
    cities = []
    with open('shanghai.csv', mode='r', encoding='utf-8') as f:
        rows = f.readlines()
        for row in rows[1:]:
            if row.count(',') != 7:
                continue
            elements = row.split(',')
            time = elements[0]
            gender = elements[3]
            city = elements[4]
            comment = elements[7]
            score = elements[6]
            if not is_float(score):
                continue
            if '鹿晗' in comment:
                scores_luhan.append(float(score) * 2)
                if float(score) * 2 == 10:
                    luhan_positive_text += comment
                elif float(score) * 2 == 1:
                    luhan_negative_text += comment
            if city != '':  # 去掉城市名为空的值
                cities.append(city)
            if score != '':
                scores.append(float(score) * 2)
                if float(score) * 2 == 10:
                    positive_text += comment
                elif float(score) * 2 == 1:
                    negative_text += comment
            if gender != '':
                genders.append(gender)

            if time != '':
                dates.append(time)

    # print(positive_text)
    # print(negative_text)

    with open("positive_text.txt", "w", encoding='utf-8') as f:
        f.write(positive_text)

    with open("negative_text.txt", "w", encoding='utf-8') as f:
        f.write(negative_text)

    with open("luhan_positive_text.txt", "w", encoding='utf-8') as f:
        f.write(luhan_positive_text)

    with open("luhan_negative_text.txt", "w", encoding='utf-8') as f:
        f.write(luhan_negative_text)

    # 对城市数据和坐标文件中的地名进行处理
    handle(cities)

    data = Counter(cities).most_common()  # 使用Counter类统计出现的次数,并转换为元组列表
    score_data = Counter(scores).most_common()  # 使用Counter类统计出现的次数,并转换为元组列表
    # 按0-10进行排序
    score_data = sorted(score_data)

    score_data_luhan = Counter(
        scores_luhan).most_common()  # 使用Counter类统计出现的次数,并转换为元组列表
    # 按0-10进行排序
    score_data_luhan = sorted(score_data_luhan)

    gender_data = Counter(genders).most_common()  # 使用Counter类统计出现的次数,并转换为元组列表
    print(gender_data)

    date_data = Counter(dates).most_common()
    # 按日期进行排序
    date_data = sorted(date_data)
    # print(data)
    # 定义样式
    style = Style(title_color='#fff',
                  title_pos='center',
                  width=800,
                  height=600,
                  background_color='#404a59')

    # 根据城市数据生成地理坐标图
    geo = Geo('观众地理分布', '数据来源:不正经程序员-采集自猫眼', **style.init_style)
    attr, value = geo.cast(data)
    geo.add('',
            attr,
            value,
            visual_range=[0, 600],
            visual_text_color='#fff',
            symbol_size=7,
            is_visualmap=True,
            is_piecewise=True,
            visual_split_number=10)
    geo.render('picture\观众地理分布-地理坐标图.html')

    # 根据城市数据生成柱状图
    data_top20 = Counter(cities).most_common(20)  # 返回出现次数最多的20条
    bar = Bar('观众来源排行TOP20',
              '数据来源:不正经程序员-采集自猫眼',
              title_pos='center',
              width=1200,
              height=600)
    attr, value = bar.cast(data_top20)
    bar.add('',
            attr,
            value,
            is_visualmap=False,
            visual_range=[0, 3500],
            visual_text_color='#fff',
            is_more_utils=True,
            is_label_show=True)
    bar.render('picture\观众来源排行-柱状图.html')

    # 根据评分数据生成柱状图
    bar = Bar('各段评分数量',
              '数据来源:不正经程序员-采集自猫眼',
              title_pos='center',
              width=900,
              height=600)
    attr, value = bar.cast(score_data)
    # line = Line()
    # line.add('', attr, value)
    bar.add('',
            attr,
            value,
            is_visualmap=True,
            visual_range=[0, 3500],
            visual_text_color='#fff',
            is_more_utils=True,
            is_label_show=True)
    overlap = Overlap()
    overlap.add(bar)
    # overlap.add(line)
    overlap.show_config()
    overlap.render('picture\评分数量-柱状图.html')

    # 根据评分数据生成柱状图
    bar = Bar('评论带有鹿晗的各段评分数量',
              '数据来源:不正经程序员-采集自猫眼',
              title_pos='center',
              width=900,
              height=600)
    attr, value = bar.cast(score_data_luhan)
    # line = Line()
    # line.add('', attr, value)
    bar.add('',
            attr,
            value,
            is_visualmap=True,
            visual_range=[0, 3500],
            visual_text_color='#fff',
            is_more_utils=True,
            is_label_show=True)
    overlap = Overlap()
    overlap.add(bar)
    # overlap.add(line)
    overlap.show_config()
    overlap.render('picture\评论带有鹿晗的各段评分数量-柱状图.html')

    # 根据评分数据生成柱状图
    bar = Bar('评价人数走势图',
              '数据来源:不正经程序员-采集自猫眼',
              title_pos='center',
              width=1200,
              height=600)
    attr, value = bar.cast(date_data)
    # line = Line()
    # line.add('', attr, value)
    bar.add('',
            attr,
            value,
            is_visualmap=True,
            xaxis_rotate=30,
            visual_range=[0, 3500],
            visual_text_color='#fff',
            is_more_utils=True,
            is_label_show=True)
    overlap = Overlap()
    overlap.add(bar)
    # overlap.add(line)
    overlap.show_config()
    overlap.render('picture\评价人数走势图.html')

    from pyecharts import Pie
    # 设置主标题与副标题,标题设置居中,设置宽度为900
    pie = Pie("观众性别分布图", "数据来源:不正经程序员-采集自猫眼", title_pos='center', width=900)
    attr, value = geo.cast(gender_data)
    # 加入数据,设置坐标位置为【25,50】,上方的colums选项取消显示
    pie.add("", ["其他", "男", "女"],
            value,
            visual_range=[0, 3500],
            is_legend_show=False,
            is_label_show=True)

    # 保存图表
    pie.render('picture\观众性别分布图.html')