Exemplo n.º 1
0
 def boxplot_demo(self):
     # 箱体图
     from pyecharts import Boxplot
     boxplot = Boxplot("箱型图", "一年的降水量与蒸发量")
     x_axis = ['降水量', '蒸发量']
     y_axis = [self.data1, self.data2]
     # prepare_data方法可以将数据转为嵌套的[min, Q1, median( or Q2), Q3, max]
     y_axis = boxplot.prepare_data(y_axis)
     boxplot.add("天气统计", x_axis, y_axis)
     boxplot.render('boxplot.html')
Exemplo n.º 2
0
def test_boxplot():
    # boxPlot default
    boxplot = Boxplot("箱形图")
    x_axis = ['expr1', 'expr2', 'expr3', 'expr4', 'expr5']
    y_axis = [[
        850, 740, 900, 1070, 930, 850, 950, 980, 980, 880, 1000, 980, 930, 650,
        760, 810, 1000, 1000, 960, 960
    ],
              [
                  960, 940, 960, 940, 880, 800, 850, 880, 900, 840, 830, 790,
                  810, 880, 880, 830, 800, 790, 760, 800
              ],
              [
                  880, 880, 880, 860, 720, 720, 620, 860, 970, 950, 880, 910,
                  850, 870, 840, 840, 850, 840, 840, 840
              ],
              [
                  890, 810, 810, 820, 800, 770, 760, 740, 750, 760, 910, 920,
                  890, 860, 880, 720, 840, 850, 850, 780
              ],
              [
                  890, 840, 780, 810, 760, 810, 790, 810, 820, 850, 870, 870,
                  810, 740, 810, 940, 950, 800, 810, 870
              ]]
    _yaxis = boxplot.prepare_data(y_axis)
    boxplot.add("boxplot", x_axis, _yaxis)
    boxplot.render()

    # boxPlot two category
    boxplot = Boxplot("箱形图")
    x_axis = ['expr1', 'expr2']
    y_axis1 = [
        [
            850, 740, 900, 1070, 930, 850, 950, 980, 980, 880, 1000, 980, 930,
            650, 760, 810, 1000, 1000, 960, 960
        ],
        [
            960, 940, 960, 940, 880, 800, 850, 880, 900, 840, 830, 790, 810,
            880, 880, 830, 800, 790, 760, 800
        ],
    ]
    y_axis2 = [[
        890, 810, 810, 820, 800, 770, 760, 740, 750, 760, 910, 920, 890, 860,
        880, 720, 840, 850, 850, 780
    ],
               [
                   890, 840, 780, 810, 760, 810, 790, 810, 820, 850, 870, 870,
                   810, 740, 810, 940, 950, 800, 810, 870
               ]]
    boxplot.add("category1", x_axis, boxplot.prepare_data(y_axis1))
    boxplot.add("category2", x_axis, boxplot.prepare_data(y_axis2))
    assert "category1" in boxplot._repr_html_()
    assert "category2" in boxplot._repr_html_()
    boxplot.render()
def boxplot_of_2_attr(df, value, attr1, attr2):
    
    boxplot = Boxplot("箱形图")
    data = data_of_2_attr(df, value, attr1, attr2)
    # print(data_of_2_attr(df,'Kills','Gender','Place'))
    
    name = data['name']
    dt = data['data']
    
    for i in range(len(set(df[attr1]))):
        boxplot.add(attr1+":{}".format(list(set(df[attr1]))[i]), name[i], 
                prepare_data(dt[i]), is_more_utils=True)
    boxplot.render('b2({}-{}).html'.format(attr1,attr2))
Exemplo n.º 4
0
def picture_box(file_path):
    """
        绘制箱形图
    :param file_path:数据文件的路径
    """
    birth_hcs = pd.read_csv(file_path)
    birth_col_hcs = birth_hcs.loc[:, [
        'INFANT_ALIVE_AT_REPORT', 'BIRTH_YEAR', 'INFANT_WEIGHT_GRAMS'
    ]]
    # 取反,去除不需要的数据
    birth_nien_hcs = birth_col_hcs[~birth_col_hcs['INFANT_WEIGHT_GRAMS'].
                                   isin([9999])]
    # 按年分时间段
    year_2014_hcs = birth_nien_hcs[birth_nien_hcs['BIRTH_YEAR'] == 2014]
    year_2015_hcs = birth_nien_hcs[birth_nien_hcs['BIRTH_YEAR'] == 2015]
    # 再分每年中,存活与死亡的
    year_2014_y_hcs = year_2014_hcs[year_2014_hcs['INFANT_ALIVE_AT_REPORT'] ==
                                    'Y']
    year_2014_n_hcs = year_2014_hcs[year_2014_hcs['INFANT_ALIVE_AT_REPORT'] ==
                                    'N']
    year_2015_y_hcs = year_2015_hcs[year_2015_hcs['INFANT_ALIVE_AT_REPORT'] ==
                                    'Y']
    year_2015_n_hcs = year_2015_hcs[year_2015_hcs['INFANT_ALIVE_AT_REPORT'] ==
                                    'N']
    # 实例化一个箱形图对象
    box_hcs = Boxplot('存活婴儿与死亡婴儿体重对比——箱型图(黄彩思)')
    # 使用自带的prepare_data计算所需的五个数
    y_data_hcs = box_hcs.prepare_data([
        year_2014_y_hcs['INFANT_WEIGHT_GRAMS'],
        year_2015_y_hcs['INFANT_WEIGHT_GRAMS']
    ])
    n_data_hcs = box_hcs.prepare_data([
        year_2014_n_hcs['INFANT_WEIGHT_GRAMS'],
        year_2015_n_hcs['INFANT_WEIGHT_GRAMS']
    ])
    # 获取x_axis轴的数据:每个年度
    x_axis_hcs = birth_nien_hcs.drop_duplicates(
        subset='BIRTH_YEAR')['BIRTH_YEAR'].sort_values()
    print('存活婴儿数据箱型图:', y_data_hcs, '\n', '死亡婴儿数据箱型图:', n_data_hcs)
    box_hcs.add('存活婴儿', x_axis=x_axis_hcs, y_axis=y_data_hcs)
    box_hcs.add('死亡婴儿',
                x_axis=x_axis_hcs,
                y_axis=n_data_hcs,
                legend_pos='right')

    box_hcs.render('box.html')
Exemplo n.º 5
0
def unicorn_china3(data):
    data1 = list(data.query("Headquarter == '北京'")['Valuation'].values)
    data2 = list(data.query("Headquarter == '上海'")['Valuation'].values)
    data3 = list(data.query("Headquarter == '广州'")['Valuation'].values)
    data4 = list(data.query("Headquarter == '深圳'")['Valuation'].values)
    data5 = list(data.query("Headquarter == '杭州'")['Valuation'].values)

    x_axis = ['北京', '上海', '广州', '深圳', '杭州']
    y_axis = [data1, data2, data3, data4, data5]

    chart = Boxplot('五大城市独角兽公司估值范围对比', title_pos='center')
    chart.add('单位(亿人民币)',
              x_axis, y_axis,
              legend_pos='right'
              )

    chart.render('五大城市独角企业市值值对比.png')
Exemplo n.º 6
0
def test_boxplot_one_legend():
    boxplot = Boxplot("箱形图")
    x_axis = ['expr1', 'expr2', 'expr3', 'expr4', 'expr5']
    y_axis = [
        [850, 740, 900, 1070, 930, 850, 950, 980, 980, 880,
         1000, 980, 930, 650, 760, 810, 1000, 1000, 960, 960],
        [960, 940, 960, 940, 880, 800, 850, 880, 900, 840,
         830, 790, 810, 880, 880, 830, 800, 790, 760, 800],
        [880, 880, 880, 860, 720, 720, 620, 860, 970, 950,
         880, 910, 850, 870, 840, 840, 850, 840, 840, 840],
        [890, 810, 810, 820, 800, 770, 760, 740, 750, 760,
         910, 920, 890, 860, 880, 720, 840, 850, 850, 780],
        [890, 840, 780, 810, 760, 810, 790, 810, 820, 850,
         870, 870, 810, 740, 810, 940, 950, 800, 810, 870]
    ]
    _yaxis = boxplot.prepare_data(y_axis)
    boxplot.add("boxplot", x_axis, _yaxis)
    boxplot.render()
Exemplo n.º 7
0
        continue
    else:
        dom22.append(i)

dom = df[['job_education', 'job_salary']]
data = [[], [], [], [], []]
dom1, dom2, dom3, dom4, dom5 = data
for i, j in zip(dom['job_education'], dom['job_salary']):
    j = ((float(j.split('-')[0].replace('k', '').replace('K', '')) +
          float(j.split('-')[1].replace('k', '').replace('K', ''))) / 2) * 1000
    if i in ['不限']:
        dom1.append(j)
    elif i in ['大专']:
        dom2.append(j)
    elif i in ['本科']:
        dom3.append(j)
    else:
        dom4.append(j)

boxplot = Boxplot("拉勾网数据分析岗—学历薪水图(元/月)",
                  title_pos='center',
                  title_top='18',
                  width=800,
                  height=400)
boxplot.use_theme("chalk")
x_axis = ['学历不限', '大专', '本科', '硕士']
y_axis = [dom1, dom2, dom3, dom4]
_yaxis = boxplot.prepare_data(y_axis)
boxplot.add("", x_axis, _yaxis, is_label_show=True)
boxplot.render("拉勾网数据分析岗—学历薪水图.html")
Exemplo n.º 8
0
gauge.render('目标完成率.html')

# # # # # # 箱线图
Titanic = pd.read_csv('titanic_train.csv')
#检查年龄是否有缺失
any(Titanic['Age'].isnull())
# 删除缺失值
Titanic['Age'].dropna(inplace=True)
from pyecharts import Boxplot
boxplot = Boxplot('年龄箱线图')
x_axis = ['年龄']
y_axis = Titanic['Age'].values
y_axis = list(np.reshape(y_axis,(1,-1)))
_yaxis = boxplot.prepare_data(y_axis) #必须要将数据进行转换
boxplot.add('箱线图',x_axis,_yaxis)
boxplot.render('boxplot.html')


# # #子图
from pyecharts import Line, Pie, Grid
line = Line("折线图示例", width=1200)
attr = ['周一', '周二', '周三', '周四', '周五', '周六', '周日']
line.add("最高气温", attr, [11, 11, 15, 13, 12, 13, 10],
         mark_point=["max", "min"], mark_line=["average"])
line.add("最低气温", attr, [1, -2, 2, 5, 3, 2, 0], mark_point=["max", "min"],
         mark_line=["average"], legend_pos="20%")
attr = ["衬衫", "羊毛衫", "雪纺衫", "裤子", "高跟鞋", "袜子"]
v1 = [11, 12, 13, 10, 10, 10]
pie = Pie("饼图示例", title_pos="45%")
pie.add("", attr, v1, radius=[30, 55],
        legend_pos="65%", legend_orient='vertical')
Exemplo n.º 9
0
    if i in dom22:
        continue
    else:
        dom22.append(i)

dom = df[['job_education', 'job_salary']]
data = [[], [], [], [], []]
dom1, dom2, dom3, dom4, dom5 = data
for i, j in zip(dom['job_education'], dom['job_salary']):
    j = ((float(j.split('-')[0].replace('k', '')) +
          float(j.split('-')[1].replace('k', ''))) / 2) * 1000
    if i in ['学历不限']:
        dom1.append(j)
    elif i in ['大专']:
        dom2.append(j)
    elif i in ['本科']:
        dom3.append(j)
    else:
        dom4.append(j)

boxplot = Boxplot("BOSS直聘数据分析岗—学历薪水图(元/月)",
                  title_pos='center',
                  title_top='18',
                  width=800,
                  height=400)
x_axis = ['学历不限', '大专', '本科', '硕士']
y_axis = [dom1, dom2, dom3, dom4]
_yaxis = boxplot.prepare_data(y_axis)
boxplot.add("", x_axis, _yaxis)
boxplot.render("BOSS直聘数据分析岗—学历薪水图.html")
Exemplo n.º 10
0
dom = df[['job_experience', 'job_salary']]
data = [[], [], [], [], [], []]
dom1, dom2, dom3, dom4, dom5, dom6 = data
for i, j in zip(dom['job_experience'], dom['job_salary']):
    j = ((float(j.split('-')[0].replace('k', '')) +
          float(j.split('-')[1].replace('k', ''))) / 2) * 1000
    if i in ['经验不限']:
        dom1.append(j)
    elif i in ['应届生']:
        dom2.append(j)
    elif i in ['1年以内']:
        dom3.append(j)
    elif i in ['1-3年']:
        dom4.append(j)
    elif i in ['3-5年']:
        dom5.append(j)
    else:
        dom6.append(j)

boxplot = Boxplot("BOSS直聘数据分析岗—工作经验薪水图(元/月)",
                  title_pos='center',
                  title_top='18',
                  width=800,
                  height=400)
x_axis = ['经验不限', '应届生', '1年以内', '1-3年', '3-5年', '5-10年']
y_axis = [dom1, dom2, dom3, dom4, dom5, dom6]
_yaxis = boxplot.prepare_data(y_axis)
boxplot.add("", x_axis, _yaxis)
boxplot.render("BOSS直聘数据分析岗—工作经验薪水图.html")
Exemplo n.º 11
0
        dfv = pde.drop_outliers(df.loc[(df[x_attr] == i) &
                                       (df[y_attr] == j), :],
                                ['profits'])  # hack
        v.append(list(dfv['profits']))
    data.append(v)

from pyecharts import Boxplot
title = 'Profits of movie with different rating and score'
boxplot = Boxplot(title, title_pos='center')

p = 0
for i in x_axis2:
    boxplot.add(i, x_axis, boxplot.prepare_data(data[p]), legend_top='7%')
    p += 1

boxplot.render(title + '.html')

#############################################################################
################   smart heatmap  ###########################################

from pyecharts import HeatMap
from pyecharts import Page


def ref_list_maker(df, attr1, attr2):
    # df is a grouped table
    x_axis = list(set(df[attr1]))
    y_axis = list(set(df[attr2]))
    ref_list = []
    for i in x_axis:
        y_axis_2 = list(set(df.loc[df[attr1] == i, attr2]))
import pandas as pd
from pyecharts import Boxplot

df = pd.read_csv('D:/data/beijing_AQI_2018.csv')
dom = df[['Date', 'PM']]
data = [[], [], [], []]
dom1, dom2, dom3, dom4 = data
for i, j in zip(dom['Date'], dom['PM']):
    time = i.split('/')[1]
    if time in ['1', '2', '3']:
        dom1.append(j)
    elif time in ['4', '5', '6']:
        dom2.append(j)
    elif time in ['7', '8', '9']:
        dom3.append(j)
    else:
        dom4.append(j)

boxplot = Boxplot("2018年北京季度PM2.5箱形图",
                  title_pos='center',
                  title_top='18',
                  width=800,
                  height=400)
x_axis = ['第一季度', '第二季度', '第三季度', '第四季度']
y_axis = [dom1, dom2, dom3, dom4]
_yaxis = boxplot.prepare_data(y_axis)
boxplot.add("", x_axis, _yaxis)
boxplot.render('2018年北京季度PM2.5箱形图.html')
Exemplo n.º 13
0
from pyecharts import Boxplot

x =['1班','2班','3班','4班']
y1=[78, 98, 56, 78, 90.0, 45, 78, 20, 87, 86, 74, 89, 94]
y2=[89, 82, 45, 67, 68, 78.0, 79, 98, 71, 56, 78, 81, 80]
y3=[90, 80, 60, 89, 76, 73.0, 72, 92, 89, 87, 65, 66, 76]
y4=[82, 72, 55, 100, 90.0, 78, 69, 67, 87, 66, 78, 71, 82]

box = Boxplot(title = '考试成绩箱型图',width = 600,height = 420)

# 预处理数据计算最大值,最小值,中位数以及上下四分位数
y_prepared = box.prepare_data([y1,y2,y3,y4]) 
box.add(name = '',x_axis = x,y_axis = y_prepared)

box.render('result.箱型图示范.html')



#箱型图的进阶版本是小提琴图,可以展示数据的密度估计曲线,可以用seaborn画出。
import seaborn as sns
# %matplotlib inline
# %config InlineBackend.figure_format = 'svg'

#设置风格
sns.set(style="white", context="notebook")
#处理中文问题
sns.set_style({'font.sans-serif':['simhei', 'Arial']}) 

dfdata = pd.DataFrame()
dfdata['score'] = y1 + y2 + y3 + y4
from pyecharts import Boxplot

df = pd.read_csv('air_tianjin_2017.csv',
                 header=None,
                 names=["Date", "Quality_grade", "AQI", "AQI_rank", "PM"])

dom = df[['Date', 'PM']]
data = [[], [], [], []]
dom1, dom2, dom3, dom4 = data
for i, j in zip(dom['Date'], dom['PM']):
    time = i.split('-')[1]
    if time in ['01', '02', '03']:
        dom1.append(j)
    elif time in ['04', '05', '06']:
        dom2.append(j)
    elif time in ['07', '08', '09']:
        dom3.append(j)
    else:
        dom4.append(j)

boxplot = Boxplot("2017年天津季度PM2.5箱形图",
                  title_pos='center',
                  title_top='18',
                  width=1200,
                  height=600)
x_axis = ['第一季度', '第二季度', '第三季度', '第四季度']
y_axis = [dom1, dom2, dom3, dom4]
_yaxis = boxplot.prepare_data(y_axis)
boxplot.add("", x_axis, _yaxis)
boxplot.render("2017年天津季度PM2.5箱形图.html")
Exemplo n.º 15
0
import pandas as pd
from pyecharts import Boxplot

df = pd.read_csv('D:/data/beijing_AQI_2018.csv')
dom = df[['Date', 'AQI']]
data = [[], [], [], []]
dom1, dom2, dom3, dom4 = data
for i, j in zip(dom['Date'], dom['AQI']):
    time = i.split('/')[1]  # 截取到月份
    if time in ['1', '2', '3']:
        dom1.append(j)
    elif time in ['4', '5', '6']:
        dom2.append(j)
    elif time in ['7', '8', '9']:
        dom3.append(j)
    else:
        dom4.append(j)

boxplot = Boxplot("2018年北京季度AQI走势箱形图",
                  title_pos='center',
                  title_top='18',
                  width=800,
                  height=400)
x_axis = ['第一季度', '第二季度', '第三季度', '第四季度']
y_axis = [dom1, dom2, dom3, dom4]
_yaxis = boxplot.prepare_data(y_axis)
boxplot.add("", x_axis, _yaxis)
boxplot.render('2018年北京季度AQI箱形图.html')
Exemplo n.º 16
0
    is_label_show=True,
    legend_orient="vertical",
    legend_pos="left",
)
pie.render("该公司人力资源总体情况图.html")


from pyecharts import Boxplot
#字段重命名
df.columns=['satisfaction', 'evaluation', 'project', 'hours', 'years_work','work_accident', 'left', 'promotion', 'department', 'salary']
#绘制箱线图
boxplot = Boxplot("对公司满意度与是否离职关系图", title_pos='center')
x_axis = ['在职', '离职']
y_axis = [df[df.left == 0].satisfaction.values, df[df.left == 1].satisfaction.values]
boxplot.add("", x_axis, boxplot.prepare_data(y_axis))
boxplot.render("对公司满意度与是否离职关系图.html")


boxplot = Boxplot("最新评估与是否离职关系图", title_pos='center')
x_axis = ['在职', '离职']
y_axis = [df[df.left == 0].evaluation.values, df[df.left == 1].evaluation.values]
boxplot.add("", x_axis, boxplot.prepare_data(y_axis))
boxplot.render("最新评估与是否离职关系图.html")

from pyecharts import Bar, Pie, Grid
#按照项目数分组分别求离职人数和所有人数
project_left_1 = df[df.left == 1].groupby('project')['left'].count()
project_all = df.groupby('project')['left'].count()
#分别计算离职人数和在职人数所占比例
project_left1_rate = project_left_1 / project_all
project_left0_rate = 1 - project_left1_rate
Exemplo n.º 17
0
import pandas as pd
from pyecharts import Boxplot
data = pd.read_csv(r"C:\学习\python数据分析\数据\iris-data.csv")
x = list(data.columns[0:4])
y = [
    list(data.sepal_length_cm),
    list(data.sepal_width_cm),
    list(data.petal_length_cm),
    list(data.petal_width_cm)
]
boxplot = Boxplot("箱线图")
y_data = boxplot.prepare_data(y)
boxplot.add("", x, y_data)
boxplot.render()
Exemplo n.º 18
0
dom = df[['company_people', 'job_salary']]
data = [[], [], [], [], [], []]
dom1, dom2, dom3, dom4, dom5, dom6 = data
for i, j in zip(dom['company_people'], dom['job_salary']):
    j = ((float(j.split('-')[0].replace('k', '')) +
          float(j.split('-')[1].replace('k', ''))) / 2) * 1000
    if i in ['0-20人']:
        dom1.append(j)
    elif i in ['20-99人']:
        dom2.append(j)
    elif i in ['100-499人']:
        dom3.append(j)
    elif i in ['500-999人']:
        dom4.append(j)
    elif i in ['1000-9999人']:
        dom5.append(j)
    else:
        dom6.append(j)

boxplot = Boxplot("BOSS直聘数据分析岗—公司规模薪水图(元/月)",
                  title_pos='center',
                  title_top='18',
                  width=800,
                  height=400)
x_axis = ['0-20人', '20-99人', '100-499人', '500-999人', '1000-9999人', '10000人以上']
y_axis = [dom1, dom2, dom3, dom4, dom5, dom6]
_yaxis = boxplot.prepare_data(y_axis)
boxplot.add("", x_axis, _yaxis)
boxplot.render("BOSS直聘数据分析岗—公司规模薪水图.html")
Exemplo n.º 19
0
from pyecharts import Boxplot

df = pd.read_csv('air_tianjin_2017.csv',
                 header=None,
                 names=["Date", "Quality_grade", "AQI", "AQI_rank", "PM"])

dom = df[['Date', 'AQI']]
data = [[], [], [], []]
dom1, dom2, dom3, dom4 = data
for i, j in zip(dom['Date'], dom['AQI']):
    time = i.split('-')[1]
    if time in ['01', '02', '03']:
        dom1.append(j)
    elif time in ['04', '05', '06']:
        dom2.append(j)
    elif time in ['07', '08', '09']:
        dom3.append(j)
    else:
        dom4.append(j)

boxplot = Boxplot("2017年天津季度AQI箱形图",
                  title_pos='center',
                  title_top='18',
                  width=800,
                  height=400)
x_axis = ['第一季度', '第二季度', '第三季度', '第四季度']
y_axis = [dom1, dom2, dom3, dom4]
_yaxis = boxplot.prepare_data(y_axis)
boxplot.add("", x_axis, _yaxis)
boxplot.render("2017年天津季度AQI箱形图.html")
Exemplo n.º 20
0
dom = df[['job_experience', 'job_salary']]
data = [[], [], [], [], [], [], []]
dom1, dom2, dom3, dom4, dom5, dom6, dom7 = data
for i, j in zip(dom['job_experience'], dom['job_salary']):
    j = ((float(j.split('-')[0].replace('k', '').replace('K', '')) +
          float(j.split('-')[1].replace('k', '').replace('K', ''))) / 2) * 1000
    if i in ['不限']:
        dom1.append(j)
    elif i in ['应届毕业生']:
        dom2.append(j)
    elif i in ['1年以下']:
        dom3.append(j)
    elif i in ['1-3年']:
        dom4.append(j)
    elif i in ['3-5年']:
        dom5.append(j)
    else:
        dom6.append(j)

boxplot = Boxplot("拉勾网数据分析岗—工作经验薪水图(元/月)",
                  title_pos='center',
                  title_top='18',
                  width=800,
                  height=400)
x_axis = ['经验不限', '应届生', '1年以内', '1-3年', '3-5年', '5-10年']
y_axis = [dom1, dom2, dom3, dom4, dom5, dom6]
_yaxis = boxplot.prepare_data(y_axis)
boxplot.add("", x_axis, _yaxis)
boxplot.render("拉勾网数据分析岗—工作经验薪水图.html")
Exemplo n.º 21
0
    j = ((float(j.split('-')[0].replace('k', '').replace('K', '')) +
          float(j.split('-')[1].replace('k', '').replace('K', ''))) / 2) * 1000
    if i in ['天使轮']:
        dom1.append(j)
    elif i in ['A轮']:
        dom2.append(j)
    elif i in ['B轮']:
        dom3.append(j)
    elif i in ['C轮']:
        dom4.append(j)
    elif i in ['D轮及以上']:
        dom5.append(j)
    elif i in ['上市公司']:
        dom6.append(j)
    elif i in ['未融资']:
        dom7.append(j)
    else:
        dom8.append(j)

boxplot = Boxplot("拉勾网数据分析岗—公司状态薪水图(元/月)",
                  title_pos='center',
                  title_top='18',
                  width=1200,
                  height=600)
boxplot.use_theme("chalk")
x_axis = ['天使轮', 'A轮', 'B轮', 'C轮', 'D轮及以上', '上市公司', '未融资', '不需要融资']
y_axis = [dom1, dom2, dom3, dom4, dom5, dom6, dom7, dom8]
_yaxis = boxplot.prepare_data(y_axis)
boxplot.add("", x_axis, _yaxis)
boxplot.render("拉勾网数据分析岗—公司状态薪水图.html")
Exemplo n.º 22
0
# 读取文件
df = pd.read_csv('douban.csv',
                 header=0,
                 names=["quote", "score", "info", "title", "people"])
(dom1, dom2) = ([], [])
# 清洗数据,获取电影年份及国家,增加年份列及国家列
for i in df['info']:
    country = i.split('/')[1].split(' ')[0].strip()
    if country in ['中国大陆', '台湾', '香港']:
        dom1.append('中国')
    else:
        dom1.append('外国')
    dom2.append(i.split('/')[0].replace('(中国大陆)', '').strip())
df['country'] = dom1
df['year'] = dom2
# 获取特定数据
df1 = df.loc[df['country'] == '中国']
df2 = df.loc[df['country'] == '外国']
# 生成箱形图
boxplot = Boxplot("豆瓣电影TOP250-中外电影评分情况",
                  title_pos='center',
                  title_top='18',
                  width=800,
                  height=400)
x_axis = ['中国', '外国']
y_axis = [df1['score'], df2['score']]
_yaxis = boxplot.prepare_data(y_axis)
boxplot.add("", x_axis, _yaxis, yaxis_min=8, yaxis_max=10)
boxplot.render("豆瓣电影TOP250中外评分情况.html")