def save_to_file(self, data, file_path): create_or_get_directory(os.path.dirname(file_path)) if os.path.exists(file_path): with open(file_path, 'a') as f: dict_writer = csv.DictWriter(f, data.keys()) dict_writer.writerow(data) logger.info(f'保存数据至:{file_path}') else: with open(file_path, 'w') as f: dict_writer = csv.DictWriter(f, data.keys()) dict_writer.writeheader() dict_writer.writerow(data) logger.info(f'保存数据至:{file_path}')
def requirement_word_cloud(self): """招聘要求词云""" jieba.analyse.set_stop_words('configures/stop_words.txt') data, word = [], '' for content in self.frame['requirement']: try: word = "".join(content) except Exception as error: print(f"获取职位诱惑发生错误:{error}") data.append(word) tags = jieba.analyse.extract_tags("".join(data).lower(), topK=100, withWeight=True) advantage_jieba = [] for wd, weight in tags: advantage_jieba.append([wd, weight]) html_path = os.path.join( create_or_get_directory(os.path.join('report', 'single')), 'requirement_word_cloud.html') (WordCloud().add(series_name="招聘要求", data_pair=advantage_jieba, word_size_range=[16, 166]).set_global_opts( tooltip_opts=opts.TooltipOpts( is_show=True), ).render(html_path)) script_string = self.get_javascript_string(html_path) self.set_report_javascript(script_string, 'requirement_word_cloud') self.middle_charts.append({ 'title': '招聘要求关键字', 'name': 'requirement_word_cloud' })
def city_company_scale_chart(self): """前10城市的企业阶段""" city_company_scale = pd.crosstab(self.frame.city, self.frame.company_scale, margins=True).sort_values( by='All', ascending=False)[:10] city_company_scale = city_company_scale.drop('All', axis=0).drop('All', axis=1) html_path = os.path.join( create_or_get_directory(os.path.join('report', 'single')), 'city_company_scale_chart.html') bar = Bar(init_opts=opts.InitOpts(width="60%")) bar.add_xaxis(list(city_company_scale.index)) for i in range(len(list(city_company_scale.T.index))): bar.add_yaxis(city_company_scale.T.index[i], [str(v) for v in city_company_scale.T.values[i]]) bar.set_global_opts( legend_opts=opts.LegendOpts(is_show=False), xaxis_opts=opts.AxisOpts( axislabel_opts=opts.LabelOpts(rotate=-20, color='red')), yaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts( color='red')), ) bar.render(html_path) script_string = self.get_javascript_string(html_path) self.set_report_javascript(script_string, 'city_company_scale_chart') self.right_charts.append({ 'title': '前10城市的企业阶段', 'name': 'city_company_scale_chart' })
def company_field_chart(self): """企业领域统计""" kd = self.frame.company_field.value_counts() field_count = {} for field_key, field_value in dict(kd).items(): field_keys = re.split(r',|,|、|\.|\|\t| |丨', field_key) for key in field_keys: if field_count.get(key, None): field_count[key] = field_count[key] + field_value else: field_count[key] = field_value html_path = os.path.join( create_or_get_directory(os.path.join('report', 'single')), 'company_field_chart.html') c = (Pie().add("企业领域", [ [k, str(v)] for k, v in field_count.items() ]).set_global_opts(legend_opts=opts.LegendOpts( is_show=False), ).set_series_opts(tooltip_opts=opts.TooltipOpts( trigger="item", formatter="{a} <br/>{b}: {c} ({d}%)")).render( html_path)) script_string = self.get_javascript_string(html_path) self.set_report_javascript(script_string, 'company_field_chart') self.right_charts.append({ 'title': '企业领域统计', 'name': 'company_field_chart' })
def company_size_bar_chart(self): """企业规模统计""" kd = self.frame.company_size.value_counts() results = [[k, str(v)] for k, v in dict(kd).items()] html_path = os.path.join( create_or_get_directory(os.path.join('report', 'single')), 'company_size_bar_chart.html') c = (Pie(init_opts=opts.InitOpts()).add( series_name="工作经验", data_pair=results, rosetype="radius", radius="55%", center=["50%", "50%"], label_opts=opts.LabelOpts(is_show=False, position="center"), ).set_global_opts( legend_opts=opts.LegendOpts(is_show=False), ).set_series_opts( tooltip_opts=opts.TooltipOpts( trigger="item", formatter="{a} <br/>{b}: {c} ({d}%)"), label_opts=opts.LabelOpts(color="rgba(255, 255, 255, 0.3)"), ).render(html_path)) script_string = self.get_javascript_string(html_path) self.set_report_javascript(script_string, 'company_size_bar_chart') self.right_charts.append({ 'title': '企业规模统计', 'name': 'company_size_bar_chart' })
def address_pie_chart(self): """招聘数前10城市""" kd = self.frame.city.value_counts()[:10] results = [[k, str(v)] for k, v in dict(kd).items()] html_path = os.path.join( create_or_get_directory(os.path.join('report', 'single')), 'address_pie_chart.html') c = (Pie().add("招聘数前10城市", results).set_global_opts( legend_opts=opts.LegendOpts(is_show=False), ).set_series_opts( tooltip_opts=opts.TooltipOpts( trigger="item", formatter="{a} <br/>{b}: {c} ({d}%)")).render(html_path)) script_string = self.get_javascript_string(html_path) self.set_report_javascript(script_string, 'address_pie_chart') self.left_charts.append({ 'title': '招聘数前10城市', 'name': 'address_pie_chart' })
def company_scale_bar_chart(self): """企业发展阶段统计""" kd = self.frame.company_scale.value_counts() results = [[k, str(v)] for k, v in dict(kd).items()] html_path = os.path.join( create_or_get_directory(os.path.join('report', 'single')), 'company_scale_bar_chart.html') c = (Pie().add("企业发展阶段", results).set_colors([ "green", "yellow", "red", "pink", "orange", "purple" ]).set_global_opts(legend_opts=opts.LegendOpts( is_show=False), ).set_series_opts(tooltip_opts=opts.TooltipOpts( trigger="item", formatter="{a} <br/>{b}: {c} ({d}%)")).render( html_path)) script_string = self.get_javascript_string(html_path) self.set_report_javascript(script_string, 'company_scale_bar_chart') self.right_charts.append({ 'title': '企业发展阶段统计', 'name': 'company_scale_bar_chart' })
def salary_pie_chart(self): """薪酬分布""" salary_xaxis = [ '0', '5000', '10000', '15000', '20000', '30000', '40000', '50000', '60000', '70000', '80000', '90000', '100000' ] salary_yaxis = [] kd = self.frame.salary.value_counts() salary_dict = { 0: 0, 5000: 0, 10000: 0, 15000: 0, 20000: 0, 30000: 0, 40000: 0, 50000: 0, 60000: 0, 70000: 0, 80000: 0, 90000: 0, 100000: 0 } for k, v in dict(kd).items(): min_salary = float( k.split('-')[0].split('·')[0].replace( '以上', '').rstrip('k').rstrip('K')) * 1000 try: max_salary = float( k.split('-')[1].split('·')[0].replace( '以上', '').rstrip('k').rstrip('K')) * 1000 except: max_salary = min_salary salary = 0.5 * (min_salary + max_salary) if salary in range(0, 5000): salary = 0 if salary in range(5000, 10000): salary = 5000 if salary in range(10000, 15000): salary = 10000 if salary in range(15000, 20000): salary = 15000 if salary in range(20000, 30000): salary = 20000 if salary in range(30000, 40000): salary = 30000 if salary in range(40000, 50000): salary = 40000 if salary in range(50000, 60000): salary = 50000 if salary in range(60000, 70000): salary = 60000 if salary in range(70000, 80000): salary = 70000 if salary in range(80000, 90000): salary = 80000 if salary in range(90000, 100000): salary = 90000 if salary in range(100000, 10000000000): salary = 100000 if salary_dict.get(salary, ''): salary_dict[salary] = salary_dict[salary] + v else: salary_dict[salary] = v salary_dict = dict(collections.OrderedDict(sorted( salary_dict.items()))) for salary_key, salary_value in salary_dict.items(): salary_yaxis.append( opts.BarItem( name=str(salary_key), value=int(salary_value), itemstyle_opts=opts.ItemStyleOpts(color="#d48265"), )) html_path = os.path.join( create_or_get_directory(os.path.join('report', 'single')), 'salary_pie_chart.html') c = (Bar(init_opts=opts.InitOpts( width="60%")).add_xaxis(salary_xaxis).add_yaxis( "", salary_yaxis, category_gap=-1).set_series_opts( label_opts=opts.LabelOpts(is_show=False)).set_global_opts( yaxis_opts=opts.AxisOpts( name="岗位数量", axislabel_opts=opts.LabelOpts(color='red'), name_textstyle_opts=opts.TextStyleOpts( color='#d48265')), xaxis_opts=opts.AxisOpts( name="薪酬", axislabel_opts=opts.LabelOpts(color='red'), name_textstyle_opts=opts.TextStyleOpts( color='#d48265')), tooltip_opts=opts.TooltipOpts( trigger="item", formatter=JsCode("""function (params) { var name=''; if (params.name < 5000){ name='小于5K' } else if (params.name < 10000) { name='5K-10K' } else if (params.name < 15000) { name='10K-15K' } else if (params.name < 20000) { name='15K-20K' } else if (params.name < 30000) { name='20K-30K' } else if (params.name < 40000) { name='30K-40K' } else if (params.name < 50000) { name='40K-50K' } else if (params.name < 60000) { name='50K-60K' } else if (params.name < 70000) { name='60K-70K' } else if (params.name < 80000) { name='70K-80K' } else if (params.name < 90000) { name='80K-90K' } else if (params.name < 100000) { name='90K-100K' } else { name='大于100K' } return name+':'+params.value; } """)), ).render(html_path)) script_string = self.get_javascript_string(html_path) self.set_report_javascript(script_string, 'salary_pie_chart') self.middle_charts.append({ 'title': '薪资分布', 'name': 'salary_pie_chart' })