示例#1
0
def crawl_lagou_jobs_count():
    pre_date = get_date_begin_by_timestamp(after_days=-1)
    keywords = KeywordController.get_most_frequently_keywords(limit=800)
    logging.info('{} crawl_lagou_job_count 定时任务运行中! 关键词 {} 个'.format(
        pre_date, len(keywords)))
    for keyword in keywords:
        city_jobs_count = {
            '全国': 0,
            '北京': 0,
            '上海': 0,
            '广州': 0,
            '深圳': 0,
            '杭州': 0,
            '成都': 0
        }
        for city in city_jobs_count:
            response_json = request_jobs_count_json(city=city, keyword=keyword)
            city_jobs_count[city] = response_json['content']['positionResult'][
                'totalCount']
        JobsCountController.add(date=pre_date,
                                keyword_id=keyword.id,
                                all_city=city_jobs_count['全国'],
                                beijing=city_jobs_count['北京'],
                                shanghai=city_jobs_count['上海'],
                                guangzhou=city_jobs_count['广州'],
                                shenzhen=city_jobs_count['深圳'],
                                hangzhou=city_jobs_count['杭州'],
                                chengdu=city_jobs_count['成都'])
    logging.info('crawl_lagou_job_count 任务完成!')
    JobController.get_jobs_statistics.cache_clear()
    logging.info('主动效缓存成功')
示例#2
0
    def get(self):
        keyword_name = self.get_argument('keyword', 'python')
        keyword = KeywordController.get(name=keyword_name)
        if not keyword:
            self.write_error(404)
            return

        (keyword_jobs_count, educations_request_counter,
         finance_stage_distribution, city_jobs_counter, salary_distribution,
         work_years_request_analyze) = JobController.get_jobs_statistics(
             keyword_id=keyword.id)

        self.render("keyword.html",
                    keyword=keyword_name,
                    keyword_jobs_count=keyword_jobs_count,
                    educations_request_counter=educations_request_counter,
                    finance_stage_distribution=finance_stage_distribution,
                    city_jobs_counter=city_jobs_counter,
                    salary_distribution=salary_distribution,
                    work_years_request_analyze=work_years_request_analyze)
示例#3
0
    def get(self):
        # 统计访问人数  incr() 操作线程安全
        redis_instance.incr(constants.REDIS_VISITED_PEOPLES_COUNT_KEY)
        keyword_name = self.get_argument('keyword', 'python')
        keyword = KeywordController.get(name=keyword_name)
        if not keyword:
            self.write_error(404)
            return

        (keyword_jobs_count, educations_request_counter,
         finance_stage_distribution, city_jobs_counter, salary_distribution,
         work_years_request_analyze) = get_jobs_statistics(keyword.id)

        self.render("keyword.html",
                    keyword=keyword_name,
                    keyword_jobs_count=keyword_jobs_count,
                    educations_request_counter=educations_request_counter,
                    finance_stage_distribution=finance_stage_distribution,
                    city_jobs_counter=city_jobs_counter,
                    salary_distribution=salary_distribution,
                    work_years_request_analyze=work_years_request_analyze)
示例#4
0
def crawl_lagou_jobs_count():
    pre_date = get_date_begin_by_timestamp(after_days=-1)
    keywords = KeywordController.get_most_frequently_keywords(limit=2000)
    logging.info('{} crawl_lagou_job_count 定时任务运行中! 关键词 {} 个'.format(
        pre_date, len(keywords)))
    for keyword in keywords:
        city_jobs_count = {
            '全国': 0,
            '北京': 0,
            '上海': 0,
            '广州': 0,
            '深圳': 0,
            '杭州': 0,
            '成都': 0
        }
        for city in city_jobs_count:
            response_json = request_jobs_count_json(city=city, keyword=keyword)
            try:
                city_jobs_count[city] = response_json['content'][
                    'positionResult']['totalCount']
            except Exception:
                logging.getLogger(__name__).error(
                    '获取 jobs count 信息失败, 关键词为 {}'.format(keyword.name),
                    exc_info=True)
        JobsCountController.add(date=pre_date,
                                keyword_id=keyword.id,
                                all_city=city_jobs_count['全国'],
                                beijing=city_jobs_count['北京'],
                                shanghai=city_jobs_count['上海'],
                                guangzhou=city_jobs_count['广州'],
                                shenzhen=city_jobs_count['深圳'],
                                hangzhou=city_jobs_count['杭州'],
                                chengdu=city_jobs_count['成都'])
    logging.info('crawl_lagou_job_count 任务完成!')
    # 失效缓存
    remove_count = cache_clear(get_jobs_statistics)
    logging.info('主动失效缓存成功, 数量{}'.format(remove_count))
示例#5
0
def generate_job_data(job, company_id):
    """生成职位数据"""
    department, description, keywords = requests_job_detail_data(
        job['positionId'])
    job_id = job['positionId']
    city_id = 0 if 'city' not in job else CityController.get_city_id_by_name(
        job['city'])
    title = job['positionName']
    work_year = filter_http_tag(job['workYear'])
    if work_year not in WORK_YEARS_REQUEST_DICT:
        logger.error(work_year + 'not in WORK_YEAR_DICT')
    work_year = WORK_YEARS_REQUEST_DICT[
        work_year] if work_year in WORK_YEARS_REQUEST_DICT else WORK_YEARS_REQUEST_DICT[
            'unknown']
    salary = job['salary']
    education = EDUCATION_REQUEST_DICT[job['education']]
    department = department
    description = description
    advantage = job['positionAdvantage'] if 'positionAdvantage' in job else ''
    job_nature = JOB_NATURE_DICT[job['jobNature']]
    created_at = job_date2timestamp(job['createTime'])

    JobController.add(id=job_id,
                      company_id=company_id,
                      title=title,
                      work_year=work_year,
                      city_id=city_id,
                      salary=salary,
                      education=education,
                      department=department,
                      description=description,
                      advantage=advantage,
                      job_nature=job_nature,
                      created_at=created_at)
    for keyword in keywords:
        keyword_id = KeywordController.get_keyword_id_by_name(keyword)
        JobKeywordController.add(job_id, keyword_id, city_id)