def handle_city_job(self, city): first_request_url = "https://www.lagou.com/jobs/list_python?city=%s&cl=false&fromSearch=true&labelWords=&suginput=" % city first_response = self.handle_request(method="GET", url=first_request_url) total_page_search = re.compile(r'class="span\stotalNum">(\d+)</span>') try: total_page = total_page_search.search(first_response).group(1) print(city, total_page) # 由于没有岗位信息造成的exception except: return else: for i in range(1, int(total_page) + 1): data = { "pn": i, "kd": "python" } page_url = "https://www.lagou.com/jobs/positionAjax.json?city=%s&needAddtionalResult=false" % city referer_url = "https://www.lagou.com/jobs/list_python?city=%s&cl=false&fromSearch=true&labelWords=&suginput=" % city # referer的URL需要进行encode self.header['Referer'] = referer_url.encode() response = self.handle_request(method="POST", url=page_url, data=data, info=city) lagou_data = json.loads(response) job_list = lagou_data['content']['positionResult']['result'] for job in job_list: lagou_mysql.insert_item(job)
def handle_city_job(self, city): first_request_url = "https://www.lagou.com/jobs/list_python?&px=default&city=%s" % city first_response = self.handle_request(method='GET', url=first_request_url) total_page_search = re.compile(r'class="span\stotalNum">(\d+)</span>') try: total_page = total_page_search.search(first_response).group(1) except: return for i in range(1, int(total_page) + 1): data = {"pn": i, "kd": "python"} page_url = "https://www.lagou.com/jobs/positionAjax.json?px=default&city=%s&needAddtionalResult=false" % city referer_url = "https://www.lagou.com/jobs/list_python?px=default&city=%s" % city self.header['Referer'] = referer_url.encode() response = self.handle_request(url=page_url, method='POST', data=data, info=city) lagou_data = json.loads(response) job_list = lagou_data['content']['positionResult']['result'] for job in job_list: lagou_mysql.insert_item(job)