def start_requests(self): for job in self.postgres.handler(sql.get_job()): tb_name = job[3] data_list = self.postgres.fetch_all(sql.get_data(tb_name), (enums.SourceType.boss.value,)) for data in data_list: tb_id, source_url = data[0], data[1] meta = {'tb_name': tb_name, 'tb_id': tb_id} yield Request(source_url, meta=meta, callback=self.parse)
def start_requests(self): for job in self.postgres.handler(sql.get_job()): tb_name = job[3] data_list = self.postgres.fetch_all( sql.get_data(tb_name), (enums.SourceType.lagou.value, )) for data in data_list: tb_id, source_url = data[0], data[1] self.headers['Cookie'] = JobSpider.random_cookie() meta = {'tb_name': tb_name, 'tb_id': tb_id} yield Request(source_url, headers=self.headers, meta=meta, callback=self.parse)
def __init__(self, **kwargs): super().__init__(**kwargs) self.postgres = app.postgres() self.city_list = self.postgres.fetch_all(sql.get_city()) self.job_list = self.postgres.fetch_all(sql.get_job()) self.start = 'https://www.lagou.com/jobs/positionAjax.json?px=default&needAddtionalResult=false&city={}' self.referer = 'https://www.lagou.com/jobs/list_{}' self.source_url = 'https://www.lagou.com/jobs/{}.html' self.company_logo = 'https://www.lgstatic.com/thumbnail_120x120/{}' self.headers = { 'Host': 'www.lagou.com', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/69.0.3497.100 Safari/537.36', }
def __init__(self, **kwargs): super().__init__(**kwargs) self.postgres = app.postgres() self.city_list = self.postgres.fetch_all(sql.get_city()) self.job_list = self.postgres.fetch_all(sql.get_job()) self.start = 'https://www.lagou.com/jobs/positionAjax.json?px=default&needAddtionalResult=false&city={0}' self.referer = 'https://www.lagou.com/jobs/list_{0}' self.source_url = 'https://www.lagou.com/jobs/{0}.html' self.headers = { 'Connection': 'keep-alive', 'Accept-Encoding:': 'gzip, deflate, br', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko' ') Chrome/67.0.3396.79 Safari/537.36', }
def __init__(self, **kwargs): super().__init__(**kwargs) self.postgres = app.postgres() self.city_list = self.postgres.fetch_all(sql.get_city()) self.job_list = self.postgres.fetch_all(sql.get_job()) self.start = 'https://www.zhipin.com/c{0}-p{1}'