Пример #1
0
    def start_requests(self):
        for job in self.postgres.handler(sql.get_job()):
            tb_name = job[3]
            data_list = self.postgres.fetch_all(sql.get_data(tb_name), (enums.SourceType.boss.value,))
            for data in data_list:
                tb_id, source_url = data[0], data[1]

                meta = {'tb_name': tb_name, 'tb_id': tb_id}
                yield Request(source_url, meta=meta, callback=self.parse)
Пример #2
0
    def start_requests(self):
        for job in self.postgres.handler(sql.get_job()):
            tb_name = job[3]
            data_list = self.postgres.fetch_all(
                sql.get_data(tb_name), (enums.SourceType.lagou.value, ))
            for data in data_list:
                tb_id, source_url = data[0], data[1]

                self.headers['Cookie'] = JobSpider.random_cookie()
                meta = {'tb_name': tb_name, 'tb_id': tb_id}
                yield Request(source_url,
                              headers=self.headers,
                              meta=meta,
                              callback=self.parse)
Пример #3
0
 def __init__(self, **kwargs):
     super().__init__(**kwargs)
     self.postgres = app.postgres()
     self.city_list = self.postgres.fetch_all(sql.get_city())
     self.job_list = self.postgres.fetch_all(sql.get_job())
     self.start = 'https://www.lagou.com/jobs/positionAjax.json?px=default&needAddtionalResult=false&city={}'
     self.referer = 'https://www.lagou.com/jobs/list_{}'
     self.source_url = 'https://www.lagou.com/jobs/{}.html'
     self.company_logo = 'https://www.lgstatic.com/thumbnail_120x120/{}'
     self.headers = {
         'Host': 'www.lagou.com',
         'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
                       'Chrome/69.0.3497.100 Safari/537.36',
     }
Пример #4
0
 def __init__(self, **kwargs):
     super().__init__(**kwargs)
     self.postgres = app.postgres()
     self.city_list = self.postgres.fetch_all(sql.get_city())
     self.job_list = self.postgres.fetch_all(sql.get_job())
     self.start = 'https://www.lagou.com/jobs/positionAjax.json?px=default&needAddtionalResult=false&city={0}'
     self.referer = 'https://www.lagou.com/jobs/list_{0}'
     self.source_url = 'https://www.lagou.com/jobs/{0}.html'
     self.headers = {
         'Connection': 'keep-alive',
         'Accept-Encoding:': 'gzip, deflate, br',
         'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko'
                       ') Chrome/67.0.3396.79 Safari/537.36',
     }
Пример #5
0
 def __init__(self, **kwargs):
     super().__init__(**kwargs)
     self.postgres = app.postgres()
     self.city_list = self.postgres.fetch_all(sql.get_city())
     self.job_list = self.postgres.fetch_all(sql.get_job())
     self.start = 'https://www.zhipin.com/c{0}-p{1}'