示例#1
0
    def _start_requests(self):
        req = []
        logger.info('Start Crawl Spider %s at rediskey %s' % (self.name,self.redis_key))

        # 51job全站
        for _url,name in self.start_urls.items():
            url = 'http://search.51job.com/list/%s,000000,0000,00,9,{0:{1}2d},%%2520,2,{2}.html' % _url
            for i in range(1,13):
                start_url = url.format(i, '0', 1)
                formater = url.format(i, '0', '{}')
                yield self.request(start_url,
                    headers=self.default_header,
                    redis_flag=True,
                    meta={'formater': formater,'money':i},
                    callback=self.job_in)
                
        # 智联全站
        yield self.request('http://company.zhaopin.com/beijing/',
            headers=self.default_header,
            callback=self.zhilian_in)

        # 猎聘全站
        yield self.request('https://www.liepin.com/company/',
            headers=self.default_header,
            callback=self.liepin_in)
示例#2
0
 def start_requests(self):
     req = []
     logger.info('Start Crawl Spider %s at rediskey %s' %
                 (self.name, self.redis_key))
     for url in self.start_urls:
         if url == 'http://insurance.jrj.com.cn/action/SearchIPJson.jspa':
             req.append(
                 self.request(url,
                              method='POST',
                              body=self.post_data(1),
                              redis_flag=REDISFLAG,
                              headers=self.default_header,
                              callback=self.jrj_insurance_in))
         elif url == 'http://bank.jrj.com.cn/txtBank/banklist_1.html':
             req.append(
                 self.request(url,
                              redis_flag=REDISFLAG,
                              headers=self.default_header,
                              callback=self.jrj_bank_in))
         elif url == 'http://insurance.jrj.com.cn/html/ic/list/ics-0.shtml':
             req.append(
                 self.request(url,
                              redis_flag=REDISFLAG,
                              headers=self.default_header,
                              callback=self.jrj_insurance_org_in))
     return req
示例#3
0
def get_cookies():
    url = 'https://www.licai.com/api/v1/auth/login/pass'
    body = json.dumps({"username": "******", "password": "******"})
    while True:
        try:
            return requests.put(url, body).cookies.get_dict()
        except BaseException as e:
            logger.info(repr(e))
示例#4
0
 def _start_requests(self):
     req = []
     logger.info('Start Crawl Spider %s at rediskey %s' % (self.name,self.redis_key))
     for url in self.start_urls:
         yield self.request(url,
             headers=self.default_header,
             redis_flag=True,
             callback=self.baidu_image_in)
示例#5
0
 def _start_requests(self):
     req = []
     logger.info('Start Crawl Spider %s at rediskey %s' % (self.name,self.redis_key))
     for url in self.start_urls:
         if url == 'https://www.chinawealth.com.cn/zzlc/jsp/lccp.jsp':
             req.append(self.request(
                 url,
                     redis_flag=REDISFLAG,
                     callback=self.chinawealth_jumps))
     return req
示例#6
0
 def start_requests(self):
     req = []
     logger.info('Start Crawl Spider %s at rediskey %s' % (self.name,self.redis_key))
     for url in self.start_urls:
         if url == 'http://www1.hkex.com.hk':
             req.append(self.request(
                 url,
                 redis_flag=REDISFLAG,
                 callback=self.HK_in))
     return req
示例#7
0
 def start_requests(self):
     req = []
     logger.info('Start Crawl Spider %s at rediskey %s' %
                 (self.name, self.redis_key))
     for url in self.start_urls:
         if url == 'http://www.p2peye.com/platform/all/p1/':
             req.append(
                 self.request(url,
                              redis_flag=REDISFLAG,
                              callback=self.p2peye_list))
     return req
示例#8
0
 def _start_requests(self):
     req = []
     logger.info('Start Crawl Spider %s at rediskey %s' % (self.name,self.redis_key))
     for url in self.start_urls:
         if url == 'http://www.cfachina.org/':
             req.append(self.request(
                 url,
                     redis_flag=REDISFLAG,
                     headers=self.default_header,
                     callback=self.cfa_in))
     return req
示例#9
0
 def _start_requests(self):
     req = []
     logger.info('Start Crawl Spider %s at rediskey %s' %
                 (self.name, self.redis_key))
     for url in self.start_urls:
         yield self.request(url,
                            headers=self.default_header,
                            cookies=get_cookie(),
                            meta={'cookiejar': 1},
                            redis_flag=True,
                            callback=self.licai_in)
示例#10
0
 def _start_requests(self):
     req = []
     logger.info('Start Crawl Spider %s at rediskey %s' %
                 (self.name, self.redis_key))
     for url in self.start_urls:
         if url == 'http://wwv.cyzone.cn':
             req.append(
                 self.request(url,
                              redis_flag=REDISFLAG,
                              callback=self.cyzone_start))
     return req
示例#11
0
 def start_requests(self):
     req = []
     logger.info('Start Crawl Spider %s at rediskey %s' % (self.name,self.redis_key))
     for url in self.start_urls:
         req.append(self.request(
             url,
             method='POST',
             body=self.post_data(1),
             redis_flag=REDISFLAG,
             headers=self.default_header,
             cookies=COOKIE,
             callback=self.howbuy_in))
     return req