Exemplo n.º 1
0
 def run_job(self, jobd):
     self.dump_jobid(jobd)
     GenQueries.run_job(self, jobd)
     if not isinstance(jobd, dict):
         return
     if jobd.get('type') == 'loadpage':
         o = self.load_page(jobd.get('url'), jobd.get('page'))
         if o is None:
             self.add_job(jobd)
     if jobd.get('type') == 'cvurl':
         url = jobd.get('url')
         m = re.search(r'res_id_encode=([a-z0-9A-Z]+)', url)
         if m:
             cvid = m.group(1)
             qstring = "liepincv://" + cvid
             cnt = spider.util.HashChecker().query(qstring)
             if cnt is not None and int(cnt) != 0:
                 print '======%s hash downloaded=====' % qstring
                 return
             o = self.al_request(jobd.get('url'),
                                 headers=Cdata.headers,
                                 allow_redirects=False)
             if o is None:
                 return None
             print '==========saveing======'
             spider.util.HashChecker().add(qstring)
             time.sleep(5)
             Cdata.lpcvstore.save(url, cvid, o.text)
             print '==========save done===='
Exemplo n.º 2
0
 def __init__(self, thcnt, ac):
     GenQueries.__init__(self, thcnt)
     self._last_time = 0.0
     self.zlm = MRLManager(ac, new_ZLLogin)
     self.headers = {
         'Referer': 'http://rdsearch.zhaopin.com/Home/ResultForCustom'
     }
     self.search_cnt = 0
Exemplo n.º 3
0
 def __init__(self, thcnt):
     GenQueries.__init__(self, thcnt)
     self.thread_count = 1
     self._name = "jobui_queries"
     self.no_match_url = FileSaver("not_match_%s.txt" % self._name)
     self.bs2 = FileSaver("failed_urls.txt")
     self.job_url = FileSaver("job_url.txt")
     self.cnt = 0
     self.domains = FileSaver("domains.txt")
     self.start_time = time.time()
Exemplo n.º 4
0
 def __init__(self, thcnt):
     GenQueries.__init__(self, thcnt)
     self._name = "test_set"
Exemplo n.º 5
0
 def __init__(self):
     GenQueries.__init__(self)
     self.thread_count = 1
     self._name = "jobui_queries"
Exemplo n.º 6
0
 def __init__(self, thcnt=8):
     GenQueries.__init__(self, thcnt)
     self._name = 'WenshuwangGenQueries'
Exemplo n.º 7
0
 def __init__(self, thcnt):
     GenQueries.__init__(self, thcnt)
     MRLManager.__init__(self, Cdata.accounts, LPRequest)
     self._name = 'lp_queries'
     self.baseurl = {}
Exemplo n.º 8
0
 def __init__(self, thcnt=20):
     GenQueries.__init__(self, thcnt)
     self._name = "lp_qiye_queries"
Exemplo n.º 9
0
 def __init__(self):
     GenQueries.__init__(self)
     self.thread_count = 8
     self._name = "zhilian_queries"