def push_proxy_queue(q,size=10): """补充代理ip队列 """ if q.qsize <size: proxy = datamodel.get_proxy() for p in proxy: q.put(p)
def run(self): #socket.setdefaulttimeout(5) queue = datamodel.get_row_queue(self.mode.biz_flag) #代理ip存活标识 proxyip_isAlive = False proxyslowip = set() while not self.exit_flag: if datamodel.g_exit: break corp = queue.pop(1) if not corp: continue #检查公司是否已存在 if exist_corp(corp,self.mode.biz_flag): #print("exist_corp:%s" % corp) continue s_time=time.time() #print(queue.__len__()) #获取代理ip if not proxyip_isAlive: proxyinfo = datamodel.get_proxy() #如果没有代理ip则插一个空值,使用本地ip if not proxyinfo: proxyinfo = [''] proxyinfo = set(proxyinfo) plen = len(proxyinfo) proxyinfo.difference_update(self.mode.ille_proxy_ip,proxyslowip) #proxyinfo.add('') if plen >5 and len(proxyinfo) <5: proxyslowip=set() proxyinfo=random.sample(proxyinfo,1) #如果没有ip可用,挂起 if not proxyinfo: print("not proxy ip....") time.sleep(5) continue corp,info,status,proxy = self.mode.get_info(corp,proxyinfo[0]) if info and status==0: print(corp,info,status,proxy) savedata(info,self.mode.biz_flag) proxyip_isAlive=True elif status ==1: proxyip_isAlive=False proxyslowip.add(proxy) queue.push(corp) elif status ==2: proxyip_isAlive=False queue.push(corp) else: proxyip_isAlive=True #print(corp,info,status,proxy) e_time=time.time()-s_time #print("kill time:%s"%(e_time)) else: time.sleep(5)
def push_proxy_queue(q,ille_proxy_ip,size=10): """补充代理ip队列 """ if q.qsize() <size: proxy = datamodel.get_proxy() proxy = set(proxy) proxy.difference_update(ille_proxy_ip) for p in proxy: q.put(p)
def exec_main_proxy(mode): """业务线程启动函数 参数mod为业务模块 模块工厂: mod. """ #socket.setdefaulttimeout(5) queue = datamodel.get_row_queue(mode.biz_flag) while 1: if datamodel.g_exit: break s_time=time.time() print(queue.__len__()) if queue.__len__() >0: proxyinfo = datamodel.get_proxy() #如果没有代理ip则插一个空值,使用本地ip if not proxyinfo: proxyinfo = [''] proxyinfo = set(proxyinfo) proxyinfo.difference_update(mode.ille_proxy_ip) #proxyinfo.add('') if len(proxyinfo) >mode.g_step: proxyinfo=random.sample(proxyinfo,mode.g_step) else: proxyinfo = list(proxyinfo) corps = [] i = 0 if not proxyinfo: print("not proxy ip....") time.sleep(5) print("start...") while i < len(proxyinfo): corp = queue.pop(1) if not corp: break if exist_corp(corp,mode.biz_flag): print(corp) continue corps.append(corp) i+=1 proxyinfo = proxyinfo[:len(corps)] pool = eventlet.GreenPool(len(proxyinfo)) result={} for corp,info,status,proxy in pool.imap(mode.get_info,corps,proxyinfo): if status in result: result[status]+=1 else: result[status]=0 if info and status==0: res,desc = savedata(info,mode.biz_flag) if res == -1: info['table'] = 'data_%s'%mode.biz_flag datamodel.get_tmp_queue().push(info) elif status ==1: mode.ille_proxy_ip.add(proxy) queue.push(corp) print(corp,info,status) print(len(proxyinfo)) print(result) e_time=time.time()-s_time print("kill time:%s"%(e_time)) else: time.sleep(5)