def get(self): """ return a useful proxy :return: """ self.db.changeTable(self.useful_proxy_queue) item_list = self.db.getAll() if item_list: random_choice = random.choice(item_list) return Proxy.newProxyFromJson(random_choice) return None
def get_http(self): """ return a http proxy :return: """ self.db.changeTable(self.useful_proxy_queue) item_list = self.db.getAll() if item_list: for _ in item_list: random_choice = random.choice(item_list) proxy_type = json.loads(random_choice)['proxy'].split("://")[0] if proxy_type == 'http': return Proxy.newProxyFromJson(random_choice) return None
def run(self): self.log.info("RawProxyCheck - {} : start".format(self.name)) self.db.changeTable(self.useful_proxy_queue) while True: try: proxy_json = self.queue.get(block=False) except Empty: self.log.info("RawProxyCheck - {} : exit".format(self.name)) break proxy_obj = Proxy.newProxyFromJson(proxy_json) proxy_obj, status = checkProxyUseful(proxy_obj) if status: self.db.put(proxy_obj) self.log.info('RawProxyCheck - {} : {} validation pass'.format(self.name, proxy_obj.proxy.ljust(20))) else: self.log.info('RawProxyCheck - {} : {} validation fail'.format(self.name, proxy_obj.proxy.ljust(20))) self.queue.task_done()
def fetch(self): """ fetch proxy into db by ProxyGetter :return: """ self.db.changeTable(self.raw_proxy_queue) proxy_set = set() self.log.info("ProxyFetch : start") for proxyGetter in config.proxy_getter_functions: self.log.info( "ProxyFetch - {func}: start".format(func=proxyGetter)) try: for proxy in getattr(GetFreeProxy, proxyGetter.strip())(): proxy_all = proxy.strip() proxy = proxy_all.split("|")[0] p_type = proxy_all.split("|")[1] if not proxy or not verifyProxyFormat(proxy): self.log.error('ProxyFetch - {func}: ' '{proxy} illegal'.format( func=proxyGetter, proxy=proxy.ljust(20))) continue elif proxy in proxy_set: self.log.info('ProxyFetch - {func}: ' '{proxy} exist'.format( func=proxyGetter, proxy=proxy.ljust(20))) continue else: self.log.info('ProxyFetch - {func}: ' '{proxy} success'.format( func=proxyGetter, proxy=proxy.ljust(20))) print "{} get_sucess".format(proxy) self.db.put( Proxy(proxy, proxy_type=p_type, source=proxyGetter)) proxy_set.add(proxy) except Exception as e: self.log.error( "ProxyFetch - {func}: error".format(func=proxyGetter))
def getAllByName(self, name): all_proxies = self.getAll() self.db.changeTable(self.useful_proxy_queue + '_fail_' + name) fail_list = self.db.getAll() fail_proxies = [Proxy.newProxyFromJson(_) for _ in fail_list] # todo: 优化 filter_proxies = [] for proxy in all_proxies: isFailed = False for failed in fail_proxies: if failed.proxy == proxy.proxy: failed_date = datetime.strptime(failed.last_time, "%Y-%m-%d %H:%M:%S") if failed_date + timedelta(hours=24) > datetime.now(): isFailed = True break if not isFailed: filter_proxies.append(proxy) return filter_proxies
def run(self): self.log.info("UsefulProxyCheck - {} : start".format(self.name)) self.db.changeTable(self.useful_proxy_queue) while True: try: proxy_str = self.queue.get(block=False) except Empty: self.log.info("UsefulProxyCheck - {} : exit".format( self.name)) break proxy_obj = Proxy.newProxyFromJson(proxy_str) proxy_obj, status = checkProxyUseful(proxy_obj, self.origin_ips) if status or proxy_obj.fail_count < FAIL_COUNT: self.db.put(proxy_obj) self.log.info( 'UsefulProxyCheck - {} : {} validation pass'.format( self.name, proxy_obj.proxy.ljust(20))) else: self.log.info( 'UsefulProxyCheck - {} : {} validation fail'.format( self.name, proxy_obj.proxy.ljust(20))) self.db.delete(proxy_obj.proxy) self.queue.task_done()
def get_all_proxy(self): self.db.changeTable(self.asdl_proxy_queue) item_list = self.db.getAll() return [Proxy.newProxyFromJson(_) for _ in item_list]
def add_asdl_proxy(self, proxy_str): proxy = Proxy(proxy_str, last_time=datetime.now().strftime('%Y-%m-%d %H:%M:%S')) self.db.changeTable(self.asdl_proxy_queue) self.db.put(proxy)
def deleteByName(self, name, proxy): failed_proxy = Proxy( proxy=proxy, last_time=datetime.now().strftime("%Y-%m-%d %H:%M:%S")) self.db.changeTable(self.useful_proxy_queue + '_fail_' + name) self.db.put(failed_proxy)