def validUsefulProxy(proxy): """ 检验代理是否可用 :param proxy: :return: """ logger = LogHandler("validUsefulProxy") if isinstance(proxy, bytes): proxy = proxy.decode('utf8') proxies = {"https": f"https://{proxy}"} check_urls = config.check_urls length = len(check_urls) start = index = random.randint(0, length - 1) flag = True while flag or index != start: flag = False url = check_urls[index] index = (index + 1) % length logger.info(f'proxy {proxy} check {url}') try: r = requests.get(url, proxies=proxies, timeout=5, verify=False) if r.status_code == 200: logger.info(f'proxy {proxy} is useful') return True except Exception as e: logger.error(f'proxy {proxy} check {url} failed, {e}') time.sleep(1) logger.error(f'proxy {proxy} all check failed') return False
class ProxyRefreshSchedule(ProxyManager): """ 代理定时刷新 """ def __init__(self): ProxyManager.__init__(self) self.log = LogHandler('refresh_schedule') def valid_proxy(self): """ valid_proxy :return: """ self.db.changeTable(self.raw_proxy_queue) raw_proxy = self.db.pop() self.log.info('%s start valid proxy' % time.ctime()) while raw_proxy: if validUsefulProxy(raw_proxy): self.db.changeTable(self.useful_proxy_queue) self.db.put(raw_proxy) self.log.debug('proxy: %s validation passes' % raw_proxy) else: self.log.debug('proxy: %s validation fail' % raw_proxy) pass self.db.changeTable(self.raw_proxy_queue) raw_proxy = self.db.pop() self.log.info('%s valid proxy complete' % time.ctime())
class Proxy_Check_Http(ProxyManage, Thread): def __init__(self, queue_http, item_dict): ProxyManage.__init__(self) Thread.__init__(self) self.log = LogHandler('proxy_check') self.queue_http = queue_http self.item_dict = item_dict def run(self): ''' 执行函数 验证http :return: ''' self.db.changeTable(self.useful_proxy_queue) while self.queue_http.qsize(): proxy = self.queue_http.get() raw_proxy_dict = {'http': proxy} if validUsefulProxy(raw_proxy_dict): self.db.put(proxy) self.log.info("ProxyCheck :{} validation pass".format(proxy)) else: self.db.delete(proxy) self.log.info("ProxyCheck :{} validation delete".format(proxy)) self.queue_http.task_done()
class ProxyValidSchedule(ProxyManager): def __init__(self): ProxyManager.__init__(self) self.log = LogHandler('valid_schedule') def __validProxy(self): """ 验证代理 :return: """ while True: self.db.changeTable(self.useful_proxy_queue) for each_proxy in self.db.getAll(): if isinstance(each_proxy, bytes): each_proxy = each_proxy.decode('utf-8') if validUsefulProxy(each_proxy): # 成功计数器加1 self.db.inckey(each_proxy, 1) self.log.debug('validProxy_b: {} validation pass'.format(each_proxy)) else: # 失败计数器减一 self.db.inckey(each_proxy, -1) # self.db.delete(each_proxy) self.log.info('validProxy_b: {} validation fail'.format(each_proxy)) value = self.db.getvalue(each_proxy) if value and value < -5: # 计数器小于-5删除该代理 self.db.delete(each_proxy) self.log.info('validProxy_a running normal') def main(self): self.__validProxy()
class ProxyRefreshSchedule(ProxyManager): """ 代理定时刷新 """ def __init__(self): ProxyManager.__init__(self) self.log = LogHandler('refresh_schedule') def validProxy(self): """ 验证raw_proxy_queue中的代理, 将可用的代理放入useful_proxy_queue :return: """ self.db.changeTable(self.raw_proxy_queue) raw_proxy_item = self.db.pop() self.log.info('ProxyRefreshSchedule: %s start validProxy' % time.ctime()) # 计算剩余代理,用来减少重复计算 remaining_proxies = self.getAll() while raw_proxy_item: raw_proxy = raw_proxy_item.get('proxy') if isinstance(raw_proxy, bytes): # 兼容Py3 raw_proxy = raw_proxy.decode('utf8') if (raw_proxy not in remaining_proxies) and validUsefulProxy(raw_proxy): self.db.changeTable(self.useful_proxy_queue) self.db.put(raw_proxy) self.log.info('ProxyRefreshSchedule: %s validation pass' % raw_proxy) else: self.log.info('ProxyRefreshSchedule: %s validation fail' % raw_proxy) self.db.changeTable(self.raw_proxy_queue) raw_proxy_item = self.db.pop() remaining_proxies = self.getAll() self.log.info('ProxyRefreshSchedule: %s validProxy complete' % time.ctime())
class ProxyRefreshSchedule(ProxyManager): """ 代理定时刷新 """ def __init__(self): ProxyManager.__init__(self) self.log = LogHandler('refresh_schedule') def validProxy(self): """ 验证raw_proxy_queue中的代理, 将可用的代理放入useful_proxy_queue :return: """ self.db.changeTable(self.raw_proxy_queue) raw_proxy = self.db.pop() while raw_proxy: self.log.info('ProxyRefreshSchedule: %s start validProxy' % time.ctime()) addr = "%s:%s" % (raw_proxy.get('ip'), raw_proxy.get('port')) if validUsefulProxy(addr): self.db.changeTable(self.useful_proxy_queue) self.db.put(raw_proxy) self.log.info('ProxyRefreshSchedule: %s validation pass' % addr) else: self.log.info('ProxyRefreshSchedule: %s validation fail' % addr) self.db.changeTable(self.raw_proxy_queue) raw_proxy = self.db.pop() self.log.info('ProxyRefreshSchedule: %s validProxy complete' % time.ctime())
class ProxyCheck(ProxyManager, Thread): def __init__(self, queue, item_dict, check_urls): ProxyManager.__init__(self) Thread.__init__(self) self.log = LogHandler('proxy_check', file=False) # 多线程同时写一个日志文件会有问题 self.queue = queue self.item_dict = item_dict self.check_urls = check_urls # def run(self): # self.db.changeTable(self.useful_proxy_queue) # while self.queue.qsize(): # try: # proxy = self.queue.get() # except Empty: # break # count = self.item_dict[proxy] # if validUsefulProxy(proxy): # # 验证通过计数器减1 # if count and int(count) > 0: # self.db.put(proxy, num=int(count) - 1) # else: # pass # self.log.info('ProxyCheck: {} validation pass'.format(proxy)) # else: # self.log.info('ProxyCheck: {} validation fail'.format(proxy)) # if count and int(count) + 1 >= FAIL_COUNT: # self.log.info('ProxyCheck: {} fail too many, delete!'.format(proxy)) # self.db.delete(proxy) # else: # self.db.put(proxy, num=int(count) + 1) # self.queue.task_done() def run(self): self.db.changeTable(self.useful_proxy_queue) while self.queue.qsize(): try: proxy = self.queue.get() except Empty: break if validUsefulProxy(proxy): self.log.info(f'ProxyCheck: {proxy} validation pass') else: self.log.info( f'ProxyCheck: {proxy} validation fail, delete it from useful_proxy!' ) # self.db.delete(proxy) ProxyManager.delete_proxy(proxy) self.queue.task_done()
class ProxyCheck(ProxyManager, Thread): def __init__(self): ProxyManager.__init__(self) Thread.__init__(self) self.log = LogHandler('proxy_check') def run(self): self.db.changeTable(self.useful_proxy_queue) while True: proxy = self.db.pop() if proxy: addr = "%s:%s" % (proxy.get('ip'), proxy.get('port')) if validUsefulProxy(addr): self.log.info('ProxyCheck: {} validation pass'.format(addr)) else: self.log.info('ProxyCheck: {} validation fail'.format(addr)) self.db.delete(proxy['ip']) sleep(20)
class ProxyValidSchedule(ProxyManager): def __init__(self): ProxyManager.__init__(self) self.log = LogHandler('valid_schedule') def validProxy(self): """ 验证代理 :return: """ while True: # for num in range(5): self.db.changeTable(self.useful_proxy_queue) each_proxys = self.db.getAll() print "验证所有ip", each_proxys if not each_proxys: time.sleep(100) for each_proxy in each_proxys: if isinstance(each_proxy, bytes): each_proxy = each_proxy.decode('utf-8') print "验证ip:", each_proxy if validUsefulProxy(each_proxy): # 成功计数器加1 self.db.inckey(each_proxy, 1) self.log.debug( 'validProxy_b: {} validation pass'.format(each_proxy)) else: # print "删除:",each_proxy # self.db.delete(each_proxy) # 失败计数器减一 self.db.inckey(each_proxy, -1) # self.db.delete(each_proxy) self.log.info( 'validProxy_b: {} validation fail'.format(each_proxy)) value = self.db.getvalue(each_proxy) if value and int(value) < -1: # 计数器小于-5删除该代理 self.db.delete(each_proxy) self.log.info('validProxy_a running normal')
class ProxyValidSchedule(ProxyManager): def __init__(self): ProxyManager.__init__(self) self.db = DbClient() self.log = LogHandler('valid_schedule') def __validProxy(self): """ 验证代理 :return: """ time.sleep(60 * 0 * random.random()) while True: self.db.changeTable(self.useful_proxy_queue) for each_proxy in self.db.getAll(): if isinstance(each_proxy, bytes): each_proxy = each_proxy.decode('utf-8') if validUsefulProxy(each_proxy) == True: # 成功计数器加1 self.db.inckey(each_proxy, 1) self.log.debug('validProxy_b: {} validation pass'.format(each_proxy)) else: # 失败计数器减一 print "原有value " + str(self.db.getvalue(each_proxy)) if self.db.getvalue(each_proxy) >= 0: self.db.inckey(each_proxy, -1*int(self.db.getvalue(each_proxy))) else: self.db.inckey(each_proxy, -1) # self.db.delete(each_proxy) self.log.info('validProxy_b: {} validation fail'.format(each_proxy)) value = self.db.getvalue(each_proxy) print value if None != value and int(value) < 0: # 计数器小于-5删除该代理 print "删除" + each_proxy self.db.delete(each_proxy) self.log.info('validProxy_a running normal') def main(self): self.__validProxy()
class ProxyCheck(ProxyManager, Thread): def __init__(self, queue, item_dict): ProxyManager.__init__(self) Thread.__init__(self) self.log = LogHandler('proxy_check', file=False) self.queue = queue self.item_dict = item_dict def run(self): self.db.changeTable(self.useful_proxy_queue) while self.queue.qsize(): proxy = self.queue.get() count = self.item_dict[proxy] if validUsefulProxy(proxy): # 验证通过计数器减1 if count and int(count) > 0: self.db.put(proxy, num=int(count) - 1) self.log.info('proxycheck:{} validation pass'.format(proxy)) else: self.log.info('proxycheck:{} validation fail'.format(proxy)) if count and int(count) + 1 >= FAIL_COUNT: self.log.info( 'proxycheck:{} fial too many,delete'.format(proxy)) self.db.delete(proxy) else: self.db.put(proxy, num=int(count) - 1) self.queue.task_done()
class ProxyCheck(ProxyManager, Thread): def __init__(self, queue, item_dict): ProxyManager.__init__(self) Thread.__init__(self) self.log = LogHandler('proxy_check', file=False) # 多线程同时写一个日志文件会有问题 self.queue = queue self.item_dict = item_dict def run(self): self.db.changeTable(self.useful_proxy_queue) while self.queue.qsize(): proxy = self.queue.get() count = self.item_dict[proxy] if validUsefulProxy(proxy): # 验证通过计数器减1 if count and int(count) > 0: self.db.put(proxy, num=int(count) - 1) else: pass self.log.info('ProxyCheck: {} validation pass'.format(proxy)) else: self.log.info('ProxyCheck: {} validation fail'.format(proxy)) if count and int(count) + 1 >= FAIL_COUNT: self.log.info('ProxyCheck: {} fail too many, delete!'.format(proxy)) self.db.delete(proxy) else: self.db.put(proxy, num=int(count) + 1) self.queue.task_done()
class ProxyCheck(ProxyManager, Thread): def __init__(self): ProxyManager.__init__(self) Thread.__init__(self) self.log = LogHandler('proxy_check') def run(self): self.db.changeTable(self.useful_proxy_queue) while True: for proxy, count in self.db.getAll().items(): if validUsefulProxy(proxy): # 验证通过计数器减1 if count and int(count) > 0: self.db.put(proxy, num=int(count) - 1) else: pass self.log.info( 'ProxyCheck: {} validation pass'.format(proxy)) else: self.log.info( 'ProxyCheck: {} validation fail'.format(proxy)) if count and int(count) > FAIL_COUNT: self.log.info( 'ProxyCheck: {} fail too many, delete!'.format( proxy)) self.db.delete(proxy) else: self.db.put(proxy, num=int(count) + 1) sleep(60 * 5)
class ProxyValidSchedule(ProxyManager): def __init__(self): ProxyManager.__init__(self) self.log = LogHandler('valid_schedule') def __validProxy__(self): """ 验证代理 :return: """ while 1: self.db.changeTable(self.useful_proxy_queue) for each_proxy in self.db.getAll(): if validUsefulProxy(each_proxy): self.log.debug( 'proxy: {} validation pass'.format(each_proxy)) if validTelnetProxy(each_proxy): self.log.debug( 'proxy: {} telnet pass'.format(each_proxy)) else: self.db.delete(each_proxy) self.log.info( 'proxy: {} telnet fail'.format(each_proxy)) else: self.db.delete(each_proxy) self.log.info( 'proxy: {} validation fail'.format(each_proxy)) self.log.info(u'代理验证程序运行正常') def main(self): self.__validProxy__()
class ProxyCheck(ProxyManager, Thread): def __init__(self, queue, item_dict): ProxyManager.__init__(self) Thread.__init__(self) self.log = LogHandler('proxy_check', file=False) # 多线程同时写一个日志文件会有问题 self.queue = queue self.item_dict = item_dict def run(self): self.db.changeTable(self.useful_proxy_queue) while True: try: proxy = self.queue.get(block=False) except Empty: break count = self.item_dict[proxy] if validUsefulProxy(proxy): # 验证通过计数器减1 if count and int(count) > 0: self.db.put(proxy, num=int(count) - 1) else: pass self.log.info('ProxyCheck: {} validation pass'.format(proxy)) else: self.log.info('ProxyCheck: {} validation fail'.format(proxy)) if count and int(count) + 1 >= FAIL_COUNT: self.log.info('ProxyCheck: {} fail too many, delete!'.format(proxy)) self.db.delete(proxy) else: self.db.put(proxy, num=int(count) + 1) self.queue.task_done()
class ProxyRefreshSchedule(ProxyManager): """ 代理定时刷新 """ def __init__(self): ProxyManager.__init__(self) self.log = LogHandler('refresh_schedule') def validProxy(self): """ 验证raw_proxy_queue中的代理, 将可用的代理放入useful_proxy_queue :return: """ self.db.changeTable(self.raw_proxy_queue) raw_proxy = self.db.pop() self.log.info('%s start validProxy_a' % time.ctime()) # exist_proxy = self.db.getAll() while raw_proxy: self.db.changeTable(self.useful_proxy_queue) exist_proxy = self.db.getAll() if validUsefulProxy(raw_proxy) and (raw_proxy not in exist_proxy): # self.db.changeTable(self.useful_proxy_queue) self.db.put(raw_proxy) self.log.info('validProxy_a: %s validation pass' % raw_proxy) else: self.log.debug('validProxy_a: %s validation fail' % raw_proxy) self.db.changeTable(self.raw_proxy_queue) raw_proxy = self.db.pop() self.log.info('%s validProxy_a complete' % time.ctime())
class ProxyCheck(ProxyManager, Thread): def __init__(self): ProxyManager.__init__(self) Thread.__init__(self) self.log = LogHandler('proxy_check') def run(self): self.db.changeTable(self.useful_proxy_queue) while True: proxy_item = self.db.pop() while proxy_item: proxy = proxy_item.get('proxy') counter = proxy_item.get('value') if validUsefulProxy(proxy): # 验证通过计数器加1, 计数在-5到1之间 if counter and int(counter) < 1: self.db.put(proxy, num=int(counter) + 1) else: self.db.put(proxy) self.log.info( 'ProxyCheck: {} validation pass'.format(proxy)) else: self.log.info( 'ProxyCheck: {} validation fail'.format(proxy)) # 验证失败,计数器减1 if counter and int(counter) < -5: self.log.info( 'ProxyCheck: {} fail too many, delete!'.format( proxy)) self.db.delete(proxy) else: self.db.put(proxy, num=int(counter) - 1) proxy_item = self.db.pop() sleep(60 * 5)
class ProxyCheck(ProxyManager, Thread): def __init__(self): ProxyManager.__init__(self) Thread.__init__(self) self.log = LogHandler('proxy_check') def run(self): self.db.changeTable(self.useful_proxy_queue) while True: proxy_item = self.db.pop() while proxy_item: proxy = proxy_item.get('proxy') counter = proxy_item.get('value', 1) if validUsefulProxy(proxy): # 验证通过计数器加1 if counter and int(counter) < 1: self.db.put(proxy, num=int(counter) + 1) else: self.db.put(proxy) self.log.info('ProxyCheck: {} validation pass'.format(proxy)) else: self.log.info('ProxyCheck: {} validation fail'.format(proxy)) # 验证失败,计数器减1 if counter and int(counter) <= FAIL_COUNT: self.log.info('ProxyCheck: {} fail too many, delete!'.format(proxy)) self.db.delete(proxy) else: self.db.put(proxy, num=int(counter) - 1) proxy_item = self.db.pop() sleep(60 * 5)
class ProxyValidSchedule(ProxyManager): def __init__(self): ProxyManager.__init__(self) self.log = LogHandler('valid_schedule') def __validProxy__(self): """ 验证代理 :return: """ while 1: self.db.changeTable(self.useful_proxy_queue) for each_proxy in self.db.getAll(): if validUsefulProxy(each_proxy): self.log.debug('proxy: {} validation pass'.format(each_proxy)) else: self.db.delete(each_proxy) self.log.info('proxy: {} validation fail'.format(each_proxy)) self.log.info(u'代理验证程序运行正常') def main(self): self.__validProxy__()
def testLogHandler(): log = LogHandler('test') log.info('this is a log from test') log.resetName(name='test1') log.info('this is a log from test1') log.resetName(name='test2') log.info('this is a log from test2')
class ProxyCheck(ProxyManager, Thread): def __init__(self): ProxyManager.__init__(self) Thread.__init__(self) self.log = LogHandler('proxy_check') def run(self): self.db.changeTable(self.useful_proxy_queue) while True: proxy_item = self.db.pop() while proxy_item: proxy = proxy_item.get('proxy') counter = proxy_item.get('value') if validUsefulProxy(proxy): self.log.info( 'ProxyCheck: {} validation pass'.format(proxy)) self.db.put(proxy) else: self.log.info( 'ProxyCheck: {} validation fail'.format(proxy)) self.db.delete(proxy) proxy_item = self.db.pop() sleep(30)
class ProxyManager(object): """ ProxyManager """ def __init__(self): self.db = DbClient() self.config = GetConfig() self.raw_proxy_queue = 'raw_proxy' self.log = LogHandler('proxy_manager') self.useful_proxy_queue = 'useful_proxy' def refresh(self): """ fetch proxy into Db by ProxyGetter :return: """ for proxyGetter in self.config.proxy_getter_functions: try: proxy_set = set() # fetch raw proxy for proxy in getattr(GetFreeProxy, proxyGetter.strip())(): if proxy: self.log.info('{func}: fetch proxy {proxy}'.format(func=proxyGetter, proxy=proxy)) proxy_set.add(proxy.strip()) # store raw proxy for proxy in proxy_set: self.db.changeTable(self.useful_proxy_queue) if self.db.exists(proxy): continue self.db.changeTable(self.raw_proxy_queue) self.db.put(proxy) except Exception, e: print e continue
class proxyRefreshSchedule(ProxyManage): ''' 定期刷新代理 ''' def __init__(self): ProxyManage.__init__(self) self.log = LogHandler("refresh_schedule") def validProxy(self, row_table, usefultable): ''' 验证row_proxy 中的代理 :return: ''' self.db.changeTable(row_table) raw_proxy_item = self.db.pop() self.log.info("ProxyRefreshSchedule:{} start validProxy".format( time.ctime())) remaining_proxies = self.getAll(self.useful_proxy_queue) while raw_proxy_item: try: raw_proxy = raw_proxy_item.get('proxy') except: raw_proxy = raw_proxy_item if isinstance(raw_proxy, bytes): raw_proxy.decode('utf-8') if 'https' in row_table: raw_proxy_dict = {'https': raw_proxy} else: raw_proxy_dict = {'http': raw_proxy} if (raw_proxy not in remaining_proxies and validUsefulProxy(raw_proxy_dict)): self.db.changeTable(usefultable) self.db.put(raw_proxy) self.log.info("ProxyRefreshSchedule:%s validation pass" % raw_proxy) else: self.log.info("ProxyRefreshSchedule: %s validation fail" % raw_proxy) self.db.changeTable(row_table) raw_proxy_item = self.db.pop() remaining_proxies = self.getAll(row_table) self.log.info("ProxyRefreshSchedule:%s validProxy complete" % time.ctime())
def testLogHandler(): """ test function LogHandler in Util/LogHandler :return: """ log = LogHandler('test') log.info('this is a log from test') log.resetName(name='test1') log.info('this is a log from test1') log.resetName(name='test2') log.info('this is a log from test2')
class ProxyRefreshSchedule(ProxyManager): """ 定时刷新raw中代理,将可用代理放入useful """ def __init__(self): ProxyManager.__init__(self) self.log = LogHandler('ProxyRefresh') def start(self): self.log.info('Proxy valid start') self.db_client.change_table(self.raw_proxy) proxy = self.db_client.pop() while proxy: if proxy_useful_valid(proxy): self.log.info('Proxy valid pass {}'.format(proxy)) self.db_client.change_table(self.useful_proxy) self.db_client.put(proxy) self.db_client.change_table(self.raw_proxy) else: self.log.info('Proxy valid failed {}'.format(proxy)) proxy = self.db_client.pop() self.log.info('Proxy valid end')
class ProxyCheck(ProxyManager, Thread): """ 检查useful中的代理,不可用的删除 """ def __init__(self): ProxyManager.__init__(self) Thread.__init__(self) self.log = LogHandler('ProxyCheck') def run(self): self.log.info('Proxy useful check start') while True: self.db_client.change_table(self.useful_proxy) proxy = self.db_client.pop() while proxy: if proxy_useful_valid(proxy): self.log.info('Proxy useful valid pass {}'.format(proxy)) self.db_client.put(proxy) else: self.log.info('Proxy useful valid failed {}'.format(proxy)) self.db_client.delete(proxy) proxy = self.db_client.pop() self.log.info('Proxy useful check pausing') sleep(5 * 60)
class ProxyValidSchedule(ProxyManager): def __init__(self): ProxyManager.__init__(self) self.log = LogHandler('valid_schedule') def __validProxy(self): """ 验证代理 :return: """ while True: self.db.changeTable(self.useful_proxy_queue) for each_proxy in self.db.getAll(): if isinstance(each_proxy, bytes): # 兼容PY3 each_proxy = each_proxy.decode('utf-8') value = self.db.get(each_proxy) if validUsefulProxy(each_proxy): # 成功计数器加1 if value and int(value) < 1: self.db.update(each_proxy, 1) self.log.info('ProxyValidSchedule: {} validation pass'.format(each_proxy)) else: # 失败计数器减一 if value and int(value) < -5: # 计数器小于-5删除该代理 self.db.delete(each_proxy) else: self.db.update(each_proxy, -1) self.log.info('ProxyValidSchedule: {} validation fail'.format(each_proxy)) self.log.info('ProxyValidSchedule running normal') sleep(60 * 1) def main(self): self.__validProxy()
class ProxyCheck(ProxyManager, Thread): def __init__(self): ProxyManager.__init__(self) Thread.__init__(self) self.log = LogHandler('proxy_check') def run(self): #todo 该方法重写threading里面的run方法,实例化该类,然后.start()就按照平时的进程执行 self.db.changeTable(self.useful_proxy_queue) while True: proxy_item = self.db.pop() while proxy_item: #todo 一直去循环判断数据库里面的useful_proxy_queue proxy = proxy_item.get('proxy') counter = proxy_item.get('value') if validUsefulProxy(proxy): # 验证通过计数器加1 if counter and int(counter) < 1: self.db.put(proxy, num=int(counter) + 1) else: self.db.put(proxy) self.log.info( 'ProxyCheck: {} validation pass'.format(proxy)) else: self.log.info( 'ProxyCheck: {} validation fail'.format(proxy)) # 验证失败,计数器减1 if counter and int(counter) <= -FAIL_COUNT: self.log.info( 'ProxyCheck: {} fail too many, delete!'.format( proxy)) self.db.delete(proxy) else: self.db.put(proxy, num=int(counter) - 1) proxy_item = self.db.pop() sleep(60 * 5)
class ProxyManager(object): """ ProxyManager """ def __init__(self): self.db = DbClient() self.config = GetConfig() self.raw_proxy_queue = 'raw_proxy' self.log = LogHandler('proxy_manager') self.useful_proxy_queue = 'useful_proxy' def refresh(self): """ fetch proxy into Db by ProxyGetter :return: """ self.db.changeTable(self.raw_proxy_queue) for proxyGetter in self.config.proxy_getter_functions: # fetch # proxy_set = set() try: self.log.info( "{func}: fetch proxy start".format(func=proxyGetter)) # proxy_iter = [_ for _ in getattr(GetFreeProxy, proxyGetter.strip())()] for proxy in getattr(GetFreeProxy, proxyGetter.strip())(): proxy = proxy.strip() if proxy and verifyProxyFormat(proxy): self.log.info('{func}: fetch proxy {proxy}'.format( func=proxyGetter, proxy=proxy)) self.db.put(proxy) else: self.log.error( '{func}: fetch proxy {proxy} error'.format( func=proxyGetter, proxy=proxy)) except Exception as e: self.log.error( "{func}: fetch proxy fail".format(func=proxyGetter)) continue def get(self): """ return a useful proxy :return: """ self.db.changeTable(self.useful_proxy_queue) item_dict = self.db.getAll() if item_dict: if EnvUtil.PY3: return random.choice(list(item_dict.keys())) else: return random.choice(item_dict.keys()) return None # return self.db.pop() def delete(self, proxy): """ delete proxy from pool :param proxy: :return: """ self.db.changeTable(self.useful_proxy_queue) self.db.delete(proxy) def getAll(self): """ get all proxy from pool as list :return: """ self.db.changeTable(self.useful_proxy_queue) item_dict = self.db.getAll() if EnvUtil.PY3: return list(item_dict.keys()) if item_dict else list() return item_dict.keys() if item_dict else list() def getNumber(self): self.db.changeTable(self.raw_proxy_queue) total_raw_proxy = self.db.getNumber() self.db.changeTable(self.useful_proxy_queue) total_useful_queue = self.db.getNumber() return { 'raw_proxy': total_raw_proxy, 'useful_proxy': total_useful_queue }
class ProxyManager(object): """ ProxyManager """ def __init__(self): self.db = DbClient() self.raw_proxy_queue = 'raw_proxy' self.log = LogHandler('proxy_manager') self.useful_proxy_queue = 'useful_proxy' def fetch(self): """ fetch proxy into db by ProxyGetter :return: """ self.db.changeTable(self.raw_proxy_queue) proxy_set = set() self.log.info("ProxyFetch : start") for proxyGetter in config.proxy_getter_functions: self.log.info( "ProxyFetch - {func}: start".format(func=proxyGetter)) try: for proxy in getattr(GetFreeProxy, proxyGetter.strip())(): proxy = proxy.strip() if not proxy or not verifyProxyFormat(proxy): self.log.error('ProxyFetch - {func}: ' '{proxy} illegal'.format( func=proxyGetter, proxy=proxy.ljust(20))) continue elif proxy in proxy_set: self.log.info('ProxyFetch - {func}: ' '{proxy} exist'.format( func=proxyGetter, proxy=proxy.ljust(20))) continue else: self.log.info('ProxyFetch - {func}: ' '{proxy} success'.format( func=proxyGetter, proxy=proxy.ljust(20))) self.db.put(Proxy(proxy, source=proxyGetter)) proxy_set.add(proxy) except Exception as e: self.log.error( "ProxyFetch - {func}: error".format(func=proxyGetter)) self.log.error(str(e)) def get(self): """ return a useful proxy :return: """ self.db.changeTable(self.useful_proxy_queue) item_list = self.db.getAll() if item_list: random_choice = random.choice(item_list) return Proxy.newProxyFromJson(random_choice) return None def get_http(self): """ return a http proxy :return: """ self.db.changeTable(self.useful_proxy_queue) item_list = self.db.getAll() if item_list: for _ in item_list: random_choice = random.choice(item_list) proxy_type = json.loads(random_choice)['proxy'].split("://")[0] if proxy_type == 'http': return Proxy.newProxyFromJson(random_choice) return None def get_socks(self): """ return a useful socks proxy :return: """ self.db.changeTable(self.useful_proxy_queue) item_list = self.db.getAll() if item_list: for _ in item_list: random_choice = random.choice(item_list) proxy_type = json.loads(random_choice)['proxy'].split("://")[0] if proxy_type == 'socks4': return Proxy.newProxyFromJson(random_choice) return None def delete(self, proxy_str): """ delete proxy from pool :param proxy_str: :return: """ self.db.changeTable(self.useful_proxy_queue) self.db.delete(proxy_str) def getAll(self): """ get all proxy from pool as list :return: """ self.db.changeTable(self.useful_proxy_queue) item_list = self.db.getAll() return [Proxy.newProxyFromJson(_) for _ in item_list] def getNumber(self): self.db.changeTable(self.raw_proxy_queue) total_raw_proxy = self.db.getNumber() self.db.changeTable(self.useful_proxy_queue) total_useful_queue = self.db.getNumber() return { 'raw_proxy': total_raw_proxy, 'useful_proxy': total_useful_queue }
class ProxyManager(object): def __init__(self): self.db = DbClient() self.config = GetConfig() self.raw_proxy_queue = 'raw_proxy' self.log = LogHandler('proxy_manager') self.useful_proxy_queue = 'useful_proxy' def refresh(self): """ 抓取代理地址存入DB中 :return: """ for proxyGetter in self.config.proxy_getter_functions: proxy_set = set() try: self.log.info("{func}:fetch proxy start".format(func=proxyGetter)) proxy_iter = [_ for _ in getattr(GetFreeProxy, proxyGetter.strip())()] except Exception as e: self.log.error("{func}:fetch proxy fail".format(func=proxyGetter)) continue for proxy in proxy_iter: proxy = proxy.strip() if proxy and verifyProxyFormat(proxy): self.log.info("{func}:fetch proxy {proxy}".format(func=proxyGetter, proxy=proxy)) proxy_set.add(proxy) else: self.log.info("{func}:fetch proxy {proxy} error".format(func=proxyGetter, proxy=proxy)) # 存储到DB for proxy in proxy_set: self.db.changeTable(self.useful_proxy_queue) if self.db.exists(proxy): continue self.db.changeTable(self.raw_proxy_queue) self.db.put(proxy) def get(self): """ 返回一个有用的代理 :return: """ self.db.changeTable(self.useful_proxy_queue) item_dict = self.db.getAll() if item_dict: if EnvUtil.PY3: return random.choice(list(item_dict.keys())) else: return random.choice(item_dict.keys()) return None def delete(self, proxy): self.db.changeTable(self.useful_proxy_queue) self.db.delete(proxy) def getAll(self): self.db.changeTable(self.useful_proxy_queue) items = self.db.getAll() if EnvUtil.PY3: return list(items.keys()) if items else list() return items.key() if items else list() def getNumber(self): self.db.changeTable(self.raw_proxy_queue) total_raw_proxy = self.db.getNumber() self.db.changeTable(self.useful_proxy_queue) total_useful_proxy = self.db.getNumber() return { 'raw_proxy': total_raw_proxy, 'useful_proxy': total_useful_proxy }
class ProxyManager(object): """ ProxyManager """ def __init__(self): self.db = DbClient() self.raw_proxy_queue = 'raw_proxy' self.log = LogHandler('proxy_manager') self.useful_proxy_queue = 'useful_proxy' self.adsl_queue = 'adsl' def refresh(self): """ fetch proxy into Db by ProxyGetter/getFreeProxy.py :return: """ self.db.changeTable(self.raw_proxy_queue) for proxyGetter in config.proxy_getter_functions: # fetch try: self.log.info( "{func}: fetch proxy start".format(func=proxyGetter)) for proxy in getattr(GetFreeProxy, proxyGetter.strip())(): # 直接存储代理, 不用在代码中排重, hash 结构本身具有排重功能 proxy = proxy.strip() if proxy and verifyProxyFormat(proxy): self.log.info('{func}: fetch proxy {proxy}'.format( func=proxyGetter, proxy=proxy)) self.db.put(proxy) else: self.log.error( '{func}: fetch proxy {proxy} error'.format( func=proxyGetter, proxy=proxy)) except Exception as e: self.log.error( "{func}: fetch proxy fail".format(func=proxyGetter)) continue def get(self): """ return a useful proxy :return: """ self.db.changeTable(self.useful_proxy_queue) item_dict = self.db.getAll() if item_dict: if EnvUtil.PY3: return random.choice(list(item_dict.keys())) else: return random.choice(item_dict.keys()) return None # return self.db.pop() def delete(self, proxy): """ delete proxy from pool :param proxy: :return: """ self.db.changeTable(self.useful_proxy_queue) self.db.delete(proxy) def getAll(self): """ get all proxy from pool as list :return: """ self.db.changeTable(self.useful_proxy_queue) item_dict = self.db.getAll() if EnvUtil.PY3: return list(item_dict.keys()) if item_dict else list() return item_dict.keys() if item_dict else list() def getNumber(self): self.db.changeTable(self.raw_proxy_queue) total_raw_proxy = self.db.getNumber() self.db.changeTable(self.useful_proxy_queue) total_useful_queue = self.db.getNumber() return { 'raw_proxy': total_raw_proxy, 'useful_proxy': total_useful_queue } def initProxyPool(self): """ 第一次启动时调用这个方法 :return: """ self.deleteAll() self.db.changeTable(self.adsl_queue) item_dict = self.db.getAll() if EnvUtil.PY3: return list(item_dict.values()) if item_dict else list() return item_dict.values() if item_dict else list() def deleteAll(self): """ 清空代理池 :param proxy: :return: """ # 删除所有 proxies = self.getAll() for proxy in proxies: self.delete(proxy) def refreshADSL(self, proxy): """ 重新拨号 :param proxy: :return: """ if isinstance(proxy, bytes): proxy = proxy.decode('utf8') ip = proxy.split(':')[0] try: # 调用接口重新拨号 refreshApi = "http://{ip}:8000/refresh".format(ip=ip) r = requests.get(refreshApi, timeout=5, verify=False) if r.status_code == 200: print('{proxy} refres done') except Exception as e: print(str(e))
class ProxyManager(object): """ ProxyManager """ def __init__(self): self.db = DbClient() self.config = GetConfig() self.raw_proxy_queue = 'raw_proxy' self.log = LogHandler('proxy_manager') self.useful_proxy_queue = 'useful_proxy' def refresh(self): """ fetch proxy into Db by ProxyGetter :return: """ for proxyGetter in self.config.proxy_getter_functions: proxy_set = set() # fetch raw proxy for proxy in getattr(GetFreeProxy, proxyGetter.strip())(): if proxy: self.log.info('{func}: fetch proxy {proxy}'.format(func=proxyGetter, proxy=proxy)) proxy_set.add(proxy.strip()) # store raw proxy for proxy in proxy_set: self.db.changeTable(self.useful_proxy_queue) if self.db.exists(proxy): continue self.db.changeTable(self.raw_proxy_queue) self.db.put(proxy) def get(self): """ return a useful proxy :return: """ self.db.changeTable(self.useful_proxy_queue) item_dict = self.db.getAll() if item_dict: if EnvUtil.PY3: return random.choice(list(item_dict.keys())) else: return random.choice(item_dict.keys()) return None # return self.db.pop() def delete(self, proxy): """ delete proxy from pool :param proxy: :return: """ self.db.changeTable(self.useful_proxy_queue) self.db.delete(proxy) def getAll(self): """ get all proxy from pool as list :return: """ self.db.changeTable(self.useful_proxy_queue) item_dict = self.db.getAll() if EnvUtil.PY3: return list(item_dict.keys()) if item_dict else list() return item_dict.keys() if item_dict else list() def getNumber(self): self.db.changeTable(self.raw_proxy_queue) total_raw_proxy = self.db.getNumber() self.db.changeTable(self.useful_proxy_queue) total_useful_queue = self.db.getNumber() return {'raw_proxy': total_raw_proxy, 'useful_proxy': total_useful_queue}
class ProxyManager(object): """ ProxyManager """ def __init__(self): self.db = DbClient() self.config = GetConfig() self.raw_proxy_queue = 'raw_proxy' self.log = LogHandler('proxy_manager') self.useful_proxy_queue = 'useful_proxy' def refresh(self): """ fetch proxy into Db by ProxyGetter :return: """ for proxyGetter in self.config.proxy_getter_functions: proxy_set = set() # fetch raw proxy for proxy in getattr(GetFreeProxy, proxyGetter.strip())(): if proxy: self.log.info('{func}: fetch proxy {proxy}'.format( func=proxyGetter, proxy=proxy)) proxy_set.add(proxy.strip()) # store raw proxy for proxy in proxy_set: self.db.changeTable(self.useful_proxy_queue) if self.db.exists(proxy): continue self.db.changeTable(self.raw_proxy_queue) self.db.put(proxy) def get(self): """ return a useful proxy :return: """ self.db.changeTable(self.useful_proxy_queue) item_dict = self.db.getAll() if item_dict: if EnvUtil.PY3: return random.choice(list(item_dict.keys())) else: return random.choice(item_dict.keys()) return None # return self.db.pop() def delete(self, proxy): """ delete proxy from pool :param proxy: :return: """ self.db.changeTable(self.useful_proxy_queue) self.db.delete(proxy) def getAll(self): """ get all proxy from pool as list :return: """ self.db.changeTable(self.useful_proxy_queue) item_dict = self.db.getAll() if EnvUtil.PY3: return list(item_dict.keys()) if item_dict else list() return item_dict.keys() if item_dict else list() def getNumber(self): self.db.changeTable(self.raw_proxy_queue) total_raw_proxy = self.db.getNumber() self.db.changeTable(self.useful_proxy_queue) total_useful_queue = self.db.getNumber() return { 'raw_proxy': total_raw_proxy, 'useful_proxy': total_useful_queue }