def quit(self): try: time.sleep(3) self.common_page.click_username() time.sleep(3) self.common_page.click_loginout_button() time.sleep(3) logger.info('账号退出登录成功') except Exception as e: logger.debug(e) return LoginPage(self.common_page.driver)
def test_useful_fanqiang(self, ip_dict): ''' 测试该代理能否翻墙(socks5代理) :return: ''' try: proxy_type = ip_dict['proxy_type'] ip_with_port = ip_dict['ip_with_port'] logger.debug("开始测试" + ip_with_port) resp = requests.get( 'https://www.youtube.com/', headers=scribe_utils.headers, proxies={ 'http': proxy_type + ('h' if proxy_type == 'socks5' else '') + '://' + ip_with_port, 'https': proxy_type + ('h' if proxy_type == 'socks5' else '') + '://' + ip_with_port }, timeout=2) logger.debug(ip_with_port + "------------------可用") use_time = resp.elapsed.microseconds / math.pow(10, 6) ip_dict['time'] = use_time db_client.run(Transformer().Fanqiang().update( ip_dict, { '_id': ip_dict['_id'] }).done()) self.__disable_minus_1(ip_dict) return True except (scribe_utils.RobotException, \ requests.exceptions.ConnectionError, requests.ReadTimeout, requests.exceptions.SSLError) as e: try: # if ip_dict['disable_times']>10: # db_client.run(Transformer().Fanqiang().delete({'_id': ip_dict['_id']}).done()) # else: self.__disable_plus_1(ip_dict) except Exception as e: logger.info(e) finally: return False except Exception as e: try: # if ip_dict['disable_times']>10: # db_client.run(Transformer().Fanqiang().delete({'_id': ip_dict['_id']}).done()) # else: self.__disable_plus_1(ip_dict) except Exception as e: logger.info(e) finally: return False
def add_to_useful(ip_port, useful_list): try: logger.debug('测试ip_port:{0}'.format(ip_port['ip_with_port'])) is_useful = pickle.loads( fanqiang_service_client.run( Transformer().FanqiangService().test_useful_fanqiang( ip_port).done()).data) if is_useful: try: lock.acquire() logger.debug('{0}能用'.format(ip_port['ip_with_port'])) useful_list.append(ip_port) finally: lock.release() except Exception: logger.error(traceback.format_exc())
def merge_proxy(): ''' 合并proxy文件 :return: ''' for root, dirs, files in os.walk("file/proxy_file"): logger.debug(root) # 当前目录路径 logger.debug(dirs) # 当前路径下所有子目录 logger.debug(files) # 当前路径下所有非目录子文件 with open(root + '/proxies.txt', 'a+', encoding='utf-8') as fw: try: all_ip_port_list = [] for file_name in files: if file_name == 'proxies.txt': continue with open(root + "/" + file_name, 'r', encoding='utf-8') as fr: try: all_ip_port_list.extend(fr.readlines()) finally: fr.close() os.remove(root + "/" + file_name) all_ip_port_list = list(set(all_ip_port_list)) # 去重 fw.writelines(all_ip_port_list) finally: fw.close()
def modify_chrome_file(file_path, ip_with_port): # 替换ip和port new_text = '' with open(file_path, 'r', encoding='utf-8') as fr: try: old_text = fr.read() new_text = old_text.replace( re.findall( r'(?:SOCKS |SOCKS5 )(\d+\.\d+\.\d+\.\d+:\d+)', old_text)[0], ip_with_port) new_text = new_text.replace( re.findall( r'(?:SOCKS |SOCKS5 )(\d+\.\d+\.\d+\.\d+:\d+)', old_text)[1], ip_with_port) finally: fr.close() with open(file_path, 'w', encoding='utf-8') as fw: try: fw.write(new_text) finally: fw.close() logger.debug("已更新文件 %s,ip_port为:%s" % (file_path, ip_with_port))
def __get_useful_fanqiang_ip_gatherproxy_worker(self, q): while not q.empty(): driver = None try: ip_dict = q.get() proxy_type = ip_dict['proxy_type'] ip_with_port = ip_dict['ip_with_port'] logger.debug("开始测试" + ip_with_port) resp = requests.get( 'https://www.google.com/', headers=scribe_utils.headers, proxies={ 'http': proxy_type + ('h' if proxy_type == 'socks5' else '') + '://' + ip_with_port, 'https': proxy_type + ('h' if proxy_type == 'socks5' else '') + '://' + ip_with_port }, timeout=10) # if not re.findall(r'input value=\"Google',resp.text): # raise scribe_utils.RobotException() use_time = resp.elapsed.microseconds / math.pow(10, 6) logger.debug(ip_with_port + "可用") elite = FanqiangService.test_elite(ip_dict['ip_with_port'], ip_dict['proxy_type']) try: lock.acquire() if elite: db_client.run(Transformer().Fanqiang().save({ 'proxy_type': proxy_type, 'ip_with_port': ip_with_port, 'time': use_time, 'location': FanqiangService.get_location( ip_with_port.split(':')[0]), 'Elite': elite }).done()) else: db_client.run(Transformer().Fanqiang().save({ 'proxy_type': proxy_type, 'ip_with_port': ip_with_port, 'time': use_time, 'location': FanqiangService.get_location( ip_with_port.split(':')[0]) }).done()) except Exception as e: logger.info(e) finally: lock.release() # 更新pac # self.modify_chrome_pac_file_and_push(ip_with_port) except (requests.exceptions.ConnectionError, requests.ReadTimeout \ , requests.exceptions.SSLError, scribe_utils.RobotException) as e: continue # except exceptions.TimeoutException as e: # 浏览器访问超时 # driver.quit() # continue except Exception as e: if driver: driver.quit() if re.findall(r'NoneType', str(e)): continue if not isinstance(e, ValueError): logger.warning(traceback.format_exc()) continue finally: q.task_done()
def __get_useful_fanqiang_ip_mongo_worker(self, q): while not q.empty(): driver = None try: ip_dict = q.get() proxy_type = ip_dict['proxy_type'] ip_with_port = ip_dict['ip_with_port'] logger.debug("开始测试" + ip_with_port) resp = requests.get( 'https://www.google.com/', headers=scribe_utils.headers, proxies={ 'http': proxy_type + ('h' if proxy_type == 'socks5' else '') + '://' + ip_with_port, 'https': proxy_type + ('h' if proxy_type == 'socks5' else '') + '://' + ip_with_port }, timeout=10) try: lock.acquire() useful_proxy_in_mongo.append(ip_dict) finally: lock.release() # if not re.findall(r'input value=\"Google',resp.text): # raise scribe_utils.RobotException() # try: # elite = FanqiangService.test_elite(ip_dict['ip_with_port'], ip_dict['proxy_type']) # if elite: # Fanqiang.update({'Elite': elite}, {'ip_with_port': ip_dict['ip_with_port']}) # except Exception as e: # logger.warning(traceback.format_exc()) logger.debug(ip_with_port + "可用") self.modify_chrome_pac_file_and_push(ip_with_port) except (scribe_utils.RobotException, \ requests.exceptions.ConnectionError, requests.ReadTimeout, requests.exceptions.SSLError) as e: try: lock.acquire() new_disable_times = ip_dict['disable_times'] + 1 db_client.run(Transformer().Fanqiang().update( { 'disable_times': new_disable_times }, { '_id': ip_dict['_id'] }).done()) except Exception as e: logger.info(e) finally: lock.release() continue except Exception as e: try: lock.acquire() new_disable_times = ip_dict['disable_times'] + 1 db_client.run(Transformer().Fanqiang().update( { 'disable_times': new_disable_times }, { '_id': ip_dict['_id'] }).done()) except Exception as e: logger.info(e) finally: lock.release() if driver: driver.quit() if re.findall(r'NoneType', str(e)): continue if not isinstance(e, ValueError): logger.warning(traceback.format_exc()) continue finally: q.task_done()
def get_ip_port_to_pool(): ''' 定时获取ip_port到备用池 :return: ''' try: lock = threading.Lock() def add_to_useful(ip_port, useful_list): try: logger.debug('测试ip_port:{0}'.format(ip_port['ip_with_port'])) is_useful = pickle.loads( fanqiang_service_client.run( Transformer().FanqiangService().test_useful_fanqiang( ip_port).done()).data) if is_useful: try: lock.acquire() logger.debug('{0}能用'.format(ip_port['ip_with_port'])) useful_list.append(ip_port) finally: lock.release() except Exception: logger.error(traceback.format_exc()) # 先读取存下来的pool with open('file/bak_pool', 'rb') as fr: try: cache.ip_port_pool = pickle.load(fr) logger.debug('从文件读到的数量为:{0}'.format(len(cache.ip_port_pool))) except EOFError: pass except Exception: logger.error(traceback.format_exc()) new_num = int(Conf.get('IP_PORT_POOL', 'new_num')) while True: useful_list = [] ## 开20个线程筛掉之前没用的ip_port(由于xmlrpc不支持并发,所以有问题) # q = queue.Queue() # tf = thread_utils.ThreadFactory() # for i in range(20): # t = threading.Thread(target=tf.queue_threads_worker, args=(q, add_to_useful)) # t.start() # tf.all_task_done = False # for ip_port in cache.ip_port_pool: # q.put({'ip_port': ip_port, 'useful_list': useful_list}) # q.join() # tf.all_task_done = True for ip_port in cache.ip_port_pool: logger.debug('测试ip_port:{0}'.format(ip_port['ip_with_port'])) is_useful = pickle.loads( fanqiang_service_client.run( Transformer().FanqiangService().test_useful_fanqiang( ip_port).done()).data) if is_useful: logger.debug('{0}能用'.format(ip_port['ip_with_port'])) useful_list.append(ip_port) cache.ip_port_pool = useful_list # 如果cache.ip_port_pool不及预期,获取能翻墙的ip_port if len(cache.ip_port_pool) < int( Conf.get('IP_PORT_POOL', 'bak_num')): new_num += 1 logger.debug('pool数量{0}达不到要求的{1}'.format( len(cache.ip_port_pool), Conf.get('IP_PORT_POOL', 'bak_num'))) ip_port_list = pickle.loads( fanqiang_service_client.run(Transformer().FanqiangService( ).get_useful_fanqiang_ip_port_from_mongo( new_num).done()).data) cache.ip_port_pool.extend(ip_port_list) # ------------去重------------ ip_port_map = {} for ip_port_dict in cache.ip_port_pool: ip_port_map[ip_port_dict['ip_with_port']] = ip_port_dict new_ip_list = [] for ip_port_dict in ip_port_map.values(): new_ip_list.append(ip_port_dict) cache.ip_port_pool = new_ip_list # ------------去重------------ else: new_num -= 1 # 把能用的写到文件里面 with open('file/bak_pool', 'wb') as fw: try: logger.debug('写到的数量为:{0}'.format(len(cache.ip_port_pool))) pickle.dump(cache.ip_port_pool, fw) except EOFError: pass except Exception: logger.error(traceback.format_exc()) # 每分钟检查一次 time.sleep(60) except Exception: logger.error(traceback.format_exc()) logger.error('get_ip_port_to_pool 线程错误关闭') get_ip_port_to_pool()