def freeProxy1(page_num=20, work_proxy=None) -> List[str]: """ https://www.7yip.cn/free/?action=china&page={page} 齐云代理 :param max_page: :return: """ src_name = '齐云代理' print("使用来源1: ", src_name) proxies = [] url_format = 'https://www.7yip.cn/free/?action=china&page={page}' for i in random.sample(list(range(page_num)), k=2): url = url_format.format(page=i + 1) good, content_type, res = make_req(url, proxies=work_proxy) if good and content_type == 'text/html;charset=utf-8': items = re.findall( r'<td.*?>(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})</td>[\s\S]*?<td.*?>(\d+)</td>', res.text) cnt = 0 for item in items: raw_ip = item[0] + ":" + item[1] if ProxyFactory.proxy_str_check(raw_ip): proxies.append('http://' + raw_ip) cnt += 1 else: pass print("---->通过来源:{0} {1} 获得{2}个IP".format(src_name, url, cnt)) else: why = content_type print("---->通过来源:{0} {1} 获得IP失败 原因为{2}".format( src_name, url, why)) return proxies
def freeProxy0(work_proxy=None) -> List[str]: """ http://118.24.52.95/get_all/ https://github.com/jhao104/proxy_pool/tree/1a3666283806a22ef287fba1a8efab7b94e94bac 每日500次限额 超出限额后使用代理 :param max_page: :return: """ src_name = 'IP代理接口jhao104' print("使用来源0: ", src_name) proxies = [] url_format = 'http://118.24.52.95/get_all/' url = url_format good, content_type, res = make_req(url, proxies=work_proxy) if good and content_type == 'application/json': items = res.json(encoding='utf-8') cnt = 0 for item in items: raw_ip = item['proxy'] if ProxyFactory.proxy_str_check(raw_ip): proxies.append('http://' + raw_ip) cnt += 1 else: pass print("---->通过来源:{0} {1} 获得{2}个IP".format(src_name, url, cnt)) else: why = content_type print("---->通过来源:{0} {1} 获得IP失败 原因为{2}".format(src_name, url, why)) return proxies
def freeProxy7(page_num=100, work_proxy=None) -> List[str]: """ http://www.xiladaili.com/gaoni/{page}/ 西拉代理 :param max_page: :return: """ src_name = '西拉代理' print("使用来源7: ", src_name) proxies = [] url_format = 'http://www.xiladaili.com/gaoni/{page}/' for i in random.sample(list(range(page_num)), k=2): url = url_format.format(page=i + 1) good, content_type, res = make_req(url, proxies=work_proxy, timeout=15) if good and 'text/html' in content_type: # \s* 0或多个空白字符 items = re.findall( r'<td>(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}):(\d+)</td>', res.text) cnt = 0 for item in items: raw_ip = item[0] + ":" + item[1] if ProxyFactory.proxy_str_check(raw_ip): proxies.append('http://' + raw_ip) cnt += 1 else: pass print("---->通过来源:{0} {1} 获得{2}个IP".format(src_name, url, cnt)) else: why = content_type print("---->通过来源:{0} {1} 获得IP失败 原因为{2}".format( src_name, url, why)) return proxies
def test_IP_by_request(proxy: str, url: str, timeout: int) -> tuple: """ :param proxy: :param url: :param timeout: :return: (isGood,content_type,result:Response) """ return make_req(url, proxy, timeout)
def freeProxy3(page_num=20, work_proxy=None) -> List[str]: """ http://ip.jiangxianli.com/api/proxy_ips?page={page} 免费代理库jiangxianli :return: """ src_name = '免费代理库jiangxianli' print("使用来源3: ", src_name) proxies = [] url_format = "https://ip.jiangxianli.com/api/proxy_ips?page={page}" for i in random.sample(list(range(page_num)), k=2): url = url_format.format(page=i + 1) good, content_type, res = make_req(url, proxies=work_proxy) print(url, good, content_type) if good and content_type == 'application/json': result_data = res.json(encoding='utf-8') items = result_data.get('data', dict(data=[])).get('data', []) if items: cnt = 0 for item in items: raw_ip = item['ip'] + ':' + item['port'] if ProxyFactory.proxy_str_check(raw_ip): proxies.append('http://' + raw_ip) cnt += 1 else: pass print("---->通过来源:{0} {1} 获得{2}个IP".format( src_name, url, cnt)) else: print("---->通过来源:{0} {1} 获得0个IP".format(src_name, url)) else: why = content_type print("---->通过来源:{0} {1} 获得IP失败 原因为{2}".format( src_name, url, why)) return proxies
def freeProxy2(page_num=20, work_proxy=None) -> List[str]: """ 云代理 http://www.ip3366.net/free/ 注意频率 :return: """ src_name = '云代理' print("使用来源2: ", src_name) proxies = [] url_formats = [ 'http://www.ip3366.net/free/?stype=1&page={page}', "http://www.ip3366.net/free/?stype=2&page={page}" ] for url_format in url_formats: for i in random.sample(list(range(page_num)), k=2): url = url_format.format(page=i + 1) good, content_type, res = make_req(url, proxies=work_proxy) if good and content_type == 'text/html': items = re.findall( r'<td>(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})</td>[\s\S]*?<td>(\d+)</td>', res.text) cnt = 0 for item in items: raw_ip = item[0] + ":" + item[1] if ProxyFactory.proxy_str_check(raw_ip): proxies.append('http://' + raw_ip) cnt += 1 else: pass print("---->通过来源:{0} {1} 获得{2}个IP".format( src_name, url, cnt)) else: why = content_type print("---->通过来源:{0} {1} 获得IP失败 原因为{2}".format( src_name, url, why)) return proxies