async def kuaidaili(): res = list() sess = AsyncHTMLSession() resp = await sess.get(f'https://www.kuaidaili.com/free/inha/') for ip_row in resp.html.find('#list table tr'): ip = ip_row.find('td[data-title="IP"]', first=True) port = ip_row.find('td[data-title="PORT"]', first=True) if ip and port: res.append(Proxy( ip_port=f"{ip.text}:{port.text}", scheme=SCHEME_HTTP, status=STATUS_NEW, )) await asyncio.sleep(5) resp = await sess.get(f'https://www.kuaidaili.com/free/intr/') for ip_row in resp.html.find('#list table tr'): ip = ip_row.find('td[data-title="IP"]', first=True) port = ip_row.find('td[data-title="PORT"]', first=True) if ip and port: res.append(Proxy( ip_port=f"{ip.text}:{port.text}", scheme=SCHEME_HTTP, status=STATUS_NEW, )) await sess.close() return res
async def proxynova(): res = list() sess = AsyncHTMLSession() resp = await sess.get('https://www.proxynova.com/proxy-server-list/') for tr in resp.html.find('#tbl_proxy_list > tbody:nth-child(2) > tr'): if 'data-proxy-id' not in tr.attrs: continue script_element = tr.find('td:nth-child(1) > abbr > script', first=True) port_element = tr.find('td:nth-child(2)', first=True) if not script_element or not port_element: continue groups = re.findall( r"document\.write\('(.*?)'\);", script_element.text) if not groups or len(groups) != 1: continue ip = groups[0] port = port_element.text res.append(Proxy( ip_port=f"{ip}:{port}", scheme=SCHEME_HTTP, status=STATUS_NEW, )) await sess.close() return res
async def http_proxy(): res = list() sess = AsyncHTMLSession() for u in [ 'https://proxyhttp.net/free-list/proxy-anonymous-hide-ip-address/', 'https://proxyhttp.net/', 'https://proxyhttp.net/free-list/anonymous-server-hide-ip-address/2#proxylist', ]: resp = await sess.get(u) await resp.html.arender(wait=1.5, timeout=10.0) for ip_row in resp.html.find('table.proxytbl tr'): ip = ip_row.find('td:nth-child(1)', first=True) port = ip_row.find('td:nth-child(2)', first=True) try: if ip and port: port_str = re.search(r'//]]> (\d+)', port.text).group(1) res.append(Proxy( ip_port=f"{ip.text}:{port_str}", scheme=SCHEME_HTTP, status=STATUS_NEW, )) except AttributeError: pass await sess.close() return res
def addlist(): proxyliststr = request.form["proxys"] proxys = proxyliststr.split(",") for proxy in proxys: proxy = proxy.split(":") if len(proxy) == 2: proxyapi.add(Proxy(ip = proxy[0], port = proxy[1])) return ""
def all(self): """ 返回所有代理 """ result = self.__db.zrangebyscore(self.__REDIS_STORAGE_KEY, self.__MIN_SCORE, self.__MAX_SCORE) proxies = [] for i in result: proxies.append(Proxy().re_serialize(i)) return proxies
async def thespeedx_proxy_list(): async with aiohttp.request( "GET", 'https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/http.txt', proxy=proxies ) as resp: res = await resp.text() res = map(lambda x: Proxy( ip_port=f"{x}", scheme=SCHEME_HTTP, status=STATUS_NEW, ), res.split("\n")) return list(res)
async def proxy_scraper(): async with aiohttp.request( "GET", 'https://sunny9577.github.io/proxy-scraper/proxies.json', proxy=proxies ) as resp: res = await resp.json() res = map(lambda x: Proxy( ip_port=f"{x['ip']}:{x['port']}", scheme=SCHEME_HTTP, status=STATUS_NEW, ), res["proxynova"]) return list(res)
async def cool_proxy(): async with aiohttp.request( "GET", 'https://cool-proxy.net/proxies.json', proxy=proxies ) as resp: res = await resp.json() res = map(lambda x: Proxy( ip_port=f"{x['ip']}:{x['port']}", scheme=SCHEME_HTTP, status=STATUS_NEW, ), res) return list(res)
async def pubproxy(): async with aiohttp.request( "GET", 'http://pubproxy.com/api/proxy?limit=5&format=json&type=http&level=anonymous&last_check=60', proxy=proxies ) as resp: res = await resp.json() res = map(lambda x: Proxy( ip_port=f"{x['ipPort']}", scheme=SCHEME_HTTP, status=STATUS_NEW, ), res["data"]) return list(res)
def show(self): """ 展示所有代理及分数 """ print('+{}+{}+'.format('-' * 21, '-' * 8)) result = self.__db.zrevrangebyscore(self.__REDIS_STORAGE_KEY, self.__MAX_SCORE, self.__MIN_SCORE) for i in result: proxy = Proxy().re_serialize(i) score = self.__db.zscore(self.__REDIS_STORAGE_KEY, i) size = 20 - len(proxy.ip + proxy.port) print('|{}:{}{}| {} |'.format(proxy.ip, proxy.port, ' ' * size, score)) print('+{}+{}+'.format('-' * 21, '-' * 8))
async def ipaddress(): res = list() sess = AsyncHTMLSession() resp = await sess.get('https://www.ipaddress.com/proxy-list/') for ip_row in resp.html.find('.proxylist tbody tr'): ip_port = ip_row.find('td:nth-child(1)', first=True).text p = Proxy( ip_port=ip_port, scheme=SCHEME_HTTP, status=STATUS_NEW, ) res.append(p) await sess.close() return res
async def free_proxy_list(): res = list() sess = AsyncHTMLSession() resp = await sess.get('https://free-proxy-list.net/') for ip_row in resp.html.find('#proxylisttable tbody tr'): ip = ip_row.find('td:nth-child(1)', first=True) port = ip_row.find('td:nth-child(2)', first=True) if ip and port: res.append(Proxy( ip_port=f"{ip.text}:{port.text}", scheme=SCHEME_HTTP, status=STATUS_NEW, )) await sess.close() return res
def get_proxy(self): """ 获取num个较优的proxy。 :param self: :param num: default 1, 返回个数 2019年02月28日 11:50:55 将筛选范围改为100-initial,不允许检测过无效的ip进入可选范围 """ result = self.__db.zrevrangebyscore(self.__REDIS_STORAGE_KEY, self.__MAX_SCORE, self.__INITIAL_SCORE) if len(result): proxies = [] for i in range(0, self.__num): proxies.append(Proxy().re_serialize(choice(result))) return proxies else: raise PoolEmptyError
def search_proxy(): url = "http://cn-proxy.com/" data = "" # while not data or data == "": # try: # data = requests.get(url).text # except Exception: # sleep(2) # continue with open('/home/chenxiao/document/data', 'rt') as f: data = f.read() soup = BeautifulSoup(data, 'html.parser') tbody = soup.findAll('tbody')[1] tr_list = tbody.findAll('tr') for tr in tr_list: td_list = tr.findAll('td') proxy = Proxy() speed = get_speed(td_list[3]) if speed < 70: continue proxy.speed = speed proxy.url = td_list[0].text + ":" + td_list[1].text proxy.position = td_list[2].text time_string = td_list[4].text time = datetime.datetime.strptime(time_string, '%Y-%m-%d %H:%M:%S') time = timezone('Asia/Shanghai').localize(time) utc_time = time.astimezone(utc) proxy.last_check = utc_time if not Proxy.objects(url=proxy.url): print("加入代理服务器: {}".format(proxy.url)) proxy.save()
async def proxy_list(): res = list() sess = AsyncHTMLSession() resp = await sess.get('http://proxy-list.org/english/index.php') for ul in resp.html.find('#proxy-table > div.table-wrap ul'): js_code = ul.find('li.proxy script', first=True).text matched = re.findall(r"Proxy\('(.+)'\)", js_code) if matched and len(matched) > 0: encoded = matched[0] ip_port = base64.b64decode(encoded).decode("utf-8") ip = re.findall(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}', ip_port)[0] port = re.findall(r':(\d{2,5})', ip_port)[0] res.append(Proxy( ip_port=f"{ip}:{port}", scheme=SCHEME_HTTP, status=STATUS_NEW, )) await sess.close() return res
async def clarketm_proxy_list(): res = list() async with aiohttp.request( "GET", 'https://raw.githubusercontent.com/clarketm/proxy-list/master/proxy-list.txt', proxy=proxies ) as resp: txt = await resp.text() ip_port_str_list = re.findall(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d{2,5}', txt) for ip_port in ip_port_str_list: ip = re.search(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}', ip_port).group(0) port = re.search(r':(\d{2,5})', ip_port).group(1) if ip and port: res.append(Proxy( ip_port=f"{ip}:{port}", scheme=SCHEME_HTTP, status=STATUS_NEW, )) return res
async def spys_one(): res = list() sess = AsyncHTMLSession() resp = await sess.get('http://spys.one/en/anonymous-proxy-list') await resp.html.arender(wait=1.5, timeout=10.0) for ip_row in resp.html.find('table tr[onmouseover]'): ip_port_text_elem = ip_row.find('.spy14', first=True) if ip_port_text_elem: ip_port_text = ip_port_text_elem.text ip = re.search(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}', ip_port_text).group(0) port = re.search(r':\n(\d{2,5})', ip_port_text).group(1) if ip and port: res.append(Proxy( ip_port=f"{ip}:{port}", scheme=SCHEME_HTTP, status=STATUS_NEW, )) await sess.close() return res
def __parse_html(self, html, link_type): """ 解析网页 :param html: 源码 """ result = list() doc = pq(html) trs = doc('#ip_list > tr').items() trs.__next__() for tr in trs: text_speed = tr('.bar').attr('title') if self.__check_delay(text_speed): child = tr.children() ip = child.eq(1).text() port = child.eq(2).text() site = child.eq(3).text() result.append( Proxy(ip=ip, port=port, site=site, p_type=link_type, delay=text_speed)) return result
def post(self): ''' Create new proxies. **Example Request** .. sourcecode:: json { "proxies": [ { "protocol": "http", "host": "192.168.0.2", "port": 80, "username": "******", "password": "******", "active": true, }, ... ] } **Example Response** .. sourcecode:: json { "message": "1 proxy created." } :<header Content-Type: application/json :<header X-Auth: the client's auth token :<json list proxies: list of proxies :<json str proxies[n]["protocol"]: protocol of proxy address :<json str proxies[n]["host"]: host of proxy address :<json int proxies[n]["port"]: port of proxy address :<json str proxies[n]["username"]: username of proxy :<json str proxies[n]["password"]: password of proxy :<json bool proxies[n]["active"]: proxy active status :>header Content-Type: application/json :>json string message: API response message :status 200: created :status 400: invalid request body :status 401: authentication required ''' request_json = request.get_json() proxies = [] # Ensure all data is valid before db operations for proxy_json in request_json['proxies']: validate_request_json(proxy_json, PROXY_ATTRS) # Save proxies for proxy_json in request_json['proxies']: proxy = Proxy(protocol=proxy_json['protocol'].lower().strip(), host=proxy_json['host'].lower().strip(), port=proxy_json['port'], active=proxy_json['active']) # Username is optional, and can be None try: proxy.username = proxy_json['username'].lower().strip() except KeyError: pass except AttributeError: proxy.username = None # Password is optional, and can be None try: proxy.password = proxy_json['password'].strip() except KeyError: pass except AttributeError: proxy.password = None g.db.add(proxy) try: g.db.flush() proxies.append(proxy) except IntegrityError: g.db.rollback() raise BadRequest('Proxy {}://{}:{} already exists.'.format( proxy.protocol, proxy.host, proxy.port)) g.db.commit() # Send redis notifications for proxy in proxies: notify_mask_client(channel='proxy', message={ 'proxy': proxy.as_dict(), 'status': 'created', 'resource': None }) message = '{} new proxies created'.format(len(request_json['proxies'])) response = jsonify(message=message) response.status_code = 202 return response
from api import proxyapi from model import Proxy import logging if __name__ == "__main__": logging.basicConfig(level=logging.INFO) pfile = open("work-proxy.list") proxystr = pfile.readline() while proxystr: proxyarr = proxystr.split(':') proxy = Proxy(ip=proxyarr[0], port=proxyarr[1].replace('\n', '')) proxyapi.add(proxy) proxystr = pfile.readline()
def get_proxy(item): try: proxy = Proxy() td = item.find_all('td') proxy.country = td[0].find('img')['alt'] proxy.ip = td[1].text proxy.port = td[2].text proxy.address = td[3].find('a').text proxy.status = td[5].text proxy.speed = td[6].find('div')['title'][:-1] proxy.ping = td[7].find('div')['title'][:-1] proxy.live_time = td[8].text proxy.timestamp = '20' + td[9].text + ':00' proxy.insert() except TypeError: print('GET PROXY NONE TYPE')