async def yield_proxy(self, *args, **kwargs): # https://www.nyloner.cn/proxy ev_loop = kwargs.get('ev_loop') async with webutils.WebSpider(ev_loop) as spider: spider.header.update({'Host': 'www.nyloner.cn', 'Referer': 'https://www.nyloner.cn/proxy'}) proxies = [] num = 15 status, _ = await spider.get('https://www.nyloner.cn/proxy') if status != 200: mylog.error('%s 访问出错', __name__) return proxies for page in range(1, 50): t = int(datetime.datetime.now().timestamp()) status, resp_html = await spider.get('https://www.nyloner.cn/proxy', params={ 'page': page, 'num': num, 't': t, 'token': self.gen_token(page, num, t)}) if status != 200: continue try: js_result = json.loads(resp_html, encoding='utf-8') if js_result['status'].lower() == 'true': for pd in json.loads(self.decode_str(js_result['list'])): proxies.append(models.ProxyTbl(host=pd['ip'], port=int(pd['port']), scheme='http', country='未知')) except json.JSONDecodeError as er: mylog.warning('%s 解析返回值<%s>出错: %s', __name__, resp_html, er) return proxies return proxies
async def yield_proxy(self, *args, **kwargs): ev_loop = kwargs.get('ev_loop') async with webutils.WebSpider(ev_loop) as spider: spider.header.update({'Host': 'www.kewangst.com', 'Referer': 'https://www.kewangst.com/ProxyList'}) proxies = [] status, resp_text = await spider.get('https://www.kewangst.com/ProxyList') if status != 200: mylog.error('%s 访问出错', __name__) return proxies with io.StringIO(resp_text) as fp: while True: line = fp.readline() if line: line = line.strip() else: break if line.startswith('http'): try: parse_result = urllib.parse.urlparse(line) proxies.append(models.ProxyTbl( host=parse_result.hostname, port=parse_result.port, scheme=parse_result.scheme, country='未知')) except ValueError as e: mylog.warning(e) return proxies
async def yield_proxy(self, *args, **kwargs): ev_loop = kwargs.get('ev_loop') async with webutils.WebSpider(ev_loop) as spider: spider.header.update({'Host': 'www.66ip.cn'}) area = 33 page = 1 proxies = [] for area_index in range(1, area + 1): asyncio.sleep(1, loop=ev_loop) for i in range(1, page + 1): url = "http://www.66ip.cn/areaindex_{}/{}.html".format( area_index, i) status, resp_html = await spider.get(url) if status != 200: continue html_tree = etree.HTML(resp_html) tr_list = html_tree.xpath( "//*[@id='footer']/div/table/tr[position()>1]") if len(tr_list) == 0: continue for tr in tr_list: proxies.append( models.ProxyTbl(host=tr.xpath("./td[1]/text()")[0], port=int( tr.xpath("./td[2]/text()")[0]), country='中国', area=tr.xpath("./td[3]/text()")[0], scheme='http')) return proxies
async def yield_proxy(self, *args, **kwargs): ev_loop = kwargs.get('ev_loop') async with webutils.WebSpider(ev_loop) as spider: spider.header.update({'Host': 'www.xicidaili.com'}) proxies = [] url_list = [ 'http://www.xicidaili.com/nn/', # 高匿 'http://www.xicidaili.com/nt/', # 透明 'http://www.xicidaili.com/wn/', # 国内https 'http://www.xicidaili.com/wt/', # 国内普通 ] page = 2 for url in url_list: for i in range(1, page + 1): asyncio.sleep(1, loop=ev_loop) url = url + str(i) status, resp_html = await spider.get(url) if status != 200: continue html_tree = etree.HTML(resp_html) ip_list = html_tree.xpath( '//table[@id="ip_list"]//tr[position()>1]') for tr in ip_list: tds = tr.xpath("td") if len(tds) < 5: continue location = tds[3].xpath('a') if len(location) >= 1: location = location[0].text else: location = tds[3].text proxies.append( models.ProxyTbl(host=str(tds[1].text), port=int(tds[2].text), country='中国', area=str(location), scheme=str(tds[5].text).lower())) return proxies
async def consume(self, req: web.Request, **kwargs) -> web.Response: self._user_arg = req.query while True: try: all_proxy = await ProxyTblManager.get_proxy(req.app['db']) except queue.Empty: return web.Response(text='目前没有可用代理,请稍候再试', charset='utf-8') proxy = random.choice(all_proxy) async with webutils.WebSpider(ev_loop=None, proxy=proxy) as client: try: stock = random.choice(await self.get_stock(client)) stock = re.search('c:"(\d+)"', stock).group(1) spldid = await self.open_page(client) await self.page_loading(client, spldid) msg = await self.set_pageoperinfo(client, spldid, stock) await self.page_close(client, spldid) except ConsumerError as err: return web.Response(text=err.expression) except AttributeError as err: raise web.HTTPInternalServerError() from err except (ClientError, asyncio.TimeoutError) as err: pass # timeout error, continue else: return web.Response(text=msg, charset='utf-8')