async def get_proxy(self, timeout=60): headers = {'User-Agent': UserAgent.random()} async with aiohttp.ClientSession(headers=headers) as session: url = self.url.format(self.count) async with session.get(url, timeout=timeout) as r: content = await r.text() return re.findall(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d{1,5}', content)
async def get_proxy(self, timeout=60): headers = {'User-Agent': UserAgent.random()} proxies = [] async with aiohttp.ClientSession(headers=headers) as session: for url in self.urls: async with session.get(url, timeout=timeout) as r: content = await r.text() selector = html.fromstring(content) ul_list = selector.xpath('//ul[@class="l2"]') for ul in ul_list: ips = ul.xpath('.//li/text()')[0:2] proxy = ':'.join(map(lambda x: x.strip(' \t\n'), ips)) proxies.append(proxy) return proxies
async def get_proxy(self, timeout=60): headers = {'User-Agent': UserAgent.random()} proxies = [] async with aiohttp.ClientSession(headers=headers) as session: for page in range(1, 10): url = self.url.format(page=page) async with session.get(url, timeout=timeout) as r: content = await r.text() selector = html.fromstring(content) proxy_list = selector.xpath('//td[@class="ip"]') for each_proxy in proxy_list: ips = each_proxy.xpath('.//text()') proxy = ''.join(map(lambda x: x.strip(' \t\n'), ips)) proxies.append(proxy) return proxies
async def get_proxy(self, timeout=60): headers = { 'User-Agent': UserAgent.random() } async with aiohttp.ClientSession(headers=headers) as session: async with session.get(self.url, timeout=timeout) as r: content = await r.text() proxies = [] selector = html.fromstring(content) tr_list = selector.xpath('//tr')[1:] for tr in tr_list: ips = tr.xpath('./td/text()')[0:2] proxy = ':'.join(map(lambda x: x.strip(' \t\n'), ips)) proxies.append(proxy) return proxies
async def get_proxy(self, timeout=60): proxies = [] with aiohttp.ClientSession() as session: for url in self.urls: for i in range(1, self.total + 1): headers = {'User-Agent': UserAgent.random()} target = url.format(i) async with session.get(target, headers=headers, timeout=timeout) as r: content = await r.text() selector = html.fromstring(content) tr_list = selector.xpath('//tbody/tr') for tr in tr_list: ip = tr.xpath('.//td[@data-title="IP"]/text()') port = tr.xpath('.//td[@data-title="PORT"]/text()') proxies.append(':'.join([ip[0], port[0]])) await asyncio.sleep(3) return proxies
def random_headers(): headers = {'User-Agent': UserAgent.random()} return headers