class GetfreeproxylistsBlogspotCom(Provider): urls = ['https://getfreeproxylists.blogspot.com'] lua_script = Util.read_lua_script() def get_requests(self): for url in self.urls: yield super().get_request(url) def get_proxies(self, response): blocks = response.xpath("//div[@class='post-body entry-content']") for block in blocks: block_content = block.xpath( "descendant-or-self::*/text()").extract() current_type = None for content in block_content: if content == 'HTTP': current_type = 1 elif content == 'HTTPS': current_type = 2 elif re.match(super().PATTERN, content): pi = ProxyItem() pi['host'] = content.split(':')[0] pi['port'] = content.split(':')[1] pi['_type'] = current_type pi['ping'] = None yield pi def get_next(self, response): pass
class RootjazzComParse(Provider): urls = ['http://rootjazz.com/proxies/proxies.txt'] lua_script = Util.read_lua_script() def get_requests(self): for url in self.urls: yield super().get_request(url) def get_proxies(self, response): proxy_list = response.text.split('\n') for row in proxy_list: row = row.strip() try: if re.match(super().PATTERN, row): pi = ProxyItem() pi['host'] = row.split(':')[0] pi['port'] = row.split(':')[1] pi['_type'] = 0 pi['ping'] = None yield pi except: continue def get_next(self, response): return None
class ApiProxyscrapeCom(Provider): urls = [ 'https://api.proxyscrape.com/?request=getproxies&proxytype=http&timeout=10000&country=all&ssl=yes&anonymity=elite', 'https://api.proxyscrape.com/?request=getproxies&proxytype=http&timeout=10000&country=all&ssl=no&anonymity=elite', 'https://api.proxyscrape.com/?request=getproxies&proxytype=http&timeout=10000&country=all&ssl=yes&anonymity=anonymous', 'https://api.proxyscrape.com/?request=getproxies&proxytype=http&timeout=10000&country=all&ssl=no&anonymity=anonymous', 'https://api.proxyscrape.com/?request=getproxies&proxytype=socks4&timeout=10000&country=all&ssl=yes&anonymity=anonymous', 'https://api.proxyscrape.com/?request=getproxies&proxytype=socks5&timeout=10000&country=all&ssl=yes&anonymity=anonymous' ] lua_script = Util.read_lua_script() def get_requests(self): for url in self.urls: yield super().get_request(url) def get_proxies(self, response): proxy_list = response.text.split('\n') for row in proxy_list: pi = ProxyItem() pi['host'] = row.split(':')[0] pi['port'] = row.split(':')[1].strip() pi['_type'] = 0 pi['ping'] = None yield pi def get_next(self, response): return None
class MyProxyCom(Provider): main_page = 'https://www.my-proxy.com/' urls = [ 'https://www.my-proxy.com/free-elite-proxy.html', 'https://www.my-proxy.com/free-anonymous-proxy.html', 'https://www.my-proxy.com/free-transparent-proxy.html', 'https://www.my-proxy.com/free-socks-4-proxy.html', 'https://www.my-proxy.com/free-socks-5-proxy.html', 'https://www.my-proxy.com/free-proxy-list.html', 'https://www.my-proxy.com/free-proxy-list-2.html', 'https://www.my-proxy.com/free-proxy-list-3.html', 'https://www.my-proxy.com/free-proxy-list-4.html', 'https://www.my-proxy.com/free-proxy-list-5.html', 'https://www.my-proxy.com/free-proxy-list-6.html', 'https://www.my-proxy.com/free-proxy-list-7.html', 'https://www.my-proxy.com/free-proxy-list-8.html', 'https://www.my-proxy.com/free-proxy-list-9.html', 'https://www.my-proxy.com/free-proxy-list-10.html' ] lua_script = Util.read_lua_script() def get_requests(self): for url in self.urls: yield super().get_request(url) def get_proxies(self, response): result = [] current_type = None if 'socks-5' in response.url: current_type = 4 elif 'socks-4' in response.url: current_type = 3 else: current_type = 1 for p in response.xpath("//div[@class='list']/text()").extract(): pi = ProxyItem() v = (p.split('#')[0]).split(':') pi['host'] = v[0] pi['port'] = v[1] pi['_type'] = current_type pi['ping'] = None result.append(pi) yield pi def get_next(self, response): return None
class Ab57Ru(Provider): urls = ['https://ab57.ru/proxylist.html'] lua_script = Util.read_lua_script() def get_requests(self): for url in self.urls: yield super().get_request(url) def get_proxies(self, response): _list = response.xpath('//table/tr/td[2]/pre/text()').extract() for row in _list: for string in row.split('\r\n'): if re.match(super().PATTERN, string): pi = ProxyItem() pi['host'] = string.split(':')[0] pi['port'] = string.split(':')[1] pi['_type'] = 0 pi['ping'] = None yield pi def get_next(self, response): return None
class OnlineProxyRu(Provider): urls = ['http://online-proxy.ru'] lua_script = Util.read_lua_script() types = {'HTTP': 1, 'HTTPS': 2, 'HTTP/HTTPS': 2} def get_requests(self): for url in self.urls: yield super().get_request(url) def get_proxies(self, response): table = response.xpath("//td[@class='content']/table[1]//tr")[23:] for row in table: if row.xpath('td/text()').extract()[4] != 'прозрачный': pi = ProxyItem() pi['host'] = row.xpath('td[2]/text()').get() pi['port'] = row.xpath('td[3]/text()').get() pi['_type'] = self.types[row.xpath('td[4]/text()').get()] pi['ping'] = None yield pi def get_next(self, response): return None
class TwoIpRu(Provider): urls = ['https://2ip.ru/proxy/'] lua_script = Util.read_lua_script() def get_requests(self): for url in self.urls: yield super().get_request(url) def get_proxies(self, response): table = response.xpath( "//div[@id='content']//table//tr/td[1]/text()").extract() for proxy in table: current_proxy = proxy.strip() pi = ProxyItem() pi['host'] = current_proxy.split(':')[0] pi['port'] = current_proxy.split(':')[1] pi['_type'] = 0 pi['ping'] = None yield pi def get_next(self, response): return None
class MultiproxyOrg(Provider): urls = ['https://multiproxy.org/txt_anon/proxy.txt'] lua_script = Util.read_lua_script() def get_requests(self): for url in self.urls: yield super().get_request(url) def get_proxies(self, response): proxy_list = response.text.split('\n') for row in proxy_list: if re.match(super().PATTERN, row): pi = ProxyItem() pi['host'] = row.split(':')[0] pi['port'] = row.split(':')[1] pi['_type'] = 0 pi['ping'] = None yield pi def get_next(self, response): return None
class Provider: PATTERN = "([0-9]{1,3}[\.]){3}[0-9]{1,3}:[0-9]{2,}" protocols = {'http': 1, 'https': 2, 'socs4': 3, 'socs': 4} lua_script = Util.read_lua_script() @abstractmethod def get_requests(self): pass @abstractmethod def get_proxies(self, response): pass @abstractmethod def get_next(self, response): pass def get_request(self, url): return SplashRequest(url=url, endpoint='execute', cache_args=['lua_source'], args={'lua_source': self.lua_script}, cb_kwargs={'provider': self})
class ProxylistdailyNet(Provider): urls = ['https://www.proxylistdaily.net/'] lua_script = Util.read_lua_script() def get_requests(self): for url in self.urls: yield super().get_request(url) def get_proxies(self, response): lists = response.xpath( "//div[@class ='centeredProxyList freeProxyStyle']/span/span/text()" ).extract() for _list in lists: for row in _list.split('\n'): if re.match(super().PATTERN, row): pi = ProxyItem() pi['host'] = row.split(':')[0] pi['port'] = row.split(':')[1] pi['_type'] = 0 pi['ping'] = None yield pi def get_next(self, response): return None