示例#1
0
class GetfreeproxylistsBlogspotCom(Provider):
    urls = ['https://getfreeproxylists.blogspot.com']
    lua_script = Util.read_lua_script()

    def get_requests(self):
        for url in self.urls:
            yield super().get_request(url)

    def get_proxies(self, response):
        blocks = response.xpath("//div[@class='post-body entry-content']")
        for block in blocks:
            block_content = block.xpath(
                "descendant-or-self::*/text()").extract()
            current_type = None
            for content in block_content:
                if content == 'HTTP':
                    current_type = 1
                elif content == 'HTTPS':
                    current_type = 2
                elif re.match(super().PATTERN, content):
                    pi = ProxyItem()
                    pi['host'] = content.split(':')[0]
                    pi['port'] = content.split(':')[1]
                    pi['_type'] = current_type
                    pi['ping'] = None
                    yield pi

    def get_next(self, response):
        pass
示例#2
0
class RootjazzComParse(Provider):
    urls = ['http://rootjazz.com/proxies/proxies.txt']
    lua_script = Util.read_lua_script()

    def get_requests(self):
        for url in self.urls:
            yield super().get_request(url)

    def get_proxies(self, response):
        proxy_list = response.text.split('\n')
        for row in proxy_list:
            row = row.strip()
            try:
                if re.match(super().PATTERN, row):
                    pi = ProxyItem()
                    pi['host'] = row.split(':')[0]
                    pi['port'] = row.split(':')[1]
                    pi['_type'] = 0
                    pi['ping'] = None
                    yield pi
            except:
                continue

    def get_next(self, response):
        return None
class ApiProxyscrapeCom(Provider):
    urls = [
        'https://api.proxyscrape.com/?request=getproxies&proxytype=http&timeout=10000&country=all&ssl=yes&anonymity=elite',
        'https://api.proxyscrape.com/?request=getproxies&proxytype=http&timeout=10000&country=all&ssl=no&anonymity=elite',
        'https://api.proxyscrape.com/?request=getproxies&proxytype=http&timeout=10000&country=all&ssl=yes&anonymity=anonymous',
        'https://api.proxyscrape.com/?request=getproxies&proxytype=http&timeout=10000&country=all&ssl=no&anonymity=anonymous',
        'https://api.proxyscrape.com/?request=getproxies&proxytype=socks4&timeout=10000&country=all&ssl=yes&anonymity=anonymous',
        'https://api.proxyscrape.com/?request=getproxies&proxytype=socks5&timeout=10000&country=all&ssl=yes&anonymity=anonymous'
    ]
    lua_script = Util.read_lua_script()

    def get_requests(self):
        for url in self.urls:
            yield super().get_request(url)

    def get_proxies(self, response):
        proxy_list = response.text.split('\n')
        for row in proxy_list:
            pi = ProxyItem()
            pi['host'] = row.split(':')[0]
            pi['port'] = row.split(':')[1].strip()
            pi['_type'] = 0
            pi['ping'] = None
            yield pi


    def get_next(self, response):
        return None
示例#4
0
class MyProxyCom(Provider):
    main_page = 'https://www.my-proxy.com/'
    urls = [
        'https://www.my-proxy.com/free-elite-proxy.html',
        'https://www.my-proxy.com/free-anonymous-proxy.html',
        'https://www.my-proxy.com/free-transparent-proxy.html',
        'https://www.my-proxy.com/free-socks-4-proxy.html',
        'https://www.my-proxy.com/free-socks-5-proxy.html',
        'https://www.my-proxy.com/free-proxy-list.html',
        'https://www.my-proxy.com/free-proxy-list-2.html',
        'https://www.my-proxy.com/free-proxy-list-3.html',
        'https://www.my-proxy.com/free-proxy-list-4.html',
        'https://www.my-proxy.com/free-proxy-list-5.html',
        'https://www.my-proxy.com/free-proxy-list-6.html',
        'https://www.my-proxy.com/free-proxy-list-7.html',
        'https://www.my-proxy.com/free-proxy-list-8.html',
        'https://www.my-proxy.com/free-proxy-list-9.html',
        'https://www.my-proxy.com/free-proxy-list-10.html'
    ]
    lua_script = Util.read_lua_script()

    def get_requests(self):
        for url in self.urls:
            yield super().get_request(url)

    def get_proxies(self, response):
        result = []
        current_type = None
        if 'socks-5' in response.url:
            current_type = 4
        elif 'socks-4' in response.url:
            current_type = 3
        else:
            current_type = 1
        for p in response.xpath("//div[@class='list']/text()").extract():
            pi = ProxyItem()
            v = (p.split('#')[0]).split(':')
            pi['host'] = v[0]
            pi['port'] = v[1]
            pi['_type'] = current_type
            pi['ping'] = None
            result.append(pi)
            yield pi

    def get_next(self, response):
        return None
示例#5
0
class Ab57Ru(Provider):
    urls = ['https://ab57.ru/proxylist.html']
    lua_script = Util.read_lua_script()

    def get_requests(self):
        for url in self.urls:
            yield super().get_request(url)

    def get_proxies(self, response):
        _list = response.xpath('//table/tr/td[2]/pre/text()').extract()
        for row in _list:
            for string in row.split('\r\n'):
                if re.match(super().PATTERN, string):
                    pi = ProxyItem()
                    pi['host'] = string.split(':')[0]
                    pi['port'] = string.split(':')[1]
                    pi['_type'] = 0
                    pi['ping'] = None
                    yield pi

    def get_next(self, response):
        return None
示例#6
0
class OnlineProxyRu(Provider):
    urls = ['http://online-proxy.ru']
    lua_script = Util.read_lua_script()
    types = {'HTTP': 1, 'HTTPS': 2, 'HTTP/HTTPS': 2}

    def get_requests(self):
        for url in self.urls:
            yield super().get_request(url)

    def get_proxies(self, response):
        table = response.xpath("//td[@class='content']/table[1]//tr")[23:]
        for row in table:
            if row.xpath('td/text()').extract()[4] != 'прозрачный':
                pi = ProxyItem()
                pi['host'] = row.xpath('td[2]/text()').get()
                pi['port'] = row.xpath('td[3]/text()').get()
                pi['_type'] = self.types[row.xpath('td[4]/text()').get()]
                pi['ping'] = None
                yield pi

    def get_next(self, response):
        return None
示例#7
0
class TwoIpRu(Provider):
    urls = ['https://2ip.ru/proxy/']
    lua_script = Util.read_lua_script()

    def get_requests(self):
        for url in self.urls:
            yield super().get_request(url)

    def get_proxies(self, response):
        table = response.xpath(
            "//div[@id='content']//table//tr/td[1]/text()").extract()
        for proxy in table:
            current_proxy = proxy.strip()
            pi = ProxyItem()
            pi['host'] = current_proxy.split(':')[0]
            pi['port'] = current_proxy.split(':')[1]
            pi['_type'] = 0
            pi['ping'] = None
            yield pi

    def get_next(self, response):
        return None
示例#8
0
class MultiproxyOrg(Provider):
    urls = ['https://multiproxy.org/txt_anon/proxy.txt']
    lua_script = Util.read_lua_script()

    def get_requests(self):
        for url in self.urls:
            yield super().get_request(url)

    def get_proxies(self, response):
        proxy_list = response.text.split('\n')
        for row in proxy_list:
            if re.match(super().PATTERN, row):
                pi = ProxyItem()
                pi['host'] = row.split(':')[0]
                pi['port'] = row.split(':')[1]
                pi['_type'] = 0
                pi['ping'] = None
                yield pi


    def get_next(self, response):
        return None
示例#9
0
class Provider:
    PATTERN = "([0-9]{1,3}[\.]){3}[0-9]{1,3}:[0-9]{2,}"
    protocols = {'http': 1, 'https': 2, 'socs4': 3, 'socs': 4}
    lua_script = Util.read_lua_script()

    @abstractmethod
    def get_requests(self):
        pass

    @abstractmethod
    def get_proxies(self, response):
        pass

    @abstractmethod
    def get_next(self, response):
        pass

    def get_request(self, url):
        return SplashRequest(url=url,
                             endpoint='execute',
                             cache_args=['lua_source'],
                             args={'lua_source': self.lua_script},
                             cb_kwargs={'provider': self})
class ProxylistdailyNet(Provider):
    urls = ['https://www.proxylistdaily.net/']
    lua_script = Util.read_lua_script()

    def get_requests(self):
        for url in self.urls:
            yield super().get_request(url)

    def get_proxies(self, response):
        lists = response.xpath(
            "//div[@class ='centeredProxyList freeProxyStyle']/span/span/text()"
        ).extract()
        for _list in lists:
            for row in _list.split('\n'):
                if re.match(super().PATTERN, row):
                    pi = ProxyItem()
                    pi['host'] = row.split(':')[0]
                    pi['port'] = row.split(':')[1]
                    pi['_type'] = 0
                    pi['ping'] = None
                    yield pi

    def get_next(self, response):
        return None