Exemplo n.º 1
0
    def load(self) -> list:
        ls = []
        if self._num is None:
            return ls

        if self._context and self._context.logger:
            self._context.logger.info('SixSixIPProxySpider: loading proxy list.')

        url = SixSixIPProxySpider._POOL_URL.format(self._num)
        reg = re.compile(r'(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}):(\d+)(?=<br />)')
        try:
            res = requests.get(url, proxies=self._sys_proxy, timeout=self._timeout)
            for match in reg.finditer(res.text):
                try:
                    for protocol in ('http', 'https'):
                        proxy = Proxy()
                        proxy.ip = match.group(1)
                        proxy.port = match.group(2)
                        proxy.protocol = protocol
                        proxy.proxy_url = self.proxy_url(proxy.ip, proxy.port, proxy.protocol)
                        proxy.collect_time = Datetime.now()
                        proxy.local = Config.local
                        ls.append(proxy)
                except:
                    pass
            return ls
        except:
            if self._context and self._context.logger:
                self._context.logger.exception('SixSixIPProxySpider: Failed be load proxy list.')
            raise
Exemplo n.º 2
0
    def load(self) -> list:
        ls = []

        if self._context and self._context.logger:
            self._context.logger.info('FatezeroProxySpider: loading proxy list.')
        try:
            res = requests.get(FatezeroProxySpider._POOL_URL, proxies=self._sys_proxy, timeout=self._timeout)
            for text in res.text.split('\n'):
                try:
                    p = json.loads(text, encoding='utf-8')
                    proxy = Proxy()
                    proxy.ip = p['host']
                    proxy.port = p['port']
                    proxy.protocol = p['type']
                    proxy.proxy_url = self.proxy_url(proxy.ip, proxy.port, proxy.protocol)
                    proxy.collect_time = Datetime.now()
                    proxy.local = Config.local
                    ls.append(proxy)
                except:
                    pass
            if self._num is None:
                return ls
            else:
                return ls[:self._num]
        except:
            if self._context and self._context.logger:
                self._context.logger.exception('FatezeroProxySpider: Failed be load proxy list.')
            raise