class TestGetfreeproxypistsBlogspotCom(unittest.TestCase): html_page = None pp = GetfreeproxylistsBlogspotCom() p1 = ProxyItem() p2 = ProxyItem() p3 = ProxyItem() def setUp(self): f = open(WAY + '/html-files/getfreeproxylistsBlogspotCom.html', 'rb') text = f.read() f.close() self.html_page = HtmlResponse(url='', body=text) self.p1["host"] = "173.82.78.189" self.p1["port"] = "5836" self.p1["_type"] = 2 self.p1["ping"] = None self.p2["host"] = "108.61.245.77" self.p2["port"] = "8080" self.p2["_type"] = 2 self.p2["ping"] = None self.p3["host"] = "191.234.168.144" self.p3["port"] = "3128" self.p3["_type"] = 2 self.p3["ping"] = None def test_get_proxies(self): res = [i for i in self.pp.get_proxies(self.html_page)] self.assertEqual(res[0], self.p1) self.assertEqual(res[1], self.p2) self.assertEqual(res[2], self.p3)
class TestMyProxyCom(unittest.TestCase): html_page = None pp = MyProxyCom() p1 = ProxyItem() p2 = ProxyItem() p3 = ProxyItem() def setUp(self): f = open(WAY + '/html-files/MyProxyComGetReq.html', 'rb') text = f.read() f.close() self.html_page = HtmlResponse(url='', body=text) self.p1["host"] = "88.218.66.158" self.p1["port"] = "8085" self.p1["_type"] = 1 self.p1["ping"] = None self.p2["host"] = "85.209.150.207" self.p2["port"] = "8085" self.p2["_type"] = 1 self.p2["ping"] = None self.p3["host"] = "91.188.246.176" self.p3["port"] = "8085" self.p3["_type"] = 1 self.p3["ping"] = None def test_get_proxies(self): res = [i for i in self.pp.get_proxies(self.html_page)] self.assertEqual(res[0], self.p1) self.assertEqual(res[1], self.p2) self.assertEqual(res[2], self.p3)
class TestOnlineProxyRu(unittest.TestCase): html_page = None pp = OnlineProxyRu() p1 = ProxyItem() p2 = ProxyItem() p3 = ProxyItem() def setUp(self): f = open(WAY + '/html-files/OnlineProxyRu.html', 'rb') text = f.read() f.close() self.html_page = HtmlResponse(url='', body=text) self.p1["host"] = "37.205.48.116" self.p1["port"] = "8080" self.p1["_type"] = 3 self.p1["ping"] = 90 self.p2["host"] = "95.183.73.89" self.p2["port"] = "8080" self.p2["_type"] = 3 self.p2["ping"] = 146 self.p3["host"] = "185.75.67.237" self.p3["port"] = "8080" self.p3["_type"] = 3 self.p3["ping"] = 33 def test_get_proxies(self): res = [i for i in self.pp.get_proxies(self.html_page)] self.assertEqual(res[0], self.p1) self.assertEqual(res[1], self.p2) self.assertEqual(res[2], self.p3)
def get_proxies(self, response): table = response.xpath("//table[@id='theProxyList']/tbody/tr") for row in table: pi = ProxyItem() pi['host'] = row.xpath('td[2]/text()').get() pi['port'] = row.xpath('td[3]/text()').get() pi['_type'] = super().protocols[row.xpath('td[6]/text()').get().strip().lower()] pi['ping'] = None yield pi
def get_proxies(self, response): proxy_list = response.text.split('\n') for row in proxy_list: pi = ProxyItem() pi['host'] = row.split(':')[0] pi['port'] = row.split(':')[1].strip() pi['_type'] = 0 pi['ping'] = None yield pi
def get_proxies(self, response): table = response.xpath("//table[@class ='table table-striped']//tr/td[1]/text()").extract() for string in table: pi =ProxyItem() pi['host'] = string.split(':')[0] pi['port'] = string.split(':')[1] pi['_type'] = 0 pi['ping'] = None yield pi
def get_proxies(self, response): table = response.xpath("//td[@class='content']/table[1]//tr")[23:] for row in table: if row.xpath('td/text()').extract()[4] != 'прозрачный': pi = ProxyItem() pi['host'] = row.xpath('td[2]/text()').get() pi['port'] = row.xpath('td[3]/text()').get() pi['_type'] = self.types[row.xpath('td[4]/text()').get()] pi['ping'] = None yield pi
def get_proxies(self, response): for row in response.xpath("//table[@id='proxylistt']/tbody/tr")[:-1]: if row.xpath("td[2]/text()").get().strip() != 'transparent': pi = ProxyItem() pi['host'] = row.xpath("td[1]/text()").get()[:-1] pi['port'] = row.xpath("td[1]/span/text()").get() pi['_type'] = self.types_[row.xpath( "td[2]/text()").get().strip()] pi['ping'] = None yield pi
def get_proxies(self, response): _list = response.xpath('//ul/li/a/text()').extract() for row in _list: pi = ProxyItem() if re.match(super().PATTERN, row): pi['host'] = row.split(':')[0] pi['port'] = row.split(':')[1] pi['_type'] = None pi['ping'] = None yield pi
def get_proxies(self, response): proxy_list = response.text.split('\n') for row in proxy_list: if re.match(super().PATTERN, row): pi = ProxyItem() pi['host'] = row.split(':')[0] pi['port'] = row.split(':')[1] pi['_type'] = 0 pi['ping'] = None yield pi
def get_proxies(self, response): table = response.xpath( "//div[@id='content']//table//tr/td[1]/text()").extract() for proxy in table: current_proxy = proxy.strip() pi = ProxyItem() pi['host'] = current_proxy.split(':')[0] pi['port'] = current_proxy.split(':')[1] pi['_type'] = 0 pi['ping'] = None yield pi
def get_proxies(self, response): _list = response.xpath('//table/tr/td[2]/pre/text()').extract() for row in _list: for string in row.split('\r\n'): if re.match(super().PATTERN, string): pi = ProxyItem() pi['host'] = string.split(':')[0] pi['port'] = string.split(':')[1] pi['_type'] = 0 pi['ping'] = None yield pi
def get_proxies(self, response): table = response.xpath("//table[@id='proxylisttable']/tbody/tr") for row in table: pi = ProxyItem() pi['host'] = row.xpath('td[1]/text()').get() pi['port'] = row.xpath('td[2]/text()').get() if row.xpath('td[7]/text()').get() == 'no': pi['_type'] = 1 else: pi['_type'] = 2 pi['ping'] = None yield pi
def get_proxies(self, response): lists = response.xpath( "//div[@class ='centeredProxyList freeProxyStyle']/span/span/text()" ).extract() for _list in lists: for row in _list.split('\n'): if re.match(super().PATTERN, row): pi = ProxyItem() pi['host'] = row.split(':')[0] pi['port'] = row.split(':')[1] pi['_type'] = 0 pi['ping'] = None yield pi
def parse(self, response): # response_proxy = 'abc' # try: # response_proxy = re.findall("\\d+\.\d+\.\d+\.\d+", response.xpath("//text()").get())[0] # except: # pass # using_proxy = response.meta['proxy'] if response.status == 200: using_proxy = response.meta['proxy'] pi = ProxyItem() pi['host'] = (using_proxy.split('://')[-1]).split(':')[0] pi['port'] = using_proxy.split(':')[-1] pi['_type'] = self.types[using_proxy.split('://')[0]] pi['ping'] = None yield pi
def get_proxies(self, response): blocks = response.xpath("//div[@class='post-body entry-content']") for block in blocks: block_content = block.xpath( "descendant-or-self::*/text()").extract() current_type = None for content in block_content: if content == 'HTTP': current_type = 1 elif content == 'HTTPS': current_type = 2 elif re.match(super().PATTERN, content): pi = ProxyItem() pi['host'] = content.split(':')[0] pi['port'] = content.split(':')[1] pi['_type'] = current_type pi['ping'] = None yield pi
def get_proxies(self, response): result = [] current_type = None if 'socks-5' in response.url: current_type = 4 elif 'socks-4' in response.url: current_type = 3 else: current_type = 1 for p in response.xpath("//div[@class='list']/text()").extract(): pi = ProxyItem() v = (p.split('#')[0]).split(':') pi['host'] = v[0] pi['port'] = v[1] pi['_type'] = current_type pi['ping'] = None result.append(pi) yield pi
def get_proxies(self, response): r = response table = r.xpath("//div[@class='table_block']/table//tr")[1:] for row in table: try: pi = ProxyItem() pi['host'] = row.xpath("td[1]/text()").get() pi['port'] = row.xpath("td[2]/text()").get() if 'https' in row.xpath("td[5]/text()").get().lower(): pi['_type'] = 2 elif 'http' in row.xpath("td[5]/text()").get().lower(): pi['_type'] = 1 elif 'socks4' in row.xpath("td[5]/text()").get().lower(): pi['_type'] = 3 elif 'socks5' in row.xpath("td[5]/text()").get().lower(): pi['_type'] = 4 else: pi['_type'] = 0 pi['ping'] = None yield pi except: continue