class TestGetfreeproxypistsBlogspotCom(unittest.TestCase):
    html_page = None
    pp = GetfreeproxylistsBlogspotCom()
    p1 = ProxyItem()
    p2 = ProxyItem()
    p3 = ProxyItem()

    def setUp(self):
        f = open(WAY + '/html-files/getfreeproxylistsBlogspotCom.html', 'rb')
        text = f.read()
        f.close()
        self.html_page = HtmlResponse(url='', body=text)

        self.p1["host"] = "173.82.78.189"
        self.p1["port"] = "5836"
        self.p1["_type"] = 2
        self.p1["ping"] = None

        self.p2["host"] = "108.61.245.77"
        self.p2["port"] = "8080"
        self.p2["_type"] = 2
        self.p2["ping"] = None

        self.p3["host"] = "191.234.168.144"
        self.p3["port"] = "3128"
        self.p3["_type"] = 2
        self.p3["ping"] = None

    def test_get_proxies(self):
        res = [i for i in self.pp.get_proxies(self.html_page)]
        self.assertEqual(res[0], self.p1)
        self.assertEqual(res[1], self.p2)
        self.assertEqual(res[2], self.p3)
Пример #2
0
class TestMyProxyCom(unittest.TestCase):
    html_page = None
    pp = MyProxyCom()
    p1 = ProxyItem()
    p2 = ProxyItem()
    p3 = ProxyItem()

    def setUp(self):
        f = open(WAY + '/html-files/MyProxyComGetReq.html', 'rb')
        text = f.read()
        f.close()
        self.html_page = HtmlResponse(url='', body=text)

        self.p1["host"] = "88.218.66.158"
        self.p1["port"] = "8085"
        self.p1["_type"] = 1
        self.p1["ping"] = None

        self.p2["host"] = "85.209.150.207"
        self.p2["port"] = "8085"
        self.p2["_type"] = 1
        self.p2["ping"] = None

        self.p3["host"] = "91.188.246.176"
        self.p3["port"] = "8085"
        self.p3["_type"] = 1
        self.p3["ping"] = None

    def test_get_proxies(self):
        res = [i for i in self.pp.get_proxies(self.html_page)]
        self.assertEqual(res[0], self.p1)
        self.assertEqual(res[1], self.p2)
        self.assertEqual(res[2], self.p3)
Пример #3
0
class TestOnlineProxyRu(unittest.TestCase):
    html_page = None
    pp = OnlineProxyRu()
    p1 = ProxyItem()
    p2 = ProxyItem()
    p3 = ProxyItem()

    def setUp(self):
        f = open(WAY + '/html-files/OnlineProxyRu.html', 'rb')
        text = f.read()
        f.close()
        self.html_page = HtmlResponse(url='', body=text)

        self.p1["host"] = "37.205.48.116"
        self.p1["port"] = "8080"
        self.p1["_type"] = 3
        self.p1["ping"] = 90

        self.p2["host"] = "95.183.73.89"
        self.p2["port"] = "8080"
        self.p2["_type"] = 3
        self.p2["ping"] = 146

        self.p3["host"] = "185.75.67.237"
        self.p3["port"] = "8080"
        self.p3["_type"] = 3
        self.p3["ping"] = 33

    def test_get_proxies(self):
        res = [i for i in self.pp.get_proxies(self.html_page)]
        self.assertEqual(res[0], self.p1)
        self.assertEqual(res[1], self.p2)
        self.assertEqual(res[2], self.p3)
Пример #4
0
 def get_proxies(self, response):
     table = response.xpath("//table[@id='theProxyList']/tbody/tr")
     for row in table:
         pi = ProxyItem()
         pi['host'] = row.xpath('td[2]/text()').get()
         pi['port'] = row.xpath('td[3]/text()').get()
         pi['_type'] = super().protocols[row.xpath('td[6]/text()').get().strip().lower()]
         pi['ping'] = None
         yield pi
Пример #5
0
 def get_proxies(self, response):
     proxy_list = response.text.split('\n')
     for row in proxy_list:
         pi = ProxyItem()
         pi['host'] = row.split(':')[0]
         pi['port'] = row.split(':')[1].strip()
         pi['_type'] = 0
         pi['ping'] = None
         yield pi
Пример #6
0
 def get_proxies(self, response):
     table = response.xpath("//table[@class ='table table-striped']//tr/td[1]/text()").extract()
     for string in table:
         pi =ProxyItem()
         pi['host'] = string.split(':')[0]
         pi['port'] = string.split(':')[1]
         pi['_type'] = 0
         pi['ping'] = None
         yield pi
Пример #7
0
 def get_proxies(self, response):
     table = response.xpath("//td[@class='content']/table[1]//tr")[23:]
     for row in table:
         if row.xpath('td/text()').extract()[4] != 'прозрачный':
             pi = ProxyItem()
             pi['host'] = row.xpath('td[2]/text()').get()
             pi['port'] = row.xpath('td[3]/text()').get()
             pi['_type'] = self.types[row.xpath('td[4]/text()').get()]
             pi['ping'] = None
             yield pi
Пример #8
0
 def get_proxies(self, response):
     for row in response.xpath("//table[@id='proxylistt']/tbody/tr")[:-1]:
         if row.xpath("td[2]/text()").get().strip() != 'transparent':
             pi = ProxyItem()
             pi['host'] = row.xpath("td[1]/text()").get()[:-1]
             pi['port'] = row.xpath("td[1]/span/text()").get()
             pi['_type'] = self.types_[row.xpath(
                 "td[2]/text()").get().strip()]
             pi['ping'] = None
             yield pi
Пример #9
0
 def get_proxies(self, response):
     _list = response.xpath('//ul/li/a/text()').extract()
     for row in _list:
         pi = ProxyItem()
         if re.match(super().PATTERN, row):
             pi['host'] = row.split(':')[0]
             pi['port'] = row.split(':')[1]
             pi['_type'] = None
             pi['ping'] = None
             yield pi
Пример #10
0
 def get_proxies(self, response):
     proxy_list = response.text.split('\n')
     for row in proxy_list:
         if re.match(super().PATTERN, row):
             pi = ProxyItem()
             pi['host'] = row.split(':')[0]
             pi['port'] = row.split(':')[1]
             pi['_type'] = 0
             pi['ping'] = None
             yield pi
Пример #11
0
 def get_proxies(self, response):
     table = response.xpath(
         "//div[@id='content']//table//tr/td[1]/text()").extract()
     for proxy in table:
         current_proxy = proxy.strip()
         pi = ProxyItem()
         pi['host'] = current_proxy.split(':')[0]
         pi['port'] = current_proxy.split(':')[1]
         pi['_type'] = 0
         pi['ping'] = None
         yield pi
Пример #12
0
 def get_proxies(self, response):
     _list = response.xpath('//table/tr/td[2]/pre/text()').extract()
     for row in _list:
         for string in row.split('\r\n'):
             if re.match(super().PATTERN, string):
                 pi = ProxyItem()
                 pi['host'] = string.split(':')[0]
                 pi['port'] = string.split(':')[1]
                 pi['_type'] = 0
                 pi['ping'] = None
                 yield pi
Пример #13
0
 def get_proxies(self, response):
     table = response.xpath("//table[@id='proxylisttable']/tbody/tr")
     for row in table:
         pi = ProxyItem()
         pi['host'] = row.xpath('td[1]/text()').get()
         pi['port'] = row.xpath('td[2]/text()').get()
         if row.xpath('td[7]/text()').get() == 'no':
             pi['_type'] = 1
         else:
             pi['_type'] = 2
         pi['ping'] = None
         yield pi
Пример #14
0
 def get_proxies(self, response):
     lists = response.xpath(
         "//div[@class ='centeredProxyList freeProxyStyle']/span/span/text()"
     ).extract()
     for _list in lists:
         for row in _list.split('\n'):
             if re.match(super().PATTERN, row):
                 pi = ProxyItem()
                 pi['host'] = row.split(':')[0]
                 pi['port'] = row.split(':')[1]
                 pi['_type'] = 0
                 pi['ping'] = None
                 yield pi
Пример #15
0
 def parse(self, response):
     # response_proxy = 'abc'
     # try:
     #     response_proxy = re.findall("\\d+\.\d+\.\d+\.\d+", response.xpath("//text()").get())[0]
     # except:
     #     pass
     # using_proxy = response.meta['proxy']
     if response.status == 200:
         using_proxy = response.meta['proxy']
         pi = ProxyItem()
         pi['host'] = (using_proxy.split('://')[-1]).split(':')[0]
         pi['port'] = using_proxy.split(':')[-1]
         pi['_type'] = self.types[using_proxy.split('://')[0]]
         pi['ping'] = None
         yield pi
Пример #16
0
 def get_proxies(self, response):
     blocks = response.xpath("//div[@class='post-body entry-content']")
     for block in blocks:
         block_content = block.xpath(
             "descendant-or-self::*/text()").extract()
         current_type = None
         for content in block_content:
             if content == 'HTTP':
                 current_type = 1
             elif content == 'HTTPS':
                 current_type = 2
             elif re.match(super().PATTERN, content):
                 pi = ProxyItem()
                 pi['host'] = content.split(':')[0]
                 pi['port'] = content.split(':')[1]
                 pi['_type'] = current_type
                 pi['ping'] = None
                 yield pi
Пример #17
0
 def get_proxies(self, response):
     result = []
     current_type = None
     if 'socks-5' in response.url:
         current_type = 4
     elif 'socks-4' in response.url:
         current_type = 3
     else:
         current_type = 1
     for p in response.xpath("//div[@class='list']/text()").extract():
         pi = ProxyItem()
         v = (p.split('#')[0]).split(':')
         pi['host'] = v[0]
         pi['port'] = v[1]
         pi['_type'] = current_type
         pi['ping'] = None
         result.append(pi)
         yield pi
Пример #18
0
 def get_proxies(self, response):
     r = response
     table = r.xpath("//div[@class='table_block']/table//tr")[1:]
     for row in table:
         try:
             pi = ProxyItem()
             pi['host'] = row.xpath("td[1]/text()").get()
             pi['port'] = row.xpath("td[2]/text()").get()
             if 'https' in row.xpath("td[5]/text()").get().lower():
                 pi['_type'] = 2
             elif 'http' in row.xpath("td[5]/text()").get().lower():
                 pi['_type'] = 1
             elif 'socks4' in row.xpath("td[5]/text()").get().lower():
                 pi['_type'] = 3
             elif 'socks5' in row.xpath("td[5]/text()").get().lower():
                 pi['_type'] = 4
             else:
                 pi['_type'] = 0
             pi['ping'] = None
             yield pi
         except:
             continue