Пример #1
0
    def parse_page(self, response):
        xpath = HtmlXPathSelector(response)

        for row in xpath.select('//table[@cellpadding="3"]/tr[starts-with(@class, "row")]'):
            loader = ProxyItemLoader(item=Proxy(), response=response, selector=row)
            
            loader.add_xpath('port', 'td[3]/a/text()')
            loader.add_xpath('address', 'td[2]/a/text()')

            yield loader.load_item()
Пример #2
0
    def parse_page(self, response):
        xpath = HtmlXPathSelector(response)
        for row in xpath.select('//table/tr[count(td) > 1]')[1:-1]:
            loader = ProxyItemLoader(item=Proxy(), response=response, selector=row)

            address = row.select('td[1]/script/text()').re(r'"(.*?)"')[0].decode('rot13').decode('base64')
            loader.add_value('address', address)
            loader.add_xpath('port', 'td[2]/text()')

            yield loader.load_item()
Пример #3
0
    def parse_page(self, response):
        xpath = HtmlXPathSelector(response)

        for row in xpath.select('//table[@id="listable"]/tbody/tr'):
            loader = ProxyItemLoader(item=Proxy(),
                                     response=response,
                                     selector=row)

            loader.add_value('address', self.get_ip(row.select('td[2]/span')))
            loader.add_xpath('port', 'td[3]/text()')

            yield loader.load_item()
Пример #4
0
    def parse_page(self, response):
        xpath = HtmlXPathSelector(response)

        for row in xpath.select('//table[@id="listable"]/tbody/tr'):
            loader = ProxyItemLoader(item=Proxy(), response=response, selector=row)

            loader.add_value("address", self.get_ip(row.select("td[2]/span")))
            loader.add_xpath("port", "td[3]/text()")

            yield loader.load_item()
Пример #5
0
    def parse_page(self, response):
        xpath = HtmlXPathSelector(response)

        for row in xpath.select('//table[@cellpadding="3"]/tr[starts-with(@class, "row")]'):
            loader = ProxyItemLoader(item=Proxy(), response=response, selector=row)

            loader.add_xpath("port", "td[3]/a/text()")
            loader.add_xpath("address", "td[2]/a/text()")

            yield loader.load_item()
Пример #6
0
    def parse_page(self, response):
        xpath = HtmlXPathSelector(response)
        variables = self.get_variables(xpath)

        for row in xpath.select('//table[@class="proxytbl"]/tr[position() > 1]'):
            loader = ProxyItemLoader(item=Proxy(), response=response, selector=row)
            
            loader.add_xpath('address', 'td[@class="t_ip"]/text()')
            loader.add_value('port', self.get_port(row.select('td[@class="t_port"]/script'), variables))

            yield loader.load_item()
Пример #7
0
    def parse_page(self, response):
        xpath = HtmlXPathSelector(response)
        variables = self.get_variables(xpath)

        for row in xpath.select(
                '//table[@class="proxytbl"]/tr[position() > 1]'):
            loader = ProxyItemLoader(item=Proxy(),
                                     response=response,
                                     selector=row)

            loader.add_xpath('address', 'td[@class="t_ip"]/text()')
            loader.add_value(
                'port',
                self.get_port(row.select('td[@class="t_port"]/script'),
                              variables))

            yield loader.load_item()