示例#1
0
    def parse(self, response):
        if not hasattr(self, 'parse_node'):
            raise NotConfigured('You must define parse_node method in order to scrape this XML feed')

        response = self.adapt_response(response)
        if self.iterator == 'iternodes':
            nodes = xmliter(response, self.itertag)
        elif self.iterator == 'xml':
            selector = XmlXPathSelector(response)
            self._register_namespaces(selector)
            nodes = selector.select('//%s' % self.itertag)
        elif self.iterator == 'html':
            selector = HtmlXPathSelector(response)
            self._register_namespaces(selector)
            nodes = selector.select('//%s' % self.itertag)
        else:
            raise NotSupported('Unsupported node iterator')

        return self.parse_nodes(response, nodes)
示例#2
0
    def parse(self, response):
        if not hasattr(self, 'parse_node'):
            raise NotConfigured(
                'You must define parse_node method in order to scrape this XML feed'
            )

        response = self.adapt_response(response)
        if self.iterator == 'iternodes':
            nodes = xmliter(response, self.itertag)
        elif self.iterator == 'xml':
            selector = XmlXPathSelector(response)
            self._register_namespaces(selector)
            nodes = selector.select('//%s' % self.itertag)
        elif self.iterator == 'html':
            selector = HtmlXPathSelector(response)
            self._register_namespaces(selector)
            nodes = selector.select('//%s' % self.itertag)
        else:
            raise NotSupported('Unsupported node iterator')

        return self.parse_nodes(response, nodes)
示例#3
0
文件: feed.py 项目: wwjiang007/scrapy
 def _iternodes(self, response):
     for node in xmliter(response, self.itertag):
         self._register_namespaces(node)
         yield node
示例#4
0
文件: feed.py 项目: 1012/scrapy
 def _iternodes(self, response):
     for node in xmliter(response, self.itertag):
         self._register_namespaces(node)
         yield node
 def parse_cities(self, response):
     nodes = xmliter(response, "city")
     for selector in nodes:
         ret = iterate_spider_output(self.parse_city(response, selector))
         for result_item in ret:
             yield result_item
示例#6
0
文件: feed.py 项目: bf96163/scrapy
 def _iternodes(self, response): #直接用 xmliter 来解析respond 过滤 tag
     for node in xmliter(response, self.itertag): #这里虽然返回的名字叫node实际上也是selector对象
         self._register_namespaces(node)
         yield node
 def parse_cities(self, response):
     nodes = xmliter(response, "city")
     for selector in nodes:
         ret = iterate_spider_output(self.parse_city(response, selector))
         for result_item in ret:
             yield result_item