示例#1
0
 def parse_item(self, response):
     # 使用功能类 itemLoader,以取代 看起来杂乱的 extract() 和 xpath() ,优化如下
     i = ItemLoader(item=items.SpideroneItem(), response=response)
     # author 作者
     # theme  主题
     i.add_xpath('author',
                 '/html/body/div[5]/div[3]/div/a/p[2]/span/span[2]/text()')
     i.add_xpath('theme',
                 '/html/body/div[5]/div[3]/div/a/p[1]/span[1]/text()')
     yield i.load_item()
示例#2
0
    def parse_item(self, response):
        i = ItemLoader(item=items.SpideroneItem(), response=response)
        i.add_xpath('author',
                    '/html/body/div[5]/div[3]/div/a/p[2]/span/span[2]/text()')
        i.add_xpath('theme',
                    '/html/body/div[5]/div[3]/div/a/p[1]/span[1]/text()')

        res = i.load_item()

        print(f"data = {res}")

        yield res
示例#3
0
    def parse(self, response):

        str = "asdf.asdf.1.2"
        print(str[str.rfind('.'):])
        return

        # 使用功能类 itemLoader,以取代 看起来杂乱的 extract() 和 xpath() ,优化如下
        i = ItemLoader(item=items.SpideroneItem(), response=response)
        # author 作者
        # theme  主题
        i.add_xpath('author','/html/body/div[5]/div[3]/div/a/p[2]/span/span[2]/text()')
        i.add_xpath('theme','/html/body/div[5]/div[3]/div/a/p[1]/span[1]/text()')
        return i.load_item()
示例#4
0
    def parse_old2(self, response):
        item = items.SpideroneItem()

        # author 作者
        # theme  主题

        author = response.xpath('/html/body/div[5]/div[3]/div/a/p[2]/span/span[2]/text()').extract()

        theme = response.xpath('/html/body/div[5]/div[3]/div/a/p[1]/span[1]/text()').extract()

        item['author'] = author
        item['theme']  = theme

        return item
示例#5
0
    def parse_start_url(self, response):

        self.log("++++++++++++++++++++++++++++++++++++")
        tt = response.xpath(
            '/html/body/div[5]/div[3]/div/a/p[2]/span/span[2]/text()').extract(
            )
        self.log(tt)
        self.log("++++++++++++++++++++++++++++++++++++")

        i = ItemLoader(item=items.SpideroneItem(), response=response)
        i.add_xpath('author',
                    '/html/body/div[5]/div[3]/div/a/p[2]/span/span[2]/text()')
        i.add_xpath('theme',
                    '/html/body/div[5]/div[3]/div/a/p[1]/span[1]/text()')

        res = i.load_item()

        print(f"data = {res}")

        yield res