def parse_item(self, response): # 使用功能类 itemLoader,以取代 看起来杂乱的 extract() 和 xpath() ,优化如下 i = ItemLoader(item=items.SpideroneItem(), response=response) # author 作者 # theme 主题 i.add_xpath('author', '/html/body/div[5]/div[3]/div/a/p[2]/span/span[2]/text()') i.add_xpath('theme', '/html/body/div[5]/div[3]/div/a/p[1]/span[1]/text()') yield i.load_item()
def parse_item(self, response): i = ItemLoader(item=items.SpideroneItem(), response=response) i.add_xpath('author', '/html/body/div[5]/div[3]/div/a/p[2]/span/span[2]/text()') i.add_xpath('theme', '/html/body/div[5]/div[3]/div/a/p[1]/span[1]/text()') res = i.load_item() print(f"data = {res}") yield res
def parse(self, response): str = "asdf.asdf.1.2" print(str[str.rfind('.'):]) return # 使用功能类 itemLoader,以取代 看起来杂乱的 extract() 和 xpath() ,优化如下 i = ItemLoader(item=items.SpideroneItem(), response=response) # author 作者 # theme 主题 i.add_xpath('author','/html/body/div[5]/div[3]/div/a/p[2]/span/span[2]/text()') i.add_xpath('theme','/html/body/div[5]/div[3]/div/a/p[1]/span[1]/text()') return i.load_item()
def parse_old2(self, response): item = items.SpideroneItem() # author 作者 # theme 主题 author = response.xpath('/html/body/div[5]/div[3]/div/a/p[2]/span/span[2]/text()').extract() theme = response.xpath('/html/body/div[5]/div[3]/div/a/p[1]/span[1]/text()').extract() item['author'] = author item['theme'] = theme return item
def parse_start_url(self, response): self.log("++++++++++++++++++++++++++++++++++++") tt = response.xpath( '/html/body/div[5]/div[3]/div/a/p[2]/span/span[2]/text()').extract( ) self.log(tt) self.log("++++++++++++++++++++++++++++++++++++") i = ItemLoader(item=items.SpideroneItem(), response=response) i.add_xpath('author', '/html/body/div[5]/div[3]/div/a/p[2]/span/span[2]/text()') i.add_xpath('theme', '/html/body/div[5]/div[3]/div/a/p[1]/span[1]/text()') res = i.load_item() print(f"data = {res}") yield res