def parse(self, response): baseList=response.xpath('//article/a/text()') item=DaomuItem() i=0 for base in baseList: info=base.extract().split( ) item['title']=info[0] item['chapter']=info[1] item['chapterName']=info[2] item['link']=response.xpath('//article/a/@href').extract()[i] i+=1 yield item
def parse(self, response): item = DaomuItem() item["bookName"] = response.xpath( '//h1[@class="focusbox-title"]/text()').extract()[0] articles = response.xpath('//article[@class="excerpt excerpt-c3"]') for article in articles: info = article.xpath('./a/text()').extract()[0].split(' ') item['bookTitle'] = info[0] item['zhName'] = info[2] item['zhNum'] = info[1] item['zhLink'] = article.xpath('./a/@href').extract()[0] yield item
def parse(self, response): # 创建item对象(items.py里面的class) item = DaomuItem() item["BookName"] = response.xpath( '//h1[@class="focusbox-title"]/text()').extract()[0] # 匹配所有章节 articles = response.xpath('//article[@class="excerpt excerpt-c3"]') for article in articles: info = article.xpath('./a/text()').extract()[0].split(' ') # ['七星鲁王','第一章','血尸'] item["BookTitle"] = info[0] item["zhNum"] = info[1] item["zhName"] = info[2] item["zhLink"] = article.xpath('./a/@href').extract()[0] yield item
def parse(self, response): # 创建item对象(items.py里的class) item = DaomuItem() # 匹配书名,单独匹配 book = response.xpath( '//h1[@class="focusbox-title"]/text()').extract()[0] # 匹配所有章节对象(基准xpath) articles = response.xpath('//article[@class="excerpt excerpt-c3"]') for article in articles: info = article.xpath('./a/text()').extract()[0].split(' ') # ['七星鲁王', '第一章', '血尸'] item["book"] = book item["title"] = info[0] item["chapter"] = info[2] item["chapter_num"] = info[1] item["chapter_url"] = article.xpath('./a/@href').extract()[0] yield item
def parse_two_link(self,response): #基准xpath,匹配所有节点对象的列表 article_list = response.xpath('/html/body/section/div[2]/div/article') for article in article_list: # 创建item对象 item = DaomuItem() info = article.xpath('./a/text()')\ .extract()[0].split() # info:['七星鲁王','第一章','血尸'] item['va_name'] = info[0] item['ch_number'] = info[1] item['ch_name'] = info[2] item['ch_link'] = article.xpath('./a/@href')\ .extract()[0] # 将章节链接交给调度器 # 必须把item传递到下一个函数,利用meta参数 yield scrapy.Request(item['ch_link'], meta={'item':item}, callback= self.parse_three_link)
def parse_two_link(self, response): # 基准xpath,匹配所有节点对象的列表 article_list = response.xpath('//article[@class="excerpt excerpt-c3"]') for article in article_list: # 创建item对象 item = DaomuItem() info = article.xpath('./a/text()')\ .extract()[0].split() print(info) # info:['七星鲁王','第一章,'血尸'] item['va_name'] = info[0] item['ch_number'] = info[1] item['ch_name'] = info[2] if len(info) > 2 else "" item['ch_link'] = article.xpath('./a/@href')\ .extract()[0] # 将章节链接交给调度器 # 必须把item传递到下一个函数,利用meta参数 yield scrapy.Request(item['ch_link'], meta={'item': item}, callback=self.parse_three_link) time.sleep(2)