def parse_book_detail(self, response): bookDetailItem = YunqiBookDetailItem() bookDetailItem["novelId"] = response.meta["novelId"] bookDetailItem["novelLabel"] = response.xpath( '//div[@class="tags"]/text()').extract()[0] node = response.xpath('//div[@id="novelInfo"]') #注意源代码中没有tbody标签 bookDetailItem["novelAllClick"] = node.xpath( './/table/tr[2]/td[1]/text()').extract()[0] bookDetailItem["novelMonthClick"] = node.xpath( './/table/tr[3]/td[1]/text()').extract()[0] bookDetailItem["novelWeekClick"] = node.xpath( './/table/tr[4]/td[1]/text()').extract()[0] bookDetailItem["novelAllPopular"] = node.xpath( './/table/tr[2]/td[2]/text()').extract()[0] bookDetailItem["novelMonthPopular"] = node.xpath( './/table/tr[3]/td[2]/text()').extract()[0] bookDetailItem["novelWeekPopular"] = node.xpath( './/table/tr[4]/td[2]/text()').extract()[0] bookDetailItem["novelCommentNum"] = node.xpath( './/table/tr[5]/td[2]/text()').extract()[0] bookDetailItem["novelAllComm"] = node.xpath( './/table/tr[2]/td[3]/text()').extract()[0] bookDetailItem["novelMonthComm"] = node.xpath( './/table/tr[3]/td[3]/text()').extract()[0] bookDetailItem["novelWeekComm"] = node.xpath( './/table/tr[4]/td[3]/text()').extract()[0] yield bookDetailItem
def parse_book_detail(self, response): # 解析一本书的详细信息,参考ch00知识补充-04-网页解析验证,包含各种解析方法使用技巧,经常反复看(重点解析思路)-yunqishuyuan2_Spider.py novelId = response.meta['novelId'] # .从根节点开始选取,//不管在什么位置,div的class属性为book的所有div标签 novelLabel = response.xpath( ".//div[@class='tags']/text()").extract_first() novelAllClick = response.xpath( ".//div[@id='novelInfo']/table/tr[2]/td[1]/text()").extract_first( ) novelAllPopular = response.xpath( ".//div[@id='novelInfo']/table/tr[2]/td[2]/text()").extract_first( ) novelAllComm = response.xpath( ".//div[@id='novelInfo']/table/tr[2]/td[3]/text()").extract_first( ) novelMonthClick = response.xpath( ".//div[@id='novelInfo']/table/tr[3]/td[1]/text()").extract_first( ) novelMonthPopular = response.xpath( ".//div[@id='novelInfo']/table/tr[3]/td[2]/text()").extract_first( ) novelMonthComm = response.xpath( ".//div[@id='novelInfo']/table/tr[3]/td[3]/text()").extract_first( ) novelWeekClick = response.xpath( ".//div[@id='novelInfo']/table/tr[4]/td[1]/text()").extract_first( ) novelWeekPopular = response.xpath( ".//div[@id='novelInfo']/table/tr[4]/td[2]/text()").extract_first( ) novelWeekComm = response.xpath( ".//div[@id='novelInfo']/table/tr[4]/td[3]/text()").extract_first( ) novelCommentNum = response.xpath( ".//*[@id='novelInfo_commentCount']/text()").extract_first() bookDetailItem = YunqiBookDetailItem( novelId=novelId, novelLabel=novelLabel, novelAllClick=novelAllClick, novelAllPopular=novelAllPopular, novelAllComm=novelAllComm, novelMonthClick=novelMonthClick, novelMonthPopular=novelMonthPopular, novelMonthComm=novelMonthComm, novelWeekClick=novelWeekClick, novelWeekPopular=novelWeekPopular, novelWeekComm=novelWeekComm, novelCommentNum=novelCommentNum, ) # 生成bookDetailItem,用于每一本书的详细信息 yield bookDetailItem
def parse_book_detail(self, response): novelId = response.meta['novelId'] novelLable = response.xpath( "//div[@class='tags']/text()").extract_first() # //*[@id="novelInfo"]/table/tbody/tr[2]/td[1] # 总点击 总人气 总推荐 novelAllClick = response.xpath( ".//*[@id='novelInfo']/table/tr[2]/td[1]/text()").extract_first() novelAllPopular = response.xpath( ".//*[@id='novelInfo']/table/tr[2]/td[2]/text()").extract_first() novelAllComm = response.xpath( ".//*[@id='novelInfo']/table/tr[2]/td[3]/text()").extract_first() # 月点击 月人气 月推荐 novelMonthClick = response.xpath( ".//*[@id='novelInfo']/table/tr[3]/td[1]/text()").extract_first() novelMonthPopular = response.xpath( ".//*[@id='novelInfo']/table/tr[3]/td[2]/text()").extract_first() novelMonthComm = response.xpath( ".//*[@id='novelInfo']/table/tr[3]/td[3]/text()").extract_first() # 周点击 周人气 周推荐 novelWeekClick = response.xpath( ".//*[@id='novelInfo']/table/tr[4]/td[1]/text()").extract_first() novelWeekPopular = response.xpath( ".//*[@id='novelInfo']/table/tr[4]/td[2]/text()").extract_first() novelWeekComm = response.xpath( ".//*[@id='novelInfo']/table/tr[4]/td[3]/text()").extract_first() # 评论数 novelCommNum = response.xpath( "//*[@id='novelInfo_commentCount']").extract_first() bookDetailItem = YunqiBookDetailItem( novelId=novelId, novelLable=novelLable, novelAllClick=novelAllClick, novelAllPopular=novelAllPopular, novelAllComm=novelAllComm, novelMonthClick=novelMonthClick, novelMonthPopular=novelMonthPopular, novelMonthComm=novelMonthComm, novelWeekClick=novelWeekClick, novelWeekPopular=novelWeekPopular, novelWeekComm=novelWeekComm, novelCommentNum=novelCommNum) yield bookDetailItem
def parse_book_detail(self, response): # from scrapy.shell import inspect_response # inspect_response(response, self) novelId = response.meta['novelId'] novelLabel = response.xpath( "//div[@class='tags']/text()").extract_first() novelAllClick = response.xpath( ".//*[@id='novelInfo']/table/tr[2]/td[1]/text()").extract_first() novelAllPopular = response.xpath( ".//*[@id='novelInfo']/table/tr[2]/td[2]/text()").extract_first() novelAllComm = response.xpath( ".//*[@id='novelInfo']/table/tr[2]/td[3]/text()").extract_first() novelMonthClick = response.xpath( ".//*[@id='novelInfo']/table/tr[3]/td[1]/text()").extract_first() novelMonthPopular = response.xpath( ".//*[@id='novelInfo']/table/tr[3]/td[2]/text()").extract_first() novelMonthComm = response.xpath( ".//*[@id='novelInfo']/table/tr[3]/td[3]/text()").extract_first() novelWeekClick = response.xpath( ".//*[@id='novelInfo']/table/tr[4]/td[1]/text()").extract_first() novelWeekPopular = response.xpath( ".//*[@id='novelInfo']/table/tr[4]/td[2]/text()").extract_first() novelWeekComm = response.xpath( ".//*[@id='novelInfo']/table/tr[4]/td[3]/text()").extract_first() novelCommentNum = response.xpath( ".//*[@id='novelInfo_commentCount']/text()").extract_first() bookDetailItem = YunqiBookDetailItem( novelId=novelId, novelLabel=novelLabel, novelAllClick=novelAllClick, novelAllPopular=novelAllPopular, novelAllComm=novelAllComm, novelMonthClick=novelMonthClick, novelMonthPopular=novelMonthPopular, novelMonthComm=novelMonthComm, novelWeekClick=novelWeekClick, novelWeekPopular=novelWeekPopular, novelWeekComm=novelWeekComm, novelCommentNum=novelCommentNum) yield bookDetailItem
def parse_book_detail(self, response): novelId = meta["novelId"] novelLabel = response.xpath( '//div[class="tags"]/text()').extract_first() novelAllClick = response.xpath( '//*[@id="novelInfo"]/table/tr[2]/td[1]/text()').extract_first() novelMonthClick = response.xpath( '//*[@id="novelInfo"]/table/tr[3]/td[1]/text()').extract_first() novelWeekClick = response.xpath( '//*[@id="novelInfo"]/table/tr[4]/td[1]/text()').extract_first() novelAllPopular = response.xpath( '//*[@id="novelInfo"]/table/tr[2]/td[2]/text()').extract_first() novelMonthPopular = response.xpath( '//*[@id="novelInfo"]/table/tr[3]/td[2]/text()').extract_first() novelWeekPopular = response.xpath( '//*[@id="novelInfo"]/table/tr[4]/td[2]/text()').extract_first() novelCommentNum = response.xpath( '//*[@id="novelInfo_commentCount"]/text()').extract_first() novelAllComm = response.xpath( '//*[@id="novelInfo"]/table/tr[2]/td[3]/text()').extract_first() novelMonthComm = response.xpath( '//*[@id="novelInfo"]/table/tr[3]/td[3]/text()').extract_first() novelWeekComm = response.xpath( '//*[@id="novelInfo"]/table/tr[4]/td[3]/text()').extract_first() print novelAllClick, novelAllComm logging.info("novelAllClick,novelAllComm: %s" % (novelAllClick, novelAllComm)) bookdetailItem = YunqiBookDetailItem( novelId=novelId, novelLabel=novelLabel, novelAllClick=novelAllClick, novelMonthClick=novelMonthClick, novelWeekClick=novelWeekClick, novelAllPopular=novelAllPopular, novelMonthPopular=novelMonthPopular, novelWeekPopular=novelWeekPopular, novelCommentNum=novelCommentNum, novelAllComm=novelAllComm, novelMonthComm=novelMonthComm, novelWeekComm=novelWeekComm) yield bookdetailItem