示例#1
0
文件: yunqi.py 项目: yaodalu/crawler
    def parse_book_detail(self, response):
        bookDetailItem = YunqiBookDetailItem()
        bookDetailItem["novelId"] = response.meta["novelId"]
        bookDetailItem["novelLabel"] = response.xpath(
            '//div[@class="tags"]/text()').extract()[0]

        node = response.xpath('//div[@id="novelInfo"]')

        #注意源代码中没有tbody标签
        bookDetailItem["novelAllClick"] = node.xpath(
            './/table/tr[2]/td[1]/text()').extract()[0]
        bookDetailItem["novelMonthClick"] = node.xpath(
            './/table/tr[3]/td[1]/text()').extract()[0]
        bookDetailItem["novelWeekClick"] = node.xpath(
            './/table/tr[4]/td[1]/text()').extract()[0]

        bookDetailItem["novelAllPopular"] = node.xpath(
            './/table/tr[2]/td[2]/text()').extract()[0]
        bookDetailItem["novelMonthPopular"] = node.xpath(
            './/table/tr[3]/td[2]/text()').extract()[0]
        bookDetailItem["novelWeekPopular"] = node.xpath(
            './/table/tr[4]/td[2]/text()').extract()[0]

        bookDetailItem["novelCommentNum"] = node.xpath(
            './/table/tr[5]/td[2]/text()').extract()[0]
        bookDetailItem["novelAllComm"] = node.xpath(
            './/table/tr[2]/td[3]/text()').extract()[0]
        bookDetailItem["novelMonthComm"] = node.xpath(
            './/table/tr[3]/td[3]/text()').extract()[0]
        bookDetailItem["novelWeekComm"] = node.xpath(
            './/table/tr[4]/td[3]/text()').extract()[0]

        yield bookDetailItem
    def parse_book_detail(self, response):
        # 解析一本书的详细信息,参考ch00知识补充-04-网页解析验证,包含各种解析方法使用技巧,经常反复看(重点解析思路)-yunqishuyuan2_Spider.py
        novelId = response.meta['novelId']
        # .从根节点开始选取,//不管在什么位置,div的class属性为book的所有div标签
        novelLabel = response.xpath(
            ".//div[@class='tags']/text()").extract_first()

        novelAllClick = response.xpath(
            ".//div[@id='novelInfo']/table/tr[2]/td[1]/text()").extract_first(
            )
        novelAllPopular = response.xpath(
            ".//div[@id='novelInfo']/table/tr[2]/td[2]/text()").extract_first(
            )
        novelAllComm = response.xpath(
            ".//div[@id='novelInfo']/table/tr[2]/td[3]/text()").extract_first(
            )

        novelMonthClick = response.xpath(
            ".//div[@id='novelInfo']/table/tr[3]/td[1]/text()").extract_first(
            )
        novelMonthPopular = response.xpath(
            ".//div[@id='novelInfo']/table/tr[3]/td[2]/text()").extract_first(
            )
        novelMonthComm = response.xpath(
            ".//div[@id='novelInfo']/table/tr[3]/td[3]/text()").extract_first(
            )

        novelWeekClick = response.xpath(
            ".//div[@id='novelInfo']/table/tr[4]/td[1]/text()").extract_first(
            )
        novelWeekPopular = response.xpath(
            ".//div[@id='novelInfo']/table/tr[4]/td[2]/text()").extract_first(
            )
        novelWeekComm = response.xpath(
            ".//div[@id='novelInfo']/table/tr[4]/td[3]/text()").extract_first(
            )

        novelCommentNum = response.xpath(
            ".//*[@id='novelInfo_commentCount']/text()").extract_first()

        bookDetailItem = YunqiBookDetailItem(
            novelId=novelId,
            novelLabel=novelLabel,
            novelAllClick=novelAllClick,
            novelAllPopular=novelAllPopular,
            novelAllComm=novelAllComm,
            novelMonthClick=novelMonthClick,
            novelMonthPopular=novelMonthPopular,
            novelMonthComm=novelMonthComm,
            novelWeekClick=novelWeekClick,
            novelWeekPopular=novelWeekPopular,
            novelWeekComm=novelWeekComm,
            novelCommentNum=novelCommentNum,
        )
        # 生成bookDetailItem,用于每一本书的详细信息
        yield bookDetailItem
示例#3
0
    def parse_book_detail(self, response):
        novelId = response.meta['novelId']
        novelLable = response.xpath(
            "//div[@class='tags']/text()").extract_first()
        # //*[@id="novelInfo"]/table/tbody/tr[2]/td[1]
        # 总点击 总人气 总推荐
        novelAllClick = response.xpath(
            ".//*[@id='novelInfo']/table/tr[2]/td[1]/text()").extract_first()
        novelAllPopular = response.xpath(
            ".//*[@id='novelInfo']/table/tr[2]/td[2]/text()").extract_first()
        novelAllComm = response.xpath(
            ".//*[@id='novelInfo']/table/tr[2]/td[3]/text()").extract_first()

        # 月点击 月人气 月推荐
        novelMonthClick = response.xpath(
            ".//*[@id='novelInfo']/table/tr[3]/td[1]/text()").extract_first()
        novelMonthPopular = response.xpath(
            ".//*[@id='novelInfo']/table/tr[3]/td[2]/text()").extract_first()
        novelMonthComm = response.xpath(
            ".//*[@id='novelInfo']/table/tr[3]/td[3]/text()").extract_first()

        # 周点击 周人气 周推荐
        novelWeekClick = response.xpath(
            ".//*[@id='novelInfo']/table/tr[4]/td[1]/text()").extract_first()
        novelWeekPopular = response.xpath(
            ".//*[@id='novelInfo']/table/tr[4]/td[2]/text()").extract_first()
        novelWeekComm = response.xpath(
            ".//*[@id='novelInfo']/table/tr[4]/td[3]/text()").extract_first()

        # 评论数
        novelCommNum = response.xpath(
            "//*[@id='novelInfo_commentCount']").extract_first()

        bookDetailItem = YunqiBookDetailItem(
            novelId=novelId,
            novelLable=novelLable,
            novelAllClick=novelAllClick,
            novelAllPopular=novelAllPopular,
            novelAllComm=novelAllComm,
            novelMonthClick=novelMonthClick,
            novelMonthPopular=novelMonthPopular,
            novelMonthComm=novelMonthComm,
            novelWeekClick=novelWeekClick,
            novelWeekPopular=novelWeekPopular,
            novelWeekComm=novelWeekComm,
            novelCommentNum=novelCommNum)
        yield bookDetailItem
示例#4
0
    def parse_book_detail(self, response):
        # from scrapy.shell import inspect_response
        # inspect_response(response, self)
        novelId = response.meta['novelId']
        novelLabel = response.xpath(
            "//div[@class='tags']/text()").extract_first()

        novelAllClick = response.xpath(
            ".//*[@id='novelInfo']/table/tr[2]/td[1]/text()").extract_first()
        novelAllPopular = response.xpath(
            ".//*[@id='novelInfo']/table/tr[2]/td[2]/text()").extract_first()
        novelAllComm = response.xpath(
            ".//*[@id='novelInfo']/table/tr[2]/td[3]/text()").extract_first()

        novelMonthClick = response.xpath(
            ".//*[@id='novelInfo']/table/tr[3]/td[1]/text()").extract_first()
        novelMonthPopular = response.xpath(
            ".//*[@id='novelInfo']/table/tr[3]/td[2]/text()").extract_first()
        novelMonthComm = response.xpath(
            ".//*[@id='novelInfo']/table/tr[3]/td[3]/text()").extract_first()

        novelWeekClick = response.xpath(
            ".//*[@id='novelInfo']/table/tr[4]/td[1]/text()").extract_first()
        novelWeekPopular = response.xpath(
            ".//*[@id='novelInfo']/table/tr[4]/td[2]/text()").extract_first()
        novelWeekComm = response.xpath(
            ".//*[@id='novelInfo']/table/tr[4]/td[3]/text()").extract_first()
        novelCommentNum = response.xpath(
            ".//*[@id='novelInfo_commentCount']/text()").extract_first()
        bookDetailItem = YunqiBookDetailItem(
            novelId=novelId,
            novelLabel=novelLabel,
            novelAllClick=novelAllClick,
            novelAllPopular=novelAllPopular,
            novelAllComm=novelAllComm,
            novelMonthClick=novelMonthClick,
            novelMonthPopular=novelMonthPopular,
            novelMonthComm=novelMonthComm,
            novelWeekClick=novelWeekClick,
            novelWeekPopular=novelWeekPopular,
            novelWeekComm=novelWeekComm,
            novelCommentNum=novelCommentNum)
        yield bookDetailItem
示例#5
0
 def parse_book_detail(self, response):
     novelId = meta["novelId"]
     novelLabel = response.xpath(
         '//div[class="tags"]/text()').extract_first()
     novelAllClick = response.xpath(
         '//*[@id="novelInfo"]/table/tr[2]/td[1]/text()').extract_first()
     novelMonthClick = response.xpath(
         '//*[@id="novelInfo"]/table/tr[3]/td[1]/text()').extract_first()
     novelWeekClick = response.xpath(
         '//*[@id="novelInfo"]/table/tr[4]/td[1]/text()').extract_first()
     novelAllPopular = response.xpath(
         '//*[@id="novelInfo"]/table/tr[2]/td[2]/text()').extract_first()
     novelMonthPopular = response.xpath(
         '//*[@id="novelInfo"]/table/tr[3]/td[2]/text()').extract_first()
     novelWeekPopular = response.xpath(
         '//*[@id="novelInfo"]/table/tr[4]/td[2]/text()').extract_first()
     novelCommentNum = response.xpath(
         '//*[@id="novelInfo_commentCount"]/text()').extract_first()
     novelAllComm = response.xpath(
         '//*[@id="novelInfo"]/table/tr[2]/td[3]/text()').extract_first()
     novelMonthComm = response.xpath(
         '//*[@id="novelInfo"]/table/tr[3]/td[3]/text()').extract_first()
     novelWeekComm = response.xpath(
         '//*[@id="novelInfo"]/table/tr[4]/td[3]/text()').extract_first()
     print novelAllClick, novelAllComm
     logging.info("novelAllClick,novelAllComm: %s" %
                  (novelAllClick, novelAllComm))
     bookdetailItem = YunqiBookDetailItem(
         novelId=novelId,
         novelLabel=novelLabel,
         novelAllClick=novelAllClick,
         novelMonthClick=novelMonthClick,
         novelWeekClick=novelWeekClick,
         novelAllPopular=novelAllPopular,
         novelMonthPopular=novelMonthPopular,
         novelWeekPopular=novelWeekPopular,
         novelCommentNum=novelCommentNum,
         novelAllComm=novelAllComm,
         novelMonthComm=novelMonthComm,
         novelWeekComm=novelWeekComm)
     yield bookdetailItem