예제 #1
0
 def parse_comment(self, response):
     #得到api返回的数据
     json_data = response.text
     #获取pid
     pid = response.meta['pid']
     #获取page的值
     page = response.meta['page']
     data = json.loads(json_data)
     total_page = data['data']['total_page']
     data_list = data['data']['list']
     #遍历接口数据,然后填充到item
     for one_data in data_list:
         #实例化item类
         item = CommentItem()
         item['commentid'] = one_data['commentid']
         item['pid'] = one_data['articleid']
         item['cid'] = one_data['userInfo']['userid']
         item['created_at'] = one_data['addtime']
         item['content'] = one_data['content']
         item['like_counts'] = num_to_int(one_data['count_approve'])
         #提交item
         yield item
     #执行循环遍历接口,得到数据
     while page <= total_page:
         page = page + 1
         # 把新的url传给解析方法,并解析
         request = scrapy.Request(comment_api % (page, pid),
                                  callback=self.parse_comment)
         request.meta['page'] = page
         request.meta['pid'] = pid
         yield request
예제 #2
0
    def parse_comment(self, response):
        # 因为是直接请求的接口,返回的都是json格式,直接用json.loads加载成python对象
        result = json.loads(response.text)
        # 遍历评论列表
        comments = result['data']['list']
        for c in comments:
            comment = CommentItem()
            # 评论内容
            comment['content'] = c['content']
            # 评论ID
            comment['commentid'] = c['commentid']
            # 作品ID
            comment['pid'] = c['articleid']
            # 评论发表时间
            comment['created_at'] = c['addtime']
            # 评论被点赞的次数
            comment['like_counts'] = c['count_approve']
            # 发表评论的用户ID
            comment['cid'] = c['userInfo']['userid']
            # 发表评论的用户名称
            comment['uname'] = c['userInfo']['username']
            # 发表评论的用户头像
            comment['avatar'] = c['userInfo']['face']
            # 如果本条评论是回复另一条评论,则reply不为空
            if c['reply']:
                # 把被回复的那条评论ID存在reply字段
                comment['reply'] = c['reply']['commentid'] or 0
            yield comment

        # 是否还有下一页评论
        next_page = result['data']['next_page_url']
        if next_page:
            yield Request(next_page, callback=self.parse_comment)
예제 #3
0
    def parse_comment(self, response):
        if response.text:
            total_pages = response.xpath('//li[last()]/@data-totalpages').get()
            print('-' * 50, total_pages)
            cur_page = response.meta['cur_page']
            pid = response.meta['pid']
            if total_pages and total_pages.isdigit():
                total_pages = int(total_pages)
                if total_pages > cur_page:
                    request = Request(comment_api % (pid, cur_page + 1),
                                      callback=self.parse_comment)
                    request.meta['pid'] = pid
                    request.meta['cur_page'] = cur_page + 1
                    yield request

            comments = response.xpath('//li')
            for comment in comments:
                c = CommentItem()
                user_page = '%s%s' % (self.root_url,
                                      comment.xpath('./a[1]/@href').get())
                request = Request(user_page, callback=self.parse_composer)
                yield request
                c['cid'] = user_page[2:]
                c['pid'] = pid
                c['created_at'] = comment.xpath(
                    './/span[contains(@class,"send-time")]/text()').get()
                c['content'] = comment.xpath(
                    './/div[contains(@class,"comment-con")]/text()').get()
                c['like_counts'] = comment.xpath(
                    './/i[@class="counts"]/text()').get()
                yield c
예제 #4
0
    def parse_comment(self, response):
        """解析评论接口"""
        resp = json.loads(response.text)
        composer_url = 'http://www.xinpianchang.com/u%s?from=articleList'
        for c in resp['data']['list']:
            comment = CommentItem()
            comment['commentid'] = c['commentid']
            comment['pid'] = c['articleid']
            comment['content'] = c['content']
            comment['created_at'] = c['addtime_int']
            comment['cid'] = c['userInfo']['userid']
            comment['uname'] = c['userInfo']['username']
            comment['avatar'] = c['userInfo']['face']
            comment['like_counts'] = c['count_approve']
            # 如果有reply字段,说明本条评论是回复的另一条评论
            if c['reply']:
                # 把reply字段设置为被回复那条评论的ID
                comment['reply'] = c['reply']['commentid']
            yield comment

            request = Request(composer_url % comment['cid'],
                              callback=self.parse_composer)
            request.meta['cid'] = comment['cid']
            yield request
        # 判断是否还需要翻页
        next_page = resp['data']['next_page_url']
        if next_page:
            yield response.follow(next_page, self.parse_comment)
예제 #5
0
    def parse_comment(self, response):
        if response.text:
            pid = response.meta['pid']
            # 下载页面的json数据
            result = json.loads(response.text)
            # 提取下一页的网址
            next_page = result['data']['next_page_url']
            if next_page:
                request = Request(next_page, callback=self.parse_comment)
                request.meta['pid'] = pid
                yield request

            comments = result['data']['list']
            for c in comments:
                comment = CommentItem()
                comment['commentid'] = c['commentid']
                comment['pid'] = pid
                comment['cid'] = c['userInfo']['userid']
                comment['uname'] = c['userInfo']['username']
                comment['avatar'] = c['userInfo']['face']
                comment['created_at'] = int(c['addtime_int'])
                comment['content'] = c['content']
                comment['like_counts'] = ci(c['count_approve'])
                if c['reply']:
                    comment['reply'] = c['reply']['commentid'] or 0
                yield comment

                request = Request('%s/u%s' % (self.root_url, comment['cid']),
                                  callback=self.parse_composer)
                request.meta['cid'] = comment['cid']
                yield request
예제 #6
0
    def parse_comment(self, response):
        if response.text:
            # total_pages = response.xpath('//li[last()]/@data-totalpages').get()
            # print('-' * 50, total_pages)
            # cur_page = response.meta['cur_page']
            pid = response.meta['pid']
            result = json.loads(response.text)
            next_page = result['data']['next_page_url']

            if next_page:
                request = Request(next_page, callback=self.parse_comment)
                request.meta['pid'] = pid
                yield request
            # if total_pages and total_pages.isdigit():
            #     total_pages = int(total_pages)
            #     if total_pages > cur_page:
            #         request = Request(comment_api % (pid, cur_page + 1), callback=self.parse_comment)
            #         request.meta['pid'] = pid
            #         request.meta['cur_page'] = cur_page + 1
            #         yield request

            # comments = response.xpath('//li')
            comments = result['data']['list']
            for c in comments:
                comment = CommentItem()
                # user_page = '%s%s' % (self.root_url, comment.xpath('./a[1]/@href').get())
                # user_id = comment.xpath('//span[@class="head-wrap"]/@data/userid').get()
                # request = Request(user_page, callback=self.parse_composer)
                # request.meta['cid'] = user_id
                # yield request
                # c['cid'] = request.meta['cid']
                # c['pid'] = pid
                # c['created_at'] = comment.xpath('.//span[contains(@class,"send-time")]/text()').get()
                # c['content'] = comment.xpath('.//div[contains(@class,"comment-con")]/text()').get()
                # c['like_counts'] = comment.xpath('.//i[@class="counts"]/text()').get()
                # yield c

                comment['commentid'] = c['commentid']
                comment['pid'] = pid
                comment['cid'] = c['userInfo']['userid']
                comment['uname'] = c['userInfo']['username']
                comment['avatar'] = c['userInfo']['face']
                comment['created_at'] = int(c['addtime_int'])
                comment['content'] = c['content']
                comment['like_counts'] = ci(c['count_approve'])
                if c['reply']:
                    comment['reply'] = c['reply']['commentid'] or 0
                yield comment

                request = Request('%s/u%s' % (self.root_url, comment['cid']),
                                  callback=self.parse_composer)
                request.meta['cid'] = comment['cid']
                yield request
예제 #7
0
파일: discovery.py 프로젝트: chenwchen/xpc
 def parse_comment(self, response):
     res = json.loads(response.text)
     comment = CommentItem()
     if res['data']:
         comment_list = res['data']['list']
         for ct in comment_list:
             comment['cid'] = ct['id']
             comment['content'] = ct['content']
             comment['avatar'] = ct['userInfo']['avatar']
             comment['uname'] = ct['userInfo']['username']
             comment['add_time'] = time.strftime(
                 '%Y-%m-%d %H-%M', time.localtime(ct['addtime']))
             yield comment
         next_url = res['data']['next_page_url']
         if next_url:
             response.follow(next_url, self.parse_comment)
    def parse_comments(self, response):
        """处理评论接口"""
        resp = json.loads(response.text)
        for c in resp['data']['list']:
            comment = CommentItem()
            comment['id'] = c['id']
            comment['content'] = c['content']
            comment['created_at'] = c['addtime']
            comment['pid'] = c['resource_id']
            comment['cid'] = c['userid']
            comment['avatar'] = c['userInfo']['avatar']
            comment['uname'] = c['userInfo']['username']
            comment['like_counts'] = c['count_approve']
            comment['referid'] = c['referid']
            yield comment

        next_page_url = resp['data']['next_page_url']
        if next_page_url:
            yield response.follow(next_page_url, callback=self.parse_comments)
예제 #9
0
    def parse_comment(self, response):
        comment = CommentItem()
        result = json.loads(response.text)
        for i in result['data']['list']:
            comment['uname'] = i['userInfo']['username']
            comment['cid'] = i['userInfo']['id']
            comment['avatar'] = i['userInfo']['avatar']
            comment['commentid'] = i['id']
            comment['pid'] = i['resource_id']
            comment['content'] = i['content']
            comment['created_at'] = i['addtime']
            comment['like_counts'] = i['count_approve']
            if i['referid'] != 0:
                comment['reply'] = i['referid'] or 0
            yield comment

        next_page = result['data']['next_page_url']
        if next_page is not None:
            yield response.follow('https://app.xinpianchang.com%s' % next_page,
                                  self.parse_comment)
예제 #10
0
    def parse_comment(self, response):
        result = json.loads(response.text)
        comments = result['data']['list']
        for c in comments:
            comment = CommentItem()
            comment['commentid'] = c['commentid']
            comment['pid'] = c['articleid']
            comment['cid'] = c['userInfo']['userid']
            comment['avatar'] = c['userInfo']['face']
            comment['uname'] = c['userInfo']['username']
            comment['created_at'] = c['addtime']
            comment['content'] = c['content']
            comment['like_counts'] = c['count_approve'].replace(',', '')
            if c['reply']:
                comment['reply'] = c['reply']['commentid']
            yield comment

        next_page = result['data']['next_page_url']
        if next_page:
            yield response.follow(next_page)
예제 #11
0
 def parse_comment(self, response):
     result = json.loads(response.text)
     comments = result['data']['list']
     for c in comments:
         comment = CommentItem()
         comment['commentid'] = c['commentid']
         comment['pid'] = response.meta['pid']
         comment['cid'] = c['userInfo']['userid']
         comment['uname'] = c['userInfo']['username']
         comment['avatar'] = c['userInfo']['face']
         comment['created_at'] = c['addtime']
         comment['content'] = c['content']
         comment['like_counts'] = c['count_approve']
         if c['reply']:
             comment['reply'] = c['reply']['commentid']
         yield comment
     next_page = result['data']['next_page_url']
     if next_page:
         request = Request(next_page, callback=self.parse_comment)
         request.meta['pid'] = response.meta['pid']
         yield request
예제 #12
0
    def parse_comment(self,response):
        comments = json.loads(response.text)
        # from scrapy.shell import inspect_response  相当于断点
        # inspect_response(response,self)
        composer_url = 'http://www.xinpianchang.com/u%s?from=articleList'
        for c in comments['data']['list']:
            comment = CommentItem()
            comment['commentid'] = c['commentid']
            comment['pid'] = c['articleid']
            comment['content'] = c['content']
            comment['created_at'] = c['addtime_int']
            comment['cid'] = c['userInfo']['userid']
            comment['uname'] = c['userInfo']['username']
            comment['avatar'] = c['userInfo']['face']
            comment['like_counts'] = c['count_approve']
            if c['reply']:
               comment['reply'] = c['reply']['commentid']
            yield comment

            request = Request(composer_url % comment['cid'],callback=self.parse_composer)
            request.meta['cid'] = comment['cid']
            yield request
예제 #13
0
 def parse_comment(self, response):
     """处理评论的接口"""
     resp = json.loads(response.text)
     comment_list = resp['data']['list']
     for comment in comment_list:
         c = CommentItem()
         c['commentid'] = comment['commentid']
         c['pid'] = comment['articleid']
         c['content'] = comment['content']
         c['created_at'] = comment['addtime_int']
         c['like_counts'] = ci(comment['count_approve'])
         c['cid'] = comment['userInfo']['userid']
         c['avatar'] = comment['userInfo']['face']
         c['uname'] = comment['userInfo']['username']
         # 判断本条评论是否是回复的之前的评论
         if comment['reply']:
             # 将reply字段设置为被回复的评论ID
             c['reply'] = comment['reply']['commentid']
         yield c
     next_page =  resp['data']['next_page_url']
     if next_page:
         yield response.follow(next_page, self.parse_comment)
예제 #14
0
    def parse_comment(self,response):
        # 由于comment传递参数是一个json形式的文件,使用json.loads来获取text
        pageinfo = json.loads(response.text)

        list = pageinfo['data']['list']  # 获取链接中的list进行循环获取
        for li in list:
            comment = CommentItem()
            comment["commentid"] = li['id']
            comment['cid'] = li['userInfo']['id']
            comment['pid'] = li['resource_id']
            comment["uname"] = li['userInfo']['username'] # 多层字典结构获取
            comment['avatar'] = li['userInfo']['avatar']
            comment['created_at'] = li['addtime']
            comment['like_counts'] = li['count_approve']
            comment["content"] = li['content']
            yield comment

        next_page = pageinfo['data']['next_page_url']
        comment_link = 'https://app.xinpianchang.com'
        if next_page is not None:
            next_page_link =  comment_link + next_page
            print('-------------------next_page--------------------')
            yield response.follow(next_page_link, self.parse_comment)