Пример #1
0
    def parse(self, response):

        # 手机端斗鱼返回的是json格式的数据,所有数据都在data中
        # 直接从请求响应体中获取数据
        node_list = json.loads(response.body)['data']

        # 如果拿不到数据,说明已经爬取完所有翻页
        if not node_list:
            return

        # 对具体数据进行解析
        for node in node_list:

            # 数据保存
            item = DouyuItem()
            item['image_link'] = node['vertical_src']
            item['nick_name'] = node['nickname']
            item['room_id'] = node['room_id']
            item['city'] = node['anchor_city']

            yield item

        # 实现翻页
        self.offset += 20
        yield scrapy.Request(self.base_url + str(self.offset),
                             callback=self.parse)
Пример #2
0
    def parse(self, response):
        # print (response.body)
        # 将源码转换成python字典
        data_list = json.loads(response.body)['data']

        # 遍历数据列表
        for data in data_list:
            # 创建item对象
            item = DouyuItem()
            # 提取数据,将data中对应的字段提取出来,保存到item中
            item['name'] = data['nickname']
            item['uid'] = data['owner_uid']
            item['image_link'] = data['vertical_src']
            item['city'] = data['anchor_city']
            # print (item)
            # 反回数据
            yield item

        #翻页
        if len(data_list) != 0:
            self.offset += 100
            # 生成出url
            next_url = self.host + str(self.offset)
            # 创建请求并发送
            yield scrapy.Request(next_url, callback=self.parse)
Пример #3
0
 def parse(self, response):
     datas = json.loads(response.body)
     for data in datas['data']:
         item = DouyuItem()
         item['vertical_src'] = data['vertical_src']
         item['nickname'] = data['nickname']
         yield item
Пример #4
0
    def parse(self, response):
        # print "###"*20
        # print type(response)
        # print response.body
        # print "###" * 20
        data_list = json.loads(response.body)['data']
        if not len(data_list):
            return
        for data in data_list:
            item = DouyuItem()
            # 房间名字
            item['room_name'] = data["room_name"]
            # 主播名字
            item['nick_name'] = data["nickname"]
            # 主播房间封面
            item['image_src'] = data['vertical_src']
            # 所在城市
            item['city'] = data['anchor_city']

            self.unm += 1
            print "写入第%s个" % self.unm
            yield item

        self.offset +=100
        yield scrapy.Request(self.base_url + str(self.offset), callback=self.parse)
Пример #5
0
    def parse(self, response):
        data_list = json.loads(response.body)["data"]
        print("len(data_list)" + str(len(data_list)))
        if len(data_list) == 0 and self.UpOver == False:
            return
        elif len(data_list) == 0 and self.UpOver == True:
            self.UpOver = False

        for data in data_list:
            item = DouyuItem()
            item["nickname"] = data["nickname"]
            item["room_id"] = data["room_id"]
            item["room_src"] = data["room_src"]
            item["vertical_src"] = data["vertical_src"]
            item["city"] = data["anchor_city"]
            print(item)
            yield item
        # print("*"*40)

        if not self.UpOver:
            self.offsetUp += 1
            yield scrapy.Request(self.base_urls + str(self.offsetUp),
                                 callback=self.parse)
        else:
            self.offsetDo -= 1
            yield scrapy.Request(self.base_urls + str(self.offsetDo),
                                 callback=self.parse)
Пример #6
0
 def parse(self, response):
     data_list = json.loads(response.body)['data']
     # 如果data_list没有值,return会结束方法
     if len(data_list) == 0:
         return
     for data in data_list:
         item = DouyuItem()
         item['nickname'] = data['nickname']
         item['imagelink'] = [data['vertical_src']]
         yield item
Пример #7
0
    def parse(self, response):
        #格式转换,先将response的内容转为可处理的json格式
        data_list = json.loads(response.body)['data']
        #print(content['data'])
        if len(data_list) == 0:
            return
        for data in data_list:
            item = DouyuItem()
            item['nickname'] = data['nickname']
            item['imagelink'] = data['vertical_src']

            yield item
Пример #8
0
 def parse(self, response):
     data_list = json.loads(response.body)
     if len(data_list) == 0:
         return
     for data in data_list:
         item = DouyuItem()
         item["nickName"] = data["nickname"]
         item["imageLink"] = data["vertical_src"]
         yield item
     self.offset += 20
     yield scrapy.Request(self.base_url + str(self.offset),
                          callback=self.parse)
Пример #9
0
    def parse(self, response):
        #there are two parts in the text: one is error:0;
        # the other is data:{}
        data = json.loads(response.text)['data']
        for each in data:
            item = DouyuItem()
            item['nickname'] = each['nickname']
            item['imagelink'] = each['vertical_src']

            yield item
        self.offset += 20
        yield scrapy.Request(self.url + str(self.offest), callback=self.parse)
Пример #10
0
    def parse(self, response):
        data_list = json.loads(response.body.decode())['data']['list']

        for room in data_list:
            item = DouyuItem()
            item['rid'] = room['rid']
            item['roomName'] = room['roomName']
            item['roomSrc'] = room['roomSrc']
            item['nickname'] = room['nickname']
            item['hn'] = room['hn']

            yield item
Пример #11
0
    def parse(self, response):
        data_list = json.loads(response.body)['data']
        if len(data_list) == 0:
            return
        for data in data_list:
            item = DouyuItem()
            item['nickname'] = data['nickname']
            item['imagelink'] = data['vertical_src']

            yield item

        self.offset += 20
        yield scrapy.Request(self.baseURL + str(self.offset), callback=self.parse)
Пример #12
0
 def parse(self, response):
     data_list = json.loads(response.body)['data']
     #在data_list为空的时候 ,return:关闭程序
     if len(data_list) == 0:
         return
     for data in data_list:
         item = DouyuItem()
         item['nickname'] = data["nickname"]
         item['vertical_src'] = data["vertical_src"]
         yield item
     #offset递增 然后调用回调函数parse()
     self.offset += 20
     scrapy.Request(self.baseUrl + str(self.offset), callback=self.parse)
Пример #13
0
    def parse(self, response):

        # json格式的数据装换成python格式
        data = json.loads(response.text)['data']
        for each in data:
            item = DouyuItem()

            item['nickname'] = each['nickname']
            item['imagelink'] = each['vertical_src']

            yield item

        self.offset += 20
        yield scrapy.Request(self.url + str(self.offset), callback=self.parse)
Пример #14
0
    def parse(self, response):
        content = response.text
        data = json.loads(content)['data']
        for each in data:
            item = DouyuItem()

            item['nickname'] = each['nickname']
            item['image_link'] = each['vertical_src']
            yield item
        #如果当前页的长度小于每页20的数量,说明当前已经是最后一页了,就停止
        if len(data) == 20:
            self.offset += 20
            yield scrapy.Request(self.url + str(self.offset),
                                 callback=self.parse)
Пример #15
0
    def parse(self, response):
    	data_list = json.loads(response.body)["data"]["list"]
    	if not len(data_list):
    		return
    	for data in data_list:
    		item = DouyuItem()
    		item["nickname"] = data["nickname"] if len(data["nickname"]) > 0 else None
    		item["imagelink"] = data["verticalSrc"] if len(data["verticalSrc"]) > 0 else None
    		yield item

    	# 记得加yield! rm * 删除当前文件夹下的所有文件
    	self.offset += 1
    	yield scrapy.Request(self.baseURL.format(str(self.offset)),
    		callback=self.parse)
Пример #16
0
 def parse(self, response):
     node_list = json.loads(response.body.decode())['data']
     if not node_list:
         return
     for node in node_list:
         item = DouyuItem()
         item['image_link'] = node['vertical_src']
         item['nick_name'] = node['nickname']
         item['room_id'] = node['room_id']
         item['city'] = node['anchor_city']
         yield item
     self.offset += 20
     yield scrapy.Request(self.base_url + str(self.offset),
                          callback=self.parse)
Пример #17
0
    def parse(self, response):
        # 返回从json里获取 data段数据集合
        data = json.loads(response.text)["data"]

        for each in data:
            item = DouyuItem()
            item["name"] = each["nickname"]
            item["imagesUrls"] = each["vertical_src"]

            yield item

            self.offset += 20
            yield scrapy.Request(self.url + str(self.offset),
                                 callback=self.parse)
Пример #18
0
    def parse(self, response):
        data = json.loads(response.body)
        if len(data['data']) == 0:
            return

        for info in data['data']:
            item = DouyuItem()
            item['nickname'] = info['nickname']
            item['image_url'] = info['vertical_src']
            yield item

        self.offset += 20
        yield scrapy.Request(self.base_url + str(self.offset),
                             callback=self.parse)
Пример #19
0
 def channel_parse(self, response):
     #channel_get 的回调函数,根据返回的json数据抓取相应内容,并抓出主播的房间链接,对房间链接执行请求
     response_json = json.loads(response.text)  # 利用json.loads将json数据转为字典
     channel = response.meta['channel']
     for i in response_json['data']['rl']:
         items = DouyuItem()  # 实例化item.HuyaItem
         items['channel'] = channel  # 获取频道名称
         items['watch_num'] = int(i['ol'])  # 获取观看数量 设置为整数方便排序
         items['anchor_roomname'] = i['rn']  # 获取房间名称 加密的
         items['anchor_url'] = 'http://www.douyu.com' + i['url']
         items['anchor_name'] = i['nn']  # 获主播名称 加密的
         yield Request(url=items['anchor_url'],
                       meta={'items': items},
                       callback=self.room_parse)  #重跳转问题修复, dont_filter=True
Пример #20
0
    def parse(self, response):
        nickList = json.loads(response.text)['data']
        if len(nickList) == 0:
            return

        for nick in nickList:
            item = DouyuItem()
            item['imagLink'] = nick['vertical_src']
            item['nickName'] = nick['nickname']
            item['nickCity'] = nick['anchor_city']
            yield item

        self.offset += 20
        yield scrapy.Request(self.baseUrl + str(self.offset),
                             callback=self.parse)
Пример #21
0
 def parse(self, response):
     #取返回的json数据中的['data'],json.loads?.load和磁盘交互,.loads处理字符串
     # data = json.loads(response.body)
     # print data
     data_list = json.loads(response.body)['data']
     if len(data_list) == 0:
         return
     for data in data_list:
         item = DouyuItem()
         item['nickname'] = data["nickname"]
         item['imagelink'] = data["vertical_src"]
         yield item
     self.offset += 20
     yield scrapy.Request(self.baseURL + str(self.offset),
                          callback=self.parse)
Пример #22
0
 def parse(self, response):
     """获取想要的字段"""
     data_list = json.loads(response.body)["data"]
     if not data_list:
         return
     for data in data_list:
         item = DouyuItem()
         item["room_link"] = "http://www.douyu.com/" + data["room_id"]
         item["image_link"] = data["vertical_src"]
         item["nick_name"] = data["nickname"]
         item["anchor_city"] = data["anchor_city"]
         yield item
     # 获取url 构建请求函数交给调度器
     self.offset += 100
     yield scrapy.Request(base_url + str(self.offset), callback=self.parse)
Пример #23
0
    def parse(self, response):
        #取出json文件
        data_list = json.loads(response.body)['data']
        # 若data为空则退出函数
        if not len(data_list):
            return
        for data in data_list:
            item = DouyuItem()
            item['nickname'] = data["nickname"]
            item['imagelink'] = data["vertical_src"]
            yield item

        self.offset += 20
        # print(self.baseURL + str(self.offset))
        yield scrapy.Request(self.baseURL + str(self.offset),
                             callback=self.parse)
Пример #24
0
    def parse(self, response):
        print('我是第一步')
        data_list = json.loads(response.body.decode())['data']
        if not data_list:
            print('没有数据呀!')
            return
        for room_dict in data_list:
            item = DouyuItem()
            item['room_id'] = room_dict['room_id']
            item['vertical_src'] = room_dict['vertical_src']
            item['nickname'] = room_dict['nickname']
            item['anchor_city'] = room_dict['anchor_city']

            yield item
        self.offset += 60
        url = self.base_url + str(self.offset)
        yield scrapy.Request(url, callback=self.parse)
Пример #25
0
    def parse(self, response):
        data_list = json.loads(response.body)['data']
        if len(data_list) == 0:
            print '=' * 30
            return

        for data in data_list:
            item = DouyuItem()
            item['nickname'] = data['nickname']
            item['imagelink'] = data['vertical_src']
            yield item

        # 发送API的请求
        self.offset += 20
        next_url = self.baseURL + str(self.offset)
        print "--------------------", next_url
        yield scrapy.Request(next_url, callback=self.parse, dont_filter=True)
Пример #26
0
    def parse(self, response):
        # 针对响应json的处理办法
        data_list = json.loads(response.body)['data']
        if len(data_list) == 0:
            return
        for data in data_list:
            # 可以不通过管道,将1数据打印出来
            # print data['nickname']
            # print data['vertical_src']
            # print '*'*40
            item = DouyuItem()
            item['nickname'] = data['nickname']
            item['imagelink'] = data['vertical_src']

            yield item
        self.offset += 20
        yield scrapy.Request(self.baseurl + str(self.offset), callback=self.parse)
Пример #27
0
    def parse(self, response):
        data_list = json.loads(response.body)['data']

        if data_list:
            for data in data_list:
                item = DouyuItem()
                item['room_id'] = data['room_id']
                item['image_src'] = data['vertical_src']
                item['nick_name'] = data['nickname']
                item['city'] = data['anchor_city']

                # yield scrapy.Request(url=item['image_src'], meta = {"image_name" : item['nick_name']}, callback = self.parse_image)

                yield item
            # 处理下一页的情况
            self.offset += 20
            yield scrapy.Request(url=self.base_url + str(self.offset), callback=self.parse)
Пример #28
0
    def parse(self, response):
        # print(response.body)
        data_list = json.loads(response.body_as_unicode())['data']
        if not len(data_list):
            return
        try:
            for data in data_list:
                item = DouyuItem()

                item['nickname'] = data['nickname']
                item['imagelink'] = data['vertical_src']
                yield item

            self.offset += 20
            yield scrapy.Request(self.base_url + str(self.offset),
                                 callback=self.parse)
        except Exception as e:
            print(e)
Пример #29
0
    def parse(self, response):
        data_list = json.loads(response.body)['data']

        # 只要返回的响应json,data数据为空,就表示抓取结束
        if not data_list:
            return

        for data in data_list:
            item = DouyuItem()
            item["room_link"] = "http://www.douyu.com/" + data['room_id']
            item["image_link"] = data['vertical_src']
            item["nick_name"] = data['nickname']
            item["city"] = data['anchor_city']

            yield item

        self.offset += 20
        yield scrapy.Request(self.base_url + str(self.offset), callback = self.parse)
Пример #30
0
    def parse(self, response):
        # response为json格式的数据
        # 先把响应转成python数据类型
        list = json.loads(response.text)["data"]
        # 如果list为空,说明爬取完毕
        # if len(list) == 0:
        if self.offset == 60:
            return
        for info in list:
            item = DouyuItem()
            item['img'] = info['vertical_src']
            item['name'] = info['nickname']
            item['city'] = info['anchor_city']
            yield item

        # 实现翻页功能
        self.offset += 20
        yield scrapy.Request(self.url + str(self.offset), callback=self.parse)