Пример #1
0
    def parse(self, response):

        item = DouyuspiderItem()

        item['nickName'] = 'shake'
        item['imageLink'] = urls

        yield item
Пример #2
0
    def parse(self, response):
        data = json.loads(response.text)['data']

        for each in data:
        	item = DouyuspiderItem()

        	item['nickName'] = each['nickname']
        	item['imageLink'] = each['vertical_src']

        	yield item

        self.offset += 20
        yield scrapy.Request(self.url + str(self.offset), callback=self.parse)
Пример #3
0
 def parse(self, response):
     data_list = json.loads(response.body)['data']
     # 若数据爬完则返回
     if len(data_list) == 0:
         return
     # 开始爬取
     for data in data_list:
         item = DouyuspiderItem()
         item['nickname'] = data['nickname']
         item['imageurl'] = data['vertical_src']
         yield item
     # 修改偏移量,多次爬虫
     self.offset += 20
     yield scrapy.Request(self.baseURL + str(self.offset), callback=self.parse)
Пример #4
0
    def parse(self, response):
        data_list = json.loads(response.body)['data']
        if len(data_list['list']) == 0:
            return  # 遇到return终止返回
        for data in data_list['list']:
            item = DouyuspiderItem()
            item['nickname'] = data['nickname'].encode(
                'utf-8')  # 防止中文出现乱码,进行utf-8编码
            item['roomSrc'] = data['roomSrc']
            item['hn'] = data['hn']
            yield item  # yield迭代器

        self.page += 1
        url = self.base_url.format(self.page)

        yield scrapy.Request(url, callback=self.parse)  # 回调函数不用加括号()
Пример #5
0
 def parse(self, response):
     """解析方法"""
     json_str = response.body.decode()
     my_dict = json.loads(json_str).get("data")
     if my_dict:
         for item in my_dict:
             douyumm = DouyuspiderItem()
             douyumm["nickname"] = item["nickname"]
             douyumm["vertical_src"] = item["vertical_src"]
             yield douyumm
             # 如果写到for 循环里面 yield的本质是放入队列等待下载
             # yield scrapy.Request(self.url + str(self.offset), callback=self.parse, dont_filter=True)
         self.offset += 20  # 偏移量叠加20
         # dont_filter 忽略域组,解决加上allowed_domains http://的问题
         yield scrapy.Request(self.url + str(self.offset),
                              callback=self.parse,
                              dont_filter=True)
     pxd = 1
Пример #6
0
    def parse(self, response):
        body_bytes = response.body

        body_bytes_decode_to_str = response.body.decode("utf-8")
        # with open("斗鱼主播信息.json", mode="a", buffering=-1, encoding="utf-8", errors="ignore") as f:
        #     f.write(body_bytes_decode_to_str)
        #     f.write(", \n")
        self.f1.write(body_bytes_decode_to_str)
        self.f1.write(", \n")

        body_str_loads_to_dict = loads(response.body.decode("utf-8"),
                                       encoding="utf-8")

        body_dict_dumps_to_str = dumps(body_str_loads_to_dict,
                                       ensure_ascii=False)
        # with open("斗鱼主播信息2.json", mode="a", buffering=-1, encoding="utf-8", errors="ignore") as f:
        #     f.write(body_dict_dumps_to_str)
        #     f.write(", \n")
        self.f2.write(body_dict_dumps_to_str)
        self.f2.write(", \n")

        data_value = loads(response.body.decode("utf-8"))["data"]

        data_list = data_value

        if not data_list:
            return

        # 返回item给管道
        for i in data_list:
            # print('*' * 100)
            # print(i["vertical_src"])
            # print(i["nickname"])
            # print(i["anchor_city"])
            # print('*' * 100)

            # 创建item对象
            item = DouyuspiderItem()
            """防止程序中断"""
            # 方式一:判断
            if len(str(i["vertical_src"])):
                item["vertical_src"] = i["vertical_src"]
            else:
                item["vertical_src"] = ""
            if len(str(i["nickname"])):
                item["nickname"] = i["nickname"]
            else:
                item["nickname"] = ""
            if len(str(i["anchor_city"])):
                item["anchor_city"] = i["anchor_city"]
            else:
                item["anchor_city"] = ""

            # # 方式二:捕获异常
            # try:
            #     item["vertical_src"] = i["vertical_src"]
            # except Exception as ex:
            #     print(ex)
            #     item["vertical_src"] = ""
            # try:
            #     item["nickname"] = i["nickname"]
            # except Exception as ex:
            #     print(ex)
            #     item["nickname"] = ""
            # try:
            #     item["anchor_city"] = i["anchor_city"]
            # except Exception as ex:
            #     print(ex)
            #     item["anchor_city"] = ""

            # 返回item给管道
            yield item

        # 返回Request给调度器
        self.offset += 20
        yield scrapy.Request(self.baseURL.format(self.offset),
                             callback=self.parse)