Exemplo n.º 1
0
 def parse(self, response):
     result = json.loads(response.text)['aweme_list']
     for each in result:
         item = DouyinItem()
         item['title'] = each['share_info']['share_title']
         item['userID'] = each['text_extra']
         if item['userID']:
             for userID in item['userID']:
                 item['userID'] = userID['user_id']
                 print(item['userID'])
         yield item
     yield scrapy.Request(self.url, callback=self.parse2)
Exemplo n.º 2
0
    def init_item(self, jsonobj):
        item = DouyinItem()
        item['user_uid'] = jsonobj['author']['uid']
        item['user_sid'] = jsonobj['author']['short_id']
        item['user_birthday'] = jsonobj['author']['birthday']
        item['user_gender'] = jsonobj['author']['gender']

        item['video_id'] = jsonobj['aweme_id']
        item['video_desc'] = jsonobj['desc']
        item['video_play'] = jsonobj['statistics']['play_count']
        item['video_comment'] = jsonobj['statistics']['comment_count']
        item['video_share'] = jsonobj['statistics']['share_count']
        item['video_digg'] = jsonobj['statistics']['digg_count']
        item['video_durl'] = jsonobj['video']['download_addr']['url_list'][0]
        item['video_gurl'] = jsonobj['video']['dynamic_cover']['url_list'][0]
        item['video_time'] = MyTools.transform_time(jsonobj['create_time'])
        return item
Exemplo n.º 3
0
    def init_item(self, video_info, comment_list):
        item = DouyinItem()

        # 作者id
        item["author_user_id"] = video_info["author_user_id"]

        # 视频aweme_id
        item["aweme_id"] = video_info["statistics"]["aweme_id"]

        # 视频描述
        item["video_desc"] = video_info["desc"]

        # 点赞数
        item["digg_count"] = video_info["statistics"]["digg_count"]

        # 分享数
        item["share_count"] = video_info["statistics"]["share_count"]

        # 评论数
        item["comment_count"] = video_info["statistics"]["comment_count"]

        # 评论列表
        item["comment_list"] = comment_list

        # 分享链接
        item["share_url"] = video_info["share_url"]

        # 封面图链接列表,这里只取一个
        item["origin_cover"] = video_info["video"]["origin_cover"]["url_list"][
            0]

        # 视频播放地址列表,这里只取一个并去掉多余参数
        item["play_addr"] = video_info["video"]["play_addr"]["url_list"][
            0].split("&line")[0]

        # 视频下载地址列表,这里只取一个并去掉多余参数
        download_addr = video_info["video"]["download_addr"]["url_list"][
            0].split("&line")[0]
        item["download_addr"] = download_addr

        return item