def parse(self, response): try: self.task.crawl_count += 1 j = json.loads(response.body) name = j['data']['card']['name'] mid = j['data']['card']['mid'] # 刷新redis数据缓存 self.redis_connection.delete("author_detail::{}".format(mid)) sex = j['data']['card']['sex'] face = j['data']['card']['face'] fans = j['data']['card']['fans'] attention = j['data']['card']['attention'] level = j['data']['card']['level_info']['current_level'] official = j['data']['card']['Official']['title'] archive = j['data']['archive_count'] article = j['data']['article_count'] face = j['data']['card']['face'] item = AuthorItem() item['mid'] = int(mid) item['name'] = name item['face'] = face item['official'] = official item['sex'] = sex item['level'] = int(level) item['data'] = { 'fans': int(fans), 'attention': int(attention), 'archive': int(archive), 'article': int(article), 'datetime': datetime.datetime.now() } item['c_fans'] = int(fans) item['c_attention'] = int(attention) item['c_archive'] = int(archive) item['c_article'] = int(article) url_list = response.url.split('&') if len(url_list) == 2: item['object_id'] = url_list[1] else: item['object_id'] = None yield Request( "https://api.bilibili.com/x/space/upstat?mid={mid}".format( mid=str(mid)), meta={'item': item}, method='GET', callback=self.parse_view) except Exception as error: # 出现错误时打印错误日志 self.task.crawl_failed += 1 mailer.send( to=["*****@*****.**"], subject="BiliobSpiderError", body="{}\n{}\n{}".format(item, response.url, error), ) logging.error("视频爬虫在解析时发生错误") logging.error(response.url) logging.error(error)
def detailParse(self, response): try: j = json.loads(response.body) name = j['data']['card']['name'] mid = j['data']['card']['mid'] sex = j['data']['card']['sex'] face = j['data']['card']['face'] fans = j['data']['card']['fans'] attention = j['data']['card']['attention'] level = j['data']['card']['level_info']['current_level'] official = j['data']['card']['Official']['title'] archive = j['data']['archive_count'] article = j['data']['article_count'] face = j['data']['card']['face'] item = AuthorItem() # 粉丝数大于1000才加入 if int(fans) > 1000: item['c_fans'] = int(fans) item['c_attention'] = int(attention) item['c_archive'] = int(archive) item['c_article'] = int(article) item['mid'] = int(mid) item['name'] = name item['face'] = face item['official'] = official item['sex'] = sex item['focus'] = True item['level'] = int(level) item['data'] = { 'fans': int(fans), 'attention': int(attention), 'archive': int(archive), 'article': int(article), 'datetime': datetime.datetime.utcnow() + datetime.timedelta(hours=8) } yield item except Exception as error: # 出现错误时打印错误日志 self.task.crawl_failed += 1 logging.error("视频爬虫在解析时发生错误") logging.error(response.url) logging.error(error)
def parse(self, response): try: j = json.loads(response.body) name = j['data']['card']['name'] mid = j['data']['card']['mid'] sex = j['data']['card']['sex'] face = j['data']['card']['face'] fans = j['data']['card']['fans'] attention = j['data']['card']['attention'] level = j['data']['card']['level_info']['current_level'] official = j['data']['card']['Official']['title'] archive = j['data']['archive_count'] article = j['data']['article_count'] face = j['data']['card']['face'] item = AuthorItem() item['mid'] = int(mid) item['name'] = name item['face'] = face item['official'] = official item['sex'] = sex item['level'] = int(level) item['data'] = { 'fans': int(fans), 'attention': int(attention), 'archive': int(archive), 'article': int(article), 'datetime': datetime.datetime.now() } item['c_fans'] = int(fans) item['c_attention'] = int(attention) item['c_archive'] = int(archive) item['c_article'] = int(article) yield Request( "https://api.bilibili.com/x/space/upstat?mid={mid}".format( mid=str(mid)), meta={'item': item}, method='GET', callback=self.parse_view) except Exception as error: # 出现错误时打印错误日志 mailer.send( to=["*****@*****.**"], subject="BiliobSpiderError", body="{}\n{}\n{}".format(item, response.url, error), ) logging.error("视频爬虫在解析时发生错误") logging.error(response.url) logging.error(error)
def parse(self, response): try: j = json.loads(response.body) name = j['data']['card']['name'] mid = j['data']['card']['mid'] # 刷新redis数据缓存 self.redis_connection.delete("author_detail::{}".format(mid)) sex = j['data']['card']['sex'] face = j['data']['card']['face'] fans = j['data']['card']['fans'] attention = j['data']['card']['attention'] level = j['data']['card']['level_info']['current_level'] official = j['data']['card']['Official']['title'] archive = j['data']['archive_count'] article = j['data']['article_count'] face = j['data']['card']['face'] item = AuthorItem() item['mid'] = int(mid) item['name'] = name item['face'] = face item['official'] = official item['sex'] = sex item['level'] = int(level) item['data'] = { 'fans': int(fans), 'attention': int(attention), 'archive': int(archive), 'article': int(article), 'datetime': datetime.datetime.now() } item['c_fans'] = int(fans) item['c_attention'] = int(attention) item['c_archive'] = int(archive) item['c_article'] = int(article) yield Request( "https://api.bilibili.com/x/space/upstat?mid={mid}".format( mid=str(mid)), meta={'item': item}, method='GET', callback=self.parse_view) except Exception as error: # 出现错误时打印错误日志 logging.error("视频爬虫在解析时发生错误") logging.error(response.url) logging.error(error)
def detailParse(self, response): try: j = json.loads(response.body) name = j['data']['card']['name'] mid = j['data']['card']['mid'] sex = j['data']['card']['sex'] face = j['data']['card']['face'] fans = j['data']['card']['fans'] attention = j['data']['card']['attention'] level = j['data']['card']['level_info']['current_level'] official = j['data']['card']['Official']['title'] archive = j['data']['archive_count'] article = j['data']['article_count'] face = j['data']['card']['face'] item = AuthorItem() # 粉丝数大于1000才加入 if int(fans) > 1000: item['c_fans'] = int(fans) item['c_attention'] = int(attention) item['c_archive'] = int(archive) item['c_article'] = int(article) item['mid'] = int(mid) item['name'] = name item['face'] = face item['official'] = official item['sex'] = sex item['focus'] = True item['level'] = int(level) item['data'] = { 'fans': int(fans), 'attention': int(attention), 'archive': int(archive), 'article': int(article), 'datetime': datetime.datetime.now() } yield item except Exception as error: # 出现错误时打印错误日志 self.task.crawl_failed += 1 mailer.send( to=["*****@*****.**"], subject="BiliobSpiderError", body="{}\n{}".format(response.url, error), ) logging.error("视频爬虫在解析时发生错误") logging.error(response.url) logging.error(error)