def get_userfollowing_list_repeat(followings): """ 根据用户关注列表循环执行,需要剔除已经执行过的 :param followings:关注列表 :return: """ global complete # logger.info(complete) try: totals = followings[0] # logger.info(totals) for i in range(1, totals + 1): try: flag = 0 for j in range(len(complete)): # logger.info(complete[j]) # logger.info(followings[i][0]) if followings[i][0] == complete[j]: flag = 1 break # logger.info('flag = {}'.format(flag)) if 1 != flag: # logger.info(followings[i]) run(followings[i][0], followings[i][1]) except: # logger.info('该用户关注列表数多于50, user_id = {}'.format(user_id)) break except ConnectionError as e: logger.error('网络连接异常,e = {}', format(e))
def get_userfollowing_list(user_id, user_name): """ 获取用户关注列表 :param user_id: :param user_name: :return: """ global complete try: url = 'https://api.bilibili.com/x/relation/followings?vmid=' + str( user_id) response = requests.get(url, headers=headers, timeout=6) if response.status_code == 200: # logger.info('获取用户关注列表成功, user_id = {}'.format(user_id)) content = response.json() if content.get('data'): data = content['data'] followings = [] totals = data['total'] followings.append(totals) # 由于系统限制 只能获取前50的关注 if totals > 50: # logger.info('该用户关注列表数多于50, user_id = {}'.format(user_id)) totals = 50 for i in range(0, totals): try: following = (data['list'][i]['mid'], data['list'][i]['uname']) followings.append(following) except: break # 保存用户关系表到mysql数据库上 # logger.info(followings) save_userinfo_mysql(followings, user_id, user_name) complete.append(user_id) logger.info(complete) get_userfollowing_list_repeat(followings) else: logger.info('获取用户关注列表失败, use_id = {}, user_name = {}'.format( user_id, user_name)) else: logger.info('获取用户关注列表失败, use_id = {}, code = {}'.format( user_id, response.status_code)) except ConnectionError as e: logger.error('网络连接异常,e = {}', format(e))
def enter_space(user_id): """ 进入用户主页 :param user_id: :return: """ try: url = 'https://space.bilibili.com/' + str(user_id) response = requests.get(url, headers=headers, timeout=6) if response.status_code == 200: # logger.info('进入主页成功, user_id = {}'.format(user_id)) get_basic_userinfo(user_id) else: logger.info('进入主页失败, use_id = {}, code = {}'.format( user_id, response.status_code)) except ConnectionError as e: logger.error('网络连接异常,e = {}', format(e))
def get_add_userview(user_id): """ 获取用户播放数archive_view和阅读数article_view :param user_id: :return: result """ try: url = 'https://api.bilibili.com/x/space/upstat?mid=' + str(user_id) response = requests.get(url, headers=headers, timeout=6) if response.status_code == 200: content = response.json() if content.get('data'): data = content['data'] result = (data['archive']['view'], data['article']['view']) return result else: logger.info('获取用户播放数和阅读数失败, use_id = {}'.format(user_id)) else: logger.info('获取用户播放数和阅读数失败, use_id = {}, code = {}'.format( user_id, response.status_code)) except ConnectionError as e: logger.error('网络连接异常,e = {}', format(e))
def get_add_userfollow(user_id): """ 获取用户粉丝数follower和关注数following :param user_id: :return: result """ try: url = 'https://api.bilibili.com/x/relation/stat?vmid=' + str(user_id) response = requests.get(url, headers=headers, timeout=6) if response.status_code == 200: content = response.json() if content.get('data'): data = content['data'] result = (data['following'], data['follower']) return result else: logger.info('获取用户粉丝数和关注数失败, use_id = {}'.format(user_id)) else: logger.info('获取用户粉丝数和关注数失败, use_id = {}, code = {}'.format( user_id, response.status_code)) except ConnectionError as e: logger.error('网络连接异常,e = {}', format(e))
def save_userinfo_mysql(followings, user_id, user_name): """ 存储用户关系信息到mysql数据库 相关:数据库bilibili 数据表bilibili_userrelation :param followings: :param user_id: :param user_name: :return: """ try: totals = followings[0] if totals > 50: totals = 50 # 单向 A关注B sql_A2B = 'insert into bilibili_userrelation(user1_mid, user1_name, user2_mid, user2_name, status) ' \ 'values(%s, %s, %s, %s, 0);' # 单向 B关注A sql_B2A = 'insert into bilibili_userrelation(user1_mid, user1_name, user2_mid, user2_name, status) ' \ 'values(%s, %s, %s, %s, 1);' # 双向 AB互相关注 sql_AB = 'update bilibili_userrelation set status=2 where user1_mid = %s AND user2_mid = %s;' # 检查记录是否存在 sql_selectAB = 'select count(*) from bilibili_userrelation where user1_mid = %s AND user2_mid = %s;' sql_selectABstatus = 'select status from bilibili_userrelation where user1_mid = %s AND user2_mid = %s;' for row in followings[1:totals + 1]: # logger.info('mid = {}'.format(row[0])) if user_id < row[0]: cur.execute(sql_selectAB, (user_id, row[0])) count = cur.fetchall()[0][0] # logger.info('count = {}'.format(count)) if 0 == count: result = (user_id, user_name) + row try: cur.execute(sql_A2B, result) except: conn.rollback() logger.info('用户关系信息保存到数据库中失败,A2BB,mid分别是{}和{}'.format( user_id, row[0])) elif 1 == count: try: cur.execute(sql_selectABstatus, (user_id, row[0])) status = cur.fetchone()[0] # logger.info(status) if 1 == status: try: cur.execute(sql_AB, (user_id, row[0])) except: conn.rollback() logger.info( '用户关系信息保存到数据库中失败,A2B,mid分别是{}和{}'.format( user_id, row[0])) else: logger.info( '用户关系信息在数据库中,A2B已经执行过,mid分别是{}和{}'.format( user_id, row[0])) except: conn.rollback() logger.info('用户关系信息保存到数据库中失败,A2B,mid分别是{}和{}'.format( user_id, row[0])) else: logger.info('用户关系信息在数据库中重复有多条,A2B,mid分别是{}和{}'.format( user_id, row[0])) elif user_id > row[0]: result = row + (user_id, user_name) cur.execute(sql_selectAB, (row[0], user_id)) count = cur.fetchall()[0][0] if 0 == count: try: cur.execute(sql_B2A, result) except: conn.rollback() logger.info('用户关系信息保存到数据库中失败,B2AA,mid分别是{}和{}'.format( row[0], user_id)) elif 1 == count: try: cur.execute(sql_selectABstatus, (row[0], user_id)) status = cur.fetchone()[0] except: logger.info('用户关系信息查询状态出错,B2A,mid分别是{}和{}'.format( row[0], user_id)) if 0 == status: try: cur.execute(sql_AB, (row[0], user_id)) except: conn.rollback() logger.info( '用户关系信息保存到数据库中失败,B2A,mid分别是{}和{}'.format( row[0], user_id)) else: logger.info('用户关系信息在数据库中,B2A已经执行过,mid分别是{}和{}'.format( row[0], user_id)) else: logger.info('用户关系信息在数据库中重复有多条,B2A,mid分别是{}和{}'.format( row[0], user_id)) else: logger.info('用户关系信息出错,自关,mid是{}', format(user_id)) except ConnectionError as e: logger.error('网络连接异常,e = {}', format(e))
def get_basic_userinfo(user_id): """ 获取基础用户个人信息 :param user_id: :return: """ global total url = 'https://space.bilibili.com/ajax/member/GetInfo' payload = {'mid': user_id} head = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36' '(KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36', 'Referer': 'https://space.bilibili.com/' + str(user_id) } try: response = requests.post(url, headers=head, data=payload, timeout=6) if response.status_code == 200: content = response.json() if content.get('data'): data = content['data'] try: regtime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(data['regtime'])) except: logger.info( '用户没有regtime这个标签, user_id = {}'.format(user_id)) regtime = 0 pass try: birthday = data['birthday'] except: logger.info( '用户没有birthday这个标签, user_id = {}'.format(user_id)) birthday = 0 regtime = 0 pass result = ( data['mid'], data['name'], data['sex'], data['rank'], data['face'], regtime, data['spacesta'], # data['birthday'], birthday, data['sign'], data['level_info']['current_level'], data['official_verify']['desc'], data['vip']['vipType'], data['vip']['vipStatus'], data['toutu'], data['toutuId'], data['theme'], data['theme_preview'], data['coins'], data['im9_sign'], data['fans_badge']) # logger.info('获取用户个人信息成功 use_id = {}'.format(user_id)) result += get_add_userfollow(user_id) result += get_add_usercount(user_id) result += get_add_userview(user_id) # logger.info(result) # 将用户个人信息保存到mysql数据库中 save_userinfo_mysql(result) total += 1 if total % 100 == 0: logger.info('目前共计爬取到{}条数据'.format(total)) else: logger.info('获取用户个人信息失败, use_id = {}'.format(user_id)) else: logger.info('获取用户个人信息失败, use_id = {}, code = {}'.format( user_id, response.status_code)) except ConnectionError as e: logger.error('网络连接异常,e = {}', format(e))