def parse_dialog_info_old(self, dialog): """ 老的私信数据解析 :param dialog: :return: """ message_id = dialog['direct_message']['id'] create_time = dialog['direct_message']['created_at'] time_format = datetime.datetime.strptime(create_time, '%a %b %d %H:%M:%S %z %Y') pos = str(time_format).find('+') created_at = str(time_format)[0:pos] sender_id = dialog['direct_message']['sender_id'] recipient_id = dialog['direct_message']['recipient_id'] sender_screen_name = dialog['direct_message']['sender_screen_name'] recipient_screen_name = dialog['direct_message'][ 'recipient_screen_name'] content = dialog['direct_message']['text'] content = content.replace('\"', ' ') sender_img_name = '{}.jpg'.format(sender_id) sender_image_path = '/WeiBo/picture/{}/friends/{}'.format( self.weiboapi.uid, sender_img_name) sender_img_url = dialog.get('user').get('avatar_large') sender_image_abspath = self.download_dir + "/WeiBo/picture/{}/friends/".format( self.weiboapi.uid) recipient_img_name = '{}.jpg'.format(recipient_id) recipient_image_path = '/WeiBo/picture/{}/friends/{}'.format( self.weiboapi.uid, recipient_img_name) src_account = self.weiboapi.uid attach_id = '' attach_name = '' attach_url = '' if 'page_info' in dialog['direct_message']: attach_id = dialog['direct_message']['page_info']['page_id'] attach_name = dialog['direct_message']['page_info']['page_title'] attach_name = attach_name.replace('\"', ' ') attach_url = dialog['direct_message']['url_struct'][0][ 'url_type_pic'] self.download.add_task(url=sender_img_url, path=sender_image_abspath, name=sender_img_name) sql = '''INSERT INTO "TBL_PRCD_WEIBO_PRIVATEMSG_INFO" ("strWeiboMsgId","strWeiboContent","strWeiboSenderAccid", "strWeiboSenderNick", "strSenderImage", "strWeiboReceiverAccid","strWeiboReceiverNick","strReceiverImage", "strWeiboSendTime","strWeiboAttachmentFid", "strWeiboAttachmentName","strWeiboAttachmentUrl","strSrcAccount") VALUES ({}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {})''' params = (message_id, content, sender_id, sender_screen_name, sender_image_path, recipient_id, recipient_screen_name, recipient_image_path, created_at, attach_id, attach_name, attach_url, src_account) self.sql_execute_try(utils.sql_format(sql, params))
def submit_userinfo(self): ''' @获取【用户信息】并提交至数据库 :return: ''' self.log.info("【用户信息】") self.create_table('TBL_PRCD_WEIBO_USERINFO', False) user = self.weiboapi.userinfo self.nick_name = user.get('nick', '') if not user: sql = '''INSERT INTO "TBL_PRCD_WEIBO_USERINFO" ("strWeiboAccountNum", "strWeiboAccountID") VALUES({}, {})''' params = ( self.weiboapi.username, self.weiboapi.uid, ) self.sql_execute_try(utils.sql_format(sql, params)) return sql = '''INSERT INTO "TBL_PRCD_WEIBO_USERINFO" ("strWeiboNickName", "strWeiboAccountNum", "strWeiboAccountID", "strWeiboGender","strUserDesc","strPhotoPath","strWeiboFaceURL","strWeiboCity","strWeiboBlog","strCreateAt", "strSunRank","strAttNum","strBlogNum","strFansNum") VALUES({}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {})''' params = (user.get('nick', ''), self.weiboapi.username, user.get('uid', ''), user.get('gender', ''), user.get('description', ''), user.get('profile_image_path', ''), user.get('profile_url', ''), user.get('address', ''), user.get('blog', ''), user.get('created_at', ''), user.get('urank', ''), user.get('follow_count', ''), user.get('statuses_count', ''), user.get('followers_count', '')) self.sql_execute_try(utils.sql_format(sql, params))
def parse_weibo_userinfo(self, userinfo): """ 解析新浪微博用户信息数据 :return: """ uid = userinfo.get('idstr', '') screen_name = userinfo.get('screen_name', '') self.nick_name = screen_name province = userinfo.get('province', '') city = userinfo.get('city', '') address = province + ' ' + city description = userinfo.get('description', '') blog_url = userinfo.get('url', '') gender = userinfo.get('gender', '') followers_count = userinfo.get('followers_count', '') friends_count = userinfo.get('friends_count', '') statuses_count = userinfo.get('statuses_count', '') favourites_count = userinfo.get('favourites_count', '') created_at = userinfo.get('created_at', '') created_at = utils.time_format(created_at) # 头像图片url profile_image_url = userinfo.get('profile_image_url', '').replace(' ', '') # 大号头像url avatar_large = userinfo.get('avatar_large', '').replace(' ', '') profile_image_name = uid + '.jpg' profile_image_relpath = "/WeiBo/picture/{}/friends/{}".format( self.weiboapi.uid, profile_image_name) profile_image_path = self.download_dir + "/WeiBo/picture/{}/friends/".format( self.weiboapi.uid) self.download.add_task(url=avatar_large, path=profile_image_path, name=profile_image_name) profile_url = 'https://m.weibo.cn/u/{}'.format(uid) sql = '''INSERT INTO "TBL_PRCD_WEIBO_USERINFO" ("strWeiboNickName", "strWeiboAccountNum", "strWeiboAccountID", "strWeiboGender","strUserDesc","strPhotoPath","strWeiboFaceURL","strWeiboCity","strWeiboBlog","strCreateAt", "strSunRank","strAttNum","strBlogNum","strFansNum") VALUES({}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {})''' params = (screen_name, self.weiboapi.username, self.weiboapi.uid, gender, description, profile_image_relpath, profile_url, address, blog_url, created_at, '', friends_count, statuses_count, followers_count) return utils.sql_format(sql, params)
def parse_weibo_follower(self, fans): """ 解析粉丝信息 :return: """ desc = fans.get('desc1', '') user = {} user = fans.get('user', {}) uid = user.get('id', '') screen_name = user.get('screen_name', '') # 头像图片url profile_image_url = user.get('profile_image_url', '').replace(' ', '') # 大号头像url avatar_large = user.get('avatar_large', '').replace(' ', '') followers_count = user.get('followers_count', '') friends_count = user.get('friends_count', '') status_count = user.get('status_count', '') profile_image_name = str(uid) + '.jpg' profile_image_relpath = "/WeiBo/picture/{}/friends/{}".format( self.weiboapi.uid, profile_image_name) profile_image_path = self.download_dir + "/WeiBo/picture/{}/friends/".format( self.weiboapi.uid) self.download.add_task(url=avatar_large, path=profile_image_path, name=profile_image_name) profile_url = 'https://m.weibo.cn/u/{}'.format(uid) sql = ''' INSERT INTO "TBL_PRCD_WEIBO_FOLLOW_INFO" ("strWeiboNickname", "strPhotoPath", "strWeiboFaceURL","strPageURL","strUserDesc", "strStatusCount","strWeiboFollowerAmount","strWeiboFollowingAmount","strWeiboUID","strWeiboRank", "strSrcAccount") VALUES ({}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}) ''' params = (screen_name, profile_image_relpath, profile_url, profile_url, desc, status_count, followers_count, friends_count, uid, '', self.weiboapi.uid) return utils.sql_format(sql, params)
def parse_dialog_info_new(self, msgs, name_dict, portrait_dict): """ 新逆向出的私信解析 :param msgs: :param name_dict: :param portrait_dict: :return: """ if not isinstance(msgs, list): return for info in msgs: if not isinstance(info, dict): continue msg_id = info.get('mid') msg_time = info.get('time') time_format = datetime.datetime.utcfromtimestamp(msg_time) created_at = time_format.strftime('%Y-%m-%d %H:%M:%S') sender_id = str(info.get('from')) sender_screen_name = name_dict.get(sender_id) sender_image_path = portrait_dict.get(sender_id) recipient_id = str(info.get('to')) recipient_screen_name = name_dict.get(recipient_id) recipient_image_path = portrait_dict.get(recipient_id) content = info.get('content') # attach info src_account = self.weiboapi.uid attach_id = '' attach_name = '' attach_url = '' sql = '''INSERT INTO "TBL_PRCD_WEIBO_PRIVATEMSG_INFO" ("strWeiboMsgId","strWeiboContent","strWeiboSenderAccid", "strWeiboSenderNick", "strSenderImage", "strWeiboReceiverAccid","strWeiboReceiverNick","strReceiverImage", "strWeiboSendTime","strWeiboAttachmentFid", "strWeiboAttachmentName","strWeiboAttachmentUrl","strSrcAccount") VALUES ({}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {})''' params = (msg_id, content, sender_id, sender_screen_name, sender_image_path, recipient_id, recipient_screen_name, recipient_image_path, created_at, attach_id, attach_name, attach_url, src_account) self.sql_execute_try(utils.sql_format(sql, params))
def submit_api_photos_info(self): ''' 获取相册照片 :return: ''' self.log.info("【照片列表】") self.create_table('TBL_PRCD_WEIBO_PHOTO_INFO', False) since_id = 0 while True: card_list, next_id = self.weiboapi.get_api_photos_list(since_id) for card in card_list: if card['card_type'] == 47: for pic in card['pics']: if 'pic_id' in pic: pic_id = pic['pic_id'] pic_url = pic['pic_big'] blog_id = pic['mblog']['id'] blog_text = pic['mblog']['text'] pic_name = str(pic_id) + '.jpg' pic_path = '/WeiBo/picture/{}/photos/{}'.format( self.weiboapi.uid, pic_name) pic_abspath = self.download_dir + '/WeiBo/picture/{}/photos/'.format( self.weiboapi.uid) try: download_file(pic_url, pic_abspath, pic_name) except Exception as e: self.log.error("照片下载你失败:[{}] {}".format( pic_url, e)) continue sql = '''INSERT INTO "TBL_PRCD_WEIBO_PHOTO_INFO" ("PhotoID","PhotoURL","LocalPath","MblogID","MblogText","strSrcAccount") VALUES({}, {}, {}, {}, {}, {})''' params = (pic_id, pic_url, pic_path, blog_id, blog_text, self.weiboapi.uid) self.sql_execute_try(utils.sql_format(sql, params)) if next_id == 0: break since_id = next_id self.download.download_wait()
def submit_bloginfo(self): ''' @获取并提交微博信息 :param page_space: 一次获取并提交的分页数,page_sapce=None 一次性提交 :return: ''' self.log.info("【微博信息】") self.create_table('TBL_PRCD_WEIBO_BLOGINFO', False) sql = '''INSERT INTO "TBL_PRCD_WEIBO_BLOGINFO" ("strWeiboSessionID","strWeiboContent","strWeiboSendTime","strSrcAccount", "strWeiboSenderNick","strSource","strWeiboAttiCount","strWeiboForwardCount","strWeiboCommentCount", "strPrevCreatedAt","strPrevMblogID","strPrevContent","strPrevUID","strPrevSource") VALUES ({}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}) ''' page_total, blog_total = self.weiboapi.weibo_amount page_list = [] for page in range(1, page_total + 1): count, one_list = self.weiboapi.get_weibo_onepage(page) for mblog in one_list: try: params = (mblog.get('mblog_id', ''), mblog.get('content', ''), mblog.get('created_at', ''), mblog.get('from', ''), mblog.get('nick', ''), self.weiboapi.account, mblog.get('attitudes_count', ''), mblog.get('reposts_count', ''), mblog.get('comments_count', ''), mblog.get('prev_createdat', ''), mblog.get('prev_id', ''), mblog.get('prev_content', ''), mblog.get('prev_uid', ''), mblog.get('prev_source', '')) self.sql_execute_try(utils.sql_format(sql, params)) except Exception as e: self.log.error("获取用户【微博信息】异常. {}\r\n 错误信息:{}".format( str(mblog), e)) continue
def submit_follower_info(self): ''' @获取【粉丝】信息并提交至数据库 :return: ''' self.log.info("【在关注者信息】") self.create_table('TBL_PRCD_WEIBO_FOLLOW_INFO', False) sql = ''' INSERT INTO "TBL_PRCD_WEIBO_FOLLOW_INFO" ("strWeiboNickname","strPhotoPath","strPageURL","strUserDesc", "strStatusCount","strWeiboFollowerAmount","strWeiboFollowingAmount","strWeiboUID","strWeiboRank", "strSrcAccount") VALUES ({}, {}, {}, {}, {}, {}, {}, {}, {}, {}) ''' page_total, follow_total = self.weiboapi.follower_amount for page in range(1, page_total + 1): count, one_list = self.weiboapi.get_follower_onepage(page) for fans in one_list: try: profile_image_path = self.download_dir + "/WeiBo/picture/{}/friends/".format( self.weiboapi.uid) self.download.add_task( url=fans.get('profile_image_url', ''), path=profile_image_path, name=fans.get('profile_image_name', '')) params = (fans['nick'], fans['profile_image_path'], fans['profile_url'], fans['desc'], fans['statuses_count'], fans['followers_count'], fans['follow_count'], fans['uid'], fans['rank'], self.weiboapi.uid) self.sql_execute_try(utils.sql_format(sql, params)) except Exception as e: self.log.error("获取用户【粉丝信息】异常. {}\r\n 错误信息:{}".format( str(fans), e)) continue self.download.download_wait()
def fetch_weibo_userinfo(self): """ 获取新浪微博用户数据 :return: """ self.log.info("【用户信息】") self.create_table('TBL_PRCD_WEIBO_USERINFO', False) userinfo = {} try: userinfo = self.weiboapi.weibo_userinfo() except Exception as e: self.log.exception( "fetch weibo userinfo exception. errorinfo:{}".format(e)) if userinfo: sql = self.parse_weibo_userinfo(userinfo) else: sql = sql = '''INSERT INTO "TBL_PRCD_WEIBO_USERINFO" ("strWeiboAccountNum", "strWeiboAccountID") VALUES({}, {})''' params = (self.weiboapi.username, self.weiboapi.uid) sql = utils.sql_format(sql, params) self.sql_execute_try(sql)
def parse_weibo_mblog(self, mblog: dict): """ 解析微博数据 :return: """ mblog_id = mblog.get('id', '') content = mblog.get('text', '') source = mblog.get('source', '') created_at = mblog.get('created_at', '') created_at = utils.time_format(created_at) reposts_count = mblog.get('reposts_count', '') comments_count = mblog.get('comments_count', '') attitudes_count = mblog.get('attitudes_count', '') sender_user = mblog.get('user', {}) sender_nick = sender_user.get('screen_name', '') pics_dict = {} pics_dict = mblog.get('pic_infos', {}) attachments_name = '' for key, value in pics_dict.items(): large_pic = value.get('large', {}) large_url = large_pic.get('url', '').replace(' ', '') large_name = key + '.jpg' large_abspath = self.download_dir + "/WeiBo/picture/{}/photos/".format( self.weiboapi.uid) large_relpath = '/WeiBo/picture/{}/photos/{}'.format( self.weiboapi.uid, large_name) self.download.add_task(url=large_url, path=large_abspath, name=large_name) attachments_name += '\"{}\";'.format(large_relpath) retweeted_status = {} retweeted_status = mblog.get('retweeted_status', {}) prev_created_at = retweeted_status.get('created_at', '') prev_created_at = utils.time_format(prev_created_at) prev_mblog_id = retweeted_status.get('id', '') prev_content = retweeted_status.get('text', '') prev_sender_user = retweeted_status.get('user', {}) prev_sender_nick = prev_sender_user.get('screen_name', '') prev_sender_uid = prev_sender_user.get('idstr', '') prev_source = retweeted_status.get('source', '') prev_picture_dict = [] prev_picture_dict = retweeted_status.get('pic_infos', {}) prev_attachments_name = '' for key, value in prev_picture_dict.items(): large_pic = value.get('large', {}) large_url = large_pic.get('url', '').replace(' ', '') large_name = key + '.jpg' large_abspath = self.download_dir + "/WeiBo/picture/{}/photos/".format( self.weiboapi.uid) large_relpath = '/WeiBo/picture/{}/photos/{}'.format( self.weiboapi.uid, large_name) self.download.add_task(url=large_url, path=large_abspath, name=large_name) prev_attachments_name += '\"{}\";'.format(large_relpath) sql = '''INSERT INTO "TBL_PRCD_WEIBO_BLOGINFO" ("strWeiboSessionID","strWeiboContent","strWeiboAttachmentName", "strWeiboAttachmentLocalPath","strWeiboSendTime","strSrcAccount","strWeiboSenderNick","strSource", "strWeiboAttiCount","strWeiboForwardCount","strWeiboCommentCount", "strPrevCreatedAt","strPrevMblogID","strPrevContent","strPrevUID","strPrevSource") VALUES ({}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}) ''' params = (mblog_id, content, attachments_name, prev_attachments_name, created_at, self.weiboapi.uid, sender_nick, source, attitudes_count, reposts_count, comments_count, prev_created_at, prev_mblog_id, prev_content, prev_sender_uid, prev_source) return utils.sql_format(sql, params)