def retrieveCommentsOfOneStatus(self, client, statusId): commentsList = [] try: commentsDict = client.comments.show.get(id=statusId) commentsList = commentsDict['comments'] except Exception: logger.errorLog(self.errorLog) finally: return commentsList
def retrieveStatus(self, client, pageCount, sinceId): statusList = [] since_id = 0 try: statusesDict = client.statuses.friends_timeline.get( count=pageCount, since_id=sinceId) statusList = statusesDict.get('statuses') since_id = statusesDict.get('since_id') except Exception: logger.errorLog(self.errorLog) finally: return statusList, since_id
def mergeOneStatus(self, client, statusToMerge): try: #根据status.user_simple.get('name')信息调用API获取用户信息 statusAuhtorScreenName = statusToMerge.user_simple.get('name') status_author_dict = self.getUserInfo(client, statusAuhtorScreenName) if not status_author_dict: message = " Error:Get author information failed ,the status id is " + str( statusToMerge.status_id) + ".\n" self.mergeLog(message) status_to_merge_id = statusToMerge.status_id if status_author_dict: self.userManager.save_user( status_to_merge_id, status_author_dict) #merge the user in to do collection. status_existed = self.statusManager.find_status_exist( status_to_merge_id) # is the status already retrieved before. if status_existed: #命中:update keywords of the status and scrap time logMessage = "The status whose id is " + str( statusToMerge.status_id ) + " already in the database! Do updating!\n" self.mergeLog(logMessage) keywords_now = status_existed.keywords if keywords_now: keywords_scraped = statusToMerge.keywords keywords_new = list( set(keywords_now).union(set(keywords_scraped))) else: keywords_new = statusToMerge.keywords status_existed.update(keywords=keywords_new, scrapedTimeStamp=statusToMerge.timestamp) else: #未命中:add the status in to merge to status collection. #retrieve the comments of the status by the status id. logMessage = "The status whose id is " + str( statusToMerge.status_id ) + " is not in the database! Do adding!\n" self.mergeLog(logMessage) commentsNum = statusToMerge.comments_count commentsOfStatusDict = None if (commentsNum > 0): commentsOfStatusDict = self.retrieveCommentsOfOneStatus( client, status_to_merge_id) # 获取某条微博的所有评论 commentsReferencedList = self.commentManager.save_comment( commentsOfStatusDict, status_to_merge_id) self.statusManager.add_status_fromCrawler( status_author_dict, statusToMerge, commentsReferencedList) except Exception: logger.errorLog(self.errorLog)
def updateCurrentLastCheckTimeStamp(self, currentCheckTimeStamp): """ 获取微博合并微博列表后更新时间戳为当前时间戳,确保下次扫描不会扫描已经合并的微博 """ try: lastCheckTimeStamp = LastCheckTimeStamp.objects.first() if lastCheckTimeStamp: lastCheckTimeStamp.update( checkedTimeStamp=currentCheckTimeStamp) else: lastCheckTimeStamp = LastCheckTimeStamp( checkedTimeStamp=currentCheckTimeStamp) lastCheckTimeStamp.save() except Exception: logger.errorLog(self.errorLog)
def retrieveCommentsOfOneStatus(self, client, statusId): commentsList = [] max_id = 0 try: commentsDict = client.comments.show.get(id=statusId, count=200, max_id=max_id) if commentsDict: max_id = commentsDict['max_id'] commentsList.extend(commentsDict['comments']) while max_id != 0: commentsDict = client.comments.show.get(id=statusId, count=200, max_id=max_id) max_id = commentsDict['max_id'] commentsList.extend(commentsDict['comments']) except Exception: logger.errorLog(self.errorLog) finally: return commentsList
def getUserInfo(self, client, userName): try: #首先根据ScreenName获取API返回的部分信息 user_account_dict = {} user_account_dict = client.users.show.get(screen_name=userName) except Exception: logger.errorLog(self.errorLog) #获取user_to_merge中查找name为userName的 user_info_in_to_merge = UserToMerge.objects(name=userName).first() if user_info_in_to_merge: if user_info_in_to_merge.friends_count: user_account_dict[ 'friends_count'] = user_info_in_to_merge.friends_count if user_info_in_to_merge.followers_count: user_account_dict[ 'followers_count'] = user_info_in_to_merge.followers_count if user_info_in_to_merge.statuses_count: user_account_dict[ 'statuses_count'] = user_info_in_to_merge.statuses_count else: pass return user_account_dict
def persistentOneStatusDic(self, client, statusDic, userManager, statusManager, commentManager): try: commentsNum = statusDic.get('comments_count') commentsOfStatusDict = None if (commentsNum > 0): commentsOfStatusDict = self.retrieveCommentsOfOneStatus( client, statusDic.get('id')) # 获取某条微博的所有评论 statusAuthorDict = statusDic['user'] commentsIdList = commentManager.save_comment( commentsOfStatusDict, statusDic.get('id')) userManager.save_user(statusDic.get('id'), statusAuthorDict) if statusDic.get('retweeted_status'): # 存储转发微博 retweetedStatusDic = statusDic[ 'retweeted_status'] # 1.存储被转发的微博 self.persistentOneStatusDic(client, retweetedStatusDic, userManager, statusManager, commentManager) statusManager.add_status_fromAPI(statusDic, commentsIdList) # 2.存储转发微博 else: statusManager.add_status_fromAPI(statusDic, commentsIdList) except Exception: logger.errorLog(self.errorLog)
def update_comments_one_status(self, status, appClient): current_timestamp = time.time() if status: status_id = status.status_id try: comments_return_by_api = appClient.comments.show.get( id=status_id) comments_list = comments_return_by_api['comments'] comments_count_before_update = len(status.comments) comments_count_after_update = len(comments_list) comments_ids = self.comments_manager.save_comment( comments_list, status_id) status.update(comments=comments_ids) comments_density = comments_count_after_update - comments_count_before_update if comments_density < 0: comments_density = 0 status.update(commentsDensity=comments_density) #更新评论密度信息 status.update( commentsUpdateTimestamp=current_timestamp) #更新微博更新时间戳 except Exception: logger.errorLog(self.logFile) else: pass