예제 #1
0
 def retrieveCommentsOfOneStatus(self, client, statusId):
     commentsList = []
     try:
         commentsDict = client.comments.show.get(id=statusId)
         commentsList = commentsDict['comments']
     except Exception:
         logger.errorLog(self.errorLog)
     finally:
         return commentsList
예제 #2
0
 def retrieveStatus(self, client, pageCount, sinceId):
     statusList = []
     since_id = 0
     try:
         statusesDict = client.statuses.friends_timeline.get(
             count=pageCount, since_id=sinceId)
         statusList = statusesDict.get('statuses')
         since_id = statusesDict.get('since_id')
     except Exception:
         logger.errorLog(self.errorLog)
     finally:
         return statusList, since_id
예제 #3
0
 def mergeOneStatus(self, client, statusToMerge):
     try:
         #根据status.user_simple.get('name')信息调用API获取用户信息
         statusAuhtorScreenName = statusToMerge.user_simple.get('name')
         status_author_dict = self.getUserInfo(client,
                                               statusAuhtorScreenName)
         if not status_author_dict:
             message = " Error:Get author information failed ,the status id is " + str(
                 statusToMerge.status_id) + ".\n"
             self.mergeLog(message)
         status_to_merge_id = statusToMerge.status_id
         if status_author_dict:
             self.userManager.save_user(
                 status_to_merge_id,
                 status_author_dict)  #merge the user in to do collection.
         status_existed = self.statusManager.find_status_exist(
             status_to_merge_id)  # is the status already retrieved before.
         if status_existed:
             #命中:update keywords of the status and scrap time
             logMessage = "The status whose id is " + str(
                 statusToMerge.status_id
             ) + " already in the database! Do updating!\n"
             self.mergeLog(logMessage)
             keywords_now = status_existed.keywords
             if keywords_now:
                 keywords_scraped = statusToMerge.keywords
                 keywords_new = list(
                     set(keywords_now).union(set(keywords_scraped)))
             else:
                 keywords_new = statusToMerge.keywords
             status_existed.update(keywords=keywords_new,
                                   scrapedTimeStamp=statusToMerge.timestamp)
         else:
             #未命中:add the status in to merge to status collection.
             #retrieve the comments of the status by the status id.
             logMessage = "The status whose id is " + str(
                 statusToMerge.status_id
             ) + " is not in the database! Do adding!\n"
             self.mergeLog(logMessage)
             commentsNum = statusToMerge.comments_count
             commentsOfStatusDict = None
             if (commentsNum > 0):
                 commentsOfStatusDict = self.retrieveCommentsOfOneStatus(
                     client, status_to_merge_id)  # 获取某条微博的所有评论
             commentsReferencedList = self.commentManager.save_comment(
                 commentsOfStatusDict, status_to_merge_id)
             self.statusManager.add_status_fromCrawler(
                 status_author_dict, statusToMerge, commentsReferencedList)
     except Exception:
         logger.errorLog(self.errorLog)
예제 #4
0
 def updateCurrentLastCheckTimeStamp(self, currentCheckTimeStamp):
     """
     获取微博合并微博列表后更新时间戳为当前时间戳,确保下次扫描不会扫描已经合并的微博
     """
     try:
         lastCheckTimeStamp = LastCheckTimeStamp.objects.first()
         if lastCheckTimeStamp:
             lastCheckTimeStamp.update(
                 checkedTimeStamp=currentCheckTimeStamp)
         else:
             lastCheckTimeStamp = LastCheckTimeStamp(
                 checkedTimeStamp=currentCheckTimeStamp)
             lastCheckTimeStamp.save()
     except Exception:
         logger.errorLog(self.errorLog)
예제 #5
0
 def retrieveCommentsOfOneStatus(self, client, statusId):
     commentsList = []
     max_id = 0
     try:
         commentsDict = client.comments.show.get(id=statusId,
                                                 count=200,
                                                 max_id=max_id)
         if commentsDict:
             max_id = commentsDict['max_id']
             commentsList.extend(commentsDict['comments'])
         while max_id != 0:
             commentsDict = client.comments.show.get(id=statusId,
                                                     count=200,
                                                     max_id=max_id)
             max_id = commentsDict['max_id']
             commentsList.extend(commentsDict['comments'])
     except Exception:
         logger.errorLog(self.errorLog)
     finally:
         return commentsList
예제 #6
0
 def getUserInfo(self, client, userName):
     try:
         #首先根据ScreenName获取API返回的部分信息
         user_account_dict = {}
         user_account_dict = client.users.show.get(screen_name=userName)
     except Exception:
         logger.errorLog(self.errorLog)
     #获取user_to_merge中查找name为userName的
     user_info_in_to_merge = UserToMerge.objects(name=userName).first()
     if user_info_in_to_merge:
         if user_info_in_to_merge.friends_count:
             user_account_dict[
                 'friends_count'] = user_info_in_to_merge.friends_count
         if user_info_in_to_merge.followers_count:
             user_account_dict[
                 'followers_count'] = user_info_in_to_merge.followers_count
         if user_info_in_to_merge.statuses_count:
             user_account_dict[
                 'statuses_count'] = user_info_in_to_merge.statuses_count
     else:
         pass
     return user_account_dict
예제 #7
0
 def persistentOneStatusDic(self, client, statusDic, userManager,
                            statusManager, commentManager):
     try:
         commentsNum = statusDic.get('comments_count')
         commentsOfStatusDict = None
         if (commentsNum > 0):
             commentsOfStatusDict = self.retrieveCommentsOfOneStatus(
                 client, statusDic.get('id'))  # 获取某条微博的所有评论
         statusAuthorDict = statusDic['user']
         commentsIdList = commentManager.save_comment(
             commentsOfStatusDict, statusDic.get('id'))
         userManager.save_user(statusDic.get('id'), statusAuthorDict)
         if statusDic.get('retweeted_status'):  # 存储转发微博
             retweetedStatusDic = statusDic[
                 'retweeted_status']  # 1.存储被转发的微博
             self.persistentOneStatusDic(client, retweetedStatusDic,
                                         userManager, statusManager,
                                         commentManager)
             statusManager.add_status_fromAPI(statusDic,
                                              commentsIdList)  # 2.存储转发微博
         else:
             statusManager.add_status_fromAPI(statusDic, commentsIdList)
     except Exception:
         logger.errorLog(self.errorLog)
    def update_comments_one_status(self, status, appClient):
        current_timestamp = time.time()
        if status:
            status_id = status.status_id
            try:
                comments_return_by_api = appClient.comments.show.get(
                    id=status_id)
                comments_list = comments_return_by_api['comments']
                comments_count_before_update = len(status.comments)
                comments_count_after_update = len(comments_list)
                comments_ids = self.comments_manager.save_comment(
                    comments_list, status_id)
                status.update(comments=comments_ids)
                comments_density = comments_count_after_update - comments_count_before_update
                if comments_density < 0:
                    comments_density = 0
                status.update(commentsDensity=comments_density)  #更新评论密度信息
                status.update(
                    commentsUpdateTimestamp=current_timestamp)  #更新微博更新时间戳
            except Exception:
                logger.errorLog(self.logFile)

        else:
            pass