def catchUserFollowingProcess(self,lock): z = ZhiHuSpider() d = DBUtil() st = Status.Following() while True: lock.acquire() # 取出第一个用户 userId, currentPage = d.getFirstUserToFollowing2() print('开始抓取用户关注者,user_id={0}, current_page={1}'.format(userId, currentPage)) if userId is None: lock.release() time.sleep(3) continue d.setUserIsFollowing(userId, st.multi_catching) lock.release() # 获取关注者页数 total = z.getUserFollowingPageNum(userId) print('当前用户总的关注者的页数,user_id={0}, total_page={1}'.format(userId, total)) # 用户没有关注任何人 if total == 0: d.setUserIsFollowing(userId, st.user_following_none) continue for i in range(currentPage + 1, total + 1): list = z.getUserFollowingPageContent(userId, i) # 获取关注者成功 if len(list) > 0: d.saveFollowerInfo(userId, list) # 设置这一页抓取完毕了 d.setUserFollowingPage(userId, i) print('抓取完一页用户的关注者,user_id={0}, page={1}, list.size={2}'.format(userId, i, len(list))) time.sleep(z.time_duration) # 设置抓取完毕 d.setUserIsFollowing(userId, st.catched) print('当前用户全部抓取完毕,user_id=', userId)
def catchUserFollowingThread(self): s = ZhiHuSpider() d = DBUtil() st = Status.Following() while self.isExit == False: #取出第一个用户 userId, currentPage = d.getFirstUserToFollowing2() log('开始抓取用户关注者,user_id={0}, current_page={1}'.format( userId, currentPage)) if userId is None: time.sleep(3) continue d.setUserIsFollowing(userId, st.is_catching) #获取关注者页数 total = self.getUserFollowingPageNum(userId) log('当前用户总的关注者的页数,user_id={0}, total_page={1}'.format( userId, total)) #用户没有关注任何人 if total == 0: d.setUserIsFollowing(userId, st.user_following_none) continue #标识是否正常退出 isFinished = True for i in range(currentPage + 1, total + 1): # 判断是否要退出 if self.isExit: isFinished = False break list = self.getUserFollowingPageContent(userId, i) #获取关注者成功 if len(list) > 0: d.saveFollowerInfo(userId, list) #设置状态 d.setUserIsFollowing(userId, st.is_catching) #设置这一页抓取完毕了 d.setUserFollowingPage(userId, i) log('抓取完一页用户的关注者,user_id={0}, page={1}, list.size={2}'.format( userId, i, len(list))) time.sleep(self.time_duration * 20) # 全部抓取成功 if isFinished: # 设置抓取完毕 d.setUserIsFollowing(userId, st.catched) log('当前用户关注的人全部抓取完毕,user_id= %s' % userId) # 没有抓取完毕 else: log('当前用户关注的人没有抓取完毕,中途退出,user_id = {0}'.format(userId)) log('获取用户关注者的线程运行结束')