Exemplo n.º 1
0
class CheckQualificationByPromotion():
    def __init__(self):
        self.get_raw_data = GetRawData()
        self.redis_client = RedisClient()
        self.batch_size = 50

    def get_pre_users(self):
        batch = []
        while len(batch) < self.batch_size:
            pre_user = self.redis_client.get_pre_users()
            if not self.is_user(pre_user):  #如果这个pre_user在user表中还不存在
                batch.append(pre_user)
        return batch

    def count_pre_users(self):
        return self.redis_client.count_pre_users()

    def is_user(self, sec_user_id):
        return self.redis_client.is_user(sec_user_id)

    def check_qualification_by_promotion(self, sec_user_id):
        try:
            raw_data = self.get_raw_data.get_promotions(sec_user_id)
        except Exception as e:
            logger.error('get_promotions出错-' + e.args[0] + '-sec_user_id-' +
                         sec_user_id)
            return None

        try:
            raw_data.get('columns')[0].get('name')  #表示确实获取到了页面
            data = raw_data.get('promotions')
            if len(data) > 10:  #确实获取到了页面,promotion大于10
                self.redis_client.add_users(sec_user_id)
        except Exception as e:
            logger.error('解析promotions页面失败-sec_user_id-' + sec_user_id + '-' +
                         e.args[0])

    def run(self):
        if self.count_pre_users() > 0:
            batch = self.get_pre_users()
            tasks = [
                gevent.spawn(self.check_qualification_by_promotion,
                             sec_user_id) for sec_user_id in batch
            ]
            gevent.joinall(tasks)
        else:
            logger.info('pre_users列表空了,程序退出')
            sys.exit()
Exemplo n.º 2
0
class GetPreUsers():
    def __init__(self):
        self.get_raw_data = GetRawData()
        self.redis_client = RedisClient()
        self.batch_size = 10
        self.pre_user_list = []
        self.stupid_key_words = STUPID_KEY_WORDS

    def count_following(self):
        return self.redis_client.count_following()

    def count_pre_users(self):
        return self.redis_client.count_pre_users()

    def get_following(self):
        batch = []
        for i in range(self.batch_size):
            batch.append(self.redis_client.get_following())
        return batch

    def add_following_and_pre_users(self):
        for pre_user in self.pre_user_list:
            if self.count_following() < 100000:
                self.redis_client.add_following(pre_user)
            if self.count_pre_users() < 500000:
                self.redis_client.add_pre_users(pre_user)

    def is_qualified_user(self, user):
        if user.get('is_gov_media_vip'):
            return False
        if user.get('enterprise_verify_reason'):
            return False
        if user.get('custom_verify'):
            if not '自媒体' in user.get('custom_verify'):
                if not '主播' in user.get('custom_verify'):
                    if not '视频' in user.get('custom_verify'):
                        return False
        nickname = user.get('nickname')
        for word in self.stupid_key_words:
            if word in nickname:
                return False
        return True

    def get_pre_users(self, sec_user_id):
        try:
            raw_data = self.get_raw_data.get_following(sec_user_id)
        except Exception as e:
            logger.error('get_pre_user出错-' + e.args[0] + '-sec_user_id-' +
                         sec_user_id)
            return None

        if not raw_data.get('status_code') == 2096:
            following_list = raw_data.get('followings')
            if len(following_list) == 0:
                logger.error('获取不到数据了,程序退出')
                #sys.exit()
            for user in following_list:
                if self.is_qualified_user(user):
                    self.pre_user_list.append(user.get('sec_uid'))
        else:
            logger.info('关注不可见-sec_user_id-' + sec_user_id)

    def run(self):
        if self.count_following() < 100000 or self.count_pre_users() < 500000:
            batch = self.get_following()
            tasks = [
                gevent.spawn(self.get_pre_users, sec_user_id)
                for sec_user_id in batch
            ]
            gevent.joinall(tasks)
            logger.info('获取到pre_user-' + str(len(self.pre_user_list)))
            self.add_following_and_pre_users()
            self.pre_user_list.clear()
        else:
            logger.info('已经有太多following或pre_users了')