Exemplo n.º 1
0
class RedisMiddleware(object):
    """
    任务管理器,负责任务相关操作,如校验是否新增,读取已抓取任务文本
    """
    def __init__(self, taskname, redis_params):
        # self._mkdata()
        self.redis_cli = StrictRedisCluster(
            startup_nodes=redis_params.get('startup_nodes', ''),
            password=redis_params.get('password', ''))
        # 实例化两个bloomfilter
        self.bloom_urls = BloomFilter(
            self.redis_cli, blockNum=6,
            key='bloomfilter_pub')  # url的过滤器,分6个块存,内存空间默认512M
        # list的过滤器,默认1个块存,内存空间给32M
        self.bloom_list = BloomFilter(self.redis_cli,
                                      key='{}:redis_list'.format(taskname),
                                      bit_size=1 << 28)
        # self.redis_cli = redis.Redis(host=redis_host, port=redis_port, db=0, password=redis_psw)

    def redis_del(self, key=None):
        """
        删除redis对应的键
        目前用在循环抓取时候,清空列表url,
        列表url每次循环只抓取一遍,直至下次循环
        :return:
        """
        if not key:
            return
        res = self.redis_cli.delete(key)
        return res

    def redis_push(self, name, data):
        """
        推入数据到redis指定任务列表中
        lpush,将新的数据放在最前面
        :return:
        """

        try:
            if isinstance(data, list):
                for each in data:
                    self.redis_cli.lpush(name, each)
            else:
                self.redis_cli.lpush(name, data)
        except:
            return

    def redis_pop(self, name):
        """
        从指定任务列表中获取数据
        rpop,从最后取
        :return:
        """
        try:
            res = self.redis_cli.rpop(name)
            return res
        except:
            return

    def redis_brpop(self, name, timeout=1):
        """
        从指定任务列表中获取数据
        brpop,阻塞,从最后取
        :return:
        """
        try:
            unuse, res = self.redis_cli.brpop(name, timeout=timeout)
            return res
        except Exception as e:
            print(e)
            return

    def redis_query(self, name):
        """
        查询指定任务列表中数据
        :param name:
        :return:
        """
        try:
            res = self.redis_cli.llen(name)
            return res
        except:
            return
Exemplo n.º 2
0
class RedisCluster:
    def __init__(self):
        try:
            self.rc = StrictRedisCluster(startup_nodes=StartupNodesServer,
                                         decode_responses=True)
        except:
            traceback.print_exc()

    def count_keys(self):  # 查询当前库里有多少key
        return self.rc.dbsize()

    def exists_key(self, key):
        return self.rc.exists(key)

    def delete_key(self, key):
        self.rc.delete(key)

    def rename_key(self, key1, key2):
        self.rc.rename(key1, key2)

    # String操作
    def set_key_value(self, key, value):
        self.rc.set(key, value)

    def get_key_value(self, key):  # 没有对应key返回None
        return self.rc.get(key)

    # Hash操作
    def set_hash(self, key, mapping):  # mapping为字典, 已存在key会覆盖mapping
        self.rc.hmset(key, mapping)

    def delete_hash_field(self, key, field):  # 删除hash表中某个字段,无论字段是否存在
        self.rc.hdel(key, field)

    def exists_hash_field(self, key, field):  # 检查hash表中某个字段存在
        return self.rc.hexists(key, field)

    def get_hash_field(self, key, field):  # 获取hash表中指定字段的值, 没有返回None
        return self.rc.hget(key, field)

    def get_hash_all_field(self, key):  # 获取hash表中指定key所有字段和值,以字典形式,没有key返回空字典
        return self.rc.hgetall(key)

    def increase_hash_field(self, key, field,
                            increment):  # 为hash表key某个字段的整数型值增加increment
        self.rc.hincrby(key, field, increment)

    # List操作
    def rpush_into_lst(self, key, value):  # url从头至尾入列
        self.rc.rpush(key, value)

    def lpush_into_lst(self, key, value):  # url从尾至头入列
        self.rc.lpush(key, value)

    def lpop_lst_item(self, key):  # 从头取出列表第一个元素,没有返回None
        return self.rc.lpop(key)

    def blpop_lst_item(
            self, key):  # 从头取出列表第一个元素(元组形式,值为元祖[1], 元祖[0]为key名),并设置超时,超时返回None
        return self.rc.blpop(key, timeout=1)

    def rpop_lst_item(self, key):  # 从尾取出列表最后一个元素,没有返回None
        return self.rc.rpop(key)

    def brpop_lst_item(
            self,
            key):  # 从尾取出列表最后一个元素(元组形式,值为元祖[1], 元祖[0]为key名),并设置超时,超时返回None
        return self.rc.brpop(key, timeout=1)

    # Set操作
    def add_set(self, key, value):
        self.rc.sadd(key, value)

    def is_member(self, key, value):
        return self.rc.sismember(key, value)

    def pop_member(self, key):  # 随机移除一个值并返回该值,没有返回None
        return self.rc.spop(key)

    def pop_members(self, key, num):  # 随机取出num个值(非移除),列表形式返回这些值,没有返回空列表
        return self.rc.srandmember(key, num)

    def remove_member(self, key, value):  # 移除集合中指定元素
        self.rc.srem(key, value)

    def get_all_members(self, key):  # 返回集合中全部元素,不删除
        return self.rc.smembers(key)

    def remove_into(self, key1, key2, value):  # 把集合key1中value元素移入集合key2中
        self.rc.smove(key1, key2, value)

    def count_members(self, key):  # 计算集合中成员数量
        return self.rc.scard(key)
Exemplo n.º 3
0
class RedisClient(object):
    def __init__(self, key, startup_nodes):
        """
		init cluster
		"""
        self.key = key
        self.conn = StrictRedisCluster(startup_nodes=startup_nodes,
                                       decode_responses=True)

    def hdel(self, field):
        """
		delete an item
		:param field:
		:return:
		"""
        self.conn.hdel(self.key, field)

    def hexists(self, field):
        """
		判断 key 中是否含有 field
		:param field:
		:return:
		"""
        return self.conn.hexists(self.key, field)

    def hget(self, field):
        """
		返回key中指定 field 中的 value
		:param field:
		:return:
		"""
        value = self.conn.hget(self.key, field)
        if isinstance(value, bytes):
            return value.decode('utf-8')
        else:
            return value if value else None

    def hgetall(self):
        """
		获取 {filed: value, field1: value1....}
		:return:
		"""
        all_dict = self.conn.hgetall(self.key)
        if not all_dict:
            return
        elif sys.version_info.major == 3:
            return {
                field.decode('utf-8'): value.decode('utf-8')
                for field, value in all_dict.items()
            }
        else:
            return all_dict

    def hkeys(self):
        """
		获取key中所有field
		:return:
		"""
        field = self.conn.hkeys(self.key)
        if isinstance(field, bytes):
            return field.decode('utf-8')
        else:
            return field if field else None

    def hlen(self):
        """
		获取所有 filed 数量
		:return:
		"""
        return self.conn.hlen(self.key)

    def hset(self, field, value):
        """
		设置 field: value
		:param field:
		:param value:
		:return:
		"""
        self.conn.hset(self.key, field, value)

    def hvals(self):
        """
		获取所有values
		:return:
		"""
        values = self.conn.hvals(self.key)
        if not values:
            return
        elif sys.version_info.major == 3:
            return [value.decode('utf-8') for value in values]
        else:
            return values

    def change_key(self, key):
        """
		替换 key
		:param key:
		:return:
		"""
        self.key = key

    # ===============================================
    def blpop(self, timeout):
        self.conn.blpop(self.key, timeout=timeout)

    def brpop(self, timeout):
        self.conn.brpop(self.key, timeout=timeout)

    def brpoplpush(self, dst, timeout):
        self.conn.brpoplpush(self.key, dst=dst, timeout=timeout)

    def lindex(self, i):
        self.conn.lindex(self.key, index=i)

    def llen(self):
        self.conn.llen(self.key)

    def lpop(self):
        self.conn.lpop(self.key)

    def lpush(self):
        self.conn.lpush(self.key)

    def lrange(self, start, stop):
        self.conn.lrange(self.key, start, stop)

    def lset(self, i, value):
        self.conn.lset(self.key, index=i, value=value)

    def rpop(self):
        self.conn.rpop(self.key)

    def rpoplpush(self, dst):
        self.conn.rpoplpush(self.key, dst=dst)

    def rpush(self, value):
        self.conn.rpush(self.key, value)
Exemplo n.º 4
0
class RedisMiddleware(object):
    """
    任务管理器,负责任务相关操作,如校验是否新增,读取已抓取任务文本
    """
    def __init__(self, redis_params):
        self.redis_cli = StrictRedisCluster(
            startup_nodes=redis_params.get('startup_nodes', ''),
            password=redis_params.get('password', ''))
        self.bloom_filter = BloomFilter(
            self.redis_cli, blockNum=5,
            key='bloomfilter_weibo')  # url的过滤器,分6个块存,内存空间默认512M

    def redis_del(self, key=None):
        """
        删除redis对应的键
        目前用在循环抓取时候,清空列表url,
        列表url每次循环只抓取一遍,直至下次循环
        :return:
        """
        if not key:
            return
        res = self.redis_cli.delete(key)
        return res

    def redis_rpush(self, name, data):
        """
        推入数据到redis指定任务列表中
        rpush,将新的数据放在最后面
        :return:
        """

        try:
            if isinstance(data, list):
                for each in data:
                    self.redis_cli.rpush(name, each)
            else:
                self.redis_cli.lpush(name, data)
        except:
            return

    def redis_lpush(self, name, data):
        """
        推入数据到redis指定任务列表中
        lpush,将新的数据放在最前面
        :return:
        """

        try:
            if isinstance(data, list):
                for each in data:
                    self.redis_cli.lpush(name, each)
            else:
                self.redis_cli.lpush(name, data)
        except:
            return

    def redis_rpop(self, name):
        """
        从指定任务列表中获取数据
        rpop,从最后取
        :return:
        """
        try:
            res = self.redis_cli.rpop(name)
            return res
        except:
            return

    def redis_lpop(self, name):
        """
        从指定任务列表中获取数据
        lpop,从头部取
        :return:
        """
        try:
            res = self.redis_cli.lpop(name)
            return res
        except:
            return

    def redis_brpop(self, name, timeout=1):
        """
        从指定任务列表中获取数据
        brpop,阻塞,从最后取
        :return:
        """
        try:
            unuse, res = self.redis_cli.brpop(name, timeout=timeout)
            return res
        except Exception as e:
            print(e)
            return

    def redis_query(self, name):
        """
        查询指定任务列表中数据
        :param name:
        :return:
        """
        try:
            res = self.redis_cli.llen(name)
            return res
        except:
            return

    def redis_sadd(self, name, data):
        """
        集合中插入数据
        :return:
        """
        try:
            if isinstance(data, list) or isinstance(data, set):
                for each in data:
                    self.redis_cli.sadd(name, each)
            else:
                self.redis_cli.sadd(name, data)
        except:
            return

    def redis_sismember(self, name, data):
        """
        校验元素是否存在于集合中
        :return:
        """
        return self.redis_cli.sismember(name, data)

    def redis_scard(self, name):
        """
        返回集合成员个数
        :return:
        """
        return int(self.redis_cli.scard(name))

    def redis_spop(self, name):
        """
        获取集合中的随机一个元素
        :param name:
        :return:
        """
        return self.redis_cli.spop(name)

    def redis_srem(self, name, data):
        """
        移除指定成员
        :param name:
        :param data:
        :return:
        """
        self.redis_cli.srem(name, data)