class RedisMiddleware(object): """ 任务管理器,负责任务相关操作,如校验是否新增,读取已抓取任务文本 """ def __init__(self, taskname, redis_params): # self._mkdata() self.redis_cli = StrictRedisCluster( startup_nodes=redis_params.get('startup_nodes', ''), password=redis_params.get('password', '')) # 实例化两个bloomfilter self.bloom_urls = BloomFilter( self.redis_cli, blockNum=6, key='bloomfilter_pub') # url的过滤器,分6个块存,内存空间默认512M # list的过滤器,默认1个块存,内存空间给32M self.bloom_list = BloomFilter(self.redis_cli, key='{}:redis_list'.format(taskname), bit_size=1 << 28) # self.redis_cli = redis.Redis(host=redis_host, port=redis_port, db=0, password=redis_psw) def redis_del(self, key=None): """ 删除redis对应的键 目前用在循环抓取时候,清空列表url, 列表url每次循环只抓取一遍,直至下次循环 :return: """ if not key: return res = self.redis_cli.delete(key) return res def redis_push(self, name, data): """ 推入数据到redis指定任务列表中 lpush,将新的数据放在最前面 :return: """ try: if isinstance(data, list): for each in data: self.redis_cli.lpush(name, each) else: self.redis_cli.lpush(name, data) except: return def redis_pop(self, name): """ 从指定任务列表中获取数据 rpop,从最后取 :return: """ try: res = self.redis_cli.rpop(name) return res except: return def redis_brpop(self, name, timeout=1): """ 从指定任务列表中获取数据 brpop,阻塞,从最后取 :return: """ try: unuse, res = self.redis_cli.brpop(name, timeout=timeout) return res except Exception as e: print(e) return def redis_query(self, name): """ 查询指定任务列表中数据 :param name: :return: """ try: res = self.redis_cli.llen(name) return res except: return
class RedisCluster: def __init__(self): try: self.rc = StrictRedisCluster(startup_nodes=StartupNodesServer, decode_responses=True) except: traceback.print_exc() def count_keys(self): # 查询当前库里有多少key return self.rc.dbsize() def exists_key(self, key): return self.rc.exists(key) def delete_key(self, key): self.rc.delete(key) def rename_key(self, key1, key2): self.rc.rename(key1, key2) # String操作 def set_key_value(self, key, value): self.rc.set(key, value) def get_key_value(self, key): # 没有对应key返回None return self.rc.get(key) # Hash操作 def set_hash(self, key, mapping): # mapping为字典, 已存在key会覆盖mapping self.rc.hmset(key, mapping) def delete_hash_field(self, key, field): # 删除hash表中某个字段,无论字段是否存在 self.rc.hdel(key, field) def exists_hash_field(self, key, field): # 检查hash表中某个字段存在 return self.rc.hexists(key, field) def get_hash_field(self, key, field): # 获取hash表中指定字段的值, 没有返回None return self.rc.hget(key, field) def get_hash_all_field(self, key): # 获取hash表中指定key所有字段和值,以字典形式,没有key返回空字典 return self.rc.hgetall(key) def increase_hash_field(self, key, field, increment): # 为hash表key某个字段的整数型值增加increment self.rc.hincrby(key, field, increment) # List操作 def rpush_into_lst(self, key, value): # url从头至尾入列 self.rc.rpush(key, value) def lpush_into_lst(self, key, value): # url从尾至头入列 self.rc.lpush(key, value) def lpop_lst_item(self, key): # 从头取出列表第一个元素,没有返回None return self.rc.lpop(key) def blpop_lst_item( self, key): # 从头取出列表第一个元素(元组形式,值为元祖[1], 元祖[0]为key名),并设置超时,超时返回None return self.rc.blpop(key, timeout=1) def rpop_lst_item(self, key): # 从尾取出列表最后一个元素,没有返回None return self.rc.rpop(key) def brpop_lst_item( self, key): # 从尾取出列表最后一个元素(元组形式,值为元祖[1], 元祖[0]为key名),并设置超时,超时返回None return self.rc.brpop(key, timeout=1) # Set操作 def add_set(self, key, value): self.rc.sadd(key, value) def is_member(self, key, value): return self.rc.sismember(key, value) def pop_member(self, key): # 随机移除一个值并返回该值,没有返回None return self.rc.spop(key) def pop_members(self, key, num): # 随机取出num个值(非移除),列表形式返回这些值,没有返回空列表 return self.rc.srandmember(key, num) def remove_member(self, key, value): # 移除集合中指定元素 self.rc.srem(key, value) def get_all_members(self, key): # 返回集合中全部元素,不删除 return self.rc.smembers(key) def remove_into(self, key1, key2, value): # 把集合key1中value元素移入集合key2中 self.rc.smove(key1, key2, value) def count_members(self, key): # 计算集合中成员数量 return self.rc.scard(key)
class RedisClient(object): def __init__(self, key, startup_nodes): """ init cluster """ self.key = key self.conn = StrictRedisCluster(startup_nodes=startup_nodes, decode_responses=True) def hdel(self, field): """ delete an item :param field: :return: """ self.conn.hdel(self.key, field) def hexists(self, field): """ 判断 key 中是否含有 field :param field: :return: """ return self.conn.hexists(self.key, field) def hget(self, field): """ 返回key中指定 field 中的 value :param field: :return: """ value = self.conn.hget(self.key, field) if isinstance(value, bytes): return value.decode('utf-8') else: return value if value else None def hgetall(self): """ 获取 {filed: value, field1: value1....} :return: """ all_dict = self.conn.hgetall(self.key) if not all_dict: return elif sys.version_info.major == 3: return { field.decode('utf-8'): value.decode('utf-8') for field, value in all_dict.items() } else: return all_dict def hkeys(self): """ 获取key中所有field :return: """ field = self.conn.hkeys(self.key) if isinstance(field, bytes): return field.decode('utf-8') else: return field if field else None def hlen(self): """ 获取所有 filed 数量 :return: """ return self.conn.hlen(self.key) def hset(self, field, value): """ 设置 field: value :param field: :param value: :return: """ self.conn.hset(self.key, field, value) def hvals(self): """ 获取所有values :return: """ values = self.conn.hvals(self.key) if not values: return elif sys.version_info.major == 3: return [value.decode('utf-8') for value in values] else: return values def change_key(self, key): """ 替换 key :param key: :return: """ self.key = key # =============================================== def blpop(self, timeout): self.conn.blpop(self.key, timeout=timeout) def brpop(self, timeout): self.conn.brpop(self.key, timeout=timeout) def brpoplpush(self, dst, timeout): self.conn.brpoplpush(self.key, dst=dst, timeout=timeout) def lindex(self, i): self.conn.lindex(self.key, index=i) def llen(self): self.conn.llen(self.key) def lpop(self): self.conn.lpop(self.key) def lpush(self): self.conn.lpush(self.key) def lrange(self, start, stop): self.conn.lrange(self.key, start, stop) def lset(self, i, value): self.conn.lset(self.key, index=i, value=value) def rpop(self): self.conn.rpop(self.key) def rpoplpush(self, dst): self.conn.rpoplpush(self.key, dst=dst) def rpush(self, value): self.conn.rpush(self.key, value)
class RedisMiddleware(object): """ 任务管理器,负责任务相关操作,如校验是否新增,读取已抓取任务文本 """ def __init__(self, redis_params): self.redis_cli = StrictRedisCluster( startup_nodes=redis_params.get('startup_nodes', ''), password=redis_params.get('password', '')) self.bloom_filter = BloomFilter( self.redis_cli, blockNum=5, key='bloomfilter_weibo') # url的过滤器,分6个块存,内存空间默认512M def redis_del(self, key=None): """ 删除redis对应的键 目前用在循环抓取时候,清空列表url, 列表url每次循环只抓取一遍,直至下次循环 :return: """ if not key: return res = self.redis_cli.delete(key) return res def redis_rpush(self, name, data): """ 推入数据到redis指定任务列表中 rpush,将新的数据放在最后面 :return: """ try: if isinstance(data, list): for each in data: self.redis_cli.rpush(name, each) else: self.redis_cli.lpush(name, data) except: return def redis_lpush(self, name, data): """ 推入数据到redis指定任务列表中 lpush,将新的数据放在最前面 :return: """ try: if isinstance(data, list): for each in data: self.redis_cli.lpush(name, each) else: self.redis_cli.lpush(name, data) except: return def redis_rpop(self, name): """ 从指定任务列表中获取数据 rpop,从最后取 :return: """ try: res = self.redis_cli.rpop(name) return res except: return def redis_lpop(self, name): """ 从指定任务列表中获取数据 lpop,从头部取 :return: """ try: res = self.redis_cli.lpop(name) return res except: return def redis_brpop(self, name, timeout=1): """ 从指定任务列表中获取数据 brpop,阻塞,从最后取 :return: """ try: unuse, res = self.redis_cli.brpop(name, timeout=timeout) return res except Exception as e: print(e) return def redis_query(self, name): """ 查询指定任务列表中数据 :param name: :return: """ try: res = self.redis_cli.llen(name) return res except: return def redis_sadd(self, name, data): """ 集合中插入数据 :return: """ try: if isinstance(data, list) or isinstance(data, set): for each in data: self.redis_cli.sadd(name, each) else: self.redis_cli.sadd(name, data) except: return def redis_sismember(self, name, data): """ 校验元素是否存在于集合中 :return: """ return self.redis_cli.sismember(name, data) def redis_scard(self, name): """ 返回集合成员个数 :return: """ return int(self.redis_cli.scard(name)) def redis_spop(self, name): """ 获取集合中的随机一个元素 :param name: :return: """ return self.redis_cli.spop(name) def redis_srem(self, name, data): """ 移除指定成员 :param name: :param data: :return: """ self.redis_cli.srem(name, data)