class RedisCluster: def __init__(self): try: self.rc = StrictRedisCluster(startup_nodes=StartupNodesServer, decode_responses=True) except: traceback.print_exc() def count_keys(self): # 查询当前库里有多少key return self.rc.dbsize() def exists_key(self, key): return self.rc.exists(key) def delete_key(self, key): self.rc.delete(key) def rename_key(self, key1, key2): self.rc.rename(key1, key2) # String操作 def set_key_value(self, key, value): self.rc.set(key, value) def get_key_value(self, key): # 没有对应key返回None return self.rc.get(key) # Hash操作 def set_hash(self, key, mapping): # mapping为字典, 已存在key会覆盖mapping self.rc.hmset(key, mapping) def delete_hash_field(self, key, field): # 删除hash表中某个字段,无论字段是否存在 self.rc.hdel(key, field) def exists_hash_field(self, key, field): # 检查hash表中某个字段存在 return self.rc.hexists(key, field) def get_hash_field(self, key, field): # 获取hash表中指定字段的值, 没有返回None return self.rc.hget(key, field) def get_hash_all_field(self, key): # 获取hash表中指定key所有字段和值,以字典形式,没有key返回空字典 return self.rc.hgetall(key) def increase_hash_field(self, key, field, increment): # 为hash表key某个字段的整数型值增加increment self.rc.hincrby(key, field, increment) # List操作 def rpush_into_lst(self, key, value): # url从头至尾入列 self.rc.rpush(key, value) def lpush_into_lst(self, key, value): # url从尾至头入列 self.rc.lpush(key, value) def lpop_lst_item(self, key): # 从头取出列表第一个元素,没有返回None return self.rc.lpop(key) def blpop_lst_item( self, key): # 从头取出列表第一个元素(元组形式,值为元祖[1], 元祖[0]为key名),并设置超时,超时返回None return self.rc.blpop(key, timeout=1) def rpop_lst_item(self, key): # 从尾取出列表最后一个元素,没有返回None return self.rc.rpop(key) def brpop_lst_item( self, key): # 从尾取出列表最后一个元素(元组形式,值为元祖[1], 元祖[0]为key名),并设置超时,超时返回None return self.rc.brpop(key, timeout=1) # Set操作 def add_set(self, key, value): self.rc.sadd(key, value) def is_member(self, key, value): return self.rc.sismember(key, value) def pop_member(self, key): # 随机移除一个值并返回该值,没有返回None return self.rc.spop(key) def pop_members(self, key, num): # 随机取出num个值(非移除),列表形式返回这些值,没有返回空列表 return self.rc.srandmember(key, num) def remove_member(self, key, value): # 移除集合中指定元素 self.rc.srem(key, value) def get_all_members(self, key): # 返回集合中全部元素,不删除 return self.rc.smembers(key) def remove_into(self, key1, key2, value): # 把集合key1中value元素移入集合key2中 self.rc.smove(key1, key2, value) def count_members(self, key): # 计算集合中成员数量 return self.rc.scard(key)
class RedisClient(object): def __init__(self, key, startup_nodes): """ init cluster """ self.key = key self.conn = StrictRedisCluster(startup_nodes=startup_nodes, decode_responses=True) def hdel(self, field): """ delete an item :param field: :return: """ self.conn.hdel(self.key, field) def hexists(self, field): """ 判断 key 中是否含有 field :param field: :return: """ return self.conn.hexists(self.key, field) def hget(self, field): """ 返回key中指定 field 中的 value :param field: :return: """ value = self.conn.hget(self.key, field) if isinstance(value, bytes): return value.decode('utf-8') else: return value if value else None def hgetall(self): """ 获取 {filed: value, field1: value1....} :return: """ all_dict = self.conn.hgetall(self.key) if not all_dict: return elif sys.version_info.major == 3: return { field.decode('utf-8'): value.decode('utf-8') for field, value in all_dict.items() } else: return all_dict def hkeys(self): """ 获取key中所有field :return: """ field = self.conn.hkeys(self.key) if isinstance(field, bytes): return field.decode('utf-8') else: return field if field else None def hlen(self): """ 获取所有 filed 数量 :return: """ return self.conn.hlen(self.key) def hset(self, field, value): """ 设置 field: value :param field: :param value: :return: """ self.conn.hset(self.key, field, value) def hvals(self): """ 获取所有values :return: """ values = self.conn.hvals(self.key) if not values: return elif sys.version_info.major == 3: return [value.decode('utf-8') for value in values] else: return values def change_key(self, key): """ 替换 key :param key: :return: """ self.key = key # =============================================== def blpop(self, timeout): self.conn.blpop(self.key, timeout=timeout) def brpop(self, timeout): self.conn.brpop(self.key, timeout=timeout) def brpoplpush(self, dst, timeout): self.conn.brpoplpush(self.key, dst=dst, timeout=timeout) def lindex(self, i): self.conn.lindex(self.key, index=i) def llen(self): self.conn.llen(self.key) def lpop(self): self.conn.lpop(self.key) def lpush(self): self.conn.lpush(self.key) def lrange(self, start, stop): self.conn.lrange(self.key, start, stop) def lset(self, i, value): self.conn.lset(self.key, index=i, value=value) def rpop(self): self.conn.rpop(self.key) def rpoplpush(self, dst): self.conn.rpoplpush(self.key, dst=dst) def rpush(self, value): self.conn.rpush(self.key, value)
db = mongoClient['jd'] productcl = db['product'] commentcl = db['comment'] pageCount = getPageCount() print(pageCount) for page in range(0, pageCount) : skuids = getPageSkuids(page) for skuid in skuids : if redisClient.sismember('inpending', skuid) == False and redisClient.sismember('visited', skuid) == False and redisClient.sismember('filtered', skuid) == False : ret = redisClient.sadd('inpending', skuid) if ret == 1 : ## redisClient.rpush('pending', skuid) while True : skuid = redisClient.blpop('pending', 60) if skuid == None : ## break skuid = skuid[1] print(skuid) html = getProductPage(skuid) skuname = getSkuname(html) redisClient.set(skuid, skuname) if filter(skuname) : redisClient.sadd('filtered', skuid) redisClient.srem('inpending', skuid) continue else : redisClient.sadd('visited', skuid) redisClient.srem('inpending', skuid) if redisClient.sismember('cinpending', skuid) == False and redisClient.sismember('cvisited', skuid) == False :