def redis_cluster(): redis_nodes = [{ 'host': '10.101.104.132', 'port': 1321 }, { 'host': '10.101.104.132', 'port': 1322 }, { 'host': '10.101.104.132', 'port': 1323 }] try: redisconn = StrictRedisCluster(startup_nodes=redis_nodes, decode_responses=True) except Exception as e: print("Connect Error!") sys.exit(1) #redisconn.set('name','admin') #redisconn.set('age',18) #print("name is: ", redisconn.get('name')) #print("age is: ", redisconn.get('age')) print(redisconn.get("creative:app:com.DreamonStudios.BouncyBasketball")) print(redisconn.get('billing:ad:1')) print(redisconn.hgetall('billing:creative:spent:2019040417:5'))
class RedisManager: def __init__(self, redis_server, pw=None): #self.redis_client_ = redis.Redis(host='127.0.0.1', port=9966) if pw: pool = ClusterConnectionPool(startup_nodes=redis_server, password=pw, skip_full_coverage_check=True, decode_responses=True) self.redis_client_ = StrictRedisCluster(connection_pool=pool) else: pool = ClusterConnectionPool(startup_nodes=redis_server, skip_full_coverage_check=True, decode_responses=True) self.redis_client_ = StrictRedisCluster(connection_pool=pool) def Set(self, k, v, is_str, expire=None): if not is_str: v = json.dumps(v) return self.redis_client_.set(k, v, ex=expire) def Get(self, k): return self.redis_client_.get(k) def Delete(self, k): return self.redis_client_.delete(k) def HashMultiSet(self, k, d, expire=None): self.redis_client_.hmset(k, d) if expire: self.redis_client_.expire(k, expire) def HashGetAll(self, k): return self.redis_client_.hgetall(k) def Pushback(self, k, v, expire=None): self.redis_client_.rpush(k, v) if expire: self.redis_client_.expire(k, expire) def SetList(self, k, l, expire=None): self.redis_client_.rpush(k, *l) if expire: self.redis_client_.expire(k, expire) def SetSet(self, k, v, expire=None): self.redis_client_.sadd(k, v) if expire: self.redis_client_.expire(k, expire) def SortSetSet(self, k, v, expire=None): self.redis_client_.zadd(k, v[0], v[1]) if expire: self.redis_client_.expire(k, expire) def Handle(self): return self.redis_client_
def mygetredis(redis_nodes): # redis_nodes=[{"host":'101.254.242.11',"port":'17001'},{"host":'101.254.242.12',"port":'17001'},{"host":'101.254.242.17',"port":'17001'}] r = StrictRedisCluster(startup_nodes=redis_nodes, max_connections=30, decode_responses=True, skip_full_coverage_check=True) # print r.hgetall('voyager:budget') order_status = r.hgetall('voyager:status') # print type(mybudget) # 负数的订单个数 j = 0 for i in order_status: if str(order_status[i])[:1] == '-': j = j + 1 print '订单为:' + str(i) + ' 状态为: ' + str(order_status[i]) return order_status, len(order_status), j
def exec_redis(redis_nodes,cmd_list): ''' redis_nodes = [{'host':'10.101.104.132','port':1321}, {'host':'10.101.104.132','port':1322}, {'host':'10.101.104.132','port':1323} {'host':'10.101.104.132','port':1323,'password':None,'db':0} ] cmd_list = [ #暂时只支持read,而且只有这几种 "get key1", "llen lis1", "smembers set1", "scard set1", "hgetall hashtable", "lindex list1 2", "hget hashtable key1", "lrange list1 1 3", ] ''' redisconn = StrictRedisCluster(startup_nodes=redis_nodes,decode_responses=True) res = {} for cmd in cmd_list: cmd = cmd.strip() arr = re.split(" +",cmd) type = arr[0].lower() if type == 'get': tmp = redisconn.get(arr[1]) elif type == 'hgetall': tmp = redisconn.hgetall(arr[1]) elif type == 'llen': tmp = redisconn.llen(arr[1]) elif type == 'smembers': tmp = redisconn.smembers(arr[1]) elif type == 'scard': tmp = redisconn.scard(arr[1]) elif type == 'hget': tmp = redisconn.hget(arr[1],arr[2]) elif type == 'lrange': tmp = redisconn.lrange(arr[1],arr[2],arr[3]) elif type == 'lindex': tmp = redisconn.lrange(arr[1],arr[2]) else: tmp = 'this cmd is not support yeild' res[cmd] = [json_decode(tmp)] return res
class RedisDB: def __init__(self, ip_ports=None, db=None, user_pass=None, url=None, decode_responses=True, service_name=None, max_connections=32, **kwargs): """ redis的封装 Args: ip_ports: ip:port 多个可写为列表或者逗号隔开 如 ip1:port1,ip2:port2 或 ["ip1:port1", "ip2:port2"] db: user_pass: url: decode_responses: service_name: 适用于redis哨兵模式 """ # 可能会改setting中的值,所以此处不能直接赋值为默认值,需要后加载赋值 if ip_ports is None: ip_ports = setting.REDISDB_IP_PORTS if db is None: db = setting.REDISDB_DB if user_pass is None: user_pass = setting.REDISDB_USER_PASS if service_name is None: service_name = setting.REDISDB_SERVICE_NAME self._is_redis_cluster = False try: if not url: ip_ports = (ip_ports if isinstance(ip_ports, list) else ip_ports.split(",")) if len(ip_ports) > 1: startup_nodes = [] for ip_port in ip_ports: ip, port = ip_port.split(":") startup_nodes.append({"host": ip, "port": port}) if service_name: log.debug("使用redis哨兵模式") hosts = [(node["host"], node["port"]) for node in startup_nodes] sentinel = Sentinel(hosts, socket_timeout=3, **kwargs) self._redis = sentinel.master_for( service_name, password=user_pass, db=db, redis_class=redis.StrictRedis, decode_responses=decode_responses, max_connections=max_connections, **kwargs) else: log.debug("使用redis集群模式") self._redis = StrictRedisCluster( startup_nodes=startup_nodes, decode_responses=decode_responses, password=user_pass, max_connections=max_connections, **kwargs) self._is_redis_cluster = True else: ip, port = ip_ports[0].split(":") self._redis = redis.StrictRedis( host=ip, port=port, db=db, password=user_pass, decode_responses=decode_responses, max_connections=max_connections, **kwargs) else: self._redis = redis.StrictRedis.from_url( url, decode_responses=decode_responses) except Exception as e: raise else: if not url: log.debug("连接到redis数据库 %s db%s" % (ip_ports, db)) else: log.debug("连接到redis数据库 %s" % (url)) self._ip_ports = ip_ports self._db = db self._user_pass = user_pass self._url = url def __repr__(self): if self._url: return "<Redisdb url:{}>".format(self._url) return "<Redisdb ip_ports: {} db:{} user_pass:{}>".format( self._ip_ports, self._db, self._user_pass) @classmethod def from_url(cls, url): return cls(url=url) def sadd(self, table, values): """ @summary: 使用无序set集合存储数据, 去重 --------- @param table: @param values: 值; 支持list 或 单个值 --------- @result: 若库中存在 返回0,否则入库,返回1。 批量添加返回None """ if isinstance(values, list): pipe = self._redis.pipeline( transaction=True ) # redis-py默认在执行每次请求都会创建(连接池申请连接)和断开(归还连接池)一次连接操作,如果想要在一次请求中指定多个命令,则可以使用pipline实现一次请求指定多个命令,并且默认情况下一次pipline 是原子性操作。 if not self._is_redis_cluster: pipe.multi() for value in values: pipe.sadd(table, value) pipe.execute() else: return self._redis.sadd(table, values) def sget(self, table, count=1, is_pop=True): """ 返回 list 如 ['1'] 或 [] @param table: @param count: @param is_pop: @return: """ datas = [] if is_pop: count = count if count <= self.sget_count( table) else self.sget_count(table) if count: if count > 1: pipe = self._redis.pipeline( transaction=True ) # redis-py默认在执行每次请求都会创建(连接池申请连接)和断开(归还连接池)一次连接操作,如果想要在一次请求中指定多个命令,则可以使用pipline实现一次请求指定多个命令,并且默认情况下一次pipline 是原子性操作。 if not self._is_redis_cluster: pipe.multi() while count: pipe.spop(table) count -= 1 datas = pipe.execute() else: datas.append(self._redis.spop(table)) else: datas = self._redis.srandmember(table, count) return datas def srem(self, table, values): """ @summary: 移除集合中的指定元素 --------- @param table: @param values: 一个或者列表 --------- @result: """ if isinstance(values, list): pipe = self._redis.pipeline( transaction=True ) # redis-py默认在执行每次请求都会创建(连接池申请连接)和断开(归还连接池)一次连接操作,如果想要在一次请求中指定多个命令,则可以使用pipline实现一次请求指定多个命令,并且默认情况下一次pipline 是原子性操作。 if not self._is_redis_cluster: pipe.multi() for value in values: pipe.srem(table, value) pipe.execute() else: self._redis.srem(table, values) def sget_count(self, table): return self._redis.scard(table) def sdelete(self, table): """ @summary: 删除set集合的大键(数据量大的表) 删除大set键,使用sscan命令,每次扫描集合中500个元素,再用srem命令每次删除一个键 若直接用delete命令,会导致Redis阻塞,出现故障切换和应用程序崩溃的故障。 --------- @param table: --------- @result: """ # 当 SCAN 命令的游标参数被设置为 0 时, 服务器将开始一次新的迭代, 而当服务器向用户返回值为 0 的游标时, 表示迭代已结束 cursor = "0" while cursor != 0: cursor, data = self._redis.sscan(table, cursor=cursor, count=500) for item in data: # pipe.srem(table, item) self._redis.srem(table, item) # pipe.execute() def sismember(self, table, key): "Return a boolean indicating if ``value`` is a member of set ``name``" return self._redis.sismember(table, key) def zadd(self, table, values, prioritys=0): """ @summary: 使用有序set集合存储数据, 去重(值存在更新) --------- @param table: @param values: 值; 支持list 或 单个值 @param prioritys: 优先级; double类型,支持list 或 单个值。 根据此字段的值来排序, 值越小越优先。 可不传值,默认value的优先级为0 --------- @result:若库中存在 返回0,否则入库,返回1。 批量添加返回 [0, 1 ...] """ if isinstance(values, list): if not isinstance(prioritys, list): prioritys = [prioritys] * len(values) else: assert len(values) == len(prioritys), "values值要与prioritys值一一对应" pipe = self._redis.pipeline(transaction=True) if not self._is_redis_cluster: pipe.multi() for value, priority in zip(values, prioritys): pipe.zadd(table, priority, value) return pipe.execute() else: return self._redis.zadd(table, prioritys, values) def zget(self, table, count=1, is_pop=True): """ @summary: 从有序set集合中获取数据 优先返回分数小的(优先级高的) --------- @param table: @param count: 数量 -1 返回全部数据 @param is_pop:获取数据后,是否在原set集合中删除,默认是 --------- @result: 列表 """ start_pos = 0 # 包含 end_pos = count - 1 if count > 0 else count pipe = self._redis.pipeline( transaction=True ) # redis-py默认在执行每次请求都会创建(连接池申请连接)和断开(归还连接池)一次连接操作,如果想要在一次请求中指定多个命令,则可以使用pipline实现一次请求指定多个命令,并且默认情况下一次pipline 是原子性操作。 if not self._is_redis_cluster: pipe.multi( ) # 标记事务的开始 参考 http://www.runoob.com/redis/redis-transactions.html pipe.zrange(table, start_pos, end_pos) # 取值 if is_pop: pipe.zremrangebyrank(table, start_pos, end_pos) # 删除 results, *count = pipe.execute() return results def zremrangebyscore(self, table, priority_min, priority_max): """ 根据分数移除成员 闭区间 @param table: @param priority_min: @param priority_max: @return: 被移除的成员个数 """ return self._redis.zremrangebyscore(table, priority_min, priority_max) def zrangebyscore(self, table, priority_min, priority_max, count=None, is_pop=True): """ @summary: 返回指定分数区间的数据 闭区间 --------- @param table: @param priority_min: 优先级越小越优先 @param priority_max: @param count: 获取的数量,为空则表示分数区间内的全部数据 @param is_pop: 是否删除 --------- @result: """ # 使用lua脚本, 保证操作的原子性 lua = """ local key = KEYS[1] local min_score = ARGV[2] local max_score = ARGV[3] local is_pop = ARGV[4] local count = ARGV[5] -- 取值 local datas = nil if count then datas = redis.call('zrangebyscore', key, min_score, max_score, 'limit', 0, count) else datas = redis.call('zrangebyscore', key, min_score, max_score) end -- 删除redis中刚取到的值 if (is_pop) then for i=1, #datas do redis.call('zrem', key, datas[i]) end end return datas """ cmd = self._redis.register_script(lua) if count: res = cmd(keys=[table], args=[table, priority_min, priority_max, is_pop, count]) else: res = cmd(keys=[table], args=[table, priority_min, priority_max, is_pop]) return res def zrangebyscore_increase_score(self, table, priority_min, priority_max, increase_score, count=None): """ @summary: 返回指定分数区间的数据 闭区间, 同时修改分数 --------- @param table: @param priority_min: 最小分数 @param priority_max: 最大分数 @param increase_score: 分数值增量 正数则在原有的分数上叠加,负数则相减 @param count: 获取的数量,为空则表示分数区间内的全部数据 --------- @result: """ # 使用lua脚本, 保证操作的原子性 lua = """ local key = KEYS[1] local min_score = ARGV[1] local max_score = ARGV[2] local increase_score = ARGV[3] local count = ARGV[4] -- 取值 local datas = nil if count then datas = redis.call('zrangebyscore', key, min_score, max_score, 'limit', 0, count) else datas = redis.call('zrangebyscore', key, min_score, max_score) end --修改优先级 for i=1, #datas do redis.call('zincrby', key, increase_score, datas[i]) end return datas """ cmd = self._redis.register_script(lua) if count: res = cmd(keys=[table], args=[priority_min, priority_max, increase_score, count]) else: res = cmd(keys=[table], args=[priority_min, priority_max, increase_score]) return res def zrangebyscore_set_score(self, table, priority_min, priority_max, score, count=None): """ @summary: 返回指定分数区间的数据 闭区间, 同时修改分数 --------- @param table: @param priority_min: 最小分数 @param priority_max: 最大分数 @param score: 分数值 @param count: 获取的数量,为空则表示分数区间内的全部数据 --------- @result: """ # 使用lua脚本, 保证操作的原子性 lua = """ local key = KEYS[1] local min_score = ARGV[1] local max_score = ARGV[2] local set_score = ARGV[3] local count = ARGV[4] -- 取值 local datas = nil if count then datas = redis.call('zrangebyscore', key, min_score, max_score, 'withscores','limit', 0, count) else datas = redis.call('zrangebyscore', key, min_score, max_score, 'withscores') end local real_datas = {} -- 数据 --修改优先级 for i=1, #datas, 2 do local data = datas[i] local score = datas[i+1] table.insert(real_datas, data) -- 添加数据 redis.call('zincrby', key, set_score - score, datas[i]) end return real_datas """ cmd = self._redis.register_script(lua) if count: res = cmd(keys=[table], args=[priority_min, priority_max, score, count]) else: res = cmd(keys=[table], args=[priority_min, priority_max, score]) return res def zget_count(self, table, priority_min=None, priority_max=None): """ @summary: 获取表数据的数量 --------- @param table: @param priority_min:优先级范围 最小值(包含) @param priority_max:优先级范围 最大值(包含) --------- @result: """ if priority_min != None and priority_max != None: return self._redis.zcount(table, priority_min, priority_max) else: return self._redis.zcard(table) def zrem(self, table, values): """ @summary: 移除集合中的指定元素 --------- @param table: @param values: 一个或者列表 --------- @result: """ if isinstance(values, list): pipe = self._redis.pipeline( transaction=True ) # redis-py默认在执行每次请求都会创建(连接池申请连接)和断开(归还连接池)一次连接操作,如果想要在一次请求中指定多个命令,则可以使用pipline实现一次请求指定多个命令,并且默认情况下一次pipline 是原子性操作。 if not self._is_redis_cluster: pipe.multi() for value in values: pipe.zrem(table, value) pipe.execute() else: self._redis.zrem(table, values) def zexists(self, table, values): """ 利用zscore判断某元素是否存在 @param values: @return: """ is_exists = [] if isinstance(values, list): pipe = self._redis.pipeline( transaction=True ) # redis-py默认在执行每次请求都会创建(连接池申请连接)和断开(归还连接池)一次连接操作,如果想要在一次请求中指定多个命令,则可以使用pipline实现一次请求指定多个命令,并且默认情况下一次pipline 是原子性操作。 pipe.multi() for value in values: pipe.zscore(table, value) is_exists_temp = pipe.execute() for is_exist in is_exists_temp: if is_exist != None: is_exists.append(1) else: is_exists.append(0) else: is_exists = self._redis.zscore(table, values) is_exists = 1 if is_exists != None else 0 return is_exists def lpush(self, table, values): if isinstance(values, list): pipe = self._redis.pipeline( transaction=True ) # redis-py默认在执行每次请求都会创建(连接池申请连接)和断开(归还连接池)一次连接操作,如果想要在一次请求中指定多个命令,则可以使用pipline实现一次请求指定多个命令,并且默认情况下一次pipline 是原子性操作。 if not self._is_redis_cluster: pipe.multi() for value in values: pipe.rpush(table, value) pipe.execute() else: return self._redis.rpush(table, values) def lpop(self, table, count=1): """ @summary: --------- @param table: @param count: --------- @result: count>1时返回列表 """ datas = None count = count if count <= self.lget_count(table) else self.lget_count( table) if count: if count > 1: pipe = self._redis.pipeline( transaction=True ) # redis-py默认在执行每次请求都会创建(连接池申请连接)和断开(归还连接池)一次连接操作,如果想要在一次请求中指定多个命令,则可以使用pipline实现一次请求指定多个命令,并且默认情况下一次pipline 是原子性操作。 if not self._is_redis_cluster: pipe.multi() while count: pipe.lpop(table) count -= 1 datas = pipe.execute() else: datas = self._redis.lpop(table) return datas def rpoplpush(self, from_table, to_table=None): """ 将列表 from_table 中的最后一个元素(尾元素)弹出,并返回给客户端。 将 from_table 弹出的元素插入到列表 to_table ,作为 to_table 列表的的头元素。 如果 from_table 和 to_table 相同,则列表中的表尾元素被移动到表头,并返回该元素,可以把这种特殊情况视作列表的旋转(rotation)操作 @param from_table: @param to_table: @return: """ if not to_table: to_table = from_table return self._redis.rpoplpush(from_table, to_table) def lget_count(self, table): return self._redis.llen(table) def lrem(self, table, value, num=0): return self._redis.lrem(table, value, num) def hset(self, table, key, value): """ @summary: 如果 key 不存在,一个新的哈希表被创建并进行 HSET 操作。 如果域 field 已经存在于哈希表中,旧值将被覆盖 --------- @param table: @param key: @param value: --------- @result: 1 新插入; 0 覆盖 """ return self._redis.hset(table, key, value) def hset_batch(self, table, datas): """ 批量插入 Args: datas: [[key, value]] Returns: """ pipe = self._redis.pipeline(transaction=True) if not self._is_redis_cluster: pipe.multi() for key, value in datas: pipe.hset(table, key, value) return pipe.execute() def hincrby(self, table, key, increment): return self._redis.hincrby(table, key, increment) def hget(self, table, key, is_pop=False): if not is_pop: return self._redis.hget(table, key) else: lua = """ local key = KEYS[1] local field = ARGV[1] -- 取值 local datas = redis.call('hget', key, field) -- 删除值 redis.call('hdel', key, field) return datas """ cmd = self._redis.register_script(lua) res = cmd(keys=[table], args=[key]) return res def hgetall(self, table): return self._redis.hgetall(table) def hexists(self, table, key): return self._redis.hexists(table, key) def hdel(self, table, *keys): """ @summary: 删除对应的key 可传多个 --------- @param table: @param *keys: --------- @result: """ self._redis.hdel(table, *keys) def hget_count(self, table): return self._redis.hlen(table) def setbit(self, table, offsets, values): """ 设置字符串数组某一位的值, 返回之前的值 @param table: @param offsets: 支持列表或单个值 @param values: 支持列表或单个值 @return: list / 单个值 """ if isinstance(offsets, list): if not isinstance(values, list): values = [values] * len(offsets) else: assert len(offsets) == len(values), "offsets值要与values值一一对应" pipe = self._redis.pipeline( transaction=True ) # redis-py默认在执行每次请求都会创建(连接池申请连接)和断开(归还连接池)一次连接操作,如果想要在一次请求中指定多个命令,则可以使用pipline实现一次请求指定多个命令,并且默认情况下一次pipline 是原子性操作。 pipe.multi() for offset, value in zip(offsets, values): pipe.setbit(table, offset, value) return pipe.execute() else: return self._redis.setbit(table, offsets, values) def getbit(self, table, offsets): """ 取字符串数组某一位的值 @param table: @param offsets: 支持列表 @return: list / 单个值 """ if isinstance(offsets, list): pipe = self._redis.pipeline( transaction=True ) # redis-py默认在执行每次请求都会创建(连接池申请连接)和断开(归还连接池)一次连接操作,如果想要在一次请求中指定多个命令,则可以使用pipline实现一次请求指定多个命令,并且默认情况下一次pipline 是原子性操作。 pipe.multi() for offset in offsets: pipe.getbit(table, offset) return pipe.execute() else: return self._redis.getbit(table, offsets) def bitcount(self, table): return self._redis.bitcount(table) def strset(self, table, value, **kwargs): return self._redis.set(table, value, **kwargs) def str_incrby(self, table, value): return self._redis.incrby(table, value) def strget(self, table): return self._redis.get(table) def strlen(self, table): return self._redis.strlen(table) def getkeys(self, regex): return self._redis.keys(regex) def exists_key(self, key): return self._redis.exists(key) def set_expire(self, key, seconds): """ @summary: 设置过期时间 --------- @param key: @param seconds: 秒 --------- @result: """ self._redis.expire(key, seconds) def clear(self, table): try: self._redis.delete(table) except Exception as e: log.error(e) def get_redis_obj(self): return self._redis
conn = pymysql.connect(host="127.0.0.1", user="******", password="******", database="defaultcart", charset="utf8") nodes = [{"host": "192.168.199.131", "port": "6379"}] #获取到redis连接,读取数据 r = StrictRedisCluster(startup_nodes=nodes, decode_responses=True) print(time.time()) cursor = conn.cursor() l = [] sql = "insert into user_shopping_cart(customer_id,service_type_id,gpid,sku_id,url,remark,qty,original_unit_price,selected,create_date,update_date) values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s);" for key in r.scan_iter(match='hash:UserShoppingCart:object:*', count=2000): d = r.hgetall(key) r.execute_command if not d: continue try: if d["CustomerId"]: customerid = int(d["CustomerId"]) if d["ServiceTypeId"]: servicetypeid = int(d["ServiceTypeId"]) if d["Gpid"]: gpid = int(d["Gpid"]) skuid = d["SkuId"] url = d["Url"] if len(url) > 100: url = url[0:100] remark = d["Remark"]
class RedisClient(object): def __init__(self, name='common', db=0, host=None, port=None): if host is None and port is None: self.config = DBConfigParser().get_config( server='redis_common_colony', key='colony') # self.config = DBConfigParser().get_config(server='redis_common_colony', key='105-62-93colony') self.host = self.config.get('host') self.port = self.config.get('port') self.db = self.config.get('db') if '|' in self.host: host_list = self.host.split('|') redis_nodes = [] for ho in host_list: redis_nodes.append({ 'host': str(ho), 'port': self.port, 'db': self.db }) self.conn = StrictRedisCluster(startup_nodes=redis_nodes) else: self.conn = redis.Redis(host=self.host, port=self.port, db=self.db) else: self.host = host self.port = port self.db = db self.conn = redis.Redis(host=self.host, port=self.port, db=self.db) self.name = name def get(self): """ 随机从redis里获取一个ip :return: """ key = self.conn.hgetall(name=self.name) rkey = random.choice(list(key.keys())) if key else None if isinstance(rkey, bytes): return rkey.decode('utf-8') else: return rkey def save(self, key): """ 保存ip :param key: :return: """ key = json.dumps(key) if isinstance(key, (dict, list)) else key return self.conn.hincrby(self.name, key, str(int(time.time()))) def get_value(self, key): """ 获取ip的值 :param key: :return: """ value = self.conn.hget(self.name, key) return value if value else None def pop(self): """ 获取一个ip并从池中删除 :return: """ key = self.get() if key: self.conn.hdel(self.name, key) return key def del_ip(self, key): """ 删除指定ip :param key: :return: """ self.conn.hdel(self.name, key) def del_all(self): """ 删除该表所有ip :return: """ self.conn.flushdb() def get_all(self): """ 获取所有的ip :return: """ if sys.version_info.major == 3: return [ key.decode('utf-8') for key in self.conn.hgetall(self.name).keys() ] else: return self.conn.hgetall(self.name).keys() def get_count(self): """ 获取池里ip数量 :return: """ return self.conn.hlen(self.name)
class RedisClient(object): def __init__(self, key, startup_nodes): """ init cluster """ self.key = key self.conn = StrictRedisCluster(startup_nodes=startup_nodes, decode_responses=True) def hdel(self, field): """ delete an item :param field: :return: """ self.conn.hdel(self.key, field) def hexists(self, field): """ 判断 key 中是否含有 field :param field: :return: """ return self.conn.hexists(self.key, field) def hget(self, field): """ 返回key中指定 field 中的 value :param field: :return: """ value = self.conn.hget(self.key, field) if isinstance(value, bytes): return value.decode('utf-8') else: return value if value else None def hgetall(self): """ 获取 {filed: value, field1: value1....} :return: """ all_dict = self.conn.hgetall(self.key) if not all_dict: return elif sys.version_info.major == 3: return { field.decode('utf-8'): value.decode('utf-8') for field, value in all_dict.items() } else: return all_dict def hkeys(self): """ 获取key中所有field :return: """ field = self.conn.hkeys(self.key) if isinstance(field, bytes): return field.decode('utf-8') else: return field if field else None def hlen(self): """ 获取所有 filed 数量 :return: """ return self.conn.hlen(self.key) def hset(self, field, value): """ 设置 field: value :param field: :param value: :return: """ self.conn.hset(self.key, field, value) def hvals(self): """ 获取所有values :return: """ values = self.conn.hvals(self.key) if not values: return elif sys.version_info.major == 3: return [value.decode('utf-8') for value in values] else: return values def change_key(self, key): """ 替换 key :param key: :return: """ self.key = key # =============================================== def blpop(self, timeout): self.conn.blpop(self.key, timeout=timeout) def brpop(self, timeout): self.conn.brpop(self.key, timeout=timeout) def brpoplpush(self, dst, timeout): self.conn.brpoplpush(self.key, dst=dst, timeout=timeout) def lindex(self, i): self.conn.lindex(self.key, index=i) def llen(self): self.conn.llen(self.key) def lpop(self): self.conn.lpop(self.key) def lpush(self): self.conn.lpush(self.key) def lrange(self, start, stop): self.conn.lrange(self.key, start, stop) def lset(self, i, value): self.conn.lset(self.key, index=i, value=value) def rpop(self): self.conn.rpop(self.key) def rpoplpush(self, dst): self.conn.rpoplpush(self.key, dst=dst) def rpush(self, value): self.conn.rpush(self.key, value)
class Redis: def __init__(self, ci): log.debug('create connection = %s', ci) t = ci.type self.t = t if t == 1: log.debug('create redis connection.') self.conn = StrictRedis(host=ci.host, port=ci.port, db=ci.db) elif t == 2: log.debug('create redis cluster connection.') nodes = json.loads(ci.host) pool = ClusterConnectionPool(startup_nodes=nodes) self.conn = StrictRedisCluster(connection_pool=pool, decode_responses=True) elif t == 3: log.debug('create redis connection from zookeeper.') client = zk.Client(hosts=ci.host, read_only=True) node = client.get(ci.path) arr = str(node[0], encoding='utf-8').split('\n') address = [] for h in arr: if h is '': continue a = h.split(':') address.append({'host': a[0], 'port': int(a[1])}) pool = ClusterConnectionPool(startup_nodes=address) self.conn = StrictRedisCluster(connection_pool=pool, decode_responses=True) else: raise AttributeError('illegal ConnInfo type.') if self.test(): self.ci = ci log.info('connect redis(%s) success', ci.host) def test(self): log.debug('test connect redis(%s)', self.conn) good = False try: result = self.conn.ping() if self.t == 1: good = result else: for k in result: v = result[k] log.debug('test [%s] result : %s', k, v) if not v: return False good = True except Exception as e: log.error(e) finally: log.debug('redis connection is good[%s]', good) return good def db_size(self): return self.conn.dbsize() def scan_iter(self, match='*', count=None): if match is not '*': match = '*' + match + '*' return self.conn.scan_iter(match=match, count=count) def get_str(self, key): log.debug('get str value by key: %s', key) return self.conn.get(key) def l_range(self, key): start = 0 end = -1 log.debug('get list value from %d to %d by key: %s', start, end, key) return self.conn.lrange(key, start, end) def z_range(self, key): start = 0 end = -1 log.debug('get sorted set value from %d to %d by key: %s', start, end, key) return self.conn.zrange(key, start, end) def s_members(self, key): log.debug('get set value by key: %s', key) return self.conn.smembers(key) def h_get_all(self, key): log.debug('get hash value by key: %s', key) return self.conn.hgetall(key) def get(self, t, k): f = self.__t_f_map__[t] return f(self, k) def type(self, key): log.debug('get type by key: %s', key) return self.conn.type(key) __t_f_map__ = { b'string': get_str, b'list': l_range, b'set': s_members, b'zset': z_range, b'hash': h_get_all, }
class RedisCluster: def __init__(self): try: self.rc = StrictRedisCluster(startup_nodes=StartupNodesServer, decode_responses=True) except: traceback.print_exc() def count_keys(self): # 查询当前库里有多少key return self.rc.dbsize() def exists_key(self, key): return self.rc.exists(key) def delete_key(self, key): self.rc.delete(key) def rename_key(self, key1, key2): self.rc.rename(key1, key2) # String操作 def set_key_value(self, key, value): self.rc.set(key, value) def get_key_value(self, key): # 没有对应key返回None return self.rc.get(key) # Hash操作 def set_hash(self, key, mapping): # mapping为字典, 已存在key会覆盖mapping self.rc.hmset(key, mapping) def delete_hash_field(self, key, field): # 删除hash表中某个字段,无论字段是否存在 self.rc.hdel(key, field) def exists_hash_field(self, key, field): # 检查hash表中某个字段存在 return self.rc.hexists(key, field) def get_hash_field(self, key, field): # 获取hash表中指定字段的值, 没有返回None return self.rc.hget(key, field) def get_hash_all_field(self, key): # 获取hash表中指定key所有字段和值,以字典形式,没有key返回空字典 return self.rc.hgetall(key) def increase_hash_field(self, key, field, increment): # 为hash表key某个字段的整数型值增加increment self.rc.hincrby(key, field, increment) # List操作 def rpush_into_lst(self, key, value): # url从头至尾入列 self.rc.rpush(key, value) def lpush_into_lst(self, key, value): # url从尾至头入列 self.rc.lpush(key, value) def lpop_lst_item(self, key): # 从头取出列表第一个元素,没有返回None return self.rc.lpop(key) def blpop_lst_item( self, key): # 从头取出列表第一个元素(元组形式,值为元祖[1], 元祖[0]为key名),并设置超时,超时返回None return self.rc.blpop(key, timeout=1) def rpop_lst_item(self, key): # 从尾取出列表最后一个元素,没有返回None return self.rc.rpop(key) def brpop_lst_item( self, key): # 从尾取出列表最后一个元素(元组形式,值为元祖[1], 元祖[0]为key名),并设置超时,超时返回None return self.rc.brpop(key, timeout=1) # Set操作 def add_set(self, key, value): self.rc.sadd(key, value) def is_member(self, key, value): return self.rc.sismember(key, value) def pop_member(self, key): # 随机移除一个值并返回该值,没有返回None return self.rc.spop(key) def pop_members(self, key, num): # 随机取出num个值(非移除),列表形式返回这些值,没有返回空列表 return self.rc.srandmember(key, num) def remove_member(self, key, value): # 移除集合中指定元素 self.rc.srem(key, value) def get_all_members(self, key): # 返回集合中全部元素,不删除 return self.rc.smembers(key) def remove_into(self, key1, key2, value): # 把集合key1中value元素移入集合key2中 self.rc.smove(key1, key2, value) def count_members(self, key): # 计算集合中成员数量 return self.rc.scard(key)
class SparkQueryEngine: def __init__(self, sc, dataset_path): logger.info("Starting Spark Query Engine..") self.sc = sc self.sqlContext = SQLContext(sc) startup_nodes = [{ "host": cfg.redis['host'], "port": cfg.redis['port'] }] self.rc = StrictRedisCluster(startup_nodes=startup_nodes, decode_responses=True) # Loading data from data.CSV file logger.info("Loading the data ....") data = ( self.sqlContext.read.format('com.databricks.spark.csv').options( header='true', inferschema='true').load(os.path.join(dataset_path, 'data.csv'))) logger.info("Cleaning the dataset ....") self.df = data.select( 'id', 'brand', 'colors', 'dateAdded') # Choosing only the required columns self.df = self.df.dropDuplicates(['id']) # Dropping duplicate rows self.df = self.df.dropna( how='any') # Dropping rows with atleast one null value logger.info("Processing and storing necessary data in Redis ... ") self.__get_recent_items() # self.__count_brands_by_date_added() def __get_recent_items(self): window = Window.partitionBy( date_format(self.df.dateAdded, 'yyy-MM-dd')).orderBy(self.df['dateAdded'].desc()) recent_items = ( self.df.select('*', rank().over(window).alias('rank')). filter(col('rank') <= 1).dropDuplicates( ['dateAdded'] ) # For some dates, multiple products have been added at the recent time. Since the output has to be a single product, dropping the duplicates .orderBy(self.df.dateAdded, ascending=False)) # removing unnecessary fields recent_items = recent_items.select( date_format(self.df.dateAdded, 'yyy-MM-dd').alias('dateAdded'), 'id', 'brand', 'colors') # writing the results to redis (uses HASH data structure). Keys will be of format `recent:yyy-MM-dd` recent_items.write \ .format("org.apache.spark.sql.redis") \ .option("table", "recent") \ .option("key.column", "dateAdded") \ .mode("overwrite") \ .save() def __count_brands_by_date_added(self): brand_counts = (self.df.groupBy([ date_format(self.df.dateAdded, 'yyy-MM-dd').alias('dateAdded'), self.df.brand ]).count().orderBy(col("count").desc())) # @pandas_udf module expects the spark context to be present # when importing the udfs module. For some reason, the spark context is not available # if I add the import statement at the top and throws the error message # AttributeError: 'NoneType' object has no attribute '_jvm' # https://github.com/apache/spark/blob/a09d5ba88680d07121ce94a4e68c3f42fc635f4f/python/pyspark/sql/types.py#L798-L806 from udfs import brand_count_list_udf brand_counts = brand_counts.groupBy('dateAdded').apply( brand_count_list_udf) # writes the brand_counts dataframe to redis using hash datastructure. Keys will be of format `count:yyy-MM-dd` brand_counts.write \ .format("org.apache.spark.sql.redis") \ .option("table", "count") \ .option("key.column", "dateAdded") \ .mode("overwrite") \ .save() def __get_items_by_color(self, color): items = ( self.df.withColumn('colors', split(col('colors'), ',')) # split colors into an array .withColumn('colorsExploded', explode(col('colors'))).where( 'colorsExploded == "' + color + '"') # filter for the color .orderBy(col('dateAdded').desc()).drop('colorsExploded').limit(10). toJSON().map(lambda j: json.loads(j)).collect( ) # Needs python installed in spark worker too as we are using `json.loads` ) # Writes the data to redis (uses SET data structure). Key is of format `color:str.lower(color)` output = json.dumps(items) self.rc.set("color:" + color.lower(), output) return output def get_recent_item_api(self, dateString): """ Gets the recent item given the date and returns as output as json""" output = self.rc.hgetall("recent:" + str(dateString)) if not output: return json.dumps(output) else: output["colors"] = output["colors"].split(",") return json.dumps(output) def get_brands_count_api(self, dateString): """ Gets the brand counts for a particular date and returns as output as json""" output = self.rc.hgetall("count:" + str(dateString)) return json.dumps(ast.literal_eval(output['brandCount'])) def get_items_by_color_api(self, color): """ Lazy loads the items given a color. If the data is not present in redis, queries spark to collect the data and store it in redis. """ if not color: return "[]" output = self.rc.get("color:" + color.lower()) if not output: output = self.__get_items_by_color(color) return output
class RedisQueue(object): # def __init__(self, redis_type=None,**args): # # if redis_type == "cluster": # try: # self.rc = StrictRedisCluster(**args) # except Exception as e: # print("Connect Error!") # else: # try: # self.pool = redis.ConnectionPool(**args) # self.rc = redis.Redis(connection_pool=self.pool) # except Exception as e: # print("Connect Error!") def __init__(self, res): redis_type = res[1] if redis_type == "cluster": try: con = res[0] self.rc = StrictRedisCluster(**con) except Exception as e: print(e) print("Connect Error!") else: try: con = res[0] self.pool = redis.ConnectionPool(**con) self.rc = redis.Redis(connection_pool=self.pool) except Exception as e: print(e) print("Connect Error!") def get_all(self, key, block=True, timeout=None): for i in self.rc.keys(): if key in str(i): type = self.rc.type(i) if type == 'string': vals = self.rc.get(i) elif type == 'list': vals = self.rc.lrange(i, 0, -1) elif type == 'set': vals = self.rc.smembers(i) elif type == 'zset': vals = self.rc.zrange(i, 0, -1) elif type == "hash": vals = self.rc.hgetall(i) else: print(type, i) return list(vals[0]) def keys(self): keys = self.rc.keys() return keys def iskey(self, key): if self.rc.exists(key): return 1 else: return 0 def get(self, key): res = self.rc.get(key) nres = json.loads(res) return nres def put(self, key, value): new_value = json.dumps(value) res = self.rc.set(key, new_value) def delall(self, key): self.rc.delete(key) # print(getredis('ak_sit')) #RedisQueue(getredis('ak_cluster'))