class ExceedDateKeyCleanTask(object): ''' 删除超过N天数据的清理任务 ''' def __init__(self): self.__config = GetConfig() self.__log = Logger('exceedDateKeyCleanTask') self.__redisClient = RedisClient(self.__config.redis_host(), self.__config.redis_port(), self.__config.redis_password()) self.__taskName = 'clean exceed date redis key' self.__className = self.__class__ def __doStart(self, keyPrefix, db, date): startTime = time.time() taskName = self.__taskName + " by doStart" self.__log.info("start doing task:{task}, className:{className}".format(task=taskName, className=self.__class__)) key = keyPrefix + date; self.__redisClient.unlink(db, key); self.__log.info("unlink key:{key}".format(key=key)) costTime = time.time() - startTime self.__log.info("task:{task} finish, className:{className}, key:{key}, db:{db}, cost:{cost}" \ .format(task=taskName, className=self.__className, key=key, db=db, cost=costTime)) def start(self, keyPrefix, db, validDays): """ 删除指定前缀的key :param keyPrefix 待删除的key前缀 key的格式为 keyPrefix_yyyyMMdd,其中前缀为**_ :param db key所在的数据库 :param validDays key在几天内有效 :return: """ day = DateUtil.format4Y2m2d(DateUtil.addDaysOnCurrent(-validDays)) self.__doStart(keyPrefix, db, day) pass def startHour(self, keyPrefix, db, validDays): """ 清除时间格式为小时的key,即key的格式为keyPrefix_yyyyMMddHH :param keyPrefix 待删除的key前缀 key的格式为 keyPrefix_yyyyMMdd,其中前缀为**_ :param db key所在的数据库 :param validDays key在几天内有效 :return: """ day = DateUtil.format4Y2m2d(DateUtil.addDaysOnCurrent(-validDays)) for i in xrange(0, 24): if i < 10: self.__doStart(keyPrefix, db, day + '0' + str(i)) else: self.__doStart(keyPrefix, db, day + str(i)) def startBatch(self, keyPrefix, db, validDays): """ 删除指定前缀的set中保存的所有value对应的key(set中保存的就是要删除的key) :param keyPrefix 待删除的key前缀 key的格式为 keyPrefix_yyyyMMdd,其中前缀为**_ :param db key所在的数据库 :param validDays key在几天内有效 :return: """ startTime = time.time() taskName = self.__taskName + " by startBatch" self.__log.info("start doing task:{task}, className:{className}".format(task=taskName, className=self.__className)) day = DateUtil.format4Y2m2d(DateUtil.addDaysOnCurrent(-validDays)) setKey = keyPrefix + day cursor = 0 # 每次删除1000条记录 while True: scanResult = self.__redisClient.sscan(db, setKey, cursor, None, 1000) # 如果获取不到记录,则跳出循环 if len(scanResult) == 0: break cursor = scanResult[0] # 删除记录 if len(scanResult[1]) > 0: self.__redisClient.unlink(db, *scanResult[1]) self.__redisClient.srem(db, setKey, *scanResult[1]) # 如果已经检索完,则跳出循环 if cursor == 0: break costTime = time.time() - startTime self.__log.info("task:{task} finish, className:{className}, key:{key}, db:{db}, cost:{cost}" \ .format(task=taskName, className=self.__className, key=setKey, db=db, cost=costTime)) def startHscan(self, keyPrefix, db, validDays): """ 使用hscan删除大数据量的hash key :param keyPrefix 待删除的key前缀 key的格式为 keyPrefix_yyyyMMdd,其中前缀为**_ :param db key所在的数据库 :param validDays key在几天内有效 :return: """ startTime = time.time() taskName = self.__taskName + " by startHscan" self.__log.info("start doing task:{task}, className:{className}".format(task=taskName, className=self.__className)) day = DateUtil.format4Y2m2d(DateUtil.addDaysOnCurrent(-validDays)) key = keyPrefix + day cursor = 0 # 每次删除1000条记录 while True: scanResult = self.__redisClient.hscan(db, key, cursor, None, 1000) # 如果获取不到记录,则跳出循环 if len(scanResult) == 0: break cursor = scanResult[0] # 获取map中的所有key keyList = [] if len(scanResult[1]) > 0: for (k, v) in scanResult[1].items(): keyList.append(k) # 批量删除记录 self.__redisClient.hdel(db, key, *keyList) # 如果已经检索完,则跳出循环 if cursor == 0: break costTime = time.time() - startTime self.__log.info("task:{task} finish, className:{className}, key:{key}, db:{db}, cost:{cost}" \ .format(task=taskName, className=self.__className, key=key, db=db, cost=costTime)) def startSscan(self, keyPrefix, db, validDays): """ 使用sscan删除大数据量的set key :param keyPrefix 待删除的key前缀 key的格式为 keyPrefix_yyyyMMdd,其中前缀为**_ :param db key所在的数据库 :param validDays key在几天内有效 :return: """ startTime = time.time() taskName = self.__taskName + " by startSscan" self.__log.info("start doing task:{task}, className:{className}".format(task=taskName, className=self.__className)) day = DateUtil.format4Y2m2d(DateUtil.addDaysOnCurrent(-validDays)) key = keyPrefix + day cursor = 0 # 每次删除1000条记录 while True: scanResult = self.__redisClient.sscan(db, key, cursor, None, 1000) # 如果获取不到记录,则跳出循环 if len(scanResult) == 0: break cursor = scanResult[0] # 删除记录 if len(scanResult[1]) > 0: self.__redisClient.srem(db, key, *scanResult[1]) # 如果已经检索完,则跳出循环 if cursor == 0: break costTime = time.time() - startTime self.__log.info("task:{task} finish, className:{className}, key:{key}, db:{db}, cost:{cost}" \ .format(task=taskName, className=self.__className, key=key, db=db, cost=costTime))
class ExceedDateKeyCleanTaskTest(object): ''' KeyCleanTask 测试类 ''' def __init__(self): self.__exceedDateKeyCleanTask = ExceedDateKeyCleanTask() self.__config = GetConfig() self.__redisClient = RedisClient(self.__config.redis_host(), self.__config.redis_port(), self.__config.redis_password()) def testStart(self): keyPrefix = 'account_cost_' validDays = 2 db = 1 day = DateUtil.format4Y2m2d(DateUtil.addDaysOnCurrent(-validDays)) key = keyPrefix + day print "----test start func begin" self.__redisClient.hset(db, key, 'map1', 'value1') self.__redisClient.hset(db, key, 'map2', 'value2') print("init data:") print(self.__redisClient.hscan(db, key, 0, None, 3)) self.__exceedDateKeyCleanTask.start(keyPrefix, db, validDays) print("after clean:") print(self.__redisClient.hscan(db, key, 0, None, 3)) pass def testStartHour(self): keyPrefix = 'data_cost_' validDays = 2 db = 3 day = DateUtil.format4Y2m2d(DateUtil.addDaysOnCurrent(-validDays)) print "----test startHour func begin" keyList = [] for i in xrange(0, 24): if i < 10: key = keyPrefix + day + '0' + str(i) else: key = keyPrefix + day + str(i) keyList.append(key) self.__redisClient.hset(db, key, 'map1', 'value1') self.__redisClient.hset(db, key, 'map2', 'value2') print("init data:") for key in keyList: print(self.__redisClient.hgetall(db, key)) self.__exceedDateKeyCleanTask.startHour(keyPrefix, db, validDays) print("after clean:") for key in keyList: print(self.__redisClient.hgetall(db, key)) pass def testStartBatch(self): keyPrefix = 'mix_detail_posid_' validDays = 30 db = 1 day = DateUtil.format4Y2m2d(DateUtil.addDaysOnCurrent(-validDays)) key = keyPrefix + day print "----test startBatch func begin" self.__redisClient.sadd(db, key, *['mem1', 'mem2']) print("init data:") print(self.__redisClient.sscan(db, key, 0, None, 3)) self.__exceedDateKeyCleanTask.startBatch(keyPrefix, db, validDays) print("after clean:") print(self.__redisClient.sscan(db, key, 0, None, 3)) pass def testStartHscan(self): keyPrefix = 'view_st_ts_' validDays = 1 db = 1 day = DateUtil.format4Y2m2d(DateUtil.addDaysOnCurrent(-validDays)) key = keyPrefix + day print "----test startHscan func begin" self.__redisClient.hset(db, key, 'map1', 'value1') self.__redisClient.hset(db, key, 'map2', 'value2') print("init data:") print(self.__redisClient.hscan(db, key, 0, None, 3)) self.__exceedDateKeyCleanTask.startHscan(keyPrefix, db, validDays) print("after clean:") print(self.__redisClient.hscan(db, key, 0, None, 3)) pass def testStartSscan(self): keyPrefix = 'click_st_' validDays = 1 db = 1 day = DateUtil.format4Y2m2d(DateUtil.addDaysOnCurrent(-validDays)) key = keyPrefix + day print "----test startSscan func begin" self.__redisClient.sadd(db, key, *['mem1', 'mem2']) print("init data:") print(self.__redisClient.sscan(db, key, 0, None, 3)) self.__exceedDateKeyCleanTask.startSscan(keyPrefix, db, validDays) print("after clean:") print(self.__redisClient.sscan(db, key, 0, None, 3)) pass