class Getter(object): def __init__(self): self.redis = RedisClient() # 实例化redis类 self.crawler = Crawler() # 实例化爬虫类 def is_over_threshold(self): """ 判断代理数量是否达到限制 :return: """ if self.redis.count() >= POOL_UPPER_THRESHOLD: return True else: return False def run(self): print('获取器开始执行...') if not self.is_over_threshold(): for callback_label in range(self.crawler.__CrawlFuncCount__): callback = self.crawler.__CrawlFunc__[callback_label] # 获取代理 proxies = self.crawler.get_proxies(callback) sys.stdout.flush() # 强制刷新缓冲区 for proxy in proxies: self.redis.add(proxy)
def test_redis_client(self): conn = RedisClient.get_connection() conn.lpush('redis_key', 1) conn.lpush('redis_key', 2) cached_list = conn.lrange('redis_key', 0, -1) self.assertEqual(cached_list, [b'2', b'1']) RedisClient.clear() cached_list = conn.lrange('redis_key', 0, -1) self.assertEqual(cached_list, [])
def test_create_new_newsfeed_before_get_cached_newsfeeds(self): feed1 = self.create_newsfeed(self.user1, self.create_tweet(self.user1)) RedisClient.clear() conn = RedisClient.get_connection() key = USER_NEWSFEEDS_PATTERN.format(user_id=self.user1.id) self.assertEqual(conn.exists(key), False) feed2 = self.create_newsfeed(self.user1, self.create_tweet(self.user1)) self.assertEqual(conn.exists(key), True) feeds = NewsfeedService.get_cached_newsfeeds(self.user1.id) self.assertEqual([f.id for f in feeds], [feed2.id, feed1.id])
def test_create_new_tweet_before_get_cached_tweets(self): tweet1 = self.create_tweet(self.user1, 'tweet1') RedisClient.clear() conn = RedisClient.get_connection() key = USER_TWEETS_PATTERN.format(user_id=self.user1.id) self.assertEqual(conn.exists(key), False) tweet2 = self.create_tweet(self.user1, 'tweet2') self.assertEqual(conn.exists(key), True) tweets = TweetService.get_cached_tweets(self.user1.id) self.assertEqual([t.id for t in tweets], [tweet2.id, tweet1.id])
def decr_count(cls, obj, attr): conn = RedisClient.get_connection() key = cls.get_count_key(obj, attr) if not conn.exists(key): count = getattr(obj, attr) conn.set(key, count) conn.expire(key, settings.REDIS_KEY_EXPIRE_TIME) return count return conn.decr(key)
def push_object(cls, key, obj, queryset): conn = RedisClient.get_connection() if not conn.exists(key): cls._load_objects_to_cache(key, queryset) return serialized_data = DjangoModelSerializer.serialize(obj) conn.lpush(key, serialized_data) # keep REDIS_LIST_LENGTH_LIMIT objects in cache conn.ltrim(key, 0, settings.REDIS_LIST_LENGTH_LIMIT - 1)
def get(cls, gk_name): conn = RedisClient.get_connection() name = f'gatekeeper:{gk_name}' if not conn.exists(name): return {'percent': 0, 'description': ''} redis_hash = conn.hgetall(name) return { 'percent': int(redis_hash.get(b'percent', 0)), 'description': str(redis_hash.get(b'description', '')), }
def test_cache_tweet_in_redis(self): tweet = self.create_tweet(self.user1) conn = RedisClient.get_connection() serialized_data = DjangoModelSerializer.serialize(tweet) conn.set(f'tweet:{tweet.id}', serialized_data) data = conn.get(f'tweet:not_exists') self.assertEqual(data, None) data = conn.get(f'tweet:{tweet.id}') cached_tweet = DjangoModelSerializer.deserialize(data) self.assertEqual(tweet, cached_tweet)
def get_count(cls, obj, attr): conn = RedisClient.get_connection() key = cls.get_count_key(obj, attr) count = conn.get(key) if count is not None: return int(count) obj.refresh_from_db() count = getattr(obj, attr) conn.set(key, count) return count
def _load_objects_to_cache(cls, key, objects): conn = RedisClient.get_connection() serialized_list = list() # load REDIS_LIST_LENGTH_LIMIT objects for obj in objects[:settings.REDIS_LIST_LENGTH_LIMIT]: serialized_list.append(DjangoModelSerializer.serialize(obj)) if serialized_list: conn.rpush(key, *serialized_list) conn.expire(key, settings.REDIS_KEY_EXPIRE_TIME)
def load_objects(cls, key, queryset): conn = RedisClient.get_connection() if conn.exists(key): serialized_list = conn.lrange(key, 0, -1) objects = list() for serialized_data in serialized_list: objects.append( DjangoModelSerializer.deserialize(serialized_data)) return objects cls._load_objects_to_cache(key, queryset) return list(queryset)
def test_get_user_tweets(self): tweet_ids = [] for i in range(3): tweet = self.create_tweet(self.user1, 'tweet {}'.format(i)) tweet_ids.append(tweet.id) tweet_ids = tweet_ids[::-1] RedisClient.clear() conn = RedisClient.get_connection() # cache miss tweets = TweetService.get_cached_tweets(self.user1.id) self.assertEqual([t.id for t in tweets], tweet_ids) # cache hit tweets = TweetService.get_cached_tweets(self.user1.id) self.assertEqual([t.id for t in tweets], tweet_ids) # cache updated new_tweet = self.create_tweet(self.user1, 'new tweet') tweets = TweetService.get_cached_tweets(self.user1.id) tweet_ids.insert(0, new_tweet.id) self.assertEqual([t.id for t in tweets], tweet_ids)
def __init__(self): self.redis = RedisClient() # 实例化redis类 self.crawler = Crawler() # 实例化爬虫类
def set_kv(cls, gk_name, key, value): conn = RedisClient.get_connection() name = f'gatekeeper:{gk_name}' conn.hset(name, key, value)
class Tester(object): """ 异步 aiohttp http请求, 需要配合 异步关键词async 使用 """ def __init__(self): self.redis = RedisClient() async def test_single_proxy(self, proxy): """ 测试单个代理 :param proxy: :return: """ conn = aiohttp.TCPConnector(verify_ssl=False) async with aiohttp.ClientSession(connector=conn) as session: try: if isinstance(proxy, bytes): proxy = proxy.decode('utf-8') real_proxy = f'http://{proxy}' print(f'正在测试{proxy}') async with session.get(TEST_URL, proxy=real_proxy, timeout=15, allow_redirects=False) as response: if response.status in VALID_STATUS_CODES: self.redis.max(proxy) print(f'代理可用{proxy}') else: self.redis.decrease(proxy) print('请求响应码不合法 ', response.status, 'IP', proxy) except (ClientError, aiohttp.ClientConnectorError, asyncio.TimeoutError, AttributeError): self.redis.decrease(proxy) print(f'代理请求失败{proxy}') def run(self): """ 测试主函数 :return: """ try: count = self.redis.count() print(f'当前剩余{count}个代理') for i in range(0, count, BATCH_TEST_SIZE): start = i stop = min(i + BATCH_TEST_SIZE, count) print('正在测试第', start + 1, '-', stop, '个代理') test_proxies = self.redis.batch(start, stop) loop = asyncio.get_event_loop() tasks = [ self.test_single_proxy(proxy) for proxy in test_proxies ] loop.run_until_complete(asyncio.wait(tasks)) # 注释sys.stdout.flush()只能等到程序执行完毕,屏幕会一次性输出// 刷新stdout,能看到实时输出信息 sys.stdout.flush() time.sleep(5) except Exception as e: print('测试器发生错误', e.args)
def __init__(self): self.redis = RedisClient()
class ColdMovieService(object): """ 电影冷启动策略: 用于对电影评分, 根据电影的风格 推荐给 喜欢该风格的用户, 曝光1000次,最终计算点击率、停留时间、一个小时、三小时、一天等等 相关指标,来衡量电影的质量的好坏 电影质量的好的评定: 1.通过人工筛选。 2.曝光用户,根据用户的点击率 停留时间 评判。 """ client = RedisClient.get_redis_client() cold_movie_need_display_num = 100 """ 向用户推荐 冷电影,用于衡量 电影的质量的好坏的指标 """ def get_cold_movie_rec(self, user_id): rec_cold_match = "" user_profile_label = self.get_user_profile(user_id) if user_profile_label: cold_movies = self.client.hmget(REDIS_COLD_MOVIES, user_profile_label)[0] if cold_movies: cold_movies = json.loads(cold_movies) if len(cold_movies) > 0: cold_movies_arr = sorted(cold_movies.items(), key=lambda x: x[1], reverse=True) rec_cold_match = cold_movies_arr[0][0] return rec_cold_match """ 如果对外推荐给用户,数据确实推荐给用户,需要更新redis """ def update_cold_movies_list(self, movie_id): movie_info = self.client.hmget(REDIS_MOVIE_INFO, movie_id)[0] if movie_info: label = GENRE2LABELMAP.get(movie_info[1]) cold_movies = self.client.hmget(REDIS_COLD_MOVIES, label)[0] if cold_movies: click_num = cold_movies.get(movie_id) click_num = click_num + 1 if click_num >= self.cold_movie_need_display_num: cold_movies.pop(movie_id) else: cold_movies[movie_id] = click_num self.client.hmset(REDIS_COLD_MOVIES, {label: json.dumps(cold_movies)}) def get_user_profile(self, user_id): """ 获取用户的最喜欢的一个风格,当然这里必须要求用户已经经过了50刷,才会给出用户最喜欢的风格 """ profile_10 = self.client.hmget(REDIS_SHORT_TERM_INTEREST_10, user_id)[0] profile_20 = self.client.hmget(REDIS_MIDDLE_TERM_INTEREST_20, user_id)[0] profile_50 = self.client.hmget(REDIS_LONG_TERM_INTEREST_50, user_id)[0] user_profile_label = "" if profile_10 is not None and profile_20 is not None and profile_50 is not None: profile_10 = Counter(json.loads(profile_10)) profile_20 = Counter(json.loads(profile_20)) profile_50 = Counter(json.loads(profile_50)) profile = dict(profile_10 + profile_20 + profile_50) profile = sorted(profile.items(), key=lambda x: x[1], reverse=True) user_profile_label = profile[0][0] return user_profile_label
def get_conn(): if not hasattr(g, 'redis'): g.redis = RedisClient() return g.redis
def __init__(self): self.client = RedisClient.get_redis_client() self.movie_df = None
def __init__(self): self.data_preprocess = DataPreprocess() self.redis_client = RedisClient.get_redis_client()
# -*- coding: utf-8 -*- from util.redis_client import RedisClient from util.config import * import time import json client = RedisClient.get_redis_client() def get_current_weight(user_id): """获取用户当前最敏感权重 即最新权重""" weight = client.hmget(REDIS_CURRENT_WEIGHTS, user_id) weight = weight[0] if weight is None: weight = json.dumps(INIT_WEIGHT) weight = json.loads(weight) return weight def set_current_weight(weight, user_id): """设置用户当前最敏感权重 即最新权重""" client.hmset(REDIS_CURRENT_WEIGHTS, {user_id: json.dumps(weight)}) def get_user_history_rec_movies(user_id): """获取已经曝光给用户的电影列表,这里只是曝光,并不是点击""" watched_movies = __get_user_history_rec_movies_from_redis(user_id) movies = set()
def setUp(self): RedisClient.clear()
def setUp(self): self.op = RedisClient()
import hashlib from datetime import datetime import pytz from util.redis_client import RedisClient utc = pytz.utc fmt = '%Y-%m-%dT%H:%M:%S%z' omp_time = datetime.strftime(utc.localize(datetime.utcnow()), fmt) # message_writer = EsWriter(host='35.184.66.182') message_writer = RedisClient(host='35.184.66.182', port=6379) def event_hash_string(event_obj): hash_string = '%s%s%s' % ( event_obj['producer'], event_obj['stateTriggerId'], event_obj['locationCode'] ) return hash_string def event_id_string(event_obj): hash_string = '%s%s%s%s' % ( event_obj['producer'], event_obj['stateTriggerId'], event_obj['locationCode'], event_obj['raisedTimestamp'] ) return hash_string def get_hash(hash_string): return hashlib.md5(hash_string.encode('utf-8')).hexdigest()
class RedisOperatorTestCase(unittest.TestCase): def setUp(self): self.op = RedisClient() def test_count(self): self.assertIsInstance(self.op.count(), int)