Пример #1
0
class Getter(object):
    def __init__(self):
        self.redis = RedisClient()  # 实例化redis类
        self.crawler = Crawler()  # 实例化爬虫类

    def is_over_threshold(self):
        """
        判断代理数量是否达到限制
        :return:
        """
        if self.redis.count() >= POOL_UPPER_THRESHOLD:
            return True
        else:
            return False

    def run(self):
        print('获取器开始执行...')
        if not self.is_over_threshold():
            for callback_label in range(self.crawler.__CrawlFuncCount__):

                callback = self.crawler.__CrawlFunc__[callback_label]
                # 获取代理
                proxies = self.crawler.get_proxies(callback)

                sys.stdout.flush()  # 强制刷新缓冲区

                for proxy in proxies:
                    self.redis.add(proxy)
Пример #2
0
    def test_redis_client(self):
        conn = RedisClient.get_connection()
        conn.lpush('redis_key', 1)
        conn.lpush('redis_key', 2)
        cached_list = conn.lrange('redis_key', 0, -1)
        self.assertEqual(cached_list, [b'2', b'1'])

        RedisClient.clear()
        cached_list = conn.lrange('redis_key', 0, -1)
        self.assertEqual(cached_list, [])
Пример #3
0
    def test_create_new_newsfeed_before_get_cached_newsfeeds(self):
        feed1 = self.create_newsfeed(self.user1, self.create_tweet(self.user1))

        RedisClient.clear()
        conn = RedisClient.get_connection()

        key = USER_NEWSFEEDS_PATTERN.format(user_id=self.user1.id)
        self.assertEqual(conn.exists(key), False)
        feed2 = self.create_newsfeed(self.user1, self.create_tweet(self.user1))
        self.assertEqual(conn.exists(key), True)

        feeds = NewsfeedService.get_cached_newsfeeds(self.user1.id)
        self.assertEqual([f.id for f in feeds], [feed2.id, feed1.id])
Пример #4
0
    def test_create_new_tweet_before_get_cached_tweets(self):
        tweet1 = self.create_tweet(self.user1, 'tweet1')

        RedisClient.clear()
        conn = RedisClient.get_connection()

        key = USER_TWEETS_PATTERN.format(user_id=self.user1.id)
        self.assertEqual(conn.exists(key), False)
        tweet2 = self.create_tweet(self.user1, 'tweet2')
        self.assertEqual(conn.exists(key), True)

        tweets = TweetService.get_cached_tweets(self.user1.id)
        self.assertEqual([t.id for t in tweets], [tweet2.id, tweet1.id])
Пример #5
0
    def decr_count(cls, obj, attr):
        conn = RedisClient.get_connection()
        key = cls.get_count_key(obj, attr)

        if not conn.exists(key):
            count = getattr(obj, attr)
            conn.set(key, count)
            conn.expire(key, settings.REDIS_KEY_EXPIRE_TIME)
            return count

        return conn.decr(key)
Пример #6
0
    def push_object(cls, key, obj, queryset):
        conn = RedisClient.get_connection()
        if not conn.exists(key):
            cls._load_objects_to_cache(key, queryset)
            return

        serialized_data = DjangoModelSerializer.serialize(obj)
        conn.lpush(key, serialized_data)

        # keep REDIS_LIST_LENGTH_LIMIT objects in cache
        conn.ltrim(key, 0, settings.REDIS_LIST_LENGTH_LIMIT - 1)
Пример #7
0
    def get(cls, gk_name):
        conn = RedisClient.get_connection()
        name = f'gatekeeper:{gk_name}'
        if not conn.exists(name):
            return {'percent': 0, 'description': ''}

        redis_hash = conn.hgetall(name)
        return {
            'percent': int(redis_hash.get(b'percent', 0)),
            'description': str(redis_hash.get(b'description', '')),
        }
Пример #8
0
    def test_cache_tweet_in_redis(self):
        tweet = self.create_tweet(self.user1)
        conn = RedisClient.get_connection()
        serialized_data = DjangoModelSerializer.serialize(tweet)
        conn.set(f'tweet:{tweet.id}', serialized_data)
        data = conn.get(f'tweet:not_exists')
        self.assertEqual(data, None)

        data = conn.get(f'tweet:{tweet.id}')
        cached_tweet = DjangoModelSerializer.deserialize(data)
        self.assertEqual(tweet, cached_tweet)
Пример #9
0
    def get_count(cls, obj, attr):
        conn = RedisClient.get_connection()
        key = cls.get_count_key(obj, attr)
        count = conn.get(key)
        if count is not None:
            return int(count)

        obj.refresh_from_db()
        count = getattr(obj, attr)
        conn.set(key, count)
        return count
Пример #10
0
    def _load_objects_to_cache(cls, key, objects):
        conn = RedisClient.get_connection()

        serialized_list = list()

        # load REDIS_LIST_LENGTH_LIMIT objects
        for obj in objects[:settings.REDIS_LIST_LENGTH_LIMIT]:
            serialized_list.append(DjangoModelSerializer.serialize(obj))

        if serialized_list:
            conn.rpush(key, *serialized_list)
            conn.expire(key, settings.REDIS_KEY_EXPIRE_TIME)
Пример #11
0
    def load_objects(cls, key, queryset):
        conn = RedisClient.get_connection()
        if conn.exists(key):
            serialized_list = conn.lrange(key, 0, -1)
            objects = list()
            for serialized_data in serialized_list:
                objects.append(
                    DjangoModelSerializer.deserialize(serialized_data))

            return objects

        cls._load_objects_to_cache(key, queryset)

        return list(queryset)
Пример #12
0
    def test_get_user_tweets(self):
        tweet_ids = []
        for i in range(3):
            tweet = self.create_tweet(self.user1, 'tweet {}'.format(i))
            tweet_ids.append(tweet.id)
        tweet_ids = tweet_ids[::-1]

        RedisClient.clear()
        conn = RedisClient.get_connection()

        # cache miss
        tweets = TweetService.get_cached_tweets(self.user1.id)
        self.assertEqual([t.id for t in tweets], tweet_ids)

        # cache hit
        tweets = TweetService.get_cached_tweets(self.user1.id)
        self.assertEqual([t.id for t in tweets], tweet_ids)

        # cache updated
        new_tweet = self.create_tweet(self.user1, 'new tweet')
        tweets = TweetService.get_cached_tweets(self.user1.id)
        tweet_ids.insert(0, new_tweet.id)
        self.assertEqual([t.id for t in tweets], tweet_ids)
Пример #13
0
 def __init__(self):
     self.redis = RedisClient()  # 实例化redis类
     self.crawler = Crawler()  # 实例化爬虫类
Пример #14
0
 def set_kv(cls, gk_name, key, value):
     conn = RedisClient.get_connection()
     name = f'gatekeeper:{gk_name}'
     conn.hset(name, key, value)
Пример #15
0
class Tester(object):
    """
    异步 aiohttp  http请求, 需要配合 异步关键词async 使用
    """
    def __init__(self):
        self.redis = RedisClient()

    async def test_single_proxy(self, proxy):
        """
        测试单个代理
        :param proxy:
        :return:
        """
        conn = aiohttp.TCPConnector(verify_ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                if isinstance(proxy, bytes):
                    proxy = proxy.decode('utf-8')
                real_proxy = f'http://{proxy}'
                print(f'正在测试{proxy}')

                async with session.get(TEST_URL,
                                       proxy=real_proxy,
                                       timeout=15,
                                       allow_redirects=False) as response:
                    if response.status in VALID_STATUS_CODES:
                        self.redis.max(proxy)
                        print(f'代理可用{proxy}')
                    else:
                        self.redis.decrease(proxy)
                        print('请求响应码不合法 ', response.status, 'IP', proxy)

            except (ClientError, aiohttp.ClientConnectorError,
                    asyncio.TimeoutError, AttributeError):
                self.redis.decrease(proxy)
                print(f'代理请求失败{proxy}')

    def run(self):
        """
        测试主函数
        :return:
        """
        try:
            count = self.redis.count()
            print(f'当前剩余{count}个代理')
            for i in range(0, count, BATCH_TEST_SIZE):
                start = i
                stop = min(i + BATCH_TEST_SIZE, count)
                print('正在测试第', start + 1, '-', stop, '个代理')
                test_proxies = self.redis.batch(start, stop)

                loop = asyncio.get_event_loop()
                tasks = [
                    self.test_single_proxy(proxy) for proxy in test_proxies
                ]
                loop.run_until_complete(asyncio.wait(tasks))
                # 注释sys.stdout.flush()只能等到程序执行完毕,屏幕会一次性输出// 刷新stdout,能看到实时输出信息
                sys.stdout.flush()
                time.sleep(5)
        except Exception as e:
            print('测试器发生错误', e.args)
Пример #16
0
 def __init__(self):
     self.redis = RedisClient()
class ColdMovieService(object):

    """
    电影冷启动策略:
    用于对电影评分,
    根据电影的风格 推荐给 喜欢该风格的用户,
    曝光1000次,最终计算点击率、停留时间、一个小时、三小时、一天等等 相关指标,来衡量电影的质量的好坏
    
    电影质量的好的评定:
    1.通过人工筛选。
    2.曝光用户,根据用户的点击率 停留时间 评判。

    """
    client = RedisClient.get_redis_client()

    cold_movie_need_display_num = 100

    """
    向用户推荐 冷电影,用于衡量 电影的质量的好坏的指标
    """

    def get_cold_movie_rec(self, user_id):

        rec_cold_match = ""
        user_profile_label = self.get_user_profile(user_id)

        if user_profile_label:

            cold_movies = self.client.hmget(REDIS_COLD_MOVIES, user_profile_label)[0]

            if cold_movies:
                cold_movies = json.loads(cold_movies)

                if len(cold_movies) > 0:
                    cold_movies_arr = sorted(cold_movies.items(), key=lambda x: x[1], reverse=True)

                    rec_cold_match = cold_movies_arr[0][0]

        return rec_cold_match

    """
    如果对外推荐给用户,数据确实推荐给用户,需要更新redis 
    """

    def update_cold_movies_list(self, movie_id):

        movie_info = self.client.hmget(REDIS_MOVIE_INFO, movie_id)[0]

        if movie_info:
            label = GENRE2LABELMAP.get(movie_info[1])
            cold_movies = self.client.hmget(REDIS_COLD_MOVIES, label)[0]

            if cold_movies:

                click_num = cold_movies.get(movie_id)

                click_num = click_num + 1

                if click_num >= self.cold_movie_need_display_num:
                    cold_movies.pop(movie_id)
                else:
                    cold_movies[movie_id] = click_num

                self.client.hmset(REDIS_COLD_MOVIES, {label: json.dumps(cold_movies)})

    def get_user_profile(self, user_id):
        """
            获取用户的最喜欢的一个风格,当然这里必须要求用户已经经过了50刷,才会给出用户最喜欢的风格
            """

        profile_10 = self.client.hmget(REDIS_SHORT_TERM_INTEREST_10, user_id)[0]

        profile_20 = self.client.hmget(REDIS_MIDDLE_TERM_INTEREST_20, user_id)[0]

        profile_50 = self.client.hmget(REDIS_LONG_TERM_INTEREST_50, user_id)[0]

        user_profile_label = ""
        if profile_10 is not None and profile_20 is not None and profile_50 is not None:
            profile_10 = Counter(json.loads(profile_10))
            profile_20 = Counter(json.loads(profile_20))
            profile_50 = Counter(json.loads(profile_50))

            profile = dict(profile_10 + profile_20 + profile_50)

            profile = sorted(profile.items(), key=lambda x: x[1], reverse=True)

            user_profile_label = profile[0][0]

        return user_profile_label
Пример #18
0
def get_conn():
    if not hasattr(g, 'redis'):
        g.redis = RedisClient()

    return g.redis
Пример #19
0
    def __init__(self):

        self.client = RedisClient.get_redis_client()

        self.movie_df = None
Пример #20
0
    def __init__(self):
        self.data_preprocess = DataPreprocess()

        self.redis_client = RedisClient.get_redis_client()
Пример #21
0
# -*- coding: utf-8 -*-
from util.redis_client import RedisClient
from util.config import *
import time
import json

client = RedisClient.get_redis_client()


def get_current_weight(user_id):
    """获取用户当前最敏感权重 即最新权重"""
    weight = client.hmget(REDIS_CURRENT_WEIGHTS, user_id)

    weight = weight[0]
    if weight is None:
        weight = json.dumps(INIT_WEIGHT)

    weight = json.loads(weight)

    return weight


def set_current_weight(weight, user_id):
    """设置用户当前最敏感权重 即最新权重"""
    client.hmset(REDIS_CURRENT_WEIGHTS, {user_id: json.dumps(weight)})


def get_user_history_rec_movies(user_id):
    """获取已经曝光给用户的电影列表,这里只是曝光,并不是点击"""
    watched_movies = __get_user_history_rec_movies_from_redis(user_id)
    movies = set()
Пример #22
0
 def setUp(self):
     RedisClient.clear()
Пример #23
0
 def setUp(self):
     self.op = RedisClient()
Пример #24
0
import hashlib
from datetime import datetime
import pytz
from util.redis_client import RedisClient

utc = pytz.utc
fmt = '%Y-%m-%dT%H:%M:%S%z'
omp_time = datetime.strftime(utc.localize(datetime.utcnow()), fmt)

# message_writer = EsWriter(host='35.184.66.182')
message_writer = RedisClient(host='35.184.66.182', port=6379)


def event_hash_string(event_obj):
    hash_string = '%s%s%s' % (
        event_obj['producer'], event_obj['stateTriggerId'], event_obj['locationCode']
    )
    return hash_string


def event_id_string(event_obj):
    hash_string = '%s%s%s%s' % (
        event_obj['producer'], event_obj['stateTriggerId'], event_obj['locationCode'], event_obj['raisedTimestamp']
    )
    return hash_string


def get_hash(hash_string):
    return hashlib.md5(hash_string.encode('utf-8')).hexdigest()

Пример #25
0
class RedisOperatorTestCase(unittest.TestCase):
    def setUp(self):
        self.op = RedisClient()

    def test_count(self):
        self.assertIsInstance(self.op.count(), int)