Exemplo n.º 1
0
def getNewsSummariesForUser(user_id, page_num):

    page_num = int(page_num)
    # news range to be fetched for the page number
    begin_index = (page_num - 1) * NEWS_LIST_BATCH_SIZE
    end_index = page_num * NEWS_LIST_BATCH_SIZE

    # the final list of news to be returned

    sliced_news = []
    db = mongodb_client.get_db()

    if redis_client.get(user_id) is not None:
        # user id already cached in redis, get next paginating data and fetch news
        news_digests = pickle.loads(redis_client.get(user_id))
        # both parameters are inclusive
        sliced_news_digest = news_digests[begin_index:end_index]
        sliced_news = list(db[NEWS_TABLE_NAME].find(
            {'digest': {
                '$in': sliced_news_digest
            }}))
    else:
        # no cached data
        # retrieve news and store their digests list in redis with user id as key
        # retrieve news and sort by publish time in reverse order (latest first)
        total_news = list(db[NEWS_TABLE_NAME].find().sort([
            ('publishedAt', -1)
        ]).limit(NEWS_LIMIT))
        total_news_digest = [x['digest']
                             for x in total_news]  # lambda function in python

        redis_client.set(user_id, pickle.dumps(total_news_digest))
        redis_client.expire(user_id, USER_NEWS_TIME_OUT_IN_SECONDS)
        sliced_news = total_news[begin_index:end_index]

    # Get user preference
    preference = recommendation_service_client.getPreferenceForUser(user_id)
    topPreference = None

    if preference is not None and len(preference) > 0:
        topPreference = preference[0]

    print('top preference is %s' % topPreference)
    for news in sliced_news:
        # Remove text field to save bandwidth
        del news['text']
        # print(news['publishedAt'])
        if news['publishedAt'].date == date.today():
            # Add time tag to be displayed on page
            news['time'] = 'today'
        if news['class'] == topPreference:
            news['reason'] = topPreference

    return json.loads(dumps(sliced_news))
Exemplo n.º 2
0
def getNewsSummariesForUser(user_id, page_num):
    page_num = int(page_num)  # cast
    beginning_news_index = (page_num - 1) * NEWS_LIST_BATCH_SIZE
    ending_news_index = page_num * NEWS_LIST_BATCH_SIZE

    sliced_news = []

    db = mongodb_client.get_db()

    if redis_client.get(user_id) is not None:
        news_digests = pickle.loads(
            redis_client.get(user_id))  # redis only stores digests

        sliced_news_digests = news_digests[
            beginning_news_index:ending_news_index]
        # print sliced_news_digests
        sliced_news = list(db[NEWS_TABLE_NAME].find(
            {'digest': {
                '$in': sliced_news_digests
            }}))
    else:
        all_news = list(
            db[NEWS_TABLE_NAME].find().limit(TOTAL_NEWS_LIMIT).sort([
                ('publishedAt', -1)
            ]))
        all_news_digests = map(lambda x: x['digest'], all_news)

        redis_client.set(user_id, pickle.dumps(all_news_digests))
        redis_client.expire(user_id, TOTAL_NEWS_EXPIRATION_IN_SECONDS)

        sliced_news = all_news[beginning_news_index:ending_news_index]

    preference = recommendation_service_client.getPreferenceForUser(user_id)
    top_preference = None

    if preference is not None and len(preference) > 0:
        top_preference = preference[0]

    # update labels according to time, recommendation
    for news in sliced_news:
        del news['text']  # to save bandwidth.
        if news['class'] == top_preference:
            news['reason'] = 'recommend'
        if news['publishedAt'].date() == datetime.today().date():
            news['time'] = 'today'

    return json.loads(dumps(sliced_news))  # serializes then loads into json
Exemplo n.º 3
0
def getNewsSummariesForUser(user_id, page_num="1"):
    ''' Form a news lists based on page_number and user_id.

    Keyword arguments:
    user_id -- user login id
    page_num -- the page user views
    '''
    page_num = int(page_num)
    # [begin_index, end_index)
    begin_index = (page_num - 1) * NEWS_LIST_BATCH_SIZE
    end_index = page_num * NEWS_LIST_BATCH_SIZE

    # The final list of news to be returned.
    sliced_news = []

    if redis_client.get(user_id) is not None:
        # Since redis could only store a string, we need to convert back to json
        # this will return json
        news_digests = pickle.loads(redis_client.get(user_id))

        # If begin_index is out of range, this will return empty list;
        # If end_index is out of range (begin_index is within the range), this
        # will return all remaining news ids.
        sliced_news_digests = news_digests[begin_index:end_index]
        print sliced_news_digests
        db = mongodb_client.get_db()
        # {'$in':sliced_news_digests} will search all digest in sliced_news_digest
        sliced_news = list(db[NEWS_TABLE_NAME].find(
            {'digest': {
                '$in': sliced_news_digests
            }}))
    else:
        db = mongodb_client.get_db()
        # the newest one comes first
        total_news = list(db[NEWS_TABLE_NAME].find().sort([
            ('publishedAt', -1)
        ]).limit(NEWS_LIMIT))
        # build a map maps from digest
        total_news_digests = map(lambda x: x['digest'], total_news)
        redis_client.set(user_id, pickle.dumps(total_news_digests))
        redis_client.expire(user_id, USER_NEWS_TIME_OUT_IN_SECONDS)

        sliced_news = total_news[begin_index:end_index]
    preference = recommendation_service_client.getPreferenceForUser(user_id)
    return _formatNews(sliced_news, preference)
Exemplo n.º 4
0
def getNewsSummariesForUser(user_id, page_num):
    page_num = int(page_num)
    begin_index = (page_num - 1) * NEWS_LIST_BATCH_SIZE
    end_index = page_num * NEWS_LIST_BATCH_SIZE

    sliced_news = []
    db = mongodb_client.get_db()

    if redis_client.get(user_id) is not None:
        news_digest = pickle.loads(redis_client.get(user_id))
        sliced_news_digest = news_digest[begin_index:end_index]
        sliced_news = list(db[NEWS_TABLE_NAME].find(
            {'digest': {
                '$in': sliced_news_digest
            }}))
    else:
        total_news = list(db[NEWS_TABLE_NAME].find().sort([
            ('publishedAt', -1)
        ]).limit(NEWS_LIST_LIMIT))
        total_news_digest = [x['digest'] for x in total_news]

        redis_client.set(user_id, pickle.dumps(total_news_digest))
        redis_client.expire(user_id, USER_NEWS_TIMEOUT_IN_SECONDS)

        sliced_news = total_news[begin_index:end_index]

    # Get preference for the user.
    preference = recommendation_service_client.getPreferenceForUser(user_id)
    topPrefence = None

    if preference is not None and len(preference) > 0:
        topPrefence = preference[0]

    for news in sliced_news:
        del news['text']
        if news['publishedAt'].date() == datetime.today().date():
            news['time'] = 'today'
        if news['class'] == topPrefence:
            news['reason'] = "Recommend"

    return json.loads(dumps(sliced_news))
Exemplo n.º 5
0
def test_basic():
    userId = "test_user"
    perfernce = client.getPreferenceForUser(userId)
    assert perfernce is not None
    print('test_basic passed!')
def test_basic():
    preference = client.getPreferenceForUser('test_user')
    assert preference is not None and len(preference) > 0
    print("test_basic passed!")