Python getPreferenceForUser示例，news_recommendation_service_client.getPreferenceForUser Python示例

示例#1

0

显示文件

文件： news_recommendation_service_client_test.py 项目： zhangsquared/News-Scraping-and-Recommendation-System

def test_basic():
  preference = client.getPreferenceForUser("user1")
  print(preference)
  assert preference is not None
  assert len(preference) > 0
  assert preference[0] == "World"
  print('test_basic passed!')

示例#2

0

显示文件

文件： operations.py 项目： anqizhao0216/TapNews

def getNewsSummariesForUserLikeList(user_id):
    db = mongodb_client.get_db()
    res = db[USER_LIKELIST_TABLE_NAME].find({"userId": user_id})
    like_digests = {}
    for data in res:
        like_digests = data["newsIds"]

    liked_news = list(db[NEWS_TABLE_NAME].find(
        {'digest': {
            '$in': like_digests
        }}))
    # Get preference for the user
    preference = news_recommendation_service_client.getPreferenceForUser(
        user_id)
    topPreference = None

    if preference is not None and len(preference) > 0:
        topPreference = preference[0]

    for news in liked_news:
        # Remove text field to save bandwidth.
        del news['text']
        if 'class' in news:
            if news['class'] == topPreference:
                news['reason'] = 'Recommend'
        if news['publishedAt'].date() == datetime.today().date():
            news['time'] = 'today'
        # whether in user like list
        user_like_list = getUserLikelist(user_id)
        if news['digest'] in user_like_list:
            news['like'] = True
        else:
            news['like'] = False
    return json.loads(dumps(liked_news))

示例#3

0

显示文件

文件： operations.py 项目： Wenbo16/Tap-News

def getNewsSummariesForUser(user_id, page_num):
    page_num = int(page_num)
    begin_index = (page_num - 1) * NEWS_LIST_BATCH_SIZE
    end_index = page_num * NEWS_LIST_BATCH_SIZE

    # The final list of news to be returned.
    sliced_news = []

    if redis_client.get(user_id) is not None:
        logger.info('user is already in redis')
        news_digests = pickle.loads(redis_client.get(user_id))

        # If begin_index is out of range, this will return empty list;
        # If end_index is out of range (begin_index is within the range), this
        # will return all remaining news ids.
        sliced_news_digests = news_digests[begin_index:end_index]
        logger.info('load sliced news')

        db = mongodb_client.get_db()
        sliced_news = list(db[NEWS_TABLE_NAME].find(
            {'digest': {
                '$in': sliced_news_digests
            }}))
    else:
        logger.info('user is not in redis')
        db = mongodb_client.get_db()
        try:
            total_news = list(db[NEWS_TABLE_NAME].find().sort([
                ('publishedAt', -1)
            ]).limit(NEWS_LIMIT))
        finally:
            logger.error('Some error occurs when find news in MongoDB')

        total_news_digests = map(lambda x: x['digest'], total_news)

        redis_client.set(user_id, pickle.dumps(total_news_digests))
        logger.info('user $s is set to redis', user_id)
        redis_client.expire(user_id, USER_NEWS_TIME_OUT_IN_SECONDS)

        sliced_news = total_news[begin_index:end_index]

    # Get preference for the user
    preference = news_recommendation_service_client.getPreferenceForUser(
        user_id)
    topPreference = None

    if preference is not None and len(preference) > 0:
        topPreference = preference[0]
        print 'top preference is: topPreference', topPreference

    for news in sliced_news:
        # Remove text field to save bandwidth.
        del news['text']
        if news['class'] == topPreference:
            news['reason'] = 'Recommend'
        if news['publishedAt'].date() == datetime.today().date():
            news['time'] = 'today'
    return json.loads(dumps(sliced_news))

示例#4

0

显示文件

def getPreference(user_id):
    print 'getPreference'
    preference = news_recommendation_service_client.getPreferenceForUser(
        user_id)
    topPreference = ''
    if preference is not None and len(preference) > 0:
        global topPreference
        topPreference = preference[0]
    return topPreference

示例#5

0

显示文件

文件： operations.py 项目： kaelssss/Tap-News

def getNewsSummariesForUser(user_id, page_num):
    page_num = int(page_num)
    begin_index = (page_num - 1) * news_list_batch_size
    end_index = page_num * news_list_batch_size

    # The final list of news to be returned.
    sliced_news = []

    # personalizing: decide each class's number
    preferences = news_recommendation_service_client.getPreferenceForUser(
        user_id)
    news_numbers = []
    if preferences is not None and len(preferences) > 0:
        news_numbers = [
            int(round(preference * news_limit)) for preference in preferences
        ]
    print news_numbers

    if redis_client.get(user_id) is not None:
        news_digests = pickle.loads(redis_client.get(user_id))
        # If begin_index is out of range, this will return empty list;
        # If end_index is out of range (begin_index is within the range), this
        # will return all remaining news ids.
        sliced_news_digests = news_digests[begin_index:end_index]
        db = mongodb_client.get_db()
        sliced_news = list(db[news_table_name].find(
            {'digest': {
                '$in': sliced_news_digests
            }}))
    else:
        db = mongodb_client.get_db()
        print 'taking from db'
        # according to each class's number, take each class's amount of news, and sort them back together
        selected_news = []
        for i in range(0, len(news_numbers)):
            selected_news.extend(
                list(db[news_table_name].find({
                    'class':
                    news_classes_v2.class_map[str(i + 1)]
                }).limit(news_numbers[i])))
        selected_news = sorted(selected_news,
                               key=lambda k: k['publishedAt'],
                               reverse=True)[:]

        # caching digests and paging
        selected_news_digests = map(lambda x: x['digest'], selected_news)
        redis_client.set(user_id, pickle.dumps(selected_news_digests))
        redis_client.expire(user_id, user_news_time_out_in_seconds)
        sliced_news = selected_news[begin_index:end_index]

    # other taggings and returning
    for news in sliced_news:
        del news['text']
        if news['publishedAt'].date() == datetime.today().date():
            news['time'] = 'today'
    return json.loads(dumps(sliced_news))

示例#6

0

显示文件

def getNewsSummariesForUser(user_id, page_num):
    page_num = int(page_num)
    begin_index = (page_num - 1) * NEWS_LIST_BATCH_SIZE
    end_index = page_num * NEWS_LIST_BATCH_SIZE

    # The final list of news to be returned.
    sliced_news = []

    if redis_client.get(user_id) is not None:
        news_digests = pickle.loads(redis_client.get(
            user_id))  #pickle.loads(python library): convert string into json

        # If begin_index is out of range, this will return empty list;
        # If end_index is out of range (begin_index is within the range), this
        # will return all remaining news ids.
        sliced_news_digests = news_digests[begin_index:end_index]
        print sliced_news_digests
        db = mongodb_client.get_db()
        sliced_news = list(db[NEWS_TABLE_NAME].find(
            {'digest': {
                '$in': sliced_news_digests
            }}))
    else:
        db = mongodb_client.get_db()
        total_news = list(
            db[NEWS_TABLE_NAME].find().sort([('publishedAt', -1)
                                             ]).limit(NEWS_LIMIT)
        )  # get news from DB based on publishAt time sort and return LIMITED NEWS
        total_news_digests = map(lambda x: x['digest'],
                                 total_news)  #get all news' digest

        redis_client.set(
            user_id, pickle.dumps(total_news_digests)
        )  #put all news from DB into REdis and set exprie date
        redis_client.expire(user_id, USER_NEWS_TIME_OUT_IN_SECONDS)

        sliced_news = total_news[
            begin_index:end_index]  #get sliced_news based on begin/end index

    # Get preference for the user
    preference = news_recommendation_service_client.getPreferenceForUser(
        user_id)
    topPreference = None

    if preference is not None and len(preference) > 0:
        topPreference = preference[0]

    for news in sliced_news:
        # Remove text field to save bandwidth.
        # del news['text']
        # if news['class'] == topPreference:
        #     news['reason'] = 'Recommend'
        if news['publishedAt'].date() == datetime.today().date():
            news['time'] = 'today'
    return json.loads(dumps(sliced_news))

示例#7

0

显示文件

def getNewsSummariesForUser(user_id, page_num):

    page_num = int(page_num)

    if page_num <= 0:
        return []

    begin_index = (page_num - 1) * NEWS_LIST_BATCH_SIZE
    end_index = page_num * NEWS_LIST_BATCH_SIZE

    # The final list of news to be returned.
    sliced_news = []

    # LOGGER.debug(mongodb_client)
    db = mongodb_client.get_db()

    if redis_client.get(user_id) is not None:
        LOGGER.debug("user exist in redis")
        # desirialize
        news_digests = pickle.loads(redis_client.get(user_id))
        sliced_news_digests = news_digests[begin_index:end_index]
        sliced_news = list(db[NEWS_TABLE_NAME].find(
            {'digest': {
                '$in': sliced_news_digests
            }}))
    else:
        LOGGER.debug("fetch news from database and saved in redis")
        # Read latest news from database
        total_news = list(db[NEWS_TABLE_NAME].find().sort([
            ('publishedAt', -1)
        ]).limit(NEWS_LIMIT))
        # Save latest news's digest
        total_news_digests = [x['digest'] for x in total_news]

        # save serialized data in redis
        redis_client.set(user_id, pickle.dumps(total_news_digests))
        redis_client.expire(user_id, USER_NEWS_TIME_OUT_IN_SECONDS)

        sliced_news = total_news[begin_index:end_index]

    # # Use preference to customize returned news news_list
    preference = news_recommendation_service_client.getPreferenceForUser(
        user_id)
    topPreference = None

    # print("preference modle:" + preference)
    if preference is not None and len(preference) > 0:
        topPreference = preference[0]

    for news in sliced_news:
        if 'class' in news and news['class'] == topPreference:
            news['reason'] = 'Recommend'

    return json.loads(dumps(sliced_news))

示例#8

0

显示文件

def getNewsSummariesForUser(user_id, page_num):
    page_num = int(page_num)
    begin_index = (page_num - 1) * NEWS_LIST_BATCH_SIZE
    end_index = page_num * NEWS_LIST_BATCH_SIZE

    sliced_news = []
    if user_id is not None:
        if redis_client.get(user_id) is not None:
            total_news_digests = pickle.loads(redis_client.get(user_id))

            # If begin_index is out of range, this will return empty list;
            # If end_index is out of range (begin_index is within the range), this
            # will return all remaining news ids.
            sliced_news_digests = total_news_digests[begin_index:end_index]
            db = mongodb_client.get_db()
            sliced_news = list(db[DB_NEWS_TABLE_NAME].find(
                {'digest': {
                    '$in': sliced_news_digests
                }}))
        else:
            db = mongodb_client.get_db()
            total_news = list(db[DB_NEWS_TABLE_NAME].find().sort([
                ('publishedAt', -1)
            ]).limit(NEWS_LIMIT))
            total_news_digests = [x['digest'] for x in total_news]

            redis_client.set(user_id, pickle.dumps(total_news_digests))
            redis_client.expire(user_id, USER_NEWS_TIME_OUT_IN_SECONDS)

            sliced_news = total_news[begin_index:end_index]
    else:
        db = mongodb_client.get_db()
        total_news = list(db[DB_NEWS_TABLE_NAME].find().sort([
            ('publishedAt', -1)
        ]).limit(NEWS_LIMIT))
        total_news_digests = [x['digest'] for x in total_news]

        sliced_news = total_news[begin_index:end_index]

    # Get preference for the user.
    preference = news_recommendation_service_client.getPreferenceForUser(
        user_id)
    topPrefence = None

    if preference is not None and len(preference) > 0:
        topPrefence = preference[0]

    for news in sliced_news:
        if news['publishedAt'].date() == datetime.today().date():
            news['time'] = 'today'
        if news['class'] == topPrefence:
            news['reason'] = "Recommend"
    return json.loads(dumps(sliced_news))

示例#9

0

显示文件

def get_news_summaries_for_user(user_id, page_num):
    page_num = int(page_num)
    if page_num <= 0:
        return []

    begin_index = (page_num - 1) * NEWS_LIST_BATCH_SIZE
    end_index = page_num * NEWS_LIST_BATCH_SIZE

    # The final list of news to be returned.
    sliced_news = []
    db = mongodb_client.get_db()

    if redis_client.get(user_id) is not None:
        #redis only store news digest(id), and based on this id look up from mongodb
        #pickle:store obj/instance to file as string (xuliehua)
        news_digests = pickle.loads(redis_client.get(user_id))

        sliced_news_digests = news_digests[begin_index:end_index]
        sliced_news = list(db[NEWS_TABLE_NAME].find(
            {'digest': {
                '$in': sliced_news_digests
            }}))

    else:
        total_news = list(db[NEWS_TABLE_NAME].find().sort([
            ('publishedAt', -1)
        ]).limit(NEWS_LIMIT))
        total_news_digests = [x['digest'] for x in total_news]

        redis_client.set(user_id, pickle.dumps(total_news_digests))
        redis_client.expire(user_id, USER_NEWS_TIME_OUT_IN_SECONDS)

        sliced_news = total_news[begin_index:end_index]

    # Get preference list for the user.
    # TODO: use preference to customize returned news list.
    preference = news_recommendation_service_client.getPreferenceForUser(
        user_id)
    topPreference = None

    if preference is not None and len(preference) > 0:
        topPreference = preference[0]

    for news in sliced_news:
        # Remove text field to save bandwidth.
        del news['text']
        if news['publishedAt'].date() == datetime.today().date():
            news['time'] = 'today'
        if news['class'] == topPreference:
            news['reason'] = 'Recommend'

    return json.loads(dumps(sliced_news))

示例#10

0

显示文件

def getNewsSummariesForUser(user_id, page_num):
    page_num = int(page_num)
    begin_index = (page_num - 1) * NEWS_LIST_BATCH_SIZE
    end_index = page_num * NEWS_LIST_BATCH_SIZE

    #the final list to be returened to the user
    sliced_news = []

    if redis_client.get(user_id) is not None:
        news_digest = pickle.loads(redis_client.get(user_id))

        # If begin_index is out of range, this will return empty list;
        # If end_index is out of range (begin_index is within the range), this
        # will return all remaining news ids.
        sliced_news_digest = news_digest[begin_index:end_index]
        print sliced_news_digest
        db = mongodb_client.get_db()
        sliced_news = list(db[NEWS_TABLE_NAME].find(
            {'digest': {
                '$in': sliced_news_digest
            }}))
    else:
        db = mongodb_client.get_db()
        # Get 100 latest news from the database according to the publishedAt attribute
        total_news = list(db[NEWS_TABLE_NAME].find().sort([
            ('publishedAt', -1)
        ]).limit(NEWS_LIMIT))
        # In order to save memeory, we only store the digest of each news from total_news
        total_news_digest = map(lambda x: x['digest'], total_news)

        redis_client.set(user_id, pickle.dumps(total_news_digest))
        redis_client.expire(user_id, USER_NEWS_TIME_OUT_IN_SECONDS)

        sliced_news = total_news[begin_index:end_index]

    # Get preference for the user
    preference = news_recommendation_service_client.getPreferenceForUser(
        user_id)
    topPreference = None

    if preference is not None and len(preference) > 0:
        topPreference = preference[0]

    for news in sliced_news:
        # Remove text field to save bandwidth
        del news['text']
        if news['class'] == topPreference:
            news['reason'] = 'Recommend'
        if news['publishedAt'].date == datetime.today().date():
            news['time'] = 'today'
    return json.loads(dumps(sliced_news))

示例#11

0

显示文件

文件： operations.py 项目： kaelssss/Tap-News

def getNewsSummariesForUser(user_id, page_num):
    page_num = int(page_num)
    begin_index = (page_num - 1) * news_list_batch_size
    end_index = page_num * news_list_batch_size

    # The final list of news to be returned.
    sliced_news = []

    # personalizing: decide each class's number
    preferences = news_recommendation_service_client.getPreferenceForUser(user_id)
    news_numbers = []
    if preferences is not None and len(preferences) > 0:
        news_numbers = [int(round(preference * news_limit)) for preference in preferences]
    print news_numbers

    if redis_client.get(user_id) is not None:
        news_digests = pickle.loads(redis_client.get(user_id))
        # If begin_index is out of range, this will return empty list;
        # If end_index is out of range (begin_index is within the range), this
        # will return all remaining news ids.
        sliced_news_digests = news_digests[begin_index:end_index]
        db = mongodb_client.get_db()
        sliced_news = list(db[news_table_name].find({
            'digest': {
                '$in': sliced_news_digests
            }
        }))
    else:
        db = mongodb_client.get_db()
        print 'taking from db'
        # according to each class's number, take each class's amount of news, and sort them back together
        selected_news = []
        for i in range(0, len(news_numbers)):
            selected_news.extend(list(db[news_table_name].find({
                'class': news_classes_v2.class_map[str(i+1)]
            }).limit(news_numbers[i])))
        selected_news = sorted(selected_news, key=lambda k: k['publishedAt'], reverse=True)[:]
        
        # caching digests and paging
        selected_news_digests = map(lambda x:x['digest'], selected_news)
        redis_client.set(user_id, pickle.dumps(selected_news_digests))
        redis_client.expire(user_id, user_news_time_out_in_seconds)
        sliced_news = selected_news[begin_index:end_index]

    # other taggings and returning
    for news in sliced_news:
        del news['text']
        if news['publishedAt'].date() == datetime.today().date():
            news['time'] = 'today'
    return json.loads(dumps(sliced_news))

示例#12

0

显示文件

def getNewsSummariesForUser(user_id, page_num):
    page_num = int(page_num)

    if page_num <= 0:
        return []

    begin_index = (page_num - 1) * NEWS_LIST_BATCH_SIZE
    end_index = page_num * NEWS_LIST_BATCH_SIZE

    # The final list of news to be returned.
    sliced_news = []
    db = mongodb_client.get_db()

    if redis_client.get(user_id) is not None:
        news_digests = pickle.loads(redis_client.get(user_id))

        sliced_news_digests = news_digests[begin_index:end_index]
        sliced_news = list(db[NEWS_TABLE_NAME].find(
            {'digest': {
                '$in': sliced_news_digests
            }}))
    else:
        total_news = list(db[NEWS_TABLE_NAME].find().sort([
            ('publishedAt', -1)
        ]).limit(NEWS_LIMIT))

        total_news_digests = [x['digest'] for x in total_news]

        redis_client.set(user_id, pickle.dumps(total_news_digests))
        redis_client.expire(user_id, USER_NEWS_TIME_OUT_IN_SECONDS)

        sliced_news = total_news[begin_index:end_index]

    # Get preference for the user
    # TODO: use preference to customize returned news list.
    preference = news_recommendation_service_client.getPreferenceForUser(
        user_id)
    topPreference = None

    if preference is not None and len(preference) > 0:
        topPreference = preference[0]

    for news in sliced_news:
        # Remove text field to save bandwidth.
        del news['text']
        if 'class' in news and news['class'] == topPreference:
            news['reason'] = 'Recommend'

    return json.loads(dumps(sliced_news))

示例#13

0

显示文件

def getNewsSummariesForUser(user_id, page_num):
    page_num = int(page_num)
    if page_num <= 0:
        return []

    begin_index = (page_num - 1) * NEWS_LIST_BATCH_SIZE  #included
    end_index = page_num * NEWS_LIST_BATCH_SIZE  #not included

    sliced_news = []  #返回前端一个没有正文内容但是包括其他一切的news list
    db = mongodb_client.get_db()

    if redis_client.get(user_id) is not None:
        news_digests = pickle.loads(redis_client.get(user_id))
        sliced_news_digests = news_digests[begin_index:end_index]
        #sliced_news = list(db[NEWS_TABLE_NAME].find({'digest': {$in: [sliced_news_digests] } }))#todo 试试这个行不行=>不行
        sliced_news = list(db[NEWS_TABLE_NAME].find(
            {'digest': {
                '$in': sliced_news_digests
            }}))

    else:
        #total_news = list(db[NEWS_TABLE_NAME].find().sort({'publishedAt': -1}).limit(NEWS_LIMIT)) #todo=>这个可以
        total_news = list(db[NEWS_TABLE_NAME].find().sort([
            ('publishedAt', -1)
        ]).limit(NEWS_LIMIT))
        total_news_digests = [x['digest'] for x in total_news
                              ]  #之前用到类似用法, 将total_news中每一个'digest'拼成一个list

        #save this list to redis
        redis_client.set(user_id, pickle.dumps(total_news_digests))
        redis_client.expire(user_id, USER_NEWS_TIME_OUT_IN_SECONDS)
        sliced_news = total_news[begin_index:end_index]

    # Get preference for the user
    # TODO: use preference to customize returned news list.
    preference = news_recommendation_service_client.getPreferenceForUser(
        user_id)
    topPreference = None

    if preference is not None and len(preference) > 0:
        topPreference = preference[0]

    for news in sliced_news:
        # Remove text field to save bandwidth.
        del news['text']
        if news['class'] == topPreference:
            news['reason'] = 'Recommend'

    return json.loads(dumps(sliced_news))

示例#14

0

显示文件

文件： operations.py 项目： ShawnLi1014/News-Recommendation-System

def getNewsSummariesForUser(user_id, page_num):
    page_num = int(page_num)
    if page_num <= 0:
        raise ValueError('page_num should be a positive integer.')
    begin_index = (page_num - 1) * NEWS_LIST_BATCH_SIZE
    end_index = page_num * NEWS_LIST_BATCH_SIZE

    # The news list to be returned
    sliced_news = []

    if redis_client.get(user_id) is not None:
        news_digests = pickle.loads(redis_client.get(user_id))

        # If begin_index is out of range, return empty list
        # If end_index is out of range, return all remaining news
        sliced_news_digests = news_digests[begin_index:end_index]
        db = mongodb_client.get_db()
        sliced_news = list(db[NEWS_TABLE_NAME].find(
            {'digest': {
                '$in': sliced_news_digests
            }}))
    else:
        db = mongodb_client.get_db()
        total_news = list(db[NEWS_TABLE_NAME].find().sort([
            ('publishedAt', -1)
        ]).limit(NEWS_LIMIT))

        total_news_digests = list(map(lambda x: x['digest'], total_news))
        print(total_news_digests)
        redis_client.set(user_id, pickle.dumps(total_news_digests))
        redis_client.expire(user_id, USER_NEWS_TIMEOUT_IN_SECONDS)

        sliced_news = total_news[begin_index:end_index]

    preference = news_recommendation_service_client.getPreferenceForUser(
        user_id)
    topPreference = None

    if preference is not None and len(preference) > 0:
        topPreference = preference[0]

    for news in sliced_news:

        if news['class'] == topPreference:
            news['reason'] = 'Recommend'
        if news['publishedAt'].date() == datetime.today().date():
            news['time'] = 'today'
    return json.loads(dumps(sliced_news))

示例#15

0

显示文件

文件： operations.py 项目： stinger0962/TopNews

def getNewsSummariesForUser(user_id, page_num):
    page_num = int(page_num)
    begin_index = (page_num - 1) * PAGE_SIZE  # inclusive
    end_index = page_num * PAGE_SIZE  # exclusive
    sliced_news = []

    # read news digest of next page from cache, then read news from db
    # if no more news in the cache, write next batch of news digests into cache
    if redis_client.get(user_id) is not None:
        # read python obj from db and de-serialize
        news_digests = pickle.loads(redis_client.get(user_id))
        sliced_news_digests = news_digests[begin_index:end_index]
        db = mongodb_client.get_db()
        sliced_news = list(db[NEWS_TABLE_NAME].find(
            {'digest': {
                '$in': sliced_news_digests
            }}))
    else:
        db = mongodb_client.get_db()
        batch_news = list(db[NEWS_TABLE_NAME].find().sort(
            'publishedAt', pymongo.DESCENDING).limit(NEWS_LIMIT))
        batch_news_digest = [news['digest'] for news in batch_news]

        redis_client.set(user_id, pickle.dumps(batch_news_digest))
        redis_client.expire(user_id, USER_NEWS_TIME_OUT_IN_SECONDS)

        sliced_news = batch_news[begin_index:end_index]

    # read user preference and customize returned news list
    #TODO: explore more complicated customization logic
    preference = news_recommendation_service_client.getPreferenceForUser(
        user_id)
    topPreference = None

    if preference is not None and len(preference) > 0:
        topPreference = preference[0]

    for news in sliced_news:
        # remove text field to save bandwidth (text doesn't display on client)
        del news['text']
        # add a tag for user top-preference news
        if 'class' in news and news['class'] == topPreference:
            news['reason'] == 'Recommended'
        # add a tag for fresh news
        if news['publishedAt'].date() == datetime.today().date():
            news['time'] = 'today'

    return json.loads(dumps(sliced_news))

示例#16

0

显示文件

def getNewsSummariesForUser(user_id, page_num):
    print("in operations.py, getNewsSummariesForUser")
    page_num = int(page_num)
    begin_index = (page_num - 1) * NEWS_LIST_BATCH_SIZE
    end_index = page_num * NEWS_LIST_BATCH_SIZE

    # The final list of news to the returned.
    sliced_news = []
    if redis_client.get(user_id) is not None:
        news_digests = pickle.loads(redis_client.get(user_id))
        sliced_news_digests = news_digests[begin_index:end_index]
        # print (sliced_news_digests)
        db = mongodb_client.get_db()
        sliced_news = list(db[NEWS_TABLE_NAME].find(
            {'digest': {
                '$in': sliced_news_digests
            }}))
    else:
        print("in else session")
        db = mongodb_client.get_db()
        total_news = list(db[NEWS_TABLE_NAME].find().sort([
            ('publishedAt', -1)
        ]).limit(NEWS_LIMIT))
        total_news_digests = list(map(lambda x: x['digest'], total_news))
        redis_client.set(user_id, pickle.dumps(total_news_digests))
        redis_client.expire(user_id, USER_NEWS_TIME_OOUT_IN_SECONDS)

        sliced_news = total_news[begin_index:end_index]

    # Get preference for the user.
    preference = news_recommendation_service_client.getPreferenceForUser(
        user_id)
    topPreference = None
    print(preference)

    if preference is not None and len(preference) > 0:
        topPreference = preference[0]

    for news in sliced_news:
        # Remove text field to save bandwidth
        del news['text']
        #if news['class'] == topPreference:
        #    news['reason'] = 'Recommend'
        if news['publishedAt'].date() == datetime.today().date():
            news['time'] = 'today'
    print

    return json.loads(dumps(sliced_news))

示例#17

0

显示文件

文件： operations.py 项目： JosephJin0815/news-classification

def getNewsSummariesForUser(user_id, page_num):
    page_num = int(page_num)
    # news range to be fetched for the page number
    begin_index = (page_num - 1) * NEWS_LIST_BATCH_SIZE
    end_index = page_num * NEWS_LIST_BATCH_SIZE
    # the final list of news to be returned
    sliced_news = []
    db = mongodb_client.get_db()
    if redis_client.get(user_id) is not None:
        # user id already cached in redis, get next paginating data and fetch news
        news_digests = pickle.loads(redis_client.get(user_id))
        # both parameters are inclusive
        sliced_news_digest = news_digests[begin_index:end_index]
        sliced_news = list(db[NEWS_TABLE_NAME].find(
            {'digest': {
                '$in': sliced_news_digest
            }}))
    else:
        # no cached data
        # retrieve news and store their digests list in redis with user id as key
        # retrieve news and sort by publish time in reverse order (latest first)
        total_news = list(db[NEWS_TABLE_NAME].find().sort([
            ('publishedAt', -1)
        ]).limit(NEWS_LIMIT))
        total_news_digest = [x['digest'] for x in total_news]
        # lambda function in python
        redis_client.set(user_id, pickle.dumps(total_news_digest))
        redis_client.expire(user_id, USER_NEWS_TIME_OUT_IN_SECONDS)
        sliced_news = total_news[begin_index:end_index]

    # TODO: user preference to customizer return new lists
    preference = news_recommendation_service_client.getPreferenceForUser(
        user_id)
    topPreference = None

    if preference is not None and len(preference) > 0:
        topPreference = preference[0]

    for news in sliced_news:
        # Remove text field to save bandwidth
        del news['text']
        if news['class'] == topPreference:
            news['reason'] = "Recommend"
        if news['publishedAt'].date() == datetime.today().date():
            # Add time tag to be displayed on page
            news['time'] = 'today'
    return json.loads(dumps(sliced_news))

示例#18

0

显示文件

文件： operations.py 项目： zduanx/tap-news

def getNewsSummaries(user_id, page_num):
    """Get news summaries"""
    page_num = int(page_num)
    begin_index = (page_num - 1) * NEWS_LIST_BATCH_SIZE
    end_index = page_num * NEWS_LIST_BATCH_SIZE  # not included

    sliced_news = []

    db = mongodb_client.get_db()
    if redis_client.get(user_id) is not None:
        print('>>> from redis: get data for user "%s"' % user_id)
        total_news_digests = pickle.loads(redis_client.get(user_id))
        sliced_news_digests = total_news_digests[begin_index:end_index]
        sliced_news = list(db[NEWS_TABLE_NAME].find(
            {'digest': {
                '$in': sliced_news_digests
            }}))
    else:
        print('>>> from mongodb: get data for user "%s"' % user_id)
        # mongodb iterable -> python list
        total_news = list(db[NEWS_TABLE_NAME].find().sort([
            ('publishedAt', -1)
        ]).limit(NEWS_LIMIT))
        total_news_digests = [x['digest'] for x in total_news]
        redis_client.set(user_id, pickle.dumps(total_news_digests))
        redis_client.expire(user_id, USER_NEWS_TIME_OUT_IN_SCONDS)
        sliced_news = total_news[begin_index:end_index]

    # Get preference for the user
    preference = news_recommendation_service_client.getPreferenceForUser(
        user_id)
    topPreference = None

    if preference is not None and len(preference) > 0:
        topPreference = preference[0]

    for news in sliced_news:
        # Remove text field to save bandwidth.
        del news['text']
        if news['class'] == topPreference:
            news['reason'] = 'Recommend'

        if news['publishedAt'].date() == datetime.utcnow().date():
            news['time'] = 'today'

    return json.loads(dumps(sliced_news))

示例#19

0

显示文件

文件： operations.py 项目： qichen90/Real-Time-Scraping-and-Recommendation-System

def get_news_summaries_for_user(user_id, page_num):
    """Get news list from MongoDB. """
    redis_client = redis.StrictRedis(host=REDIS_HOST, port=REDIS_PORT)
    db = mongodb_client.get_db()

    news_index_begin = (int(page_num) - 1)* NEWS_LIST_SIZE
    news_index_end = news_index_begin + NEWS_LIST_SIZE
    sliced_news = []

    # userid in Redis, get digest from Redis
    if redis_client.get(user_id) is not None:
        sliced_news_digests = pickle.loads(redis_client.get(user_id))[news_index_begin: news_index_end]
        sliced_news = list(db[NEWS_TABLE_NAME].find({'digest': {'$in': sliced_news_digests}}))
    # userid not in Redis, get 200 news from Mongo first
    else:
        all_news = list(db[NEWS_TABLE_NAME].find().sort([('publishedAt', -1)]).limit(NEWS_LIMIT))
        all_news_digests = [news['digest'] for news in all_news]

        redis_client.set(user_id, pickle.dumps(all_news_digests))
        redis_client.expire(user_id, USER_NEWS_TIME_OUT)

        sliced_news = all_news[news_index_begin: news_index_end]
    
    # get preference for user
    preference = news_recommendation_service_client.getPreferenceForUser(user_id)
    topPreference = None
    if preference is not None and len(preference) > 0:
        topPreference = preference[0]

    # prepare news
    for news in sliced_news:
        del news['text']
        # set time chip for front-end
        if news['publishedAt'].date() == datetime.today().date():
            news['time'] = 'today'
        #     now = datetime.now()
        #     news_hour = news['publishedAt'].time().hour
        #     if now.hour - news_hour <= 3:
        #         news['time'] = '%s hours ago' % (now.hour - news_hour)
        else:
            news['time'] = news['publishedAt'].date().strftime("%m/%d/%y")
        
        if news['class'] == topPreference:
            news['reason'] = 'Recommend'
        
    return json.loads(dumps(sliced_news))

示例#20

0

显示文件

文件： operations.py 项目： cqlzx/fyr-news

def get_news_summaries_for_user(user_id, page_num):
    page_num = int(page_num)
    begin_index = (page_num - 1) * NEWS_LIST_SIZE
    end_index = page_num * NEWS_LIST_SIZE

    sliced_news = []

    if redis_client.get(user_id) is not None:
        news_digests = pickle.loads(redis_client.get(user_id))
        sliced_news_digests = news_digests[begin_index:end_index]
        print sliced_news_digests
        db = mongodb_client.get_db()
        sliced_news = list(db[NEWS_TABLE_NAME].find(
            {'digest': {
                '$in': sliced_news_digests
            }}))
    else:
        db = mongodb_client.get_db()
        total_news = list(db[NEWS_TABLE_NAME].find().sort([
            ('publishedAt', -1)
        ]).limit(NEWS_LIMIT))
        total_news_digests = map(lambda x: x['digest'], total_news)
        redis_client.set(user_id, pickle.dumps(total_news_digests))
        redis_client.expire(user_id, USER_NEWS_TIME_OUT_IN_SECONDS)

        sliced_news = total_news[begin_index:end_index]

    # Get preference for user
    preference = news_recommendation_service_client.getPreferenceForUser(
        user_id)
    topPreference = None

    if preference is not None and len(preference) > 0:
        topPreference = preference[0]

    for news in sliced_news:
        # Remove text from news
        del news['text']
        if news['class'] == topPreference:
            news['reason'] = 'Recommend'
        if news['publishedAt'].date() == datetime.today().date():
            news['time'] = 'today'

    return json.loads(dumps(sliced_news))

示例#21

0

显示文件

def getNewsSummariesForUser(user_id, page_num):
    page_num = int(page_num)
    begin_index = (page_num - 1) * NEWS_LIST_BATCH_SIZE
    end_index = page_num * NEWS_LIST_BATCH_SIZE

    sliced_news = []

    if redis_client.get(user_id) is not None:
        total_news_digests = pickle.loads(redis_client.get(user_id))
        sliced_news_digests = total_news_digests[begin_index:end_index]
        db = mongodb_client.get_db()
        # look up mongodb via digests
        sliced_news = list(db[NEWS_TABLE_NAME].find(
            {'digest': {
                '$in': sliced_news_digests
            }}))
    else:
        db = mongodb_client.get_db()
        # put newer news in the top of pages
        total_news = list(db[NEWS_TABLE_NAME].find().sort([
            ('publishedAt', -1)
        ]).limit(NEWS_LIMIT))
        total_news_digests = [x['digest'] for x in total_news]
        redis_client.set(user_id, pickle.dumps(total_news_digests))
        redis_client.expire(user_id, USER_NEWS_TIME_OUT_IN_SECONDS)

        sliced_news = total_news[begin_index:end_index]

    preference = news_recommendation_service_client.getPreferenceForUser(
        user_id)
    topPreference = None

    if preference is not None and len(preference) > 0:
        topPreference = preference[0]

    for news in sliced_news:
        # Remove text field to save bandwidth.
        del news['text']
        if news['class'] == topPreference:
            news['reason'] = 'Recommend'
        if news['publishedAt'].date() == datetime.today().date():
            news['time'] = 'today'

    return json.loads(dumps(sliced_news))

示例#22

0

显示文件

文件： operations.py 项目： zenz34/tapnews

def getNewsSummariesForUser(user_id, page_num):
    page_num = int(page_num)
    begin_index = (page_num - 1) * config['operations']['NEWS_LIST_BATCH_SIZE']
    end_index = page_num * int(config['operations']['NEWS_LIST_BATCH_SIZE'])

    # The final list of news to be returned.
    sliced_news = []

    if redis_client.get(user_id) is not None:
        news_digests = pickle.loads(redis_client.get(user_id))

        # If begin_index is out of range, this will return empty list;
        # If end_index is out of range (begin_index is within the range), this
        # will return all remaining news ids.
        sliced_news_digests = news_digests[begin_index:end_index]
        print sliced_news_digests
        db = mongo_client.get_db()
        sliced_news = list(db[NEWS_TABLE_NAME].find({'digest':{'$in':sliced_news_digests}}))
    else:
        db = mongo_client.get_db()
        total_news = list(db[NEWS_TABLE_NAME].find().sort([('publishedAt', -1)]).limit(NEWS_LIMIT))
        total_news_digests = map(lambda x:x['digest'], total_news)

        redis_client.set(user_id, pickle.dumps(total_news_digests))
        redis_client.expire(user_id, config['operations']['USER_NEWS_TIME_OUT_IN_SECONDS'])

        sliced_news = total_news[begin_index:end_index]

    # Get preference for the user
    preference = news_recommendation_service_client.getPreferenceForUser(user_id)
    topPreference = None

    if preference is not None and len(preference) > 0:
        topPreference = preference[0]

    for news in sliced_news:
        # Remove text field to save bandwidth.
        del news['text']
        #if news['class'] == topPreference:
        #    news['reason'] = 'Recommend'
        if news['publishedAt'].date() == datetime.today().date():
            news['time'] = 'today'
    return json.loads(dumps(sliced_news))

示例#23

0

显示文件

文件： operations.py 项目： anqizhao0216/TapNews

def getNewsSummariesForUserWithKeyword(user_id, page_num, keyword):
    page_num = int(page_num)
    begin_index = (page_num - 1) * NEWS_LIST_BATCH_SIZE

    sliced_news = []

    # TODO only search title
    # use post to ensure multi word search
    sliced_news = elasticsearch_client.postSearchResultsByKeyWithPage(
        'title', urllib.unquote(keyword), begin_index, NEWS_LIST_BATCH_SIZE)

    # Get preference for the user
    preference = news_recommendation_service_client.getPreferenceForUser(
        user_id)
    topPreference = None

    if preference is not None and len(preference) > 0:
        topPreference = preference[0]

    for news in sliced_news:
        # Remove text field to save bandwidth.
        del news['text']
        if 'class' in news:
            if news['class'] == topPreference:
                news['reason'] = 'Recommend'
        # two format in publishAt field
        fmt1 = "%Y-%m-%dT%H:%M:%S"
        fmt2 = "%Y-%m-%dT%H:%M:%S.%f"
        if "." not in news['publishedAt']:
            publishDate = datetime.strptime(news['publishedAt'], fmt1)
        else:
            publishDate = datetime.strptime(news['publishedAt'], fmt2)
        if publishDate.date() == datetime.today().date():
            news['time'] = 'today'
        # whether in user like list
        user_like_list = getUserLikelist(user_id)
        if news['digest'] in user_like_list:
            news['like'] = True
        else:
            news['like'] = False
    return json.loads(dumps(sliced_news))

示例#24

0

显示文件

def test_basic():
    preference_list = client.getPreferenceForUser('test_user')
    assert len(preference_list) == 8
    print(preference_list)
    print('test_basic passed.')

示例#25

0

显示文件

文件： operations.py 项目： ganqianjun/personalized-news-feed

def getNewsSummariesForUser(user_id, page_num):
    page_num = int(page_num)
    begin_index = (page_num - 1) * NEWS_LIST_BATCH_SIZE
    end_index = page_num * NEWS_LIST_BATCH_SIZE

    if page_num == 1:
        redis_client.delete(user_id)

    sliced_news = []

    if redis_client.get(user_id) is not None:
        news_digests = pickle.loads(redis_client.get(user_id))

        # If begin_index is out of range, this will return empty list;
        # If end_index is out of range (begin_index is within the range), this
        # will return all remaining news ids.
        sliced_news_digests = news_digests[begin_index:end_index]
        #print sliced_news_digests
        db = mongodb_client.get_db()
        sliced_news = list(db[NEWS_TABLE_NAME].find(
            {'digest': {
                '$in': sliced_news_digests
            }}))

    if not sliced_news:
        db = mongodb_client.get_db()
        total_news = list(db[NEWS_TABLE_NAME].find().sort([
            ('publishedAt', -1)
        ]).skip(begin_index).limit(NEWS_LIMIT))
        latest_total_news_digests = map(lambda x: x['digest'], total_news)

        if page_num == 1:
            total_news_digests = latest_total_news_digests
        else:
            total_news_digests = pickle.loads(redis_client.get(user_id))
            total_news_digests.extend(latest_total_news_digests)

        redis_client.set(user_id, pickle.dumps(total_news_digests))
        redis_client.expire(user_id, USER_NEWS_TIME_OUT_IN_SECONDS)
        news_digests = pickle.loads(redis_client.get(user_id))

        sliced_news = total_news[0:NEWS_LIST_BATCH_SIZE]

    # get click_predict list to customize news list
    # The lower the number in 'click_predict', the higher probability to click
    news_description = []
    for news in sliced_news:
        if news['description'] and news['description'].strip():
            news_description.append(news['description'])
        elif news['title'] and news['title'].strip():
            news_description.append(news['title'])
        else:
            news_description.append("This is an empty description")

    if news_description:
        click_predict = customized_news_list_client.predict_news_click(
            user_id, news_description)
    else:
        click_predict = []

    # get user preference for news
    preference = news_recommendation_service_client.getPreferenceForUser(
        user_id)
    topPreference = None
    if preference is not None and len(preference) > 0:
        topPreference = preference[0]

    for news in sliced_news:
        # Remove text field to save bandwidth.
        del news['text']
        if 'class' in news and news['class'] == topPreference:
            news['reason'] = 'Recommend'
            click_predict[sliced_news.index(news)] = 0.0
        if news['publishedAt'].date() == datetime.today().date():
            news['time'] = 'Today'

    # sort the news based on the sort order of click_predict
    sliced_news = [x for (y, x) in sorted(zip(click_predict, sliced_news))]

    return json.loads(dumps(sliced_news))

示例#26

0

显示文件

文件： operations.py 项目： wgltony/ReactRealTimeNewsScrapingandRecommendationSystem-GraphiteSystemMonitor

def getNewsSummariesForUser(user_id, page_num):
    #print 'operations: getNewsSummariesForUser'
    page_num = int(page_num)
    begin_index = (page_num - 1) * NEWS_LIST_BATCH_SIZE
    end_index = page_num * NEWS_LIST_BATCH_SIZE
    # print 'getNewsSummariesForUser, pageNum: %s' % page_num
    # print 'begin_index: %s' % begin_index
    # print 'end_index: %s' % end_index
    # the final lisr of news to be returned
    sliced_news = []

    #Get preference for the user_id
    preference = news_recommendation_service_client.getPreferenceForUser(user_id)
    topPreference = None
    if preference is not None and len(preference) > 0:
        topPreference = preference[0]

    if redis_client.get(user_id) is not None:
        #print "len %s" % len(redis_client.get(user_id))
        #print "end_index %s" % end_index
        news_digests = pickle.loads(redis_client.get(user_id))
        sliced_news_digests = news_digests[begin_index:end_index]
        #print sliced_news_digests
        sliced_news = list(db[NEWS_TABLE_NAME].find({'digest':{'$in':sliced_news_digests}}).sort([('publishedAt', -1)]))
        if preference is not None and len(preference) > 0:
            #Sort news by preference
            level = config['operations']['CLASS_NUMBER']
            for prefer in preference:
                level-=1
                for news in sliced_news:
                    if(news['class'] == prefer):
                        news['level'] = level
                        if news['publishedAt'].replace(tzinfo=from_zone).astimezone(to_zone).date() == datetime.today().date():
                            news['level'] += 0.5
                        #print "news list: %s" % news

            sliced_news.sort(key=lambda x: x['level'], reverse=True)
    else:
        total_news = list(db[NEWS_TABLE_NAME].find().sort([('publishedAt', -1)]).limit(NEWS_LIMITS))
        if preference is not None and len(preference) > 0:
            #Sort news by preference
            level = config['operations']['CLASS_NUMBER']
            for prefer in preference:
                level-=1
                for news in total_news:
                    if(news['class'] == prefer):
                        news['level'] = level
                        #Use local time zone
                        if news['publishedAt'].replace(tzinfo=from_zone).astimezone(to_zone).date() == datetime.today().date():
                            news['level'] += 0.5
                        #print "news list: %s" % news

            total_news.sort(key=lambda x: x['level'], reverse=True)

        total_news_digest = map(lambda x:x['digest'], total_news)

        redis_client.set(user_id, pickle.dumps(total_news_digest))
        redis_client.expire(user_id, USER_NEWS_TIME_OUT_IN_SECONDS)

        sliced_news = total_news[begin_index:end_index]

    for news in sliced_news:
        log_client.logger.debug('Create News class [%s] label, Current Top Preference is [%s]' % (news['class'], topPreference))
        #print 'Create News class [%s] label, Current Top Preference is [%s]' % (news['class'], topPreference)
        #remove text field to save bandwidth
        del news['text']

        if news['publishedAt'].replace(tzinfo=from_zone).astimezone(to_zone).date()== datetime.today().date():
            news['time'] = 'today'
        else:
            news['time'] = news['publishedAt'].replace(tzinfo=from_zone).astimezone(to_zone).date().strftime("%A %d. %B %Y")

        if news['class'] == topPreference and news['time'] == 'today':
            news['reason'] = 'Recommend'

    return json.loads(dumps(sliced_news))

示例#27

0

显示文件

def getNewsSummariesForUser(user_id, page_num):
    """
    If the input user_id exists in Redis(cache in mem) then we 
    calculate his digested news; if cannot find in Redis (a new user), 
    then we get the most recent news records from MongoDB and set 
    100 most recent news as the his initial digested news and save in Redis.

    From Web Server:5050 call pyjsonrpc for the list of preference
    of a user, and set the first element in the list as the top 
    preference

    Delete the text field for saving bandwidth, in the meanwhile 
    set the 'reason' field to 'Recommend' if the 'class' field 
    shows as topPrefrence; set the 'time' field to 'today' if the
    'publishAt' shows the data is equal to today
    """
    page_num = int(page_num)
    begin_index = (page_num - 1) * NEWS_LIST_BATCH_SIZE
    end_index = page_num * NEWS_LIST_BATCH_SIZE

    # The final list of news to be returned.
    sliced_news = []

    if redis_client.get(user_id) is not None:
        news_digests = pickle.loads(redis_client.get(
            user_id))  # GET the corresponding (VALUE)news_id by (KEY)a user_id

        # If begin_index is out of range, this will return empty list;
        # If end_index is out of range (begin_index is within the range), this
        # will return all remaining news ids.
        sliced_news_digests = news_digests[begin_index:end_index]
        print sliced_news_digests
        db = mongodb_client.get_db()
        sliced_news = list(db[NEWS_TABLE_NAME].find(
            {'digest': {
                '$in': sliced_news_digests
            }}))
    else:
        db = mongodb_client.get_db()
        total_news = list(db[NEWS_TABLE_NAME].find().sort([
            ('publishedAt', -1)
        ]).limit(NEWS_LIMIT))  # sort in descending order(-1)
        total_news_digests = map(lambda x: x['digest'], total_news)

        redis_client.set(user_id, pickle.dumps(total_news_digests))
        redis_client.expire(user_id, USER_NEWS_TIME_OUT_IN_SECONDS)

        sliced_news = total_news[begin_index:end_index]

    # Get preference for the user
    preference = news_recommendation_service_client.getPreferenceForUser(
        user_id)
    topPreference = None

    if preference is not None and len(preference) > 0:
        topPreference = preference[0]

    for news in sliced_news:
        # Remove text field to save bandwidth.
        del news['text']
        if news['class'] == topPreference:
            news['reason'] = 'Recommend'
        if news['publishedAt'].date() == datetime.today().date():
            news['time'] = 'today'
    return json.loads(dumps(sliced_news))

示例#28

0

显示文件

def test_basic():
    res = client.getPreferenceForUser("test_user")
    print res
    assert res is not None

示例#29

0

显示文件

def test_basic():
    preference = recommendation.getPreferenceForUser('test_user')
    print(preference)
    assert len(preference) != 0
    print("test_basic passed.")

示例#30

0

显示文件

def getSearchNewsSummariesForUser(user_id, page_num, search_key):
    db = mongodb_client.get_db()
    #connect to our cluster
    page_num = int(page_num)
    begin_index = (page_num - 1) * NEWS_LIST_BATCH_SIZE
    end_index = page_num * NEWS_LIST_BATCH_SIZE
    print 'getSearchNewsSummariesForUser, pageNum: %s' % page_num
    print 'begin_index: %s' % begin_index
    print 'end_index: %s' % end_index
    sliced_news = []
    #Get preference for the user_id
    preference = news_recommendation_service_client.getPreferenceForUser(
        user_id)
    topPreference = None
    if preference is not None and len(preference) > 0:
        topPreference = preference[0]
    if redis_client.get(search_key) is not None:
        #print "len %s" % len(redis_client.get(search_key))
        #print "end_index %s" % end_index
        news_search_digests = pickle.loads(redis_client.get(search_key))
        sliced_search_news_digests = news_search_digests[begin_index:end_index]
        #print 'news_digests %s' % news_digests
        #print 'redis sliced_news begin_index end_index: %s %s %s' % (sliced_news_digests,begin_index,end_index)
        sliced_search_news = list(db[NEWS_TABLE_NAME].find({
            'digest': {
                '$in': sliced_search_news_digests
            }
        }).sort([('publishedAt', -1)]))
    else:
        try:
            # ,('description', pymongo.TEXT),('text', pymongo.TEXT),('class', pymongo.TEXT)
            db[NEWS_TABLE_NAME].create_index([('title', pymongo.TEXT),
                                              ('description', pymongo.TEXT),
                                              ('text', pymongo.TEXT),
                                              ('class', pymongo.TEXT)])
            for index in db[NEWS_TABLE_NAME].list_indexes():
                print(index)
            total_search_news = list(db[NEWS_TABLE_NAME].find(
                {"$text": {
                    "$search": search_key
                }}))
            print len(total_search_news)
            # result = list(db[NEWS_TABLE_NAME].find({"$text": {"$search": search_key}}, {"score": {"$meta": "toextScore"}}).sort({"score":{"$meta":"textScore"}}))
            # print len(result)
        except Exception as e:
            print str(e)

        total_search_news_digest = map(lambda x: x['digest'],
                                       total_search_news)
        redis_client.set(search_key, pickle.dumps(total_search_news_digest))
        redis_client.expire(search_key, USER_NEWS_TIME_OUT_IN_SECONDS)
        sliced_search_news = total_search_news[begin_index:end_index]

    for news in sliced_search_news:
        del news['text']
        if news['publishedAt'].replace(tzinfo=from_zone).astimezone(
                to_zone).date() == datetime.today().date():
            news['time'] = 'today'
        else:
            news['time'] = news['publishedAt'].replace(
                tzinfo=from_zone).astimezone(to_zone).date().strftime(
                    "%A %d. %B %Y")

        if news['class'] == topPreference and news['time'] == 'today':
            news['reason'] = 'Recommend'
    #print 'after sliced_news %s' % sliced_news
    return json.loads(dumps(sliced_search_news))

示例#31

0

显示文件

文件： operations.py 项目： wgltony/ReactRealTimeNewsScrapingandRecommendationSystem-GraphiteSystemMonitor

def getSearchNewsSummariesForUser(user_id, page_num, search_key):
    #connect to our cluster
    page_num = int(page_num)
    begin_index = (page_num - 1) * NEWS_LIST_BATCH_SIZE
    end_index = page_num * NEWS_LIST_BATCH_SIZE
    # print 'getSearchNewsSummariesForUser, pageNum: %s' % page_num
    # print 'begin_index: %s' % begin_index
    # print 'end_index: %s' % end_index
    sliced_news = []
    #Get preference for the user_id
    preference = news_recommendation_service_client.getPreferenceForUser(user_id)
    topPreference = None
    if preference is not None and len(preference) > 0:
        topPreference = preference[0]
    if redis_client.get(search_key) is not None:
        #print "len %s" % len(redis_client.get(search_key))
        #print "end_index %s" % end_index
        news_digests = pickle.loads(redis_client.get(search_key))
        #print 'news_digests %s' % news_digests
        #print 'redis sliced_news begin_index end_index: %s %s %s' % (sliced_news_digests,begin_index,end_index)
        tmp_total_news = list(db[NEWS_TABLE_NAME].find({'digest':{'$in':news_digests}}).sort([('publishedAt', -1)]))
        sliced_news = tmp_total_news[begin_index:end_index]
    else:
        try:
            #print es
            result = es.search(index="news", body={"size":80, "query": {"more_like_this":
                                { "fields" : ["title", "description", "text", "class"],
                                "like" : search_key,"min_term_freq" : 1,
                                "max_query_terms": 100}}})
        except Exception as e:
            print str(e)
        hits = result['hits']['hits']
        #print 'hits: %s' % hits
        total_news=[]
        #print 'len of hits %s' % len(hits)
        if hits is not None and len(hits)>0:
            for i in hits:
                #print i['_source']
                total_news.append(i['_source'])
            #total_news = list(total_news)
            #print 'total_news: %s' % len(total_news)
            total_news_digest = map(lambda x:x['digest'], total_news)
            total_news = list(db[NEWS_TABLE_NAME].find({'digest':{'$in':total_news_digest}}).sort([('publishedAt', -1)]))

            #print 'total_news: %s' % len(total_news)
            #print 'total_news_digest %s' % total_news_digest
            redis_client.set(search_key, pickle.dumps(total_news_digest))
            redis_client.expire(search_key, USER_NEWS_TIME_OUT_IN_SECONDS)
            sliced_news = total_news[begin_index:end_index]
            #print 'before sliced_news begin end %s %s %s' % (sliced_news,begin_index, end_index)
        else:
            return json.loads(dumps(total_news));
    #print 'before sliced_news %s' % sliced_news
    for news in sliced_news:
        log_client.logger.debug('Create News class [%s] label, Current Top Preference is [%s]' % (news['class'], topPreference))
        #print 'Create News class [%s] label, Current Top Preference is [%s]' % (news['class'], topPreference)
        #remove text field to save bandwidth
        del news['text']
        if news['publishedAt'].replace(tzinfo=from_zone).astimezone(to_zone).date()== datetime.today().date():
            news['time'] = 'today'
        else:
            news['time'] = news['publishedAt'].replace(tzinfo=from_zone).astimezone(to_zone).date().strftime("%A %d. %B %Y")

        if news['class'] == topPreference and news['time'] == 'today':
            news['reason'] = 'Recommend'
    #print 'after sliced_news %s' % sliced_news
    return json.loads(dumps(sliced_news))