def getNewsSummariesForUser(user_id, page_num): page_num = int(page_num) # news range to be fetched for the page number begin_index = (page_num - 1) * NEWS_LIST_BATCH_SIZE end_index = page_num * NEWS_LIST_BATCH_SIZE # the final list of news to be returned sliced_news = [] db = MongoDBClient(MONGO_DB_HOST, MONGO_DB_PORT).get_db() if redis_client.get(user_id) is not None: # user id already cached in redis, get next paginating data and fetch news news_digests = pickle.loads(redis_client.get(user_id)) # both parameters are inclusive sliced_news_digest = news_digests[begin_index:end_index] sliced_news = list(db[NEWS_TABLE_NAME].find( {'digest': { '$in': sliced_news_digest }})) else: # no cached data # retrieve news and store their digests list in redis with user id as key first) total_news = list(db[NEWS_TABLE_NAME].find().sort([ ('publishedAt', -1) ]).limit(NEWS_LIMIT)) total_news_digest = [x['digest'] for x in total_news] # lambda function in python redis_client.set(user_id, pickle.dumps(total_news_digest)) redis_client.expire(user_id, USER_NEWS_TIME_OUT_IN_SECONDS) sliced_news = total_news[begin_index:end_index] # Get preference for the user preference = NewsRecommenderClient( NEWS_RECOMMENDER_HOST, NEWS_RECOMMENDER_PORT).getPreferenceForUser(user_id) topPreference = None if preference is not None and len(preference) > 0: topPreference = preference[0] print('topPreference', topPreference) for news in sliced_news: # Remove text field to save bandwidth. del news['text'] if news['class'] == topPreference: news['reason'] = 'Recommended' return json.loads(dumps(sliced_news))
def backfill(): print ('begin backfilling') db = MongoDBClient(MONGO_DB_HOST, MONGO_DB_PORT).get_db() cursor = db['news'].find({}) count = 0 for news in cursor: count += 1 print(count) if 'class' not in news: print('Populating classes...') description = news['description'] if description is None: description = news['title'] topic = classify(description) news['class'] = topic db['news'].replace_one({'digest': news['digest']}, news, upsert=True)
def getPreferenceForUser(userId): """ Get user's preference in an ordered class list. """ db = MongoDBClient(MONGO_DB_HOST, MONGO_DB_PORT).get_db() model = db[PREFERENCE_MODEL_TABLE_NAME].find_one({'userId': userId}) if model is None: return [] sorted_tuples = sorted(list(model['preference'].items()), key=operator.itemgetter(1), reverse=True) sorted_list = [x[0] for x in sorted_tuples] sorted_value_list = [x[1] for x in sorted_tuples] # If the first preference is same as the last one, the preference makes # no sense. if isclose(float(sorted_value_list[0]), float(sorted_value_list[-1])): return [] return sorted_list
def getOneNews(): db = MongoDBClient(MONGO_DB_HOST, MONGO_DB_PORT).get_db() news = db[NEWS_TABLE_NAME].find_one() return json.loads(dumps(news))
from config import MONGO_DB_HOST, MONGO_DB_PORT from config import PREFERENCE_MODEL_TABLE_NAME from tap_news_utils.mongodb_client import MongoDBClient from tap_news_utils.cloudAMQP_client import CloudAMQPClient NUM_OF_CLASSES = 8 INITIAL_P = 1.0 / NUM_OF_CLASSES ALPHA = 0.1 SLEEP_TIME_IN_SECONDS = 1 NEWS_TABLE_NAME = "news" cloudAMQP_client = CloudAMQPClient(LOG_CLICKS_TASK_QUEUE_URL, LOG_CLICKS_TASK_QUEUE_NAME) mongodb_client = MongoDBClient(MONGO_DB_HOST, MONGO_DB_PORT) def handle_message(msg): print('click log processor: handle_message', msg) if not isinstance(msg, dict): print('Error not dict') return if ('userId' not in msg or 'newsId' not in msg or 'timestamp' not in msg): print('Error not valid msg') return userId = msg['userId'] newsId = msg['newsId']