def test_basic(): preference = client.getPreferenceForUser("user1") print(preference) assert preference is not None assert len(preference) > 0 assert preference[0] == "World" print('test_basic passed!')
def getNewsSummariesForUserLikeList(user_id): db = mongodb_client.get_db() res = db[USER_LIKELIST_TABLE_NAME].find({"userId": user_id}) like_digests = {} for data in res: like_digests = data["newsIds"] liked_news = list(db[NEWS_TABLE_NAME].find( {'digest': { '$in': like_digests }})) # Get preference for the user preference = news_recommendation_service_client.getPreferenceForUser( user_id) topPreference = None if preference is not None and len(preference) > 0: topPreference = preference[0] for news in liked_news: # Remove text field to save bandwidth. del news['text'] if 'class' in news: if news['class'] == topPreference: news['reason'] = 'Recommend' if news['publishedAt'].date() == datetime.today().date(): news['time'] = 'today' # whether in user like list user_like_list = getUserLikelist(user_id) if news['digest'] in user_like_list: news['like'] = True else: news['like'] = False return json.loads(dumps(liked_news))
def getNewsSummariesForUser(user_id, page_num): page_num = int(page_num) begin_index = (page_num - 1) * NEWS_LIST_BATCH_SIZE end_index = page_num * NEWS_LIST_BATCH_SIZE # The final list of news to be returned. sliced_news = [] if redis_client.get(user_id) is not None: logger.info('user is already in redis') news_digests = pickle.loads(redis_client.get(user_id)) # If begin_index is out of range, this will return empty list; # If end_index is out of range (begin_index is within the range), this # will return all remaining news ids. sliced_news_digests = news_digests[begin_index:end_index] logger.info('load sliced news') db = mongodb_client.get_db() sliced_news = list(db[NEWS_TABLE_NAME].find( {'digest': { '$in': sliced_news_digests }})) else: logger.info('user is not in redis') db = mongodb_client.get_db() try: total_news = list(db[NEWS_TABLE_NAME].find().sort([ ('publishedAt', -1) ]).limit(NEWS_LIMIT)) finally: logger.error('Some error occurs when find news in MongoDB') total_news_digests = map(lambda x: x['digest'], total_news) redis_client.set(user_id, pickle.dumps(total_news_digests)) logger.info('user $s is set to redis', user_id) redis_client.expire(user_id, USER_NEWS_TIME_OUT_IN_SECONDS) sliced_news = total_news[begin_index:end_index] # Get preference for the user preference = news_recommendation_service_client.getPreferenceForUser( user_id) topPreference = None if preference is not None and len(preference) > 0: topPreference = preference[0] print 'top preference is: topPreference', topPreference for news in sliced_news: # Remove text field to save bandwidth. del news['text'] if news['class'] == topPreference: news['reason'] = 'Recommend' if news['publishedAt'].date() == datetime.today().date(): news['time'] = 'today' return json.loads(dumps(sliced_news))
def getPreference(user_id): print 'getPreference' preference = news_recommendation_service_client.getPreferenceForUser( user_id) topPreference = '' if preference is not None and len(preference) > 0: global topPreference topPreference = preference[0] return topPreference
def getNewsSummariesForUser(user_id, page_num): page_num = int(page_num) begin_index = (page_num - 1) * news_list_batch_size end_index = page_num * news_list_batch_size # The final list of news to be returned. sliced_news = [] # personalizing: decide each class's number preferences = news_recommendation_service_client.getPreferenceForUser( user_id) news_numbers = [] if preferences is not None and len(preferences) > 0: news_numbers = [ int(round(preference * news_limit)) for preference in preferences ] print news_numbers if redis_client.get(user_id) is not None: news_digests = pickle.loads(redis_client.get(user_id)) # If begin_index is out of range, this will return empty list; # If end_index is out of range (begin_index is within the range), this # will return all remaining news ids. sliced_news_digests = news_digests[begin_index:end_index] db = mongodb_client.get_db() sliced_news = list(db[news_table_name].find( {'digest': { '$in': sliced_news_digests }})) else: db = mongodb_client.get_db() print 'taking from db' # according to each class's number, take each class's amount of news, and sort them back together selected_news = [] for i in range(0, len(news_numbers)): selected_news.extend( list(db[news_table_name].find({ 'class': news_classes_v2.class_map[str(i + 1)] }).limit(news_numbers[i]))) selected_news = sorted(selected_news, key=lambda k: k['publishedAt'], reverse=True)[:] # caching digests and paging selected_news_digests = map(lambda x: x['digest'], selected_news) redis_client.set(user_id, pickle.dumps(selected_news_digests)) redis_client.expire(user_id, user_news_time_out_in_seconds) sliced_news = selected_news[begin_index:end_index] # other taggings and returning for news in sliced_news: del news['text'] if news['publishedAt'].date() == datetime.today().date(): news['time'] = 'today' return json.loads(dumps(sliced_news))
def getNewsSummariesForUser(user_id, page_num): page_num = int(page_num) begin_index = (page_num - 1) * NEWS_LIST_BATCH_SIZE end_index = page_num * NEWS_LIST_BATCH_SIZE # The final list of news to be returned. sliced_news = [] if redis_client.get(user_id) is not None: news_digests = pickle.loads(redis_client.get( user_id)) #pickle.loads(python library): convert string into json # If begin_index is out of range, this will return empty list; # If end_index is out of range (begin_index is within the range), this # will return all remaining news ids. sliced_news_digests = news_digests[begin_index:end_index] print sliced_news_digests db = mongodb_client.get_db() sliced_news = list(db[NEWS_TABLE_NAME].find( {'digest': { '$in': sliced_news_digests }})) else: db = mongodb_client.get_db() total_news = list( db[NEWS_TABLE_NAME].find().sort([('publishedAt', -1) ]).limit(NEWS_LIMIT) ) # get news from DB based on publishAt time sort and return LIMITED NEWS total_news_digests = map(lambda x: x['digest'], total_news) #get all news' digest redis_client.set( user_id, pickle.dumps(total_news_digests) ) #put all news from DB into REdis and set exprie date redis_client.expire(user_id, USER_NEWS_TIME_OUT_IN_SECONDS) sliced_news = total_news[ begin_index:end_index] #get sliced_news based on begin/end index # Get preference for the user preference = news_recommendation_service_client.getPreferenceForUser( user_id) topPreference = None if preference is not None and len(preference) > 0: topPreference = preference[0] for news in sliced_news: # Remove text field to save bandwidth. # del news['text'] # if news['class'] == topPreference: # news['reason'] = 'Recommend' if news['publishedAt'].date() == datetime.today().date(): news['time'] = 'today' return json.loads(dumps(sliced_news))
def getNewsSummariesForUser(user_id, page_num): page_num = int(page_num) if page_num <= 0: return [] begin_index = (page_num - 1) * NEWS_LIST_BATCH_SIZE end_index = page_num * NEWS_LIST_BATCH_SIZE # The final list of news to be returned. sliced_news = [] # LOGGER.debug(mongodb_client) db = mongodb_client.get_db() if redis_client.get(user_id) is not None: LOGGER.debug("user exist in redis") # desirialize news_digests = pickle.loads(redis_client.get(user_id)) sliced_news_digests = news_digests[begin_index:end_index] sliced_news = list(db[NEWS_TABLE_NAME].find( {'digest': { '$in': sliced_news_digests }})) else: LOGGER.debug("fetch news from database and saved in redis") # Read latest news from database total_news = list(db[NEWS_TABLE_NAME].find().sort([ ('publishedAt', -1) ]).limit(NEWS_LIMIT)) # Save latest news's digest total_news_digests = [x['digest'] for x in total_news] # save serialized data in redis redis_client.set(user_id, pickle.dumps(total_news_digests)) redis_client.expire(user_id, USER_NEWS_TIME_OUT_IN_SECONDS) sliced_news = total_news[begin_index:end_index] # # Use preference to customize returned news news_list preference = news_recommendation_service_client.getPreferenceForUser( user_id) topPreference = None # print("preference modle:" + preference) if preference is not None and len(preference) > 0: topPreference = preference[0] for news in sliced_news: if 'class' in news and news['class'] == topPreference: news['reason'] = 'Recommend' return json.loads(dumps(sliced_news))
def getNewsSummariesForUser(user_id, page_num): page_num = int(page_num) begin_index = (page_num - 1) * NEWS_LIST_BATCH_SIZE end_index = page_num * NEWS_LIST_BATCH_SIZE sliced_news = [] if user_id is not None: if redis_client.get(user_id) is not None: total_news_digests = pickle.loads(redis_client.get(user_id)) # If begin_index is out of range, this will return empty list; # If end_index is out of range (begin_index is within the range), this # will return all remaining news ids. sliced_news_digests = total_news_digests[begin_index:end_index] db = mongodb_client.get_db() sliced_news = list(db[DB_NEWS_TABLE_NAME].find( {'digest': { '$in': sliced_news_digests }})) else: db = mongodb_client.get_db() total_news = list(db[DB_NEWS_TABLE_NAME].find().sort([ ('publishedAt', -1) ]).limit(NEWS_LIMIT)) total_news_digests = [x['digest'] for x in total_news] redis_client.set(user_id, pickle.dumps(total_news_digests)) redis_client.expire(user_id, USER_NEWS_TIME_OUT_IN_SECONDS) sliced_news = total_news[begin_index:end_index] else: db = mongodb_client.get_db() total_news = list(db[DB_NEWS_TABLE_NAME].find().sort([ ('publishedAt', -1) ]).limit(NEWS_LIMIT)) total_news_digests = [x['digest'] for x in total_news] sliced_news = total_news[begin_index:end_index] # Get preference for the user. preference = news_recommendation_service_client.getPreferenceForUser( user_id) topPrefence = None if preference is not None and len(preference) > 0: topPrefence = preference[0] for news in sliced_news: if news['publishedAt'].date() == datetime.today().date(): news['time'] = 'today' if news['class'] == topPrefence: news['reason'] = "Recommend" return json.loads(dumps(sliced_news))
def get_news_summaries_for_user(user_id, page_num): page_num = int(page_num) if page_num <= 0: return [] begin_index = (page_num - 1) * NEWS_LIST_BATCH_SIZE end_index = page_num * NEWS_LIST_BATCH_SIZE # The final list of news to be returned. sliced_news = [] db = mongodb_client.get_db() if redis_client.get(user_id) is not None: #redis only store news digest(id), and based on this id look up from mongodb #pickle:store obj/instance to file as string (xuliehua) news_digests = pickle.loads(redis_client.get(user_id)) sliced_news_digests = news_digests[begin_index:end_index] sliced_news = list(db[NEWS_TABLE_NAME].find( {'digest': { '$in': sliced_news_digests }})) else: total_news = list(db[NEWS_TABLE_NAME].find().sort([ ('publishedAt', -1) ]).limit(NEWS_LIMIT)) total_news_digests = [x['digest'] for x in total_news] redis_client.set(user_id, pickle.dumps(total_news_digests)) redis_client.expire(user_id, USER_NEWS_TIME_OUT_IN_SECONDS) sliced_news = total_news[begin_index:end_index] # Get preference list for the user. # TODO: use preference to customize returned news list. preference = news_recommendation_service_client.getPreferenceForUser( user_id) topPreference = None if preference is not None and len(preference) > 0: topPreference = preference[0] for news in sliced_news: # Remove text field to save bandwidth. del news['text'] if news['publishedAt'].date() == datetime.today().date(): news['time'] = 'today' if news['class'] == topPreference: news['reason'] = 'Recommend' return json.loads(dumps(sliced_news))
def getNewsSummariesForUser(user_id, page_num): page_num = int(page_num) begin_index = (page_num - 1) * NEWS_LIST_BATCH_SIZE end_index = page_num * NEWS_LIST_BATCH_SIZE #the final list to be returened to the user sliced_news = [] if redis_client.get(user_id) is not None: news_digest = pickle.loads(redis_client.get(user_id)) # If begin_index is out of range, this will return empty list; # If end_index is out of range (begin_index is within the range), this # will return all remaining news ids. sliced_news_digest = news_digest[begin_index:end_index] print sliced_news_digest db = mongodb_client.get_db() sliced_news = list(db[NEWS_TABLE_NAME].find( {'digest': { '$in': sliced_news_digest }})) else: db = mongodb_client.get_db() # Get 100 latest news from the database according to the publishedAt attribute total_news = list(db[NEWS_TABLE_NAME].find().sort([ ('publishedAt', -1) ]).limit(NEWS_LIMIT)) # In order to save memeory, we only store the digest of each news from total_news total_news_digest = map(lambda x: x['digest'], total_news) redis_client.set(user_id, pickle.dumps(total_news_digest)) redis_client.expire(user_id, USER_NEWS_TIME_OUT_IN_SECONDS) sliced_news = total_news[begin_index:end_index] # Get preference for the user preference = news_recommendation_service_client.getPreferenceForUser( user_id) topPreference = None if preference is not None and len(preference) > 0: topPreference = preference[0] for news in sliced_news: # Remove text field to save bandwidth del news['text'] if news['class'] == topPreference: news['reason'] = 'Recommend' if news['publishedAt'].date == datetime.today().date(): news['time'] = 'today' return json.loads(dumps(sliced_news))
def getNewsSummariesForUser(user_id, page_num): page_num = int(page_num) begin_index = (page_num - 1) * news_list_batch_size end_index = page_num * news_list_batch_size # The final list of news to be returned. sliced_news = [] # personalizing: decide each class's number preferences = news_recommendation_service_client.getPreferenceForUser(user_id) news_numbers = [] if preferences is not None and len(preferences) > 0: news_numbers = [int(round(preference * news_limit)) for preference in preferences] print news_numbers if redis_client.get(user_id) is not None: news_digests = pickle.loads(redis_client.get(user_id)) # If begin_index is out of range, this will return empty list; # If end_index is out of range (begin_index is within the range), this # will return all remaining news ids. sliced_news_digests = news_digests[begin_index:end_index] db = mongodb_client.get_db() sliced_news = list(db[news_table_name].find({ 'digest': { '$in': sliced_news_digests } })) else: db = mongodb_client.get_db() print 'taking from db' # according to each class's number, take each class's amount of news, and sort them back together selected_news = [] for i in range(0, len(news_numbers)): selected_news.extend(list(db[news_table_name].find({ 'class': news_classes_v2.class_map[str(i+1)] }).limit(news_numbers[i]))) selected_news = sorted(selected_news, key=lambda k: k['publishedAt'], reverse=True)[:] # caching digests and paging selected_news_digests = map(lambda x:x['digest'], selected_news) redis_client.set(user_id, pickle.dumps(selected_news_digests)) redis_client.expire(user_id, user_news_time_out_in_seconds) sliced_news = selected_news[begin_index:end_index] # other taggings and returning for news in sliced_news: del news['text'] if news['publishedAt'].date() == datetime.today().date(): news['time'] = 'today' return json.loads(dumps(sliced_news))
def getNewsSummariesForUser(user_id, page_num): page_num = int(page_num) if page_num <= 0: return [] begin_index = (page_num - 1) * NEWS_LIST_BATCH_SIZE end_index = page_num * NEWS_LIST_BATCH_SIZE # The final list of news to be returned. sliced_news = [] db = mongodb_client.get_db() if redis_client.get(user_id) is not None: news_digests = pickle.loads(redis_client.get(user_id)) sliced_news_digests = news_digests[begin_index:end_index] sliced_news = list(db[NEWS_TABLE_NAME].find( {'digest': { '$in': sliced_news_digests }})) else: total_news = list(db[NEWS_TABLE_NAME].find().sort([ ('publishedAt', -1) ]).limit(NEWS_LIMIT)) total_news_digests = [x['digest'] for x in total_news] redis_client.set(user_id, pickle.dumps(total_news_digests)) redis_client.expire(user_id, USER_NEWS_TIME_OUT_IN_SECONDS) sliced_news = total_news[begin_index:end_index] # Get preference for the user # TODO: use preference to customize returned news list. preference = news_recommendation_service_client.getPreferenceForUser( user_id) topPreference = None if preference is not None and len(preference) > 0: topPreference = preference[0] for news in sliced_news: # Remove text field to save bandwidth. del news['text'] if 'class' in news and news['class'] == topPreference: news['reason'] = 'Recommend' return json.loads(dumps(sliced_news))
def getNewsSummariesForUser(user_id, page_num): page_num = int(page_num) if page_num <= 0: return [] begin_index = (page_num - 1) * NEWS_LIST_BATCH_SIZE #included end_index = page_num * NEWS_LIST_BATCH_SIZE #not included sliced_news = [] #返回前端一个没有正文内容但是包括其他一切的news list db = mongodb_client.get_db() if redis_client.get(user_id) is not None: news_digests = pickle.loads(redis_client.get(user_id)) sliced_news_digests = news_digests[begin_index:end_index] #sliced_news = list(db[NEWS_TABLE_NAME].find({'digest': {$in: [sliced_news_digests] } }))#todo 试试这个行不行=>不行 sliced_news = list(db[NEWS_TABLE_NAME].find( {'digest': { '$in': sliced_news_digests }})) else: #total_news = list(db[NEWS_TABLE_NAME].find().sort({'publishedAt': -1}).limit(NEWS_LIMIT)) #todo=>这个可以 total_news = list(db[NEWS_TABLE_NAME].find().sort([ ('publishedAt', -1) ]).limit(NEWS_LIMIT)) total_news_digests = [x['digest'] for x in total_news ] #之前用到类似用法, 将total_news中每一个'digest'拼成一个list #save this list to redis redis_client.set(user_id, pickle.dumps(total_news_digests)) redis_client.expire(user_id, USER_NEWS_TIME_OUT_IN_SECONDS) sliced_news = total_news[begin_index:end_index] # Get preference for the user # TODO: use preference to customize returned news list. preference = news_recommendation_service_client.getPreferenceForUser( user_id) topPreference = None if preference is not None and len(preference) > 0: topPreference = preference[0] for news in sliced_news: # Remove text field to save bandwidth. del news['text'] if news['class'] == topPreference: news['reason'] = 'Recommend' return json.loads(dumps(sliced_news))
def getNewsSummariesForUser(user_id, page_num): page_num = int(page_num) if page_num <= 0: raise ValueError('page_num should be a positive integer.') begin_index = (page_num - 1) * NEWS_LIST_BATCH_SIZE end_index = page_num * NEWS_LIST_BATCH_SIZE # The news list to be returned sliced_news = [] if redis_client.get(user_id) is not None: news_digests = pickle.loads(redis_client.get(user_id)) # If begin_index is out of range, return empty list # If end_index is out of range, return all remaining news sliced_news_digests = news_digests[begin_index:end_index] db = mongodb_client.get_db() sliced_news = list(db[NEWS_TABLE_NAME].find( {'digest': { '$in': sliced_news_digests }})) else: db = mongodb_client.get_db() total_news = list(db[NEWS_TABLE_NAME].find().sort([ ('publishedAt', -1) ]).limit(NEWS_LIMIT)) total_news_digests = list(map(lambda x: x['digest'], total_news)) print(total_news_digests) redis_client.set(user_id, pickle.dumps(total_news_digests)) redis_client.expire(user_id, USER_NEWS_TIMEOUT_IN_SECONDS) sliced_news = total_news[begin_index:end_index] preference = news_recommendation_service_client.getPreferenceForUser( user_id) topPreference = None if preference is not None and len(preference) > 0: topPreference = preference[0] for news in sliced_news: if news['class'] == topPreference: news['reason'] = 'Recommend' if news['publishedAt'].date() == datetime.today().date(): news['time'] = 'today' return json.loads(dumps(sliced_news))
def getNewsSummariesForUser(user_id, page_num): page_num = int(page_num) begin_index = (page_num - 1) * PAGE_SIZE # inclusive end_index = page_num * PAGE_SIZE # exclusive sliced_news = [] # read news digest of next page from cache, then read news from db # if no more news in the cache, write next batch of news digests into cache if redis_client.get(user_id) is not None: # read python obj from db and de-serialize news_digests = pickle.loads(redis_client.get(user_id)) sliced_news_digests = news_digests[begin_index:end_index] db = mongodb_client.get_db() sliced_news = list(db[NEWS_TABLE_NAME].find( {'digest': { '$in': sliced_news_digests }})) else: db = mongodb_client.get_db() batch_news = list(db[NEWS_TABLE_NAME].find().sort( 'publishedAt', pymongo.DESCENDING).limit(NEWS_LIMIT)) batch_news_digest = [news['digest'] for news in batch_news] redis_client.set(user_id, pickle.dumps(batch_news_digest)) redis_client.expire(user_id, USER_NEWS_TIME_OUT_IN_SECONDS) sliced_news = batch_news[begin_index:end_index] # read user preference and customize returned news list #TODO: explore more complicated customization logic preference = news_recommendation_service_client.getPreferenceForUser( user_id) topPreference = None if preference is not None and len(preference) > 0: topPreference = preference[0] for news in sliced_news: # remove text field to save bandwidth (text doesn't display on client) del news['text'] # add a tag for user top-preference news if 'class' in news and news['class'] == topPreference: news['reason'] == 'Recommended' # add a tag for fresh news if news['publishedAt'].date() == datetime.today().date(): news['time'] = 'today' return json.loads(dumps(sliced_news))
def getNewsSummariesForUser(user_id, page_num): print("in operations.py, getNewsSummariesForUser") page_num = int(page_num) begin_index = (page_num - 1) * NEWS_LIST_BATCH_SIZE end_index = page_num * NEWS_LIST_BATCH_SIZE # The final list of news to the returned. sliced_news = [] if redis_client.get(user_id) is not None: news_digests = pickle.loads(redis_client.get(user_id)) sliced_news_digests = news_digests[begin_index:end_index] # print (sliced_news_digests) db = mongodb_client.get_db() sliced_news = list(db[NEWS_TABLE_NAME].find( {'digest': { '$in': sliced_news_digests }})) else: print("in else session") db = mongodb_client.get_db() total_news = list(db[NEWS_TABLE_NAME].find().sort([ ('publishedAt', -1) ]).limit(NEWS_LIMIT)) total_news_digests = list(map(lambda x: x['digest'], total_news)) redis_client.set(user_id, pickle.dumps(total_news_digests)) redis_client.expire(user_id, USER_NEWS_TIME_OOUT_IN_SECONDS) sliced_news = total_news[begin_index:end_index] # Get preference for the user. preference = news_recommendation_service_client.getPreferenceForUser( user_id) topPreference = None print(preference) if preference is not None and len(preference) > 0: topPreference = preference[0] for news in sliced_news: # Remove text field to save bandwidth del news['text'] #if news['class'] == topPreference: # news['reason'] = 'Recommend' if news['publishedAt'].date() == datetime.today().date(): news['time'] = 'today' print return json.loads(dumps(sliced_news))
def getNewsSummariesForUser(user_id, page_num): page_num = int(page_num) # news range to be fetched for the page number begin_index = (page_num - 1) * NEWS_LIST_BATCH_SIZE end_index = page_num * NEWS_LIST_BATCH_SIZE # the final list of news to be returned sliced_news = [] db = mongodb_client.get_db() if redis_client.get(user_id) is not None: # user id already cached in redis, get next paginating data and fetch news news_digests = pickle.loads(redis_client.get(user_id)) # both parameters are inclusive sliced_news_digest = news_digests[begin_index:end_index] sliced_news = list(db[NEWS_TABLE_NAME].find( {'digest': { '$in': sliced_news_digest }})) else: # no cached data # retrieve news and store their digests list in redis with user id as key # retrieve news and sort by publish time in reverse order (latest first) total_news = list(db[NEWS_TABLE_NAME].find().sort([ ('publishedAt', -1) ]).limit(NEWS_LIMIT)) total_news_digest = [x['digest'] for x in total_news] # lambda function in python redis_client.set(user_id, pickle.dumps(total_news_digest)) redis_client.expire(user_id, USER_NEWS_TIME_OUT_IN_SECONDS) sliced_news = total_news[begin_index:end_index] # TODO: user preference to customizer return new lists preference = news_recommendation_service_client.getPreferenceForUser( user_id) topPreference = None if preference is not None and len(preference) > 0: topPreference = preference[0] for news in sliced_news: # Remove text field to save bandwidth del news['text'] if news['class'] == topPreference: news['reason'] = "Recommend" if news['publishedAt'].date() == datetime.today().date(): # Add time tag to be displayed on page news['time'] = 'today' return json.loads(dumps(sliced_news))
def getNewsSummaries(user_id, page_num): """Get news summaries""" page_num = int(page_num) begin_index = (page_num - 1) * NEWS_LIST_BATCH_SIZE end_index = page_num * NEWS_LIST_BATCH_SIZE # not included sliced_news = [] db = mongodb_client.get_db() if redis_client.get(user_id) is not None: print('>>> from redis: get data for user "%s"' % user_id) total_news_digests = pickle.loads(redis_client.get(user_id)) sliced_news_digests = total_news_digests[begin_index:end_index] sliced_news = list(db[NEWS_TABLE_NAME].find( {'digest': { '$in': sliced_news_digests }})) else: print('>>> from mongodb: get data for user "%s"' % user_id) # mongodb iterable -> python list total_news = list(db[NEWS_TABLE_NAME].find().sort([ ('publishedAt', -1) ]).limit(NEWS_LIMIT)) total_news_digests = [x['digest'] for x in total_news] redis_client.set(user_id, pickle.dumps(total_news_digests)) redis_client.expire(user_id, USER_NEWS_TIME_OUT_IN_SCONDS) sliced_news = total_news[begin_index:end_index] # Get preference for the user preference = news_recommendation_service_client.getPreferenceForUser( user_id) topPreference = None if preference is not None and len(preference) > 0: topPreference = preference[0] for news in sliced_news: # Remove text field to save bandwidth. del news['text'] if news['class'] == topPreference: news['reason'] = 'Recommend' if news['publishedAt'].date() == datetime.utcnow().date(): news['time'] = 'today' return json.loads(dumps(sliced_news))
def get_news_summaries_for_user(user_id, page_num): """Get news list from MongoDB. """ redis_client = redis.StrictRedis(host=REDIS_HOST, port=REDIS_PORT) db = mongodb_client.get_db() news_index_begin = (int(page_num) - 1)* NEWS_LIST_SIZE news_index_end = news_index_begin + NEWS_LIST_SIZE sliced_news = [] # userid in Redis, get digest from Redis if redis_client.get(user_id) is not None: sliced_news_digests = pickle.loads(redis_client.get(user_id))[news_index_begin: news_index_end] sliced_news = list(db[NEWS_TABLE_NAME].find({'digest': {'$in': sliced_news_digests}})) # userid not in Redis, get 200 news from Mongo first else: all_news = list(db[NEWS_TABLE_NAME].find().sort([('publishedAt', -1)]).limit(NEWS_LIMIT)) all_news_digests = [news['digest'] for news in all_news] redis_client.set(user_id, pickle.dumps(all_news_digests)) redis_client.expire(user_id, USER_NEWS_TIME_OUT) sliced_news = all_news[news_index_begin: news_index_end] # get preference for user preference = news_recommendation_service_client.getPreferenceForUser(user_id) topPreference = None if preference is not None and len(preference) > 0: topPreference = preference[0] # prepare news for news in sliced_news: del news['text'] # set time chip for front-end if news['publishedAt'].date() == datetime.today().date(): news['time'] = 'today' # now = datetime.now() # news_hour = news['publishedAt'].time().hour # if now.hour - news_hour <= 3: # news['time'] = '%s hours ago' % (now.hour - news_hour) else: news['time'] = news['publishedAt'].date().strftime("%m/%d/%y") if news['class'] == topPreference: news['reason'] = 'Recommend' return json.loads(dumps(sliced_news))
def get_news_summaries_for_user(user_id, page_num): page_num = int(page_num) begin_index = (page_num - 1) * NEWS_LIST_SIZE end_index = page_num * NEWS_LIST_SIZE sliced_news = [] if redis_client.get(user_id) is not None: news_digests = pickle.loads(redis_client.get(user_id)) sliced_news_digests = news_digests[begin_index:end_index] print sliced_news_digests db = mongodb_client.get_db() sliced_news = list(db[NEWS_TABLE_NAME].find( {'digest': { '$in': sliced_news_digests }})) else: db = mongodb_client.get_db() total_news = list(db[NEWS_TABLE_NAME].find().sort([ ('publishedAt', -1) ]).limit(NEWS_LIMIT)) total_news_digests = map(lambda x: x['digest'], total_news) redis_client.set(user_id, pickle.dumps(total_news_digests)) redis_client.expire(user_id, USER_NEWS_TIME_OUT_IN_SECONDS) sliced_news = total_news[begin_index:end_index] # Get preference for user preference = news_recommendation_service_client.getPreferenceForUser( user_id) topPreference = None if preference is not None and len(preference) > 0: topPreference = preference[0] for news in sliced_news: # Remove text from news del news['text'] if news['class'] == topPreference: news['reason'] = 'Recommend' if news['publishedAt'].date() == datetime.today().date(): news['time'] = 'today' return json.loads(dumps(sliced_news))
def getNewsSummariesForUser(user_id, page_num): page_num = int(page_num) begin_index = (page_num - 1) * NEWS_LIST_BATCH_SIZE end_index = page_num * NEWS_LIST_BATCH_SIZE sliced_news = [] if redis_client.get(user_id) is not None: total_news_digests = pickle.loads(redis_client.get(user_id)) sliced_news_digests = total_news_digests[begin_index:end_index] db = mongodb_client.get_db() # look up mongodb via digests sliced_news = list(db[NEWS_TABLE_NAME].find( {'digest': { '$in': sliced_news_digests }})) else: db = mongodb_client.get_db() # put newer news in the top of pages total_news = list(db[NEWS_TABLE_NAME].find().sort([ ('publishedAt', -1) ]).limit(NEWS_LIMIT)) total_news_digests = [x['digest'] for x in total_news] redis_client.set(user_id, pickle.dumps(total_news_digests)) redis_client.expire(user_id, USER_NEWS_TIME_OUT_IN_SECONDS) sliced_news = total_news[begin_index:end_index] preference = news_recommendation_service_client.getPreferenceForUser( user_id) topPreference = None if preference is not None and len(preference) > 0: topPreference = preference[0] for news in sliced_news: # Remove text field to save bandwidth. del news['text'] if news['class'] == topPreference: news['reason'] = 'Recommend' if news['publishedAt'].date() == datetime.today().date(): news['time'] = 'today' return json.loads(dumps(sliced_news))
def getNewsSummariesForUser(user_id, page_num): page_num = int(page_num) begin_index = (page_num - 1) * config['operations']['NEWS_LIST_BATCH_SIZE'] end_index = page_num * int(config['operations']['NEWS_LIST_BATCH_SIZE']) # The final list of news to be returned. sliced_news = [] if redis_client.get(user_id) is not None: news_digests = pickle.loads(redis_client.get(user_id)) # If begin_index is out of range, this will return empty list; # If end_index is out of range (begin_index is within the range), this # will return all remaining news ids. sliced_news_digests = news_digests[begin_index:end_index] print sliced_news_digests db = mongo_client.get_db() sliced_news = list(db[NEWS_TABLE_NAME].find({'digest':{'$in':sliced_news_digests}})) else: db = mongo_client.get_db() total_news = list(db[NEWS_TABLE_NAME].find().sort([('publishedAt', -1)]).limit(NEWS_LIMIT)) total_news_digests = map(lambda x:x['digest'], total_news) redis_client.set(user_id, pickle.dumps(total_news_digests)) redis_client.expire(user_id, config['operations']['USER_NEWS_TIME_OUT_IN_SECONDS']) sliced_news = total_news[begin_index:end_index] # Get preference for the user preference = news_recommendation_service_client.getPreferenceForUser(user_id) topPreference = None if preference is not None and len(preference) > 0: topPreference = preference[0] for news in sliced_news: # Remove text field to save bandwidth. del news['text'] #if news['class'] == topPreference: # news['reason'] = 'Recommend' if news['publishedAt'].date() == datetime.today().date(): news['time'] = 'today' return json.loads(dumps(sliced_news))
def getNewsSummariesForUserWithKeyword(user_id, page_num, keyword): page_num = int(page_num) begin_index = (page_num - 1) * NEWS_LIST_BATCH_SIZE sliced_news = [] # TODO only search title # use post to ensure multi word search sliced_news = elasticsearch_client.postSearchResultsByKeyWithPage( 'title', urllib.unquote(keyword), begin_index, NEWS_LIST_BATCH_SIZE) # Get preference for the user preference = news_recommendation_service_client.getPreferenceForUser( user_id) topPreference = None if preference is not None and len(preference) > 0: topPreference = preference[0] for news in sliced_news: # Remove text field to save bandwidth. del news['text'] if 'class' in news: if news['class'] == topPreference: news['reason'] = 'Recommend' # two format in publishAt field fmt1 = "%Y-%m-%dT%H:%M:%S" fmt2 = "%Y-%m-%dT%H:%M:%S.%f" if "." not in news['publishedAt']: publishDate = datetime.strptime(news['publishedAt'], fmt1) else: publishDate = datetime.strptime(news['publishedAt'], fmt2) if publishDate.date() == datetime.today().date(): news['time'] = 'today' # whether in user like list user_like_list = getUserLikelist(user_id) if news['digest'] in user_like_list: news['like'] = True else: news['like'] = False return json.loads(dumps(sliced_news))
def test_basic(): preference_list = client.getPreferenceForUser('test_user') assert len(preference_list) == 8 print(preference_list) print('test_basic passed.')
def getNewsSummariesForUser(user_id, page_num): page_num = int(page_num) begin_index = (page_num - 1) * NEWS_LIST_BATCH_SIZE end_index = page_num * NEWS_LIST_BATCH_SIZE if page_num == 1: redis_client.delete(user_id) sliced_news = [] if redis_client.get(user_id) is not None: news_digests = pickle.loads(redis_client.get(user_id)) # If begin_index is out of range, this will return empty list; # If end_index is out of range (begin_index is within the range), this # will return all remaining news ids. sliced_news_digests = news_digests[begin_index:end_index] #print sliced_news_digests db = mongodb_client.get_db() sliced_news = list(db[NEWS_TABLE_NAME].find( {'digest': { '$in': sliced_news_digests }})) if not sliced_news: db = mongodb_client.get_db() total_news = list(db[NEWS_TABLE_NAME].find().sort([ ('publishedAt', -1) ]).skip(begin_index).limit(NEWS_LIMIT)) latest_total_news_digests = map(lambda x: x['digest'], total_news) if page_num == 1: total_news_digests = latest_total_news_digests else: total_news_digests = pickle.loads(redis_client.get(user_id)) total_news_digests.extend(latest_total_news_digests) redis_client.set(user_id, pickle.dumps(total_news_digests)) redis_client.expire(user_id, USER_NEWS_TIME_OUT_IN_SECONDS) news_digests = pickle.loads(redis_client.get(user_id)) sliced_news = total_news[0:NEWS_LIST_BATCH_SIZE] # get click_predict list to customize news list # The lower the number in 'click_predict', the higher probability to click news_description = [] for news in sliced_news: if news['description'] and news['description'].strip(): news_description.append(news['description']) elif news['title'] and news['title'].strip(): news_description.append(news['title']) else: news_description.append("This is an empty description") if news_description: click_predict = customized_news_list_client.predict_news_click( user_id, news_description) else: click_predict = [] # get user preference for news preference = news_recommendation_service_client.getPreferenceForUser( user_id) topPreference = None if preference is not None and len(preference) > 0: topPreference = preference[0] for news in sliced_news: # Remove text field to save bandwidth. del news['text'] if 'class' in news and news['class'] == topPreference: news['reason'] = 'Recommend' click_predict[sliced_news.index(news)] = 0.0 if news['publishedAt'].date() == datetime.today().date(): news['time'] = 'Today' # sort the news based on the sort order of click_predict sliced_news = [x for (y, x) in sorted(zip(click_predict, sliced_news))] return json.loads(dumps(sliced_news))
def getNewsSummariesForUser(user_id, page_num): #print 'operations: getNewsSummariesForUser' page_num = int(page_num) begin_index = (page_num - 1) * NEWS_LIST_BATCH_SIZE end_index = page_num * NEWS_LIST_BATCH_SIZE # print 'getNewsSummariesForUser, pageNum: %s' % page_num # print 'begin_index: %s' % begin_index # print 'end_index: %s' % end_index # the final lisr of news to be returned sliced_news = [] #Get preference for the user_id preference = news_recommendation_service_client.getPreferenceForUser(user_id) topPreference = None if preference is not None and len(preference) > 0: topPreference = preference[0] if redis_client.get(user_id) is not None: #print "len %s" % len(redis_client.get(user_id)) #print "end_index %s" % end_index news_digests = pickle.loads(redis_client.get(user_id)) sliced_news_digests = news_digests[begin_index:end_index] #print sliced_news_digests sliced_news = list(db[NEWS_TABLE_NAME].find({'digest':{'$in':sliced_news_digests}}).sort([('publishedAt', -1)])) if preference is not None and len(preference) > 0: #Sort news by preference level = config['operations']['CLASS_NUMBER'] for prefer in preference: level-=1 for news in sliced_news: if(news['class'] == prefer): news['level'] = level if news['publishedAt'].replace(tzinfo=from_zone).astimezone(to_zone).date() == datetime.today().date(): news['level'] += 0.5 #print "news list: %s" % news sliced_news.sort(key=lambda x: x['level'], reverse=True) else: total_news = list(db[NEWS_TABLE_NAME].find().sort([('publishedAt', -1)]).limit(NEWS_LIMITS)) if preference is not None and len(preference) > 0: #Sort news by preference level = config['operations']['CLASS_NUMBER'] for prefer in preference: level-=1 for news in total_news: if(news['class'] == prefer): news['level'] = level #Use local time zone if news['publishedAt'].replace(tzinfo=from_zone).astimezone(to_zone).date() == datetime.today().date(): news['level'] += 0.5 #print "news list: %s" % news total_news.sort(key=lambda x: x['level'], reverse=True) total_news_digest = map(lambda x:x['digest'], total_news) redis_client.set(user_id, pickle.dumps(total_news_digest)) redis_client.expire(user_id, USER_NEWS_TIME_OUT_IN_SECONDS) sliced_news = total_news[begin_index:end_index] for news in sliced_news: log_client.logger.debug('Create News class [%s] label, Current Top Preference is [%s]' % (news['class'], topPreference)) #print 'Create News class [%s] label, Current Top Preference is [%s]' % (news['class'], topPreference) #remove text field to save bandwidth del news['text'] if news['publishedAt'].replace(tzinfo=from_zone).astimezone(to_zone).date()== datetime.today().date(): news['time'] = 'today' else: news['time'] = news['publishedAt'].replace(tzinfo=from_zone).astimezone(to_zone).date().strftime("%A %d. %B %Y") if news['class'] == topPreference and news['time'] == 'today': news['reason'] = 'Recommend' return json.loads(dumps(sliced_news))
def getNewsSummariesForUser(user_id, page_num): """ If the input user_id exists in Redis(cache in mem) then we calculate his digested news; if cannot find in Redis (a new user), then we get the most recent news records from MongoDB and set 100 most recent news as the his initial digested news and save in Redis. From Web Server:5050 call pyjsonrpc for the list of preference of a user, and set the first element in the list as the top preference Delete the text field for saving bandwidth, in the meanwhile set the 'reason' field to 'Recommend' if the 'class' field shows as topPrefrence; set the 'time' field to 'today' if the 'publishAt' shows the data is equal to today """ page_num = int(page_num) begin_index = (page_num - 1) * NEWS_LIST_BATCH_SIZE end_index = page_num * NEWS_LIST_BATCH_SIZE # The final list of news to be returned. sliced_news = [] if redis_client.get(user_id) is not None: news_digests = pickle.loads(redis_client.get( user_id)) # GET the corresponding (VALUE)news_id by (KEY)a user_id # If begin_index is out of range, this will return empty list; # If end_index is out of range (begin_index is within the range), this # will return all remaining news ids. sliced_news_digests = news_digests[begin_index:end_index] print sliced_news_digests db = mongodb_client.get_db() sliced_news = list(db[NEWS_TABLE_NAME].find( {'digest': { '$in': sliced_news_digests }})) else: db = mongodb_client.get_db() total_news = list(db[NEWS_TABLE_NAME].find().sort([ ('publishedAt', -1) ]).limit(NEWS_LIMIT)) # sort in descending order(-1) total_news_digests = map(lambda x: x['digest'], total_news) redis_client.set(user_id, pickle.dumps(total_news_digests)) redis_client.expire(user_id, USER_NEWS_TIME_OUT_IN_SECONDS) sliced_news = total_news[begin_index:end_index] # Get preference for the user preference = news_recommendation_service_client.getPreferenceForUser( user_id) topPreference = None if preference is not None and len(preference) > 0: topPreference = preference[0] for news in sliced_news: # Remove text field to save bandwidth. del news['text'] if news['class'] == topPreference: news['reason'] = 'Recommend' if news['publishedAt'].date() == datetime.today().date(): news['time'] = 'today' return json.loads(dumps(sliced_news))
def test_basic(): res = client.getPreferenceForUser("test_user") print res assert res is not None
def test_basic(): preference = recommendation.getPreferenceForUser('test_user') print(preference) assert len(preference) != 0 print("test_basic passed.")
def getSearchNewsSummariesForUser(user_id, page_num, search_key): db = mongodb_client.get_db() #connect to our cluster page_num = int(page_num) begin_index = (page_num - 1) * NEWS_LIST_BATCH_SIZE end_index = page_num * NEWS_LIST_BATCH_SIZE print 'getSearchNewsSummariesForUser, pageNum: %s' % page_num print 'begin_index: %s' % begin_index print 'end_index: %s' % end_index sliced_news = [] #Get preference for the user_id preference = news_recommendation_service_client.getPreferenceForUser( user_id) topPreference = None if preference is not None and len(preference) > 0: topPreference = preference[0] if redis_client.get(search_key) is not None: #print "len %s" % len(redis_client.get(search_key)) #print "end_index %s" % end_index news_search_digests = pickle.loads(redis_client.get(search_key)) sliced_search_news_digests = news_search_digests[begin_index:end_index] #print 'news_digests %s' % news_digests #print 'redis sliced_news begin_index end_index: %s %s %s' % (sliced_news_digests,begin_index,end_index) sliced_search_news = list(db[NEWS_TABLE_NAME].find({ 'digest': { '$in': sliced_search_news_digests } }).sort([('publishedAt', -1)])) else: try: # ,('description', pymongo.TEXT),('text', pymongo.TEXT),('class', pymongo.TEXT) db[NEWS_TABLE_NAME].create_index([('title', pymongo.TEXT), ('description', pymongo.TEXT), ('text', pymongo.TEXT), ('class', pymongo.TEXT)]) for index in db[NEWS_TABLE_NAME].list_indexes(): print(index) total_search_news = list(db[NEWS_TABLE_NAME].find( {"$text": { "$search": search_key }})) print len(total_search_news) # result = list(db[NEWS_TABLE_NAME].find({"$text": {"$search": search_key}}, {"score": {"$meta": "toextScore"}}).sort({"score":{"$meta":"textScore"}})) # print len(result) except Exception as e: print str(e) total_search_news_digest = map(lambda x: x['digest'], total_search_news) redis_client.set(search_key, pickle.dumps(total_search_news_digest)) redis_client.expire(search_key, USER_NEWS_TIME_OUT_IN_SECONDS) sliced_search_news = total_search_news[begin_index:end_index] for news in sliced_search_news: del news['text'] if news['publishedAt'].replace(tzinfo=from_zone).astimezone( to_zone).date() == datetime.today().date(): news['time'] = 'today' else: news['time'] = news['publishedAt'].replace( tzinfo=from_zone).astimezone(to_zone).date().strftime( "%A %d. %B %Y") if news['class'] == topPreference and news['time'] == 'today': news['reason'] = 'Recommend' #print 'after sliced_news %s' % sliced_news return json.loads(dumps(sliced_search_news))
def getSearchNewsSummariesForUser(user_id, page_num, search_key): #connect to our cluster page_num = int(page_num) begin_index = (page_num - 1) * NEWS_LIST_BATCH_SIZE end_index = page_num * NEWS_LIST_BATCH_SIZE # print 'getSearchNewsSummariesForUser, pageNum: %s' % page_num # print 'begin_index: %s' % begin_index # print 'end_index: %s' % end_index sliced_news = [] #Get preference for the user_id preference = news_recommendation_service_client.getPreferenceForUser(user_id) topPreference = None if preference is not None and len(preference) > 0: topPreference = preference[0] if redis_client.get(search_key) is not None: #print "len %s" % len(redis_client.get(search_key)) #print "end_index %s" % end_index news_digests = pickle.loads(redis_client.get(search_key)) #print 'news_digests %s' % news_digests #print 'redis sliced_news begin_index end_index: %s %s %s' % (sliced_news_digests,begin_index,end_index) tmp_total_news = list(db[NEWS_TABLE_NAME].find({'digest':{'$in':news_digests}}).sort([('publishedAt', -1)])) sliced_news = tmp_total_news[begin_index:end_index] else: try: #print es result = es.search(index="news", body={"size":80, "query": {"more_like_this": { "fields" : ["title", "description", "text", "class"], "like" : search_key,"min_term_freq" : 1, "max_query_terms": 100}}}) except Exception as e: print str(e) hits = result['hits']['hits'] #print 'hits: %s' % hits total_news=[] #print 'len of hits %s' % len(hits) if hits is not None and len(hits)>0: for i in hits: #print i['_source'] total_news.append(i['_source']) #total_news = list(total_news) #print 'total_news: %s' % len(total_news) total_news_digest = map(lambda x:x['digest'], total_news) total_news = list(db[NEWS_TABLE_NAME].find({'digest':{'$in':total_news_digest}}).sort([('publishedAt', -1)])) #print 'total_news: %s' % len(total_news) #print 'total_news_digest %s' % total_news_digest redis_client.set(search_key, pickle.dumps(total_news_digest)) redis_client.expire(search_key, USER_NEWS_TIME_OUT_IN_SECONDS) sliced_news = total_news[begin_index:end_index] #print 'before sliced_news begin end %s %s %s' % (sliced_news,begin_index, end_index) else: return json.loads(dumps(total_news)); #print 'before sliced_news %s' % sliced_news for news in sliced_news: log_client.logger.debug('Create News class [%s] label, Current Top Preference is [%s]' % (news['class'], topPreference)) #print 'Create News class [%s] label, Current Top Preference is [%s]' % (news['class'], topPreference) #remove text field to save bandwidth del news['text'] if news['publishedAt'].replace(tzinfo=from_zone).astimezone(to_zone).date()== datetime.today().date(): news['time'] = 'today' else: news['time'] = news['publishedAt'].replace(tzinfo=from_zone).astimezone(to_zone).date().strftime("%A %d. %B %Y") if news['class'] == topPreference and news['time'] == 'today': news['reason'] = 'Recommend' #print 'after sliced_news %s' % sliced_news return json.loads(dumps(sliced_news))