def main(): args = get_args() logger = get_logger("article_service", args.v) db_channel = get_service_channel(logger, "DB_SERVICE_HOST", 1798) db_stub = database_pb2_grpc.DatabaseStub(db_channel) create_channel = get_service_channel(logger, "CREATE_SERVICE_HOST", 1922) create_stub = create_pb2_grpc.CreateStub(create_channel) search_channel = get_service_channel(logger, "SEARCH_SERVICE_HOST", 1886) search_stub = search_pb2_grpc.SearchStub(search_channel) logger.info("Creating article server") mdc_channel = get_service_channel(logger, "MDC_SERVICE_HOST", 1937) mdc_stub = mdc_pb2_grpc.ConverterStub(mdc_channel) server = grpc.server(futures.ThreadPoolExecutor(max_workers=10)) users_util = UsersUtil(logger, db_stub) recommender_util = RecommendersUtil(logger, db_stub) post_recommendation_stub = recommender_util.get_post_recommendation_stub() article_pb2_grpc.add_ArticleServicer_to_server( ArticleServicer(create_stub, db_stub, mdc_stub, search_stub, logger, users_util, post_recommendation_stub), server) server.add_insecure_port('0.0.0.0:1601') logger.info("Starting article server on port 1601") server.start() try: while True: time.sleep(60 * 60 * 24) # One day except KeyboardInterrupt: db_channel.close() create_channel.close() pass
def __init__(self, logger, users_util, db_stub): self._logger = logger self._users_util = users_util self._db_stub = db_stub self._recommender_util = RecommendersUtil( logger, db_stub, self.DEFAULT_RECOMMENDER, self.ENV_VAR, self.RECOMMENDERS) # self.active_recommenders contains one or more recommender system # objects (out of the constructors in self.RECOMMENDERS). self.active_recommenders = self._recommender_util._get_active_recommenders()
def __init__(self, logger, users_util, db_stub): self._logger = logger self._db = db_stub self._recommender_util = RecommendersUtil(logger, db_stub) # Get user data and create models self.post_tag_freq = defaultdict(int) self.user_tag_freq = defaultdict(int) self.posts = self._get_all_posts_and_tags() self._logger.info("post-tags: {}".format(self.posts)) self.users = self._get_all_user() self.user_models = self._create_user_models(self.users) self._logger.info("user_models: {}".format(self.user_models)) # Calculate Inverse Frequencies self.user_tag_ifs = self._calculate_based_itf( self.user_tag_freq, len(self.user_models)) self.post_tag_ifs = self._calculate_based_itf( self.post_tag_freq, len(self.posts))
def main(): logger = get_logger("likes_service") db_stub = get_db_stub(logger) user_util = UsersUtil(logger, db_stub) activ_util = ActivitiesUtil(logger, db_stub) recommender_util = RecommendersUtil(logger, db_stub) post_recommendation_stub = recommender_util.get_post_recommendation_stub() server = grpc.server(futures.ThreadPoolExecutor(max_workers=10)) like_pb2_grpc.add_S2SLikeServicer_to_server( S2SLikeServicer(logger, db_stub, user_util, activ_util, post_recommendation_stub), server) server.add_insecure_port("0.0.0.0:1848") logger.info("Starting Like service on port 1848") server.start() try: while True: time.sleep(60 * 60 * 24) # One day except KeyboardInterrupt: pass
def main(): args = get_args() logger = get_logger('follows_service', args.v) logger.info('Creating server') db_env = 'DB_SERVICE_HOST' follow_env = 'FOLLOW_ACTIVITY_SERVICE_HOST' approver_env = 'APPROVER_SERVICE_HOST' rss_env = 'RSS_SERVICE_HOST' with get_service_channel(logger, db_env, 1798) as db_chan, \ get_service_channel(logger, follow_env, 1922) as follow_chan, \ get_service_channel(logger, approver_env, 2077) as approver_chan, \ get_service_channel(logger, rss_env, 1973) as rss_chan: db_stub = database_pb2_grpc.DatabaseStub(db_chan) rss_stub = rss_pb2_grpc.RSSStub(rss_chan) follow_stub = s2s_follow_pb2_grpc.S2SFollowStub(follow_chan) approver_stub = approver_pb2_grpc.ApproverStub(approver_chan) users_util = UsersUtil(logger, db_stub) util = Util(logger, db_stub, approver_stub, users_util) server = grpc.server(futures.ThreadPoolExecutor(max_workers=10)) recommender_util = RecommendersUtil(logger, db_stub) follow_recommender_stub = recommender_util.get_follow_recommender_stub( ) follows_servicer = FollowsServicer(logger, util, users_util, db_stub, follow_stub, approver_stub, rss_stub, follow_recommender_stub) follows_pb2_grpc.add_FollowsServicer_to_server(follows_servicer, server) server.add_insecure_port('0.0.0.0:1641') logger.info("Starting follows service on port 1641") server.start() try: while True: time.sleep(60 * 60 * 24) # One day except KeyboardInterrupt: pass
class CosineRecommender: ''' Calculate similarity based on TF-IDF cosine-based similarity method described in Content-based Recommendation in Social Tagging Systems (4.3) ''' def __init__(self, logger, users_util, db_stub): self._logger = logger self._db = db_stub self._recommender_util = RecommendersUtil(logger, db_stub) # Get user data and create models self.post_tag_freq = defaultdict(int) self.user_tag_freq = defaultdict(int) self.posts = self._get_all_posts_and_tags() self._logger.info("post-tags: {}".format(self.posts)) self.users = self._get_all_user() self.user_models = self._create_user_models(self.users) self._logger.info("user_models: {}".format(self.user_models)) # Calculate Inverse Frequencies self.user_tag_ifs = self._calculate_based_itf( self.user_tag_freq, len(self.user_models)) self.post_tag_ifs = self._calculate_based_itf( self.post_tag_freq, len(self.posts)) def _calculate_based_itf(self, tag_freq, N): itfs = defaultdict(int) for key in tag_freq.keys(): itf = log(N / tag_freq[key]) itfs[key] = itf return itfs def _clean_post_entries(self, pes): posts = defaultdict(lambda: {"tags": [], "author_id": 0}) for pe in pes: tags = self._recommender_util.split_tags(pe.tags) for t in tags: self.post_tag_freq[t] += 1 posts[pe.global_id] = { "author_id": pe.author_id, "tags": tags } return posts def _clean_user_entries(self, ues): # Create an array with the same length as the highest user id to allow # indexing by global_id users = defaultdict(lambda: {"likes": []}) for ue in ues: likes = self._clean_likes(ue.likes) if not ue.host_is_null: # Do not generate anything for foreign users. continue users[ue.global_id] = { "likes": likes } return users def _clean_likes(self, likes): # The GROUP_CONCAT method in sqlite joins objects with "," into a string return [int(x) for x in likes.split(",") if x != ""] def _create_user_models(self, users): # Iterate over every user like and add all tags of that post to the user # model user_models = defaultdict(lambda: defaultdict(int)) for u_k in users.keys(): for post_id in users[u_k]["likes"]: for tag in self.posts[post_id]["tags"]: self.user_tag_freq[tag] += 1 user_models[u_k][tag] += 1 return user_models def _get_all_posts_and_tags(self): find_resp = self._db.TaggedPosts(database_pb2.PostsRequest()) if find_resp.result_type == database_pb2.PostsResponse.ERROR: self._logger.error( 'Error getting TaggedPosts for Cosine: {}'.format(find_resp.error)) return [] return self._clean_post_entries(find_resp.results) def _get_all_user(self): find_resp = self._db.AllUserLikes(database_pb2.AllUsersRequest()) if find_resp.result_type == database_pb2.UsersResponse.ERROR: self._logger.error( 'Error getting AllUserLikes for Cosine: {}'.format(find_resp.error)) return [] return self._clean_user_entries(find_resp.results) def _tf_idf_cosine_similarity(self, user_model, post_tags): sum_user_item_tf = 0 sum_user_tf = 0 sum_item_tf = 0 for tag in post_tags: sum_user_item_tf += user_model[tag] * \ self.user_tag_ifs[tag] * self.post_tag_ifs[tag] sum_user_tf += (user_model[tag] * self.user_tag_ifs[tag]) ** 2 sum_item_tf += self.post_tag_ifs[tag] ** 2 divisor = (((sum_user_tf) ** 0.5) * ((sum_item_tf) ** 0.5)) if divisor == 0: return -1 tf_cosine = sum_user_item_tf / divisor return tf_cosine def get_recommendations(self, user_id, n): u_m = self.user_models[user_id] if u_m == {}: self._logger.info( 'Cosine user_model is empty. id: {}'.format(user_id)) return [], None # Calculate similarities sims = [] for p_k in self.posts.keys(): # do not recommend liked posts or dummy posts if p_k in self.users[user_id]["likes"] or p_k == 0 or self.posts[p_k]["author_id"] == user_id: continue sim = self._tf_idf_cosine_similarity(u_m, self.posts[p_k]["tags"]) if len(sims) < n: heappush(sims, (sim, p_k)) else: heappushpop(sims, (sim, p_k)) # get top n results sims = sorted(sims, reverse=True) self._logger.info('Recommended (score, id): {}'.format(sims)) posts_entries = [] for result in sims: art = get_article(self._logger, self._db, global_id=result[1]) posts_entries.append(art) return posts_entries, None def update_model(self, user_id, article_id): # If the user has liked the article previously do not update if article_id in self.users[user_id]["likes"]: return None art = get_article(self._logger, self._db, global_id=article_id) tags = self._recommender_util.split_tags(art.tags) # update user likes self.users[user_id]["likes"].append(article_id) # update user model with post tags for t in tags: self.user_tag_freq[t] += 1 self.user_models[user_id][t] += 1 self.user_tag_ifs = self._calculate_based_itf( self.user_tag_freq, len(self.user_models)) return None def add_post(self, post_entry): tags = self._recommender_util.split_tags(post_entry.tags) # update post (in case of new post/edit) self.posts[post_entry.global_id] = { "author_id": post_entry.author_id, "tags": tags } # update post tag frequency with new tags for t in tags: self.post_tag_freq[t] += 1 self.post_tag_ifs = self._calculate_based_itf( self.post_tag_freq, len(self.posts)) return None
class FollowRecommendationsServicer(follows_pb2_grpc.FollowsServicer): RECOMMENDERS = { 'surprise': SurpriseRecommender, 'cn': CNRecommender, 'graphdist': GraphDistanceRecommender, } DEFAULT_RECOMMENDER = 'graphdist' ENV_VAR = 'FOLLOW_RECOMMENDER_METHOD' DEFAULT_IMAGE = "https://upload.wikimedia.org/wikipedia/commons/8/89/Portrait_Placeholder.png" def __init__(self, logger, users_util, db_stub): self._logger = logger self._users_util = users_util self._db_stub = db_stub self._recommender_util = RecommendersUtil( logger, db_stub, self.DEFAULT_RECOMMENDER, self.ENV_VAR, self.RECOMMENDERS) # self.active_recommenders contains one or more recommender system # objects (out of the constructors in self.RECOMMENDERS). self.active_recommenders = self._recommender_util._get_active_recommenders() def _get_recommendations(self, user_id): '''Get recommendations for users for the given user_id to follow, using the one or more systems in self.active_recommenders. Could return empty list if there are no good recommendations.''' # TODO(iandioch): Allow for combining the results of multiple systems # in a smarter way than just concatenation. for r in self.active_recommenders: yield from r.get_recommendations(user_id) def GetFollowRecommendations(self, request, context): self._logger.debug('GetFollowRecommendations, user_id = %s', request.user_id) resp = recommend_follows_pb2.FollowRecommendationResponse() user = self._users_util.get_user_from_db(global_id=request.user_id) if user is None: resp.result_type = \ general_pb2.ResultType.ERROR resp.error = "Could not find the given user_id." return resp if not (user.host is None or user.host == ""): resp.result_type = \ general_pb2.ResultType.ERROR resp.error = "Can only give recommendations for local users." return resp resp.result_type = general_pb2.ResultType.OK # Get the recommendations and package them into proto. for p in self._get_recommendations(user.global_id): a = self._users_util.get_or_create_user_from_db(global_id=p[0]) user_obj = resp.results.add() user_obj.handle = a.handle user_obj.host = a.host user_obj.display_name = a.display_name user_obj.bio = a.bio user_obj.image = self.DEFAULT_IMAGE user_obj.global_id = a.global_id return resp def UpdateFollowRecommendations(self, request, context): self._logger.debug('UpdateFollowRecommendations, %d following %d: %s', request.follower, request.followed, request.following) resp = recommend_follows_pb2.UpdateFollowRecommendationsResponse() for r in self.active_recommenders: r.update_recommendations(request.follower, request.followed, request.following) return resp
class PostRecommendationsServicer(recommend_posts_pb2_grpc.PostRecommendationsServicer): RECOMMENDERS = { 'random': RandomRecommender, 'cosine': CosineRecommender, } DEFAULT_RECOMMENDER = 'random' ENV_VAR = 'POSTS_RECOMMENDER_METHOD' MAX_RECOMMENDATIONS = 50 DEFAULT_IMAGE = "https://upload.wikimedia.org/wikipedia/commons/8/89/Portrait_Placeholder.png" def __init__(self, users_util, logger, db_stub): self._logger = logger self._db_stub = db_stub self._users_util = users_util self._activ_util = ActivitiesUtil(logger, db_stub) self._recommender_util = RecommendersUtil( logger, db_stub, self.DEFAULT_RECOMMENDER, self.ENV_VAR, self.RECOMMENDERS) # self.active_recommenders contains one or more recommender system # objects (out of the constructors in self.RECOMMENDERS). self.active_recommenders = self._recommender_util._get_active_recommenders() def Get(self, request, context): self._logger.debug('Get PostRecommendations, user_id = %s', request.user_id) resp = recommend_posts_pb2.PostRecommendationsResponse() recommended_posts = [] post_ids = set() max_posts_per_r = self.MAX_RECOMMENDATIONS // len( self.active_recommenders) for r in self.active_recommenders: r_posts, error = r.get_recommendations( request.user_id, max_posts_per_r) if error: resp.result_type = \ recommend_posts_pb2.PostRecommendationsResponse.ERROR resp.message = error return resp recommended_posts.append(r_posts) # Join recommendations together, with the highest recommended first posts = [] for i in range(max_posts_per_r + 1): for r_p in recommended_posts: # See proto/Feed.Post and proto/database.PostsEntry if i < len(r_p): author = self._users_util.get_user_from_db( global_id=r_p[i].author_id) if author == None: resp.result_type = \ recommend_posts_pb2.PostRecommendationsResponse.ERROR resp.message = "Post Author could not be found" return resp post_obj = resp.results.add() post_obj.global_id = r_p[i].global_id post_obj.author = author.handle post_obj.author_host = author.host post_obj.author_id = r_p[i].author_id post_obj.title = r_p[i].title post_obj.body = r_p[i].body post_obj.published = self._activ_util.timestamp_to_rfc( r_p[i].creation_datetime) post_obj.likes_count = r_p[i].likes_count post_obj.bio = author.bio post_obj.image = self.DEFAULT_IMAGE post_obj.is_liked = r_p[i].is_liked post_obj.is_followed = r_p[i].is_followed post_obj.shares_count = r_p[i].shares_count post_obj.summary = r_p[i].summary tags = self._recommender_util.split_tags(r_p[i].tags) post_obj.tags.extend(tags) resp.result_type = \ recommend_posts_pb2.PostRecommendationsResponse.OK return resp def UpdateModel(self, request, context): self._logger.debug('UpdateModel PostRecommendations, user_id = %s', request.user_id) resp = recommend_posts_pb2.PostRecommendationsResponse() for r in self.active_recommenders: error = r.update_model(request.user_id, request.article_id) if error: resp.result_type = \ recommend_posts_pb2.PostRecommendationsResponse.ERROR resp.message = error return resp resp.result_type = \ recommend_posts_pb2.PostRecommendationsResponse.OK return resp def AddPost(self, request, context): self._logger.debug('UpdateModel PostRecommendations, user_id = %s', request.author_id) resp = recommend_posts_pb2.PostRecommendationsResponse() for r in self.active_recommenders: error = r.add_post(request) if error: resp.result_type = \ recommend_posts_pb2.PostRecommendationsResponse.ERROR resp.message = error return resp resp.result_type = \ recommend_posts_pb2.PostRecommendationsResponse.OK return resp