Пример #1
0
def main():
    args = get_args()
    logger = get_logger("article_service", args.v)

    db_channel = get_service_channel(logger, "DB_SERVICE_HOST", 1798)
    db_stub = database_pb2_grpc.DatabaseStub(db_channel)

    create_channel = get_service_channel(logger, "CREATE_SERVICE_HOST", 1922)
    create_stub = create_pb2_grpc.CreateStub(create_channel)

    search_channel = get_service_channel(logger, "SEARCH_SERVICE_HOST", 1886)
    search_stub = search_pb2_grpc.SearchStub(search_channel)

    logger.info("Creating article server")
    mdc_channel = get_service_channel(logger, "MDC_SERVICE_HOST", 1937)
    mdc_stub = mdc_pb2_grpc.ConverterStub(mdc_channel)

    server = grpc.server(futures.ThreadPoolExecutor(max_workers=10))
    users_util = UsersUtil(logger, db_stub)
    recommender_util = RecommendersUtil(logger, db_stub)
    post_recommendation_stub = recommender_util.get_post_recommendation_stub()
    article_pb2_grpc.add_ArticleServicer_to_server(
        ArticleServicer(create_stub, db_stub, mdc_stub, search_stub, logger,
                        users_util, post_recommendation_stub), server)
    server.add_insecure_port('0.0.0.0:1601')
    logger.info("Starting article server on port 1601")
    server.start()
    try:
        while True:
            time.sleep(60 * 60 * 24)  # One day
    except KeyboardInterrupt:
        db_channel.close()
        create_channel.close()
        pass
Пример #2
0
    def __init__(self, logger, users_util, db_stub):
        self._logger = logger
        self._users_util = users_util
        self._db_stub = db_stub
        self._recommender_util = RecommendersUtil(
            logger, db_stub, self.DEFAULT_RECOMMENDER, self.ENV_VAR, self.RECOMMENDERS)

        # self.active_recommenders contains one or more recommender system
        # objects (out of the constructors in self.RECOMMENDERS).
        self.active_recommenders = self._recommender_util._get_active_recommenders()
Пример #3
0
    def __init__(self, logger, users_util, db_stub):
        self._logger = logger
        self._db = db_stub
        self._recommender_util = RecommendersUtil(logger, db_stub)

        # Get user data and create models
        self.post_tag_freq = defaultdict(int)
        self.user_tag_freq = defaultdict(int)
        self.posts = self._get_all_posts_and_tags()
        self._logger.info("post-tags: {}".format(self.posts))
        self.users = self._get_all_user()
        self.user_models = self._create_user_models(self.users)
        self._logger.info("user_models: {}".format(self.user_models))

        # Calculate Inverse Frequencies
        self.user_tag_ifs = self._calculate_based_itf(
            self.user_tag_freq, len(self.user_models))
        self.post_tag_ifs = self._calculate_based_itf(
            self.post_tag_freq, len(self.posts))
Пример #4
0
def main():
    logger = get_logger("likes_service")
    db_stub = get_db_stub(logger)
    user_util = UsersUtil(logger, db_stub)
    activ_util = ActivitiesUtil(logger, db_stub)
    recommender_util = RecommendersUtil(logger, db_stub)
    post_recommendation_stub = recommender_util.get_post_recommendation_stub()
    server = grpc.server(futures.ThreadPoolExecutor(max_workers=10))
    like_pb2_grpc.add_S2SLikeServicer_to_server(
        S2SLikeServicer(logger, db_stub, user_util, activ_util,
                        post_recommendation_stub), server)
    server.add_insecure_port("0.0.0.0:1848")
    logger.info("Starting Like service on port 1848")
    server.start()
    try:
        while True:
            time.sleep(60 * 60 * 24)  # One day
    except KeyboardInterrupt:
        pass
Пример #5
0
def main():
    args = get_args()
    logger = get_logger('follows_service', args.v)
    logger.info('Creating server')

    db_env = 'DB_SERVICE_HOST'
    follow_env = 'FOLLOW_ACTIVITY_SERVICE_HOST'
    approver_env = 'APPROVER_SERVICE_HOST'
    rss_env = 'RSS_SERVICE_HOST'

    with get_service_channel(logger, db_env, 1798) as db_chan, \
            get_service_channel(logger, follow_env, 1922) as follow_chan, \
            get_service_channel(logger, approver_env, 2077) as approver_chan, \
            get_service_channel(logger, rss_env, 1973) as rss_chan:

        db_stub = database_pb2_grpc.DatabaseStub(db_chan)
        rss_stub = rss_pb2_grpc.RSSStub(rss_chan)
        follow_stub = s2s_follow_pb2_grpc.S2SFollowStub(follow_chan)
        approver_stub = approver_pb2_grpc.ApproverStub(approver_chan)
        users_util = UsersUtil(logger, db_stub)

        util = Util(logger, db_stub, approver_stub, users_util)
        server = grpc.server(futures.ThreadPoolExecutor(max_workers=10))

        recommender_util = RecommendersUtil(logger, db_stub)
        follow_recommender_stub = recommender_util.get_follow_recommender_stub(
        )

        follows_servicer = FollowsServicer(logger, util, users_util, db_stub,
                                           follow_stub, approver_stub,
                                           rss_stub, follow_recommender_stub)
        follows_pb2_grpc.add_FollowsServicer_to_server(follows_servicer,
                                                       server)

        server.add_insecure_port('0.0.0.0:1641')
        logger.info("Starting follows service on port 1641")
        server.start()
        try:
            while True:
                time.sleep(60 * 60 * 24)  # One day
        except KeyboardInterrupt:
            pass
Пример #6
0
class CosineRecommender:
    '''
    Calculate similarity based on TF-IDF cosine-based similarity method
    described in Content-based Recommendation in Social Tagging Systems (4.3)
    '''

    def __init__(self, logger, users_util, db_stub):
        self._logger = logger
        self._db = db_stub
        self._recommender_util = RecommendersUtil(logger, db_stub)

        # Get user data and create models
        self.post_tag_freq = defaultdict(int)
        self.user_tag_freq = defaultdict(int)
        self.posts = self._get_all_posts_and_tags()
        self._logger.info("post-tags: {}".format(self.posts))
        self.users = self._get_all_user()
        self.user_models = self._create_user_models(self.users)
        self._logger.info("user_models: {}".format(self.user_models))

        # Calculate Inverse Frequencies
        self.user_tag_ifs = self._calculate_based_itf(
            self.user_tag_freq, len(self.user_models))
        self.post_tag_ifs = self._calculate_based_itf(
            self.post_tag_freq, len(self.posts))

    def _calculate_based_itf(self, tag_freq, N):
        itfs = defaultdict(int)
        for key in tag_freq.keys():
            itf = log(N / tag_freq[key])
            itfs[key] = itf
        return itfs

    def _clean_post_entries(self, pes):
        posts = defaultdict(lambda: {"tags": [], "author_id": 0})
        for pe in pes:
            tags = self._recommender_util.split_tags(pe.tags)
            for t in tags:
                self.post_tag_freq[t] += 1
            posts[pe.global_id] = {
                "author_id": pe.author_id,
                "tags": tags
            }
        return posts

    def _clean_user_entries(self, ues):
        # Create an array with the same length as the highest user id to allow
        # indexing by global_id
        users = defaultdict(lambda: {"likes": []})
        for ue in ues:
            likes = self._clean_likes(ue.likes)
            if not ue.host_is_null:
                # Do not generate anything for foreign users.
                continue
            users[ue.global_id] = {
                "likes": likes
            }
        return users

    def _clean_likes(self, likes):
        # The GROUP_CONCAT method in sqlite joins objects with "," into a string
        return [int(x) for x in likes.split(",") if x != ""]

    def _create_user_models(self, users):
        # Iterate over every user like and add all tags of that post to the user
        # model
        user_models = defaultdict(lambda: defaultdict(int))
        for u_k in users.keys():
            for post_id in users[u_k]["likes"]:
                for tag in self.posts[post_id]["tags"]:
                    self.user_tag_freq[tag] += 1
                    user_models[u_k][tag] += 1
        return user_models

    def _get_all_posts_and_tags(self):
        find_resp = self._db.TaggedPosts(database_pb2.PostsRequest())
        if find_resp.result_type == database_pb2.PostsResponse.ERROR:
            self._logger.error(
                'Error getting TaggedPosts for Cosine: {}'.format(find_resp.error))
            return []
        return self._clean_post_entries(find_resp.results)

    def _get_all_user(self):
        find_resp = self._db.AllUserLikes(database_pb2.AllUsersRequest())
        if find_resp.result_type == database_pb2.UsersResponse.ERROR:
            self._logger.error(
                'Error getting AllUserLikes for Cosine: {}'.format(find_resp.error))
            return []

        return self._clean_user_entries(find_resp.results)

    def _tf_idf_cosine_similarity(self, user_model, post_tags):
        sum_user_item_tf = 0
        sum_user_tf = 0
        sum_item_tf = 0
        for tag in post_tags:
            sum_user_item_tf += user_model[tag] * \
                self.user_tag_ifs[tag] * self.post_tag_ifs[tag]
            sum_user_tf += (user_model[tag] * self.user_tag_ifs[tag]) ** 2
            sum_item_tf += self.post_tag_ifs[tag] ** 2
        divisor = (((sum_user_tf) ** 0.5) * ((sum_item_tf) ** 0.5))
        if divisor == 0:
            return -1
        tf_cosine = sum_user_item_tf / divisor
        return tf_cosine

    def get_recommendations(self, user_id, n):
        u_m = self.user_models[user_id]
        if u_m == {}:
            self._logger.info(
                'Cosine user_model is empty. id: {}'.format(user_id))
            return [], None

        # Calculate similarities
        sims = []
        for p_k in self.posts.keys():
            # do not recommend liked posts or dummy posts
            if p_k in self.users[user_id]["likes"] or p_k == 0 or self.posts[p_k]["author_id"] == user_id:
                continue
            sim = self._tf_idf_cosine_similarity(u_m, self.posts[p_k]["tags"])
            if len(sims) < n:
                heappush(sims, (sim, p_k))
            else:
                heappushpop(sims, (sim, p_k))

        # get top n results
        sims = sorted(sims, reverse=True)
        self._logger.info('Recommended (score, id): {}'.format(sims))
        posts_entries = []
        for result in sims:
            art = get_article(self._logger, self._db, global_id=result[1])
            posts_entries.append(art)
        return posts_entries, None

    def update_model(self, user_id, article_id):
        # If the user has liked the article previously do not update
        if article_id in self.users[user_id]["likes"]:
            return None

        art = get_article(self._logger, self._db, global_id=article_id)
        tags = self._recommender_util.split_tags(art.tags)

        # update user likes
        self.users[user_id]["likes"].append(article_id)

        # update user model with post tags
        for t in tags:
            self.user_tag_freq[t] += 1
            self.user_models[user_id][t] += 1

        self.user_tag_ifs = self._calculate_based_itf(
            self.user_tag_freq, len(self.user_models))
        return None

    def add_post(self, post_entry):
        tags = self._recommender_util.split_tags(post_entry.tags)
        # update post (in case of new post/edit)
        self.posts[post_entry.global_id] = {
            "author_id": post_entry.author_id,
            "tags": tags
        }
        # update post tag frequency with new tags
        for t in tags:
            self.post_tag_freq[t] += 1

        self.post_tag_ifs = self._calculate_based_itf(
            self.post_tag_freq, len(self.posts))
        return None
Пример #7
0
class FollowRecommendationsServicer(follows_pb2_grpc.FollowsServicer):

    RECOMMENDERS = {
        'surprise': SurpriseRecommender,
        'cn': CNRecommender,
        'graphdist': GraphDistanceRecommender,
    }
    DEFAULT_RECOMMENDER = 'graphdist'
    ENV_VAR = 'FOLLOW_RECOMMENDER_METHOD'
    DEFAULT_IMAGE = "https://upload.wikimedia.org/wikipedia/commons/8/89/Portrait_Placeholder.png"

    def __init__(self, logger, users_util, db_stub):
        self._logger = logger
        self._users_util = users_util
        self._db_stub = db_stub
        self._recommender_util = RecommendersUtil(
            logger, db_stub, self.DEFAULT_RECOMMENDER, self.ENV_VAR, self.RECOMMENDERS)

        # self.active_recommenders contains one or more recommender system
        # objects (out of the constructors in self.RECOMMENDERS).
        self.active_recommenders = self._recommender_util._get_active_recommenders()

    def _get_recommendations(self, user_id):
        '''Get recommendations for users for the given user_id to follow, using
        the one or more systems in self.active_recommenders. Could return empty
        list if there are no good recommendations.'''
        # TODO(iandioch): Allow for combining the results of multiple systems
        # in a smarter way than just concatenation.
        for r in self.active_recommenders:
            yield from r.get_recommendations(user_id)

    def GetFollowRecommendations(self, request, context):
        self._logger.debug('GetFollowRecommendations, user_id = %s',
                           request.user_id)

        resp = recommend_follows_pb2.FollowRecommendationResponse()

        user = self._users_util.get_user_from_db(global_id=request.user_id)
        if user is None:
            resp.result_type = \
                general_pb2.ResultType.ERROR
            resp.error = "Could not find the given user_id."
            return resp

        if not (user.host is None or user.host == ""):
            resp.result_type = \
                general_pb2.ResultType.ERROR
            resp.error = "Can only give recommendations for local users."
            return resp

        resp.result_type = general_pb2.ResultType.OK

        # Get the recommendations and package them into proto.
        for p in self._get_recommendations(user.global_id):
            a = self._users_util.get_or_create_user_from_db(global_id=p[0])
            user_obj = resp.results.add()
            user_obj.handle = a.handle
            user_obj.host = a.host
            user_obj.display_name = a.display_name
            user_obj.bio = a.bio
            user_obj.image = self.DEFAULT_IMAGE
            user_obj.global_id = a.global_id
        return resp

    def UpdateFollowRecommendations(self, request, context):
        self._logger.debug('UpdateFollowRecommendations, %d following %d: %s',
                           request.follower,
                           request.followed,
                           request.following)
        resp = recommend_follows_pb2.UpdateFollowRecommendationsResponse()
        for r in self.active_recommenders:
            r.update_recommendations(request.follower,
                                     request.followed,
                                     request.following)
        return resp
Пример #8
0
class PostRecommendationsServicer(recommend_posts_pb2_grpc.PostRecommendationsServicer):

    RECOMMENDERS = {
        'random': RandomRecommender,
        'cosine': CosineRecommender,
    }
    DEFAULT_RECOMMENDER = 'random'
    ENV_VAR = 'POSTS_RECOMMENDER_METHOD'
    MAX_RECOMMENDATIONS = 50
    DEFAULT_IMAGE = "https://upload.wikimedia.org/wikipedia/commons/8/89/Portrait_Placeholder.png"

    def __init__(self, users_util, logger, db_stub):
        self._logger = logger
        self._db_stub = db_stub
        self._users_util = users_util
        self._activ_util = ActivitiesUtil(logger, db_stub)
        self._recommender_util = RecommendersUtil(
            logger, db_stub, self.DEFAULT_RECOMMENDER, self.ENV_VAR, self.RECOMMENDERS)

        # self.active_recommenders contains one or more recommender system
        # objects (out of the constructors in self.RECOMMENDERS).
        self.active_recommenders = self._recommender_util._get_active_recommenders()

    def Get(self, request, context):
        self._logger.debug('Get PostRecommendations, user_id = %s',
                           request.user_id)

        resp = recommend_posts_pb2.PostRecommendationsResponse()

        recommended_posts = []
        post_ids = set()
        max_posts_per_r = self.MAX_RECOMMENDATIONS // len(
            self.active_recommenders)
        for r in self.active_recommenders:
            r_posts, error = r.get_recommendations(
                request.user_id, max_posts_per_r)
            if error:
                resp.result_type = \
                    recommend_posts_pb2.PostRecommendationsResponse.ERROR
                resp.message = error
                return resp
            recommended_posts.append(r_posts)

        # Join recommendations together, with the highest recommended first
        posts = []
        for i in range(max_posts_per_r + 1):
            for r_p in recommended_posts:
                # See proto/Feed.Post and proto/database.PostsEntry
                if i < len(r_p):
                    author = self._users_util.get_user_from_db(
                        global_id=r_p[i].author_id)
                    if author == None:
                        resp.result_type = \
                            recommend_posts_pb2.PostRecommendationsResponse.ERROR
                        resp.message = "Post Author could not be found"
                        return resp
                    post_obj = resp.results.add()
                    post_obj.global_id = r_p[i].global_id
                    post_obj.author = author.handle
                    post_obj.author_host = author.host
                    post_obj.author_id = r_p[i].author_id
                    post_obj.title = r_p[i].title
                    post_obj.body = r_p[i].body
                    post_obj.published = self._activ_util.timestamp_to_rfc(
                        r_p[i].creation_datetime)
                    post_obj.likes_count = r_p[i].likes_count
                    post_obj.bio = author.bio
                    post_obj.image = self.DEFAULT_IMAGE
                    post_obj.is_liked = r_p[i].is_liked
                    post_obj.is_followed = r_p[i].is_followed
                    post_obj.shares_count = r_p[i].shares_count
                    post_obj.summary = r_p[i].summary
                    tags = self._recommender_util.split_tags(r_p[i].tags)
                    post_obj.tags.extend(tags)
        resp.result_type = \
            recommend_posts_pb2.PostRecommendationsResponse.OK
        return resp

    def UpdateModel(self, request, context):
        self._logger.debug('UpdateModel PostRecommendations, user_id = %s',
                           request.user_id)

        resp = recommend_posts_pb2.PostRecommendationsResponse()

        for r in self.active_recommenders:
            error = r.update_model(request.user_id, request.article_id)
            if error:
                resp.result_type = \
                    recommend_posts_pb2.PostRecommendationsResponse.ERROR
                resp.message = error
                return resp

        resp.result_type = \
            recommend_posts_pb2.PostRecommendationsResponse.OK
        return resp

    def AddPost(self, request, context):
        self._logger.debug('UpdateModel PostRecommendations, user_id = %s',
                           request.author_id)

        resp = recommend_posts_pb2.PostRecommendationsResponse()

        for r in self.active_recommenders:
            error = r.add_post(request)
            if error:
                resp.result_type = \
                    recommend_posts_pb2.PostRecommendationsResponse.ERROR
                resp.message = error
                return resp

        resp.result_type = \
            recommend_posts_pb2.PostRecommendationsResponse.OK
        return resp