Exemplo n.º 1
0
class BaseStorage(object):

    '''
    The feed uses two storage classes, the
    - Activity Storage and the
    - Timeline Storage

    The process works as follows::

        feed = BaseFeed()
        # the activity storage is used to store the activity and mapped to an id
        feed.insert_activity(activity)
        # now the id is inserted into the timeline storage
        feed.add(activity)

    Currently there are two activity storage classes ready for production:

    - Cassandra
    - Redis

    The storage classes always receive a full activity object.
    The serializer class subsequently determines how to transform the activity
    into something the database can store.
    '''
    #: The default serializer class to use
    default_serializer_class = DummySerializer
    metrics = get_metrics_instance()

    activity_class = Activity
    aggregated_activity_class = AggregatedActivity

    def __init__(self, serializer_class=None, activity_class=None, **options):
        '''
        :param serializer_class: allows you to overwrite the serializer class
        '''
        self.serializer_class = serializer_class or self.default_serializer_class
        self.options = options
        if activity_class is not None:
            self.activity_class = activity_class
        aggregated_activity_class = options.pop(
            'aggregated_activity_class', None)
        if aggregated_activity_class is not None:
            self.aggregated_activity_class = aggregated_activity_class

    def flush(self):
        '''
        Flushes the entire storage
        '''
        pass

    def activities_to_ids(self, activities_or_ids):
        '''
        Utility function for lower levels to chose either serialize
        '''
        ids = []
        for activity_or_id in activities_or_ids:
            ids.append(self.activity_to_id(activity_or_id))
        return ids

    def activity_to_id(self, activity):
        return getattr(activity, 'serialization_id', activity)

    @property
    def serializer(self):
        '''
        Returns an instance of the serializer class

        The serializer needs to know about the activity and
        aggregated activity classes we're using
        '''
        serializer_class = self.serializer_class
        kwargs = {}
        if getattr(self, 'aggregated_activity_class', None) is not None:
            kwargs[
                'aggregated_activity_class'] = self.aggregated_activity_class
        serializer_instance = serializer_class(
            activity_class=self.activity_class, **kwargs)
        return serializer_instance

    def serialize_activity(self, activity):
        '''
        Serialize the activity and returns the serialized activity

        :returns str: the serialized activity
        '''
        serialized_activity = self.serializer.dumps(activity)
        return serialized_activity

    def serialize_activities(self, activities):
        '''
        Serializes the list of activities

        :param activities: the list of activities
        '''
        serialized_activities = {}
        for activity in activities:
            serialized_activity = self.serialize_activity(activity)
            serialized_activities[
                self.activity_to_id(activity)] = serialized_activity
        return serialized_activities

    def deserialize_activities(self, serialized_activities):
        '''
        Serializes the list of activities

        :param serialized_activities: the list of activities
        :param serialized_activities: a dictionary with activity ids and activities
        '''
        activities = []
        # handle the case where this is a dict
        if isinstance(serialized_activities, dict):
            serialized_activities = serialized_activities.values()

        for serialized_activity in serialized_activities:
            activity = self.serializer.loads(serialized_activity)
            activities.append(activity)
        return activities
Exemplo n.º 2
0
class Feedly(object):

    '''
    The Feedly class handles the fanout from a user's activity
    to all their follower's feeds

    .. note::
        Fanout is the process which pushes a little bit of data to all of your
        followers in many small and asynchronous tasks.

    To write your own Feedly class you will need to implement

    - get_user_follower_ids
    - feed_classes
    - user_feed_class

    **Example** ::

        from feedly.feed_managers.base import Feedly

        class PinFeedly(Feedly):
            # customize the feed classes we write to
            feed_classes = dict(
                normal=PinFeed,
                aggregated=AggregatedPinFeed
            )
            # customize the user feed class
            user_feed_class = UserPinFeed

            # define how feedly can get the follower ids
            def get_user_follower_ids(self, user_id):
                ids = Follow.objects.filter(target=user_id).values_list('user_id', flat=True)
                return {FanoutPriority.HIGH:ids}

            # utility functions to easy integration for your project
            def add_pin(self, pin):
                activity = pin.create_activity()
                # add user activity adds it to the user feed, and starts the fanout
                self.add_user_activity(pin.user_id, activity)

            def remove_pin(self, pin):
                activity = pin.create_activity()
                # removes the pin from the user's followers feeds
                self.remove_user_activity(pin.user_id, activity)

    '''
    # : a dictionary with the feeds to fanout to
    # : for example feed_classes = dict(normal=PinFeed, aggregated=AggregatedPinFeed)
    feed_classes = dict(
        normal=RedisFeed
    )
    # : the user feed class (it stores the latest activity by one user)
    user_feed_class = UserBaseFeed

    # : the number of activities which enter your feed when you follow someone
    follow_activity_limit = 5000
    # : the number of users which are handled in one asynchronous task
    # : when doing the fanout
    fanout_chunk_size = 100

    # maps between priority and fanout tasks
    priority_fanout_task = {
        FanoutPriority.HIGH: fanout_operation_hi_priority,
        FanoutPriority.LOW: fanout_operation_low_priority
    }

    metrics = get_metrics_instance()

    def get_user_follower_ids(self, user_id):
        '''
        Returns a dict of users ids which follow the given user grouped by
        priority/importance

        eg.
        {'HIGH': [...], 'LOW': [...]}

        :param user_id: the user id for which to get the follower ids
        '''
        raise NotImplementedError()

    def add_user_activity(self, user_id, activity):
        '''
        Store the new activity and then fanout to user followers

        This function will
        - store the activity in the activity storage
        - store it in the user feed (list of activities for one user)
        - fanout for all feed_classes

        :param user_id: the id of the user
        :param activity: the activity which to add
        '''
        # add into the global activity cache (if we are using it)
        self.user_feed_class.insert_activity(activity)
        # now add to the user's personal feed
        user_feed = self.get_user_feed(user_id)
        user_feed.add(activity)
        operation_kwargs = dict(activities=[activity], trim=True)

        for priority_group, follower_ids in self.get_user_follower_ids(user_id=user_id).items():
            # create the fanout tasks
            for feed_class in self.feed_classes.values():
                self.create_fanout_tasks(
                    follower_ids,
                    feed_class,
                    add_operation,
                    operation_kwargs=operation_kwargs,
                    fanout_priority=priority_group
                )
        self.metrics.on_activity_published()

    def remove_user_activity(self, user_id, activity):
        '''
        Remove the activity and then fanout to user followers

        :param user_id: the id of the user
        :param activity: the activity which to add
        '''
        # we don't remove from the global feed due to race conditions
        # but we do remove from the personal feed
        user_feed = self.get_user_feed(user_id)
        user_feed.remove(activity)

        # no need to trim when removing items
        operation_kwargs = dict(activities=[activity], trim=False)

        for priority_group, follower_ids in self.get_user_follower_ids(user_id=user_id).items():
            for feed_class in self.feed_classes.values():
                self.create_fanout_tasks(
                    follower_ids,
                    feed_class,
                    remove_operation,
                    operation_kwargs=operation_kwargs,
                    fanout_priority=priority_group
                )
        self.metrics.on_activity_removed()

    def get_feeds(self, user_id):
        '''
        get the feed that contains the sum of all activity
        from feeds :user_id is subscribed to

        :returns dict: a dictionary with the feeds we're pushing to
        '''
        return dict([(k, feed(user_id)) for k, feed in self.feed_classes.items()])

    def get_user_feed(self, user_id):
        '''
        feed where activity from :user_id is saved

        :param user_id: the id of the user
        '''
        return self.user_feed_class(user_id)

    def update_user_activities(self, activities):
        '''
        Update the user activities
        :param activities: the activities to update
        '''
        for activity in activities:
            self.add_user_activity(activity.actor_id, activity)

    def update_user_activity(self, activity):
        self.update_user_activities([activity])

    def follow_feed(self, feed, activities):
        '''
        copies source_feed entries into feed
        it will only copy follow_activity_limit activities

        :param feed: the feed to copy to
        :param activities: the activities to copy into the feed
        '''
        if activities:
            return feed.add_many(activities)

    def unfollow_feed(self, feed, source_feed):
        '''
        removes entries originating from the source feed form the feed class
        this will remove all activities, so this could take a wh
        :param feed: the feed to copy to
        :param source_feed: the feed with a list of activities to remove
        '''
        activities = source_feed[:]  # need to slice
        if activities:
            return feed.remove_many(activities)

    def follow_user(self, user_id, target_user_id, async=True):
        '''
        user_id starts following target_user_id

        :param user_id: the user which is doing the following/unfollowing
        :target_user_id: the user which is being unfollowed
        '''
        source_feed = self.get_user_feed(target_user_id)
        # fetch the activities only once
        activities = source_feed[:self.follow_activity_limit]
        for user_feed in self.get_feeds(user_id).values():
            self.follow_feed(user_feed, activities)