class BaseStorage(object): ''' The feed uses two storage classes, the - Activity Storage and the - Timeline Storage The process works as follows:: feed = BaseFeed() # the activity storage is used to store the activity and mapped to an id feed.insert_activity(activity) # now the id is inserted into the timeline storage feed.add(activity) Currently there are two activity storage classes ready for production: - Cassandra - Redis The storage classes always receive a full activity object. The serializer class subsequently determines how to transform the activity into something the database can store. ''' #: The default serializer class to use default_serializer_class = DummySerializer metrics = get_metrics_instance() activity_class = Activity aggregated_activity_class = AggregatedActivity def __init__(self, serializer_class=None, activity_class=None, **options): ''' :param serializer_class: allows you to overwrite the serializer class ''' self.serializer_class = serializer_class or self.default_serializer_class self.options = options if activity_class is not None: self.activity_class = activity_class aggregated_activity_class = options.pop( 'aggregated_activity_class', None) if aggregated_activity_class is not None: self.aggregated_activity_class = aggregated_activity_class def flush(self): ''' Flushes the entire storage ''' pass def activities_to_ids(self, activities_or_ids): ''' Utility function for lower levels to chose either serialize ''' ids = [] for activity_or_id in activities_or_ids: ids.append(self.activity_to_id(activity_or_id)) return ids def activity_to_id(self, activity): return getattr(activity, 'serialization_id', activity) @property def serializer(self): ''' Returns an instance of the serializer class The serializer needs to know about the activity and aggregated activity classes we're using ''' serializer_class = self.serializer_class kwargs = {} if getattr(self, 'aggregated_activity_class', None) is not None: kwargs[ 'aggregated_activity_class'] = self.aggregated_activity_class serializer_instance = serializer_class( activity_class=self.activity_class, **kwargs) return serializer_instance def serialize_activity(self, activity): ''' Serialize the activity and returns the serialized activity :returns str: the serialized activity ''' serialized_activity = self.serializer.dumps(activity) return serialized_activity def serialize_activities(self, activities): ''' Serializes the list of activities :param activities: the list of activities ''' serialized_activities = {} for activity in activities: serialized_activity = self.serialize_activity(activity) serialized_activities[ self.activity_to_id(activity)] = serialized_activity return serialized_activities def deserialize_activities(self, serialized_activities): ''' Serializes the list of activities :param serialized_activities: the list of activities :param serialized_activities: a dictionary with activity ids and activities ''' activities = [] # handle the case where this is a dict if isinstance(serialized_activities, dict): serialized_activities = serialized_activities.values() for serialized_activity in serialized_activities: activity = self.serializer.loads(serialized_activity) activities.append(activity) return activities
class Feedly(object): ''' The Feedly class handles the fanout from a user's activity to all their follower's feeds .. note:: Fanout is the process which pushes a little bit of data to all of your followers in many small and asynchronous tasks. To write your own Feedly class you will need to implement - get_user_follower_ids - feed_classes - user_feed_class **Example** :: from feedly.feed_managers.base import Feedly class PinFeedly(Feedly): # customize the feed classes we write to feed_classes = dict( normal=PinFeed, aggregated=AggregatedPinFeed ) # customize the user feed class user_feed_class = UserPinFeed # define how feedly can get the follower ids def get_user_follower_ids(self, user_id): ids = Follow.objects.filter(target=user_id).values_list('user_id', flat=True) return {FanoutPriority.HIGH:ids} # utility functions to easy integration for your project def add_pin(self, pin): activity = pin.create_activity() # add user activity adds it to the user feed, and starts the fanout self.add_user_activity(pin.user_id, activity) def remove_pin(self, pin): activity = pin.create_activity() # removes the pin from the user's followers feeds self.remove_user_activity(pin.user_id, activity) ''' # : a dictionary with the feeds to fanout to # : for example feed_classes = dict(normal=PinFeed, aggregated=AggregatedPinFeed) feed_classes = dict( normal=RedisFeed ) # : the user feed class (it stores the latest activity by one user) user_feed_class = UserBaseFeed # : the number of activities which enter your feed when you follow someone follow_activity_limit = 5000 # : the number of users which are handled in one asynchronous task # : when doing the fanout fanout_chunk_size = 100 # maps between priority and fanout tasks priority_fanout_task = { FanoutPriority.HIGH: fanout_operation_hi_priority, FanoutPriority.LOW: fanout_operation_low_priority } metrics = get_metrics_instance() def get_user_follower_ids(self, user_id): ''' Returns a dict of users ids which follow the given user grouped by priority/importance eg. {'HIGH': [...], 'LOW': [...]} :param user_id: the user id for which to get the follower ids ''' raise NotImplementedError() def add_user_activity(self, user_id, activity): ''' Store the new activity and then fanout to user followers This function will - store the activity in the activity storage - store it in the user feed (list of activities for one user) - fanout for all feed_classes :param user_id: the id of the user :param activity: the activity which to add ''' # add into the global activity cache (if we are using it) self.user_feed_class.insert_activity(activity) # now add to the user's personal feed user_feed = self.get_user_feed(user_id) user_feed.add(activity) operation_kwargs = dict(activities=[activity], trim=True) for priority_group, follower_ids in self.get_user_follower_ids(user_id=user_id).items(): # create the fanout tasks for feed_class in self.feed_classes.values(): self.create_fanout_tasks( follower_ids, feed_class, add_operation, operation_kwargs=operation_kwargs, fanout_priority=priority_group ) self.metrics.on_activity_published() def remove_user_activity(self, user_id, activity): ''' Remove the activity and then fanout to user followers :param user_id: the id of the user :param activity: the activity which to add ''' # we don't remove from the global feed due to race conditions # but we do remove from the personal feed user_feed = self.get_user_feed(user_id) user_feed.remove(activity) # no need to trim when removing items operation_kwargs = dict(activities=[activity], trim=False) for priority_group, follower_ids in self.get_user_follower_ids(user_id=user_id).items(): for feed_class in self.feed_classes.values(): self.create_fanout_tasks( follower_ids, feed_class, remove_operation, operation_kwargs=operation_kwargs, fanout_priority=priority_group ) self.metrics.on_activity_removed() def get_feeds(self, user_id): ''' get the feed that contains the sum of all activity from feeds :user_id is subscribed to :returns dict: a dictionary with the feeds we're pushing to ''' return dict([(k, feed(user_id)) for k, feed in self.feed_classes.items()]) def get_user_feed(self, user_id): ''' feed where activity from :user_id is saved :param user_id: the id of the user ''' return self.user_feed_class(user_id) def update_user_activities(self, activities): ''' Update the user activities :param activities: the activities to update ''' for activity in activities: self.add_user_activity(activity.actor_id, activity) def update_user_activity(self, activity): self.update_user_activities([activity]) def follow_feed(self, feed, activities): ''' copies source_feed entries into feed it will only copy follow_activity_limit activities :param feed: the feed to copy to :param activities: the activities to copy into the feed ''' if activities: return feed.add_many(activities) def unfollow_feed(self, feed, source_feed): ''' removes entries originating from the source feed form the feed class this will remove all activities, so this could take a wh :param feed: the feed to copy to :param source_feed: the feed with a list of activities to remove ''' activities = source_feed[:] # need to slice if activities: return feed.remove_many(activities) def follow_user(self, user_id, target_user_id, async=True): ''' user_id starts following target_user_id :param user_id: the user which is doing the following/unfollowing :target_user_id: the user which is being unfollowed ''' source_feed = self.get_user_feed(target_user_id) # fetch the activities only once activities = source_feed[:self.follow_activity_limit] for user_feed in self.get_feeds(user_id).values(): self.follow_feed(user_feed, activities)