def read_feed_pages(self, social_model, user_ids):
    for user_id in user_ids:
        browse_depth = social_model.get_browse_depth(user_id)
        feed_instance = feeds.TimelineFeed(user_id)
        metrics = get_metrics_instance()
        # browse x pages deep
        for page in range(browse_depth):
            # track the time every read takes
            with metrics.feed_reads_timer(feed_instance.__class__):
                activities = feed_instance[:25]
                if activities and len(activities) == 25:
                    last_id = activities[-1]
                    feed_instance.filter(id__lte=last_id)
                else:
                    break
Exemplo n.º 2
0
class Manager(object):
    '''
    The Manager class handles the fanout from a user's activity
    to all their follower's feeds

    .. note::
        Fanout is the process which pushes a little bit of data to all of your
        followers in many small and asynchronous tasks.

    To write your own Manager class you will need to implement

    - get_user_follower_ids
    - feed_classes
    - user_feed_class

    **Example** ::

        from stream_framework.feed_managers.base import Manager

        class PinManager(Manager):
            # customize the feed classes we write to
            feed_classes = dict(
                normal=PinFeed,
                aggregated=AggregatedPinFeed
            )
            # customize the user feed class
            user_feed_class = UserPinFeed

            # define how stream_framework can get the follower ids
            def get_user_follower_ids(self, user_id):
                ids = Follow.objects.filter(target=user_id).values_list('user_id', flat=True)
                return {FanoutPriority.HIGH:ids}

            # utility functions to easy integration for your project
            def add_pin(self, pin):
                activity = pin.create_activity()
                # add user activity adds it to the user feed, and starts the fanout
                self.add_user_activity(pin.user_id, activity)

            def remove_pin(self, pin):
                activity = pin.create_activity()
                # removes the pin from the user's followers feeds
                self.remove_user_activity(pin.user_id, activity)

    '''
    # : a dictionary with the feeds to fanout to
    # : for example feed_classes = dict(normal=PinFeed, aggregated=AggregatedPinFeed)
    feed_classes = dict(normal=RedisFeed)
    # : the user feed class (it stores the latest activity by one user)
    user_feed_class = UserBaseFeed

    # : the number of activities which enter your feed when you follow someone
    follow_activity_limit = 5000
    # : the number of users which are handled in one asynchronous task
    # : when doing the fanout
    fanout_chunk_size = 100

    # maps between priority and fanout tasks
    priority_fanout_task = {
        FanoutPriority.HIGH: fanout_operation_hi_priority,
        FanoutPriority.LOW: fanout_operation_low_priority
    }

    metrics = get_metrics_instance()

    def get_user_follower_ids(self, user_id):
        '''
        Returns a dict of users ids which follow the given user grouped by
        priority/importance

        eg.
        {'HIGH': [...], 'LOW': [...]}

        :param user_id: the user id for which to get the follower ids
        '''
        raise NotImplementedError()

    def add_user_activity(self, user_id, activity):
        '''
        Store the new activity and then fanout to user followers

        This function will
        - store the activity in the activity storage
        - store it in the user feed (list of activities for one user)
        - fanout for all feed_classes

        :param user_id: the id of the user
        :param activity: the activity which to add
        '''
        # add into the global activity cache (if we are using it)
        self.user_feed_class.insert_activity(activity)
        # now add to the user's personal feed
        user_feed = self.get_user_feed(user_id)
        user_feed.add(activity)
        operation_kwargs = dict(activities=[activity], trim=True)

        for priority_group, follower_ids in self.get_user_follower_ids(
                user_id=user_id).items():
            # create the fanout tasks
            for feed_class in self.feed_classes.values():
                self.create_fanout_tasks(follower_ids,
                                         feed_class,
                                         add_operation,
                                         operation_kwargs=operation_kwargs,
                                         fanout_priority=priority_group)
        self.metrics.on_activity_published()

    def remove_user_activity(self, user_id, activity):
        '''
        Remove the activity and then fanout to user followers

        :param user_id: the id of the user
        :param activity: the activity which to remove
        '''
        # we don't remove from the global feed due to race conditions
        # but we do remove from the personal feed
        user_feed = self.get_user_feed(user_id)
        user_feed.remove(activity)

        # no need to trim when removing items
        operation_kwargs = dict(activities=[activity], trim=False)

        for priority_group, follower_ids in self.get_user_follower_ids(
                user_id=user_id).items():
            for feed_class in self.feed_classes.values():
                self.create_fanout_tasks(follower_ids,
                                         feed_class,
                                         remove_operation,
                                         operation_kwargs=operation_kwargs,
                                         fanout_priority=priority_group)
        self.metrics.on_activity_removed()

    def get_feeds(self, user_id):
        '''
        get the feed that contains the sum of all activity
        from feeds :user_id is subscribed to

        :returns dict: a dictionary with the feeds we're pushing to
        '''
        return dict([(k, feed(user_id))
                     for k, feed in self.feed_classes.items()])

    def get_user_feed(self, user_id):
        '''
        feed where activity from :user_id is saved

        :param user_id: the id of the user
        '''
        return self.user_feed_class(user_id)

    def update_user_activities(self, activities):
        '''
        Update the user activities
        :param activities: the activities to update
        '''
        for activity in activities:
            self.add_user_activity(activity.actor_id, activity)

    def update_user_activity(self, activity):
        self.update_user_activities([activity])

    def follow_feed(self, feed, activities):
        '''
        copies source_feed entries into feed
        it will only copy follow_activity_limit activities

        :param feed: the feed to copy to
        :param activities: the activities to copy into the feed
        '''
        if activities:
            return feed.add_many(activities)

    def unfollow_feed(self, feed, source_feed):
        '''
        removes entries originating from the source feed form the feed class
        this will remove all activities, so this could take a while
        :param feed: the feed to copy to
        :param source_feed: the feed with a list of activities to remove
        '''
        activities = source_feed[:]  # need to slice
        if activities:
            return feed.remove_many(activities)

    def follow_user(self, user_id, target_user_id, async=True):
        '''
        user_id starts following target_user_id

        :param user_id: the user which is doing the following
        :param target_user_id: the user which is being followed
        :param async: controls if the operation should be done via celery
        '''
        self.follow_many_users(user_id, [target_user_id], async)
Exemplo n.º 3
0
class Manager(object):
    '''
    The Manager class handles the fanout from a user's activity
    to all their follower's feeds

    .. note::
        Fanout is the process which pushes a little bit of data to all of your
        followers in many small and asynchronous tasks.

    To write your own Manager class you will need to implement

    - get_user_follower_ids
    - feed_classes
    - user_feed_class

    **Example** ::

        from stream_framework.feed_managers.base import Manager

        class PinManager(Manager):
            # customize the feed classes we write to
            feed_classes = dict(
                normal=PinFeed,
                aggregated=AggregatedPinFeed
            )
            # customize the user feed class
            user_feed_class = UserPinFeed

            # define how stream_framework can get the follower ids
            def get_user_follower_ids(self, user_id):
                ids = Follow.objects.filter(target=user_id).values_list('user_id', flat=True)
                return {FanoutPriority.HIGH:ids}

            # utility functions to easy integration for your project
            def add_pin(self, pin):
                activity = pin.create_activity()
                # add user activity adds it to the user feed, and starts the fanout
                self.add_user_activity(pin.user_id, activity)

            def remove_pin(self, pin):
                activity = pin.create_activity()
                # removes the pin from the user's followers feeds
                self.remove_user_activity(pin.user_id, activity)

    '''
    # : a dictionary with the feeds to fanout to
    # : for example feed_classes = dict(normal=PinFeed, aggregated=AggregatedPinFeed)
    feed_classes = dict(normal=RedisFeed)
    # : the user feed class (it stores the latest activity by one user)
    user_feed_class = UserBaseFeed

    # : the number of activities which enter your feed when you follow someone
    follow_activity_limit = 5000
    # : the number of users which are handled in one asynchronous task
    # : when doing the fanout
    fanout_chunk_size = 100

    # maps between priority and fanout tasks
    priority_fanout_task = {
        FanoutPriority.HIGH: fanout_operation_hi_priority,
        FanoutPriority.LOW: fanout_operation_low_priority
    }

    metrics = get_metrics_instance()

    def get_user_follower_ids(self, user_id):
        '''
        Returns a dict of users ids which follow the given user grouped by
        priority/importance

        eg.
        {'HIGH': [...], 'LOW': [...]}

        :param user_id: the user id for which to get the follower ids
        '''
        raise NotImplementedError()

    def add_user_activity(self, user_id, activity):
        '''
        Store the new activity and then fanout to user followers

        This function will
        - store the activity in the activity storage
        - store it in the user feed (list of activities for one user)
        - fanout for all feed_classes

        :param user_id: the id of the user
        :param activity: the activity which to add
        '''
        # add into the global activity cache (if we are using it)
        self.user_feed_class.insert_activity(activity)
        # now add to the user's personal feed
        user_feed = self.get_user_feed(user_id)
        user_feed.add(activity)
        operation_kwargs = dict(activities=[activity], trim=True)

        for priority_group, follower_ids in self.get_user_follower_ids(
                user_id=user_id).items():
            # create the fanout tasks
            for feed_class in self.feed_classes.values():
                self.create_fanout_tasks(follower_ids,
                                         feed_class,
                                         add_operation,
                                         operation_kwargs=operation_kwargs,
                                         fanout_priority=priority_group)
        self.metrics.on_activity_published()

    def remove_user_activity(self, user_id, activity):
        '''
        Remove the activity and then fanout to user followers

        :param user_id: the id of the user
        :param activity: the activity which to remove
        '''
        # we don't remove from the global feed due to race conditions
        # but we do remove from the personal feed
        user_feed = self.get_user_feed(user_id)
        user_feed.remove(activity)

        # no need to trim when removing items
        operation_kwargs = dict(activities=[activity], trim=False)

        for priority_group, follower_ids in self.get_user_follower_ids(
                user_id=user_id).items():
            for feed_class in self.feed_classes.values():
                self.create_fanout_tasks(follower_ids,
                                         feed_class,
                                         remove_operation,
                                         operation_kwargs=operation_kwargs,
                                         fanout_priority=priority_group)
        self.metrics.on_activity_removed()

    def get_feeds(self, user_id):
        '''
        get the feed that contains the sum of all activity
        from feeds :user_id is subscribed to

        :returns dict: a dictionary with the feeds we're pushing to
        '''
        return dict([(k, feed(user_id))
                     for k, feed in self.feed_classes.items()])

    def get_user_feed(self, user_id):
        '''
        feed where activity from :user_id is saved

        :param user_id: the id of the user
        '''
        return self.user_feed_class(user_id)

    def update_user_activities(self, activities):
        '''
        Update the user activities
        :param activities: the activities to update
        '''
        for activity in activities:
            self.add_user_activity(activity.actor_id, activity)

    def update_user_activity(self, activity):
        self.update_user_activities([activity])

    def follow_feed(self, feed, source_feed):
        '''
        copies source_feed entries into feed
        it will only copy follow_activity_limit activities

        :param feed: the feed to copy to
        :param source_feed: the feed with a list of activities to add
        '''
        activities = source_feed[:self.follow_activity_limit]
        if activities:
            return feed.add_many(activities)

    def unfollow_feed(self, feed, source_feed):
        '''
        removes entries originating from the source feed form the feed class
        this will remove all activities, so this could take a while
        :param feed: the feed to copy to
        :param source_feed: the feed with a list of activities to remove
        '''
        activities = source_feed[:]  # need to slice
        if activities:
            return feed.remove_many(activities)

    def follow_user(self, user_id, target_user_id, async_rename=True):
        '''
        user_id starts following target_user_id

        :param user_id: the user which is doing the following
        :param target_user_id: the user which is being followed
        :param async: controls if the operation should be done via celery
        '''
        self.follow_many_users(user_id, [target_user_id], async_rename)

    def unfollow_user(self, user_id, target_user_id, async_rename=True):
        '''
        user_id stops following target_user_id

        :param user_id: the user which is doing the unfollowing
        :param target_user_id: the user which is being unfollowed
        :param async: controls if the operation should be done via celery
        '''
        self.unfollow_many_users(user_id, [target_user_id], async_rename)

    def follow_many_users(self, user_id, target_ids, async_rename=True):
        '''
        Copies feeds' entries that belong to target_ids into the
        corresponding feeds of user_id.

        :param user_id: the user which is doing the following
        :param target_ids: the users to follow
        :param async: controls if the operation should be done via celery
        '''
        if async_rename:
            follow_many_fn = follow_many.delay
        else:
            follow_many_fn = follow_many

        follow_many_fn(self, user_id, target_ids, self.follow_activity_limit)

    def unfollow_many_users(self, user_id, target_ids, async_rename=True):
        '''
        Removes feeds' entries that belong to target_ids from the
        corresponding feeds of user_id.

        :param user_id: the user which is doing the unfollowing
        :param target_ids: the users to unfollow
        :param async: controls if the operation should be done via celery
        '''
        if async_rename:
            unfollow_many_fn = unfollow_many.delay
        else:
            unfollow_many_fn = unfollow_many

        unfollow_many_fn(self, user_id, target_ids)

    def get_fanout_task(self, priority=None, feed_class=None):
        '''
        Returns the fanout task taking priority in account.

        :param priority: the priority of the task
        :param feed_class: the feed_class the task will write to
        '''
        return self.priority_fanout_task.get(priority, fanout_operation)

    def create_fanout_tasks(self,
                            follower_ids,
                            feed_class,
                            operation,
                            operation_kwargs=None,
                            fanout_priority=None):
        '''
        Creates the fanout task for the given activities and feed classes
        followers

        It takes the following ids and distributes them per fanout_chunk_size
        into smaller tasks

        :param follower_ids: specify the list of followers
        :param feed_class: the feed classes to run the operation on
        :param operation: the operation function applied to all follower feeds
        :param operation_kwargs: kwargs passed to the operation
        :param fanout_priority: the priority set to this fanout
        '''
        fanout_task = self.get_fanout_task(fanout_priority,
                                           feed_class=feed_class)
        if not fanout_task:
            return []
        chunk_size = self.fanout_chunk_size
        user_ids_chunks = list(chunks(follower_ids, chunk_size))
        msg_format = 'spawning %s subtasks for %s user ids in chunks of %s users'
        logger.info(msg_format, len(user_ids_chunks), len(follower_ids),
                    chunk_size)
        tasks = []
        # now actually create the tasks
        for ids_chunk in user_ids_chunks:
            task = fanout_task.delay(feed_manager=self,
                                     feed_class=feed_class,
                                     user_ids=ids_chunk,
                                     operation=operation,
                                     operation_kwargs=operation_kwargs)
            tasks.append(task)
        return tasks

    def fanout(self, user_ids, feed_class, operation, operation_kwargs):
        '''
        This functionality is called from within stream_framework.tasks.fanout_operation

        :param user_ids: the list of user ids which feeds we should apply the
            operation against
        :param feed_class: the feed to run the operation on
        :param operation: the operation to run on the feed
        :param operation_kwargs: kwargs to pass to the operation

        '''
        with self.metrics.fanout_timer(feed_class):
            separator = '===' * 10
            logger.info('%s starting fanout %s', separator, separator)
            batch_context_manager = feed_class.get_timeline_batch_interface()
            msg_format = 'starting batch interface for feed %s, fanning out to %s users'
            with batch_context_manager as batch_interface:
                logger.info(msg_format, feed_class, len(user_ids))
                operation_kwargs['batch_interface'] = batch_interface
                for user_id in user_ids:
                    logger.debug('now handling fanout to user %s', user_id)
                    feed = feed_class(user_id)
                    operation(feed, **operation_kwargs)
            logger.info('finished fanout for feed %s', feed_class)
        fanout_count = len(operation_kwargs['activities']) * len(user_ids)
        self.metrics.on_fanout(feed_class, operation, fanout_count)

    def batch_import(self, user_id, activities, fanout=True, chunk_size=500):
        '''
        Batch import all of the users activities and distributes
        them to the users followers

        **Example**::

            activities = [long list of activities]
            stream_framework.batch_import(13, activities, 500)

        :param user_id: the user who created the activities
        :param activities: a list of activities from this user
        :param fanout: if we should run the fanout or not
        :param chunk_size: per how many activities to run the batch operations

        '''
        activities = list(activities)
        # skip empty lists
        if not activities:
            return
        logger.info('running batch import for user %s', user_id)

        user_feed = self.get_user_feed(user_id)
        if activities[0].actor_id != user_id:
            raise ValueError('Send activities for only one user please')

        activity_chunks = list(chunks(activities, chunk_size))
        logger.info('processing %s items in %s chunks of %s', len(activities),
                    len(activity_chunks), chunk_size)

        for index, activity_chunk in enumerate(activity_chunks):
            # first insert into the global activity storage
            self.user_feed_class.insert_activities(activity_chunk)
            logger.info(
                'inserted chunk %s (length %s) into the global activity store',
                index, len(activity_chunk))
            # next add the activities to the users personal timeline
            user_feed.add_many(activity_chunk, trim=False)
            logger.info('inserted chunk %s (length %s) into the user feed',
                        index, len(activity_chunk))
            # now start a big fanout task
            if fanout:
                logger.info('starting task fanout for chunk %s', index)
                follower_ids_by_prio = self.get_user_follower_ids(
                    user_id=user_id)
                # create the fanout tasks
                operation_kwargs = dict(activities=activity_chunk, trim=False)
                for feed_class in self.feed_classes.values():
                    for priority_group, fids in follower_ids_by_prio.items():
                        self.create_fanout_tasks(
                            fids,
                            feed_class,
                            add_operation,
                            fanout_priority=priority_group,
                            operation_kwargs=operation_kwargs)
Exemplo n.º 4
0
def run_benchmark(benchmark, network_size, max_network_size, multiplier, duration):
    logger.info('Starting the benchmark! Exciting.... :)')
    
    if benchmark is None:
        benchmark_class = get_benchmark('stream_bench_custom')
        benchmark = benchmark_class(network_size, max_network_size, multiplier, duration)
    else:
        benchmark_class = get_benchmark(benchmark)
        benchmark = benchmark_class()
    
    logger.info('Running benchmark %s', benchmark.name)
    logger.info('Network size starting at %s will grow to %s', benchmark.network_size, benchmark.max_network_size)
    logger.info('Multiplier is set to %s and duration %s', benchmark.multiplier, benchmark.duration)
    metrics_instance = get_metrics_instance()

    social_model = benchmark.get_social_model()
    days = 0
    while True:
        logger.info(
            'Simulating a social network with network size %s', social_model.network_size)
        object_id = 1
        for x in range(benchmark.duration):
            days += 1
            social_model.day = days
            daily_tasks = collections.defaultdict(list)
            t = timer()
            metrics_instance.on_day_change(days)
            logger.debug('Day %s for our network', days)
            # create load based on the current model
            active_users = social_model.active_users
            for user_id in active_users:
                # follow other users, note that we don't actually store the follower
                #  lists for this benchmark
                for target_user_id in social_model.get_new_follows(user_id):
                    daily_tasks['follow_users'].append([user_id, target_user_id])
                    
                # create activities
                for x in range(social_model.get_user_activity(user_id)):
                    activity = create_activity(user_id, object_id)
                    object_id += 1
                    daily_tasks['add_activities'].append([user_id, activity])
                # read a few pages of data
                daily_tasks['read_feed_pages'].append([user_id])
                
            logger.debug('%s seconds spent creating the model', t.next())
            # send the daily tasks to celery
            batch_tasks = []
            for task_name, task_args in daily_tasks.items():
                task = getattr(tasks, task_name)
                for task_arg_chunk in chunks(task_args, 100):
                    task_signature = task.s(social_model, task_arg_chunk)
                    batch_tasks.append(task_signature)
            for signature in batch_tasks:
                signature.apply_async()
            logger.debug('%s seconds spent sending %s tasks', t.next(), len(batch_tasks))
            # wait
            #while True:
            #    if result.ready():
            #        break
            #    time.sleep(1)
            #    logger.debug('Waiting for day %s to finish' % days)
            
            logger.debug('Day %s finished', days)
            time.sleep(1)

        # grow the network
        logger.info('Growing the social network.....')
        social_model.network_size = social_model.network_size * benchmark.multiplier
        metrics_instance.on_network_size_change(social_model.network_size)
        if social_model.network_size >= benchmark.max_network_size:
            logger.info(
                'Reached the max users, we\'re done with our benchmark!')
Exemplo n.º 5
0
class BaseStorage(object):
    '''
    The feed uses two storage classes, the
    - Activity Storage and the
    - Timeline Storage

    The process works as follows::

        feed = BaseFeed()
        # the activity storage is used to store the activity and mapped to an id
        feed.insert_activity(activity)
        # now the id is inserted into the timeline storage
        feed.add(activity)

    Currently there are two activity storage classes ready for production:

    - Cassandra
    - Redis

    The storage classes always receive a full activity object.
    The serializer class subsequently determines how to transform the activity
    into something the database can store.
    '''
    #: The default serializer class to use
    default_serializer_class = DummySerializer
    metrics = get_metrics_instance()

    activity_class = Activity
    aggregated_activity_class = AggregatedActivity

    def __init__(self, serializer_class=None, activity_class=None, **options):
        '''
        :param serializer_class: allows you to overwrite the serializer class
        '''
        self.serializer_class = serializer_class or self.default_serializer_class
        self.options = options
        if activity_class is not None:
            self.activity_class = activity_class
        aggregated_activity_class = options.pop('aggregated_activity_class',
                                                None)
        if aggregated_activity_class is not None:
            self.aggregated_activity_class = aggregated_activity_class

    def flush(self):
        '''
        Flushes the entire storage
        '''
        pass

    def activities_to_ids(self, activities_or_ids):
        '''
        Utility function for lower levels to chose either serialize
        '''
        ids = []
        for activity_or_id in activities_or_ids:
            ids.append(self.activity_to_id(activity_or_id))
        return ids

    def activity_to_id(self, activity):
        return getattr(activity, 'serialization_id', activity)

    @property
    def serializer(self):
        '''
        Returns an instance of the serializer class

        The serializer needs to know about the activity and
        aggregated activity classes we're using
        '''
        serializer_class = self.serializer_class
        kwargs = {}
        if getattr(self, 'aggregated_activity_class', None) is not None:
            kwargs[
                'aggregated_activity_class'] = self.aggregated_activity_class
        serializer_instance = serializer_class(
            activity_class=self.activity_class, **kwargs)
        return serializer_instance

    def serialize_activity(self, activity):
        '''
        Serialize the activity and returns the serialized activity

        :returns str: the serialized activity
        '''
        serialized_activity = self.serializer.dumps(activity)
        return serialized_activity

    def serialize_activities(self, activities):
        '''
        Serializes the list of activities

        :param activities: the list of activities
        '''
        serialized_activities = {}
        for activity in activities:
            serialized_activity = self.serialize_activity(activity)
            serialized_activities[self.activity_to_id(
                activity)] = serialized_activity
        return serialized_activities

    def deserialize_activities(self, serialized_activities):
        '''
        Serializes the list of activities

        :param serialized_activities: the list of activities
        :param serialized_activities: a dictionary with activity ids and activities
        '''
        activities = []
        # handle the case where this is a dict
        if isinstance(serialized_activities, dict):
            serialized_activities = serialized_activities.values()

        if serialized_activities is not None:
            for serialized_activity in serialized_activities:
                activity = self.serializer.loads(serialized_activity)
                activities.append(activity)
        return activities
Exemplo n.º 6
0
class FeedManager:
    """
    A feed manager is responsible for adding activities to a set of feeds. This class contains some
    helper functions that makes async fanout easier.
    """

    priority_fanout_task = {
        FanoutPriority.HIGH: fanout_operation_hi_priority,
        FanoutPriority.LOW: fanout_operation_low_priority
    }

    fanout_chunk_size = 100

    metrics = get_metrics_instance()

    def add_activity(self, activity, recipients, feed_classes):
        """
        Simple fanout a task to a set of recipients.
        """
        operation_kwargs = dict(activities=[activity], trim=True)

        for feed_class in feed_classes:
            self.create_fanout_tasks(set(recipients),
                                     feed_class,
                                     add_operation,
                                     operation_kwargs=operation_kwargs,
                                     fanout_priority=FanoutPriority.HIGH)
        self.metrics.on_activity_published()

    def remove_activity(self, activity, recipients, feed_classes):
        """
        Remove an activity from a set of recipient feeds.
        """
        operation_kwargs = dict(activities=[activity], trim=False)

        for feed_class in feed_classes:
            self.create_fanout_tasks(set(recipients),
                                     feed_class,
                                     remove_operation,
                                     operation_kwargs=operation_kwargs,
                                     fanout_priority=FanoutPriority.HIGH)
        self.metrics.on_activity_removed()

    def get_fanout_task(self, priority=None, feed_class=None):
        """
        Returns the fanout task taking priority in account.
        """

        return self.priority_fanout_task.get(priority, fanout_operation)

    def create_fanout_tasks(self,
                            follower_ids,
                            feed_class,
                            operation,
                            operation_kwargs=None,
                            fanout_priority=None):
        """
        Creates the fanout task for the given activities and feed classes
        followers
        It takes the following ids and distributes them per fanout_chunk_size
        into smaller tasks
        """

        fanout_task = self.get_fanout_task(fanout_priority,
                                           feed_class=feed_class)

        if not fanout_task:
            return []

        chunk_size = self.fanout_chunk_size
        user_ids_chunks = list(chunks(follower_ids, chunk_size))

        log.info('feed_spawn_tasks',
                 subtasks=len(user_ids_chunks),
                 recipients=len(follower_ids))

        tasks = []

        for ids_chunk in user_ids_chunks:
            task = fanout_task.delay(feed_manager=self,
                                     feed_class=feed_class,
                                     user_ids=ids_chunk,
                                     operation=operation,
                                     operation_kwargs=operation_kwargs)
            tasks.append(task)
        return tasks

    def fanout(self, user_ids, feed_class, operation, operation_kwargs):
        """
        This functionality is called from within stream_framework.tasks.fanout_operation
        This function is almost always called in async tasks created by stream_framework.
        """
        with self.metrics.fanout_timer(feed_class):
            batch_context_manager = feed_class.get_timeline_batch_interface()
            with batch_context_manager as batch_interface:
                log.info('feed_batch_fanout', recipients=len(user_ids))

                operation_kwargs['batch_interface'] = batch_interface
                for user_id in user_ids:
                    feed = feed_class(user_id)
                    operation(feed, **operation_kwargs)

        fanout_count = len(operation_kwargs['activities']) * len(user_ids)
        self.metrics.on_fanout(feed_class, operation, fanout_count)
Exemplo n.º 7
0
def run_benchmark(benchmark, network_size, max_network_size, multiplier,
                  duration):
    logger.info('Starting the benchmark! Exciting.... :)')

    if benchmark is None:
        benchmark_class = get_benchmark('stream_bench_custom')
        benchmark = benchmark_class(network_size, max_network_size, multiplier,
                                    duration)
    else:
        benchmark_class = get_benchmark(benchmark)
        benchmark = benchmark_class()

    logger.info('Running benchmark %s', benchmark.name)
    logger.info('Network size starting at %s will grow to %s',
                benchmark.network_size, benchmark.max_network_size)
    logger.info('Multiplier is set to %s and duration %s',
                benchmark.multiplier, benchmark.duration)
    metrics_instance = get_metrics_instance()

    social_model = benchmark.get_social_model()
    days = 0
    while True:
        logger.info('Simulating a social network with network size %s',
                    social_model.network_size)
        object_id = 1
        for x in range(benchmark.duration):
            days += 1
            social_model.day = days
            daily_tasks = collections.defaultdict(list)
            t = timer()
            metrics_instance.on_day_change(days)
            logger.debug('Day %s for our network', days)
            # create load based on the current model
            active_users = social_model.active_users
            for user_id in active_users:
                # follow other users, note that we don't actually store the follower
                #  lists for this benchmark
                for target_user_id in social_model.get_new_follows(user_id):
                    daily_tasks['follow_users'].append(
                        [user_id, target_user_id])

                # create activities
                for x in range(social_model.get_user_activity(user_id)):
                    activity = create_activity(user_id, object_id)
                    object_id += 1
                    daily_tasks['add_activities'].append([user_id, activity])
                # read a few pages of data
                daily_tasks['read_feed_pages'].append([user_id])

            logger.debug('%s seconds spent creating the model', t.next())
            # send the daily tasks to celery
            batch_tasks = []
            for task_name, task_args in daily_tasks.items():
                task = getattr(tasks, task_name)
                for task_arg_chunk in chunks(task_args, 100):
                    task_signature = task.s(social_model, task_arg_chunk)
                    batch_tasks.append(task_signature)
            for signature in batch_tasks:
                signature.apply_async()
            logger.debug('%s seconds spent sending %s tasks', t.next(),
                         len(batch_tasks))
            # wait
            #while True:
            #    if result.ready():
            #        break
            #    time.sleep(1)
            #    logger.debug('Waiting for day %s to finish' % days)

            logger.debug('Day %s finished', days)
            time.sleep(1)

        # grow the network
        logger.info('Growing the social network.....')
        social_model.network_size = social_model.network_size * benchmark.multiplier
        metrics_instance.on_network_size_change(social_model.network_size)
        if social_model.network_size >= benchmark.max_network_size:
            logger.info(
                'Reached the max users, we\'re done with our benchmark!')