Пример #1
0
    def create_fanout_tasks(self, follower_ids, feed_class, operation, operation_kwargs=None, fanout_priority=None):
        '''
        Creates the fanout task for the given activities and feed classes
        followers

        It takes the following ids and distributes them per fanout_chunk_size
        into smaller tasks

        :param follower_ids: specify the list of followers
        :param feed_class: the feed classes to run the operation on
        :param operation: the operation function applied to all follower feeds
        :param operation_kwargs: kwargs passed to the operation
        :param fanout_priority: the priority set to this fanout
        '''
        fanout_task = self.get_fanout_task(
            fanout_priority, feed_class=feed_class)
        if not fanout_task:
            return []
        chunk_size = self.fanout_chunk_size
        user_ids_chunks = list(chunks(follower_ids, chunk_size))
        msg_format = 'spawning %s subtasks for %s user ids in chunks of %s users'
        logger.info(
            msg_format, len(user_ids_chunks), len(follower_ids), chunk_size)
        tasks = []
        # now actually create the tasks
        for ids_chunk in user_ids_chunks:
            task = fanout_task.delay(
                feed_manager=self,
                feed_class=feed_class,
                user_ids=ids_chunk,
                operation=operation,
                operation_kwargs=operation_kwargs
            )
            tasks.append(task)
        return tasks
Пример #2
0
    def create_fanout_tasks(self, follower_ids, feed_class, operation, operation_kwargs=None, fanout_priority=None):
        '''
        Creates the fanout task for the given activities and feed classes
        followers

        It takes the following ids and distributes them per fanout_chunk_size
        into smaller tasks

        :param follower_ids: specify the list of followers
        :param feed_class: the feed classes to run the operation on
        :param operation: the operation function applied to all follower feeds
        :param operation_kwargs: kwargs passed to the operation
        :param fanout_priority: the priority set to this fanout
        '''
        fanout_task = self.get_fanout_task(
            fanout_priority, feed_class=feed_class)
        if not fanout_task:
            return []
        chunk_size = self.fanout_chunk_size
        user_ids_chunks = list(chunks(follower_ids, chunk_size))
        msg_format = 'spawning %s subtasks for %s user ids in chunks of %s users'
        logger.info(
            msg_format, len(user_ids_chunks), len(follower_ids), chunk_size)
        tasks = []
        # now actually create the tasks
        for ids_chunk in user_ids_chunks:
            task = fanout_task.delay(
                feed_manager=self,
                feed_class=feed_class,
                user_ids=ids_chunk,
                operation=operation,
                operation_kwargs=operation_kwargs
            )
            tasks.append(task)
        return tasks
Пример #3
0
    def batch_import(self, user_id, activities, fanout=True, chunk_size=500):
        '''
        Batch import all of the users activities and distributes
        them to the users followers

        **Example**::

            activities = [long list of activities]
            stream_framework.batch_import(13, activities, 500)

        :param user_id: the user who created the activities
        :param activities: a list of activities from this user
        :param fanout: if we should run the fanout or not
        :param chunk_size: per how many activities to run the batch operations

        '''
        activities = list(activities)
        # skip empty lists
        if not activities:
            return
        logger.info('running batch import for user %s', user_id)

        user_feed = self.get_user_feed(user_id)
        if activities[0].actor_id != user_id:
            raise ValueError('Send activities for only one user please')

        activity_chunks = list(chunks(activities, chunk_size))
        logger.info('processing %s items in %s chunks of %s', len(activities),
                    len(activity_chunks), chunk_size)

        for index, activity_chunk in enumerate(activity_chunks):
            # first insert into the global activity storage
            self.user_feed_class.insert_activities(activity_chunk,
                                                   self.options)
            logger.info(
                'inserted chunk %s (length %s) into the global activity store',
                index, len(activity_chunk))
            # next add the activities to the users personal timeline
            user_feed.add_many(activity_chunk, trim=False)
            logger.info('inserted chunk %s (length %s) into the user feed',
                        index, len(activity_chunk))
            # now start a big fanout task
            if fanout:
                logger.info('starting task fanout for chunk %s', index)
                follower_ids_by_prio = self.get_user_follower_ids(
                    user_id=user_id)
                # create the fanout tasks
                operation_kwargs = dict(activities=activity_chunk, trim=False)
                for feed_class in self.feed_classes.values():
                    for priority_group, fids in follower_ids_by_prio.items():
                        self.create_fanout_tasks(
                            fids,
                            feed_class,
                            add_operation,
                            fanout_priority=priority_group,
                            operation_kwargs=operation_kwargs)
Пример #4
0
        def _add_many(redis, score_value_pairs):
            score_value_list = sum(map(list, score_value_pairs), [])
            score_value_chunks = chunks(score_value_list, 200)

            for score_value_chunk in score_value_chunks:
                result = redis.zadd(key, {score:value for (value, score) in [score_value_chunks]})
                logger.debug('adding to %s with score_value_chunk %s',
                             key, score_value_chunk)
                results.append(result)
            return results
Пример #5
0
        def _add_many(redis, score_value_pairs):
            score_value_list = sum(map(list, score_value_pairs), [])
            score_value_chunks = chunks(score_value_list, 200)

            for score, value in score_value_chunks:
                result = redis.zadd(key, {value: score})
                logger.debug('adding to %s with value %s and score %s', key,
                             value, score)
                results.append(result)
            return results
Пример #6
0
        def _add_many(redis, score_value_pairs):
            score_value_list = sum(map(list, score_value_pairs), [])
            score_value_chunks = chunks(score_value_list, 200)

            for score_value_chunk in score_value_chunks:
                result = redis.zadd(key, *score_value_chunk)
                logger.debug('adding to %s with score_value_chunk %s',
                             key, score_value_chunk)
                results.append(result)
            return results
Пример #7
0
        def _add_many(redis, score_value_pairs):
            score_value_list = sum(map(list, score_value_pairs), [])
            score_value_chunks = chunks(score_value_list, 200)

            for score_value_chunk in score_value_chunks:
                result = redis.zadd(key, dict(zip(score_value_chunk[1::2], score_value_chunk[::2])))
                logger.debug('adding to %s with score_value_chunk %s',
                             key, score_value_chunk)
                results.append(result)
            return results
Пример #8
0
    def batch_import(self, user_id, activities, fanout=True, chunk_size=500):
        '''
        Batch import all of the users activities and distributes
        them to the users followers

        **Example**::

            activities = [long list of activities]
            stream_framework.batch_import(13, activities, 500)

        :param user_id: the user who created the activities
        :param activities: a list of activities from this user
        :param fanout: if we should run the fanout or not
        :param chunk_size: per how many activities to run the batch operations

        '''
        activities = list(activities)
        # skip empty lists
        if not activities:
            return
        logger.info('running batch import for user %s', user_id)

        user_feed = self.get_user_feed(user_id)
        if activities[0].actor_id != user_id:
            raise ValueError('Send activities for only one user please')

        activity_chunks = list(chunks(activities, chunk_size))
        logger.info('processing %s items in %s chunks of %s',
                    len(activities), len(activity_chunks), chunk_size)

        for index, activity_chunk in enumerate(activity_chunks):
            # first insert into the global activity storage
            self.user_feed_class.insert_activities(activity_chunk)
            logger.info(
                'inserted chunk %s (length %s) into the global activity store', index, len(activity_chunk))
            # next add the activities to the users personal timeline
            user_feed.add_many(activity_chunk, trim=False)
            logger.info(
                'inserted chunk %s (length %s) into the user feed', index, len(activity_chunk))
            # now start a big fanout task
            if fanout:
                logger.info('starting task fanout for chunk %s', index)
                follower_ids_by_prio = self.get_user_follower_ids(
                    user_id=user_id)
                # create the fanout tasks
                operation_kwargs = dict(activities=activity_chunk, trim=False)
                for feed_class in self.feed_classes.values():
                    for priority_group, fids in follower_ids_by_prio.items():
                        self.create_fanout_tasks(
                            fids,
                            feed_class,
                            add_operation,
                            fanout_priority=priority_group,
                            operation_kwargs=operation_kwargs
                        )
Пример #9
0
        def _add_many(redis, score_value_pairs):
            score_value_list = sum(map(list, score_value_pairs), [])
            score_value_chunks = chunks(score_value_list, 200)

            for score_value_chunk in score_value_chunks:
                # redis >3.2 requires a dictionary
                result = redis.zadd(key,
                                    {k: v
                                     for (v, k) in [score_value_chunk]})
                logger.debug('adding to %s with score_value_chunk %s', key,
                             score_value_chunk)
                results.append(result)
            return results
Пример #10
0
        def _add_many(redis, score_value_pairs):
            score_value_list = sum(map(list, score_value_pairs), [])
            score_value_chunks = chunks(score_value_list, 200)

            for score_value_chunk in score_value_chunks:
                mapping = {}
                for score, name in zip(*[iter(score_value_chunk)] * 2):
                    mapping[name] = score
                # import pdb; pdb.set_trace()
                result = redis.zadd(key, mapping=mapping)
                logger.debug('adding to %s with score_value_chunk %s', key,
                             score_value_chunk)
                results.append(result)
            return results
Пример #11
0
    def create_fanout_tasks(self,
                            follower_ids,
                            feed_class,
                            operation,
                            operation_kwargs=None,
                            fanout_priority=None):
        """
        Creates the fanout task for the given activities and feed classes
        followers
        It takes the following ids and distributes them per fanout_chunk_size
        into smaller tasks
        """

        fanout_task = self.get_fanout_task(fanout_priority,
                                           feed_class=feed_class)

        if not fanout_task:
            return []

        chunk_size = self.fanout_chunk_size
        user_ids_chunks = list(chunks(follower_ids, chunk_size))

        log.info('feed_spawn_tasks',
                 subtasks=len(user_ids_chunks),
                 recipients=len(follower_ids))

        tasks = []

        for ids_chunk in user_ids_chunks:
            task = fanout_task.delay(feed_manager=self,
                                     feed_class=feed_class,
                                     user_ids=ids_chunk,
                                     operation=operation,
                                     operation_kwargs=operation_kwargs)
            tasks.append(task)
        return tasks
Пример #12
0
 def test_chunks(self):
     chunked = chunks(range(6), 2)
     chunked = list(chunked)
     self.assertEqual(chunked, [(0, 1), (2, 3), (4, 5)])
Пример #13
0
 def test_one_chunk(self):
     chunked = chunks(range(2), 5)
     chunked = list(chunked)
     self.assertEqual(chunked, [(0, 1)])
Пример #14
0
 def test_one_chunk(self):
     chunked = chunks(range(2), 5)
     chunked = list(chunked)
     self.assertEqual(chunked, [(0, 1)])
Пример #15
0
 def test_chunks(self):
     chunked = chunks(range(6), 2)
     chunked = list(chunked)
     self.assertEqual(chunked, [(0, 1), (2, 3), (4, 5)])